错误原因 - Jsoup.isValid

Abi*_*Abi 6 java jsoup

我有以下代码可以工作,但我只是想知道在Jsoup中是否有可能找出错误的确切原因.

以下返回true(如预期的那样)

private void validateProtocol() {
        String html = "<p><a href='https://example.com/'>Link</a></p>";

        Whitelist whiteList = Whitelist.basic();
        whiteList.addProtocols("a","href","tel");
        whiteList.removeProtocols("a","href","ftp");
        boolean safe = Jsoup.isValid(html, whiteList);
        System.out.println(safe);
    }
Run Code Online (Sandbox Code Playgroud)

当我将上面的字符串更改为它返回false(按预期)

String html = "<p><a href='ftp://example.com/'>Link</a></p>";
Run Code Online (Sandbox Code Playgroud)

现在,当我有以下代码时,有两个错误,一个是无效协议,一个是onfocus()链接.

private void validateProtocol() {
            String html = "<p><a href='ftp://example.com/' onfocus='invalidLink()'>Link</a></p>";

            Whitelist whiteList = Whitelist.basic();
            whiteList.addProtocols("a","href","tel", "device");
            whiteList.removeProtocols("a","href","ftp");
            boolean safe = Jsoup.isValid(html, whiteList);
            System.out.println(safe);
        }
Run Code Online (Sandbox Code Playgroud)

结果是假的,但有没有办法弄清楚URL的哪一部分是假的?例如 - 错误的协议或错误的方法..?

Ste*_*han 1

您想要创建具有报告功能的自定义白名单。

MyReportEnabledWhitelist.java

public class MyReportEnabledWhitelist extends Whitelist {

    private Set<String> alreadyCheckedAttributeSignatures = new HashSet<>();

    @Override
    protected boolean isSafeTag(String tag) {
        boolean isSafe = super.isSafeTag(tag);

        if (!isSafe) {
            say("Disallowed tag: " + tag);
        }

        return isSafe;
    }

    @Override
    protected boolean isSafeAttribute(String tagName, Element el, Attribute attr) {
        boolean isSafe = super.isSafeAttribute(tagName, el, attr);

        String signature = el.hashCode() + "-" + attr.hashCode();
        if (alreadyCheckedAttributeSignatures.contains(signature) == false) {
            alreadyCheckedAttributeSignatures.add(signature);

            if (!isSafe) {
                say("Wrong attribute: " + attr.getKey() + " (" + attr.html() + ") in " + el.outerHtml());
            }
        }

        return isSafe;
    }
}
Run Code Online (Sandbox Code Playgroud)

示例代码

String html = "<p><a href='ftp://example.com/' onfocus='invalidLink()'>Link</a></p><a href='ftp://example2.com/'>Link 2</a>";

// * Custom whitelist
Whitelist myReportEnabledWhitelist = new MyReportEnabledWhitelist()
    // ** Basic whitelist (from Jsoup)
    .addTags("a", "b", "blockquote", "br", "cite", "code", "dd", "dl", "dt", "em", "i", "li", "ol", "p", "pre", "q", "small", "span",
                "strike", "strong", "sub", "sup", "u", "ul") //

    .addAttributes("a", "href") //
    .addAttributes("blockquote", "cite") //
    .addAttributes("q", "cite") //

    .addProtocols("a", "href", "ftp", "http", "https", "mailto") //
    .addProtocols("blockquote", "cite", "http", "https") //
    .addProtocols("cite", "cite", "http", "https") //

    .addEnforcedAttribute("a", "rel", "nofollow") //

    // ** Customizations
    .addTags("body") //
    .addProtocols("a", "href", "tel", "device") //
    .removeProtocols("a", "href", "ftp");

boolean safeCustom = Jsoup.isValid(html, myReportEnabledWhitelist);
System.out.println(safeCustom);
Run Code Online (Sandbox Code Playgroud)

输出

public class MyReportEnabledWhitelist extends Whitelist {

    private Set<String> alreadyCheckedAttributeSignatures = new HashSet<>();

    @Override
    protected boolean isSafeTag(String tag) {
        boolean isSafe = super.isSafeTag(tag);

        if (!isSafe) {
            say("Disallowed tag: " + tag);
        }

        return isSafe;
    }

    @Override
    protected boolean isSafeAttribute(String tagName, Element el, Attribute attr) {
        boolean isSafe = super.isSafeAttribute(tagName, el, attr);

        String signature = el.hashCode() + "-" + attr.hashCode();
        if (alreadyCheckedAttributeSignatures.contains(signature) == false) {
            alreadyCheckedAttributeSignatures.add(signature);

            if (!isSafe) {
                say("Wrong attribute: " + attr.getKey() + " (" + attr.html() + ") in " + el.outerHtml());
            }
        }

        return isSafe;
    }
}
Run Code Online (Sandbox Code Playgroud)