package org.zaproxy.zap.spider.parser;

import java.util.Arrays;
import java.util.Iterator;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import net.htmlparser.jericho.Element;
import net.htmlparser.jericho.Source;
import net.htmlparser.jericho.StartTag;
import net.htmlparser.jericho.StartTagType;
import org.parosproxy.paros.Constant;
import org.parosproxy.paros.network.HttpMessage;
import org.zaproxy.zap.extension.alert.AlertEventPublisher;
import org.zaproxy.zap.spider.SpiderParam;

@Deprecated
/* loaded from: input_file:org/zaproxy/zap/spider/parser/SpiderHtmlParser.class */
public class SpiderHtmlParser extends SpiderParser {
    static final Pattern URL_PATTERN = Pattern.compile("(?:url\\s*=|report-uri)\\s*[\"']?([^;'\"]+)", 2);
    private static final Pattern PLAIN_COMMENTS_URL_PATTERN = Pattern.compile("(?:http(?:s?):)?//[^\\x00-\\x1f\"'\\s<>#()\\[\\]{}]+", 2);
    private static final Pattern INLINE_CONTENT_URL_PATTERN = Pattern.compile("(?:http(?:s?)://|(?:\\s|\\B)//?)[^\\x00-\\x1f\"'\\s<>#()\\[\\]{}]+", 2);
    private static final Pattern SRCSET_PATTERN = Pattern.compile("[^\"'=\\s,]+\\.[^\\s,]+", 2);
    private static final List<String> elementsWithText = Arrays.asList("p", "title", "h1", "h2", "h3", "h4", "h5", "h6", "li", "blockquote");
    private static final String IMPORT_TAG = "IMPORT";
    private boolean baseTagSet;

    /* JADX INFO: Access modifiers changed from: private */
    @FunctionalInterface
    /* loaded from: input_file:org/zaproxy/zap/spider/parser/SpiderHtmlParser$CustomUrlProcessor.class */
    public interface CustomUrlProcessor {
        void process(HttpMessage httpMessage, int i, String str, String str2);
    }

    public SpiderHtmlParser(SpiderParam spiderParam) {
        super(spiderParam);
    }

    @Override // org.zaproxy.zap.spider.parser.SpiderParser
    public boolean parseResource(HttpMessage httpMessage, Source source, int i) {
        if (source == null) {
            source = new Source(httpMessage.getResponseBody().toString());
        }
        String uri = httpMessage.getRequestHeader().getURI().toString();
        Element firstElement = source.getFirstElement("base");
        if (firstElement != null) {
            getLogger().debug("Base tag was found in HTML: {}", firstElement.getDebugInfo());
            String attributeValue = firstElement.getAttributeValue("href");
            if (attributeValue != null && !attributeValue.isEmpty()) {
                uri = getCanonicalURL(attributeValue, uri);
                this.baseTagSet = true;
            }
        }
        parseSource(httpMessage, source, i, uri);
        if (getSpiderParam().isParseComments()) {
            Iterator it = source.getAllStartTags(StartTagType.COMMENT).iterator();
            while (it.hasNext()) {
                Source source2 = new Source(((StartTag) it.next()).getTagContent());
                if (!parseSource(httpMessage, source2, i, uri)) {
                    Matcher matcher = PLAIN_COMMENTS_URL_PATTERN.matcher(source2.toString());
                    while (matcher.find()) {
                        processURL(httpMessage, i, matcher.group(), uri);
                    }
                }
            }
        }
        Iterator it2 = source.getAllStartTags(StartTagType.DOCTYPE_DECLARATION).iterator();
        while (it2.hasNext()) {
            for (String str : ((StartTag) it2.next()).getTagContent().toString().split(" ")) {
                if (str.startsWith("\"") && str.endsWith("\"")) {
                    processURL(httpMessage, i, str.substring(1, str.length() - 1), uri);
                }
            }
        }
        return false;
    }

    private void srcSetProcessor(HttpMessage httpMessage, int i, String str, String str2) {
        Matcher matcher = SRCSET_PATTERN.matcher(str);
        while (matcher.find()) {
            if (!matcher.group().isEmpty()) {
                processURL(httpMessage, i, matcher.group(), str2);
            }
        }
    }

    private boolean parseSource(HttpMessage httpMessage, Source source, int i, String str) {
        getLogger().debug("Parsing an HTML message...");
        boolean z = false;
        for (Element element : source.getAllElements("a")) {
            z = z | processAttributeElement(httpMessage, i, str, element, "href") | processAttributeElement(httpMessage, i, str, element, "ping");
        }
        for (Element element2 : source.getAllElements("applet")) {
            z = z | processAttributeElement(httpMessage, i, str, element2, "archive") | processAttributeElement(httpMessage, i, str, element2, "codebase") | processAttributeElement(httpMessage, i, str, element2, "src");
        }
        for (Element element3 : source.getAllElements("area")) {
            z = z | processAttributeElement(httpMessage, i, str, element3, "href") | processAttributeElement(httpMessage, i, str, element3, "ping");
        }
        Iterator it = source.getAllElements("audio").iterator();
        while (it.hasNext()) {
            z |= processAttributeElement(httpMessage, i, str, (Element) it.next(), "src");
        }
        Iterator it2 = source.getAllElements("embed").iterator();
        while (it2.hasNext()) {
            z |= processAttributeElement(httpMessage, i, str, (Element) it2.next(), "src");
        }
        Iterator it3 = source.getAllElements("frame").iterator();
        while (it3.hasNext()) {
            z |= processAttributeElement(httpMessage, i, str, (Element) it3.next(), "src");
        }
        Iterator it4 = source.getAllElements("iframe").iterator();
        while (it4.hasNext()) {
            z |= processAttributeElement(httpMessage, i, str, (Element) it4.next(), "src");
        }
        Iterator it5 = source.getAllElements("input").iterator();
        while (it5.hasNext()) {
            z |= processAttributeElement(httpMessage, i, str, (Element) it5.next(), "src");
        }
        Iterator it6 = source.getAllElements("isindex").iterator();
        while (it6.hasNext()) {
            z |= processAttributeElement(httpMessage, i, str, (Element) it6.next(), "action");
        }
        Iterator it7 = source.getAllElements("link").iterator();
        while (it7.hasNext()) {
            z |= processAttributeElement(httpMessage, i, str, (Element) it7.next(), "href");
        }
        for (Element element4 : source.getAllElements("object")) {
            z = z | processAttributeElement(httpMessage, i, str, element4, "data") | processAttributeElement(httpMessage, i, str, element4, "codebase");
        }
        Iterator it8 = source.getAllElements("script").iterator();
        while (it8.hasNext()) {
            z |= processAttributeElement(httpMessage, i, str, (Element) it8.next(), "src");
        }
        Iterator it9 = source.getAllElements("table").iterator();
        while (it9.hasNext()) {
            z |= processAttributeElement(httpMessage, i, str, (Element) it9.next(), "background");
        }
        Iterator it10 = source.getAllElements("td").iterator();
        while (it10.hasNext()) {
            z |= processAttributeElement(httpMessage, i, str, (Element) it10.next(), "background");
        }
        for (Element element5 : source.getAllElements("video")) {
            boolean processAttributeElement = z | processAttributeElement(httpMessage, i, str, element5, "src");
            Iterator it11 = element5.getAllElements(AlertEventPublisher.SOURCE).iterator();
            while (it11.hasNext()) {
                processAttributeElement |= processAttributeElement(httpMessage, i, str, (Element) it11.next(), "src");
            }
            z = processAttributeElement | processAttributeElement(httpMessage, i, str, element5, "poster");
        }
        for (Element element6 : source.getAllElements("img")) {
            z = z | processAttributeElement(httpMessage, i, str, element6, "src") | processAttributeElement(httpMessage, i, str, element6, "longdesc") | processAttributeElement(httpMessage, i, str, element6, "lowsrc") | processAttributeElement(httpMessage, i, str, element6, "dynsrc") | processAttributeElement(httpMessage, i, str, element6, "srcset", this::srcSetProcessor);
        }
        Iterator it12 = source.getAllElements(IMPORT_TAG).iterator();
        while (it12.hasNext()) {
            z |= processAttributeElement(httpMessage, i, str, (Element) it12.next(), "implementation");
        }
        String str2 = str;
        Iterator<String> it13 = elementsWithText.iterator();
        while (it13.hasNext()) {
            Iterator it14 = source.getAllElements(it13.next()).iterator();
            while (it14.hasNext()) {
                Matcher matcher = INLINE_CONTENT_URL_PATTERN.matcher(((Element) it14.next()).getContent().getRenderer().setMaxLineLength(0).toString());
                while (matcher.find()) {
                    String trim = matcher.group().trim();
                    if (this.baseTagSet) {
                        if (!str2.endsWith("/")) {
                            str2 = str2 + "/";
                        }
                        if (trim.charAt(0) == '/' && trim.indexOf("//") != 0) {
                            trim = trim.substring(1);
                        }
                    }
                    processURL(httpMessage, i, trim, str2);
                    z = true;
                }
            }
        }
        for (Element element7 : source.getAllElements("meta")) {
            String attributeValue = element7.getAttributeValue("http-equiv");
            String attributeValue2 = element7.getAttributeValue(AlertEventPublisher.NAME);
            String attributeValue3 = element7.getAttributeValue("content");
            if (attributeValue == null || attributeValue3 == null) {
                if ("msapplication-config".equalsIgnoreCase(attributeValue2) && attributeValue3 != null && !attributeValue3.equals(Constant.USER_AGENT) && !attributeValue3.equalsIgnoreCase("none")) {
                    processURL(httpMessage, i, attributeValue3, str);
                    z = true;
                }
            } else if (attributeValue.equalsIgnoreCase("refresh") || attributeValue.equalsIgnoreCase("location") || attributeValue.equalsIgnoreCase("content-security-policy")) {
                Matcher matcher2 = URL_PATTERN.matcher(attributeValue3);
                if (matcher2.find()) {
                    processURL(httpMessage, i, matcher2.group(1), str);
                    z = true;
                }
            }
        }
        Iterator it15 = source.getAllElements("html").iterator();
        while (it15.hasNext()) {
            z |= processAttributeElement(httpMessage, i, str, (Element) it15.next(), "manifest");
        }
        Iterator it16 = source.getAllElements("body").iterator();
        while (it16.hasNext()) {
            z |= processAttributeElement(httpMessage, i, str, (Element) it16.next(), "background");
        }
        return z;
    }

    private boolean processAttributeElement(HttpMessage httpMessage, int i, String str, Element element, String str2) {
        return processAttributeElement(httpMessage, i, str, element, str2, null);
    }

    private boolean processAttributeElement(HttpMessage httpMessage, int i, String str, Element element, String str2, CustomUrlProcessor customUrlProcessor) {
        String attributeValue = element.getAttributeValue(str2);
        if (attributeValue == null) {
            return false;
        }
        if (customUrlProcessor != null) {
            customUrlProcessor.process(httpMessage, i, attributeValue, str);
            return true;
        }
        if (!str2.equalsIgnoreCase("ping")) {
            processURL(httpMessage, i, attributeValue, str);
            return true;
        }
        for (String str3 : attributeValue.split("\\s")) {
            if (!str3.isEmpty()) {
                processURL(httpMessage, i, str3, str);
            }
        }
        return true;
    }

    @Override // org.zaproxy.zap.spider.parser.SpiderParser
    public boolean canParseResource(HttpMessage httpMessage, String str, boolean z) {
        return !z && httpMessage.getResponseHeader().isHtml();
    }
}
