package org.zaproxy.zap.spider.parser;

import java.util.Iterator;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import net.htmlparser.jericho.Element;
import net.htmlparser.jericho.Source;
import net.htmlparser.jericho.StartTag;
import net.htmlparser.jericho.StartTagType;
import org.parosproxy.paros.network.HttpMessage;
import org.zaproxy.zap.spider.SpiderParam;
import org.zaproxy.zap.spider.URLCanonicalizer;

/* loaded from: input_file:org/zaproxy/zap/spider/parser/SpiderHtmlParser.class */
public class SpiderHtmlParser extends SpiderParser {
    private static final Pattern urlPattern = Pattern.compile("url\\s*=\\s*([^;]+)", 2);
    private static final Pattern PLAIN_COMMENTS_URL_PATTERN = Pattern.compile("(?:http(?:s?):)?//[^\\x00-\\x1f\"'\\s<>#()\\[\\]{}]+", 2);
    private SpiderParam params;

    public SpiderHtmlParser(SpiderParam spiderParam) {
        if (spiderParam == null) {
            throw new IllegalArgumentException("Parameter params must not be null.");
        }
        this.params = spiderParam;
    }

    @Override // org.zaproxy.zap.spider.parser.SpiderParser
    public boolean parseResource(HttpMessage httpMessage, Source source, int i) {
        if (source == null) {
            source = new Source(httpMessage.getResponseBody().toString());
        }
        String uri = httpMessage.getRequestHeader().getURI().toString();
        Element firstElement = source.getFirstElement("base");
        if (firstElement != null) {
            if (log.isDebugEnabled()) {
                log.debug("Base tag was found in HTML: " + firstElement.getDebugInfo());
            }
            String attributeValue = firstElement.getAttributeValue("href");
            if (attributeValue != null && !attributeValue.isEmpty()) {
                uri = URLCanonicalizer.getCanonicalURL(attributeValue, uri);
            }
        }
        parseSource(httpMessage, source, i, uri);
        if (!this.params.isParseComments()) {
            return false;
        }
        Iterator it = source.getAllStartTags(StartTagType.COMMENT).iterator();
        while (it.hasNext()) {
            Source source2 = new Source(((StartTag) it.next()).getTagContent());
            if (!parseSource(httpMessage, source2, i, uri)) {
                Matcher matcher = PLAIN_COMMENTS_URL_PATTERN.matcher(source2.toString());
                while (matcher.find()) {
                    processURL(httpMessage, i, matcher.group(), uri);
                }
            }
        }
        return false;
    }

    private boolean parseSource(HttpMessage httpMessage, Source source, int i, String str) {
        log.debug("Parsing an HTML message...");
        boolean z = false;
        Iterator it = source.getAllElements("a").iterator();
        while (it.hasNext()) {
            z |= processAttributeElement(httpMessage, i, str, (Element) it.next(), "href");
        }
        Iterator it2 = source.getAllElements("area").iterator();
        while (it2.hasNext()) {
            z |= processAttributeElement(httpMessage, i, str, (Element) it2.next(), "href");
        }
        Iterator it3 = source.getAllElements("frame").iterator();
        while (it3.hasNext()) {
            z |= processAttributeElement(httpMessage, i, str, (Element) it3.next(), "src");
        }
        Iterator it4 = source.getAllElements("iframe").iterator();
        while (it4.hasNext()) {
            z |= processAttributeElement(httpMessage, i, str, (Element) it4.next(), "src");
        }
        Iterator it5 = source.getAllElements("link").iterator();
        while (it5.hasNext()) {
            z |= processAttributeElement(httpMessage, i, str, (Element) it5.next(), "href");
        }
        Iterator it6 = source.getAllElements("script").iterator();
        while (it6.hasNext()) {
            z |= processAttributeElement(httpMessage, i, str, (Element) it6.next(), "src");
        }
        Iterator it7 = source.getAllElements("img").iterator();
        while (it7.hasNext()) {
            z |= processAttributeElement(httpMessage, i, str, (Element) it7.next(), "src");
        }
        for (Element element : source.getAllElements("meta")) {
            String attributeValue = element.getAttributeValue("http-equiv");
            String attributeValue2 = element.getAttributeValue("content");
            if (attributeValue != null && attributeValue2 != null && (attributeValue.equalsIgnoreCase("refresh") || attributeValue.equalsIgnoreCase("location"))) {
                Matcher matcher = urlPattern.matcher(attributeValue2);
                if (matcher.find()) {
                    processURL(httpMessage, i, matcher.group(1), str);
                    z = true;
                }
            }
        }
        return z;
    }

    private boolean processAttributeElement(HttpMessage httpMessage, int i, String str, Element element, String str2) {
        String attributeValue = element.getAttributeValue(str2);
        if (attributeValue == null) {
            return false;
        }
        processURL(httpMessage, i, attributeValue, str);
        return true;
    }

    @Override // org.zaproxy.zap.spider.parser.SpiderParser
    public boolean canParseResource(HttpMessage httpMessage, String str, boolean z) {
        return !z && httpMessage.getResponseHeader().isHtml();
    }
}
