package com.crawljax.core;

import com.crawljax.browser.EmbeddedBrowser;
import com.crawljax.condition.eventablecondition.EventableCondition;
import com.crawljax.condition.eventablecondition.EventableConditionChecker;
import com.crawljax.core.configuration.CrawlElement;
import com.crawljax.core.configuration.CrawlRules;
import com.crawljax.core.configuration.CrawljaxConfiguration;
import com.crawljax.core.configuration.PreCrawlConfiguration;
import com.crawljax.core.state.Identification;
import com.crawljax.core.state.StateVertex;
import com.crawljax.forms.FormHandler;
import com.crawljax.util.DomUtils;
import com.crawljax.util.XPathHelper;
import com.google.common.base.Strings;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMultimap;
import com.google.common.collect.ImmutableSortedSet;
import com.google.inject.assistedinject.Assisted;
import java.io.IOException;
import java.net.URI;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.regex.Pattern;
import javax.inject.Inject;
import javax.xml.xpath.XPathExpressionException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;

/* loaded from: input_file:com/crawljax/core/CandidateElementExtractor.class */
public class CandidateElementExtractor {
    private static final Logger LOG = LoggerFactory.getLogger(CandidateElementExtractor.class);
    private final ExtractorManager checkedElements;
    private final EmbeddedBrowser browser;
    private final FormHandler formHandler;
    private final boolean crawlFrames;
    private final ImmutableMultimap<String, CrawlElement> excludeCrawlElements;
    private final ImmutableList<CrawlElement> includedCrawlElements;
    private final boolean clickOnce;
    private final boolean randomizeElementsOrder;
    private final ImmutableSortedSet<String> ignoredFrameIdentifiers;
    private final boolean followExternalLinks;
    private final String siteHostName;

    @Inject
    public CandidateElementExtractor(ExtractorManager extractorManager, @Assisted EmbeddedBrowser embeddedBrowser, FormHandler formHandler, CrawljaxConfiguration crawljaxConfiguration) {
        this.checkedElements = extractorManager;
        this.browser = embeddedBrowser;
        this.formHandler = formHandler;
        CrawlRules crawlRules = crawljaxConfiguration.getCrawlRules();
        PreCrawlConfiguration preCrawlConfig = crawlRules.getPreCrawlConfig();
        this.excludeCrawlElements = asMultiMap(preCrawlConfig.getExcludedElements());
        this.includedCrawlElements = ImmutableList.builder().addAll(preCrawlConfig.getIncludedElements()).addAll(crawlRules.getInputSpecification().getCrawlElements()).build();
        this.crawlFrames = crawlRules.shouldCrawlFrames();
        this.clickOnce = crawlRules.isClickOnce();
        this.randomizeElementsOrder = crawlRules.isRandomizeCandidateElements();
        this.ignoredFrameIdentifiers = crawlRules.getIgnoredFrameIdentifiers();
        this.followExternalLinks = crawlRules.followExternalLinks();
        this.siteHostName = crawljaxConfiguration.getUrl().getHost();
    }

    private ImmutableMultimap<String, CrawlElement> asMultiMap(ImmutableList<CrawlElement> immutableList) {
        ImmutableMultimap.Builder builder = ImmutableMultimap.builder();
        Iterator it = immutableList.iterator();
        while (it.hasNext()) {
            CrawlElement crawlElement = (CrawlElement) it.next();
            builder.put(crawlElement.getTagName(), crawlElement);
        }
        return builder.build();
    }

    public ImmutableList<CandidateElement> extract(StateVertex stateVertex) throws CrawljaxException {
        LinkedList<CandidateElement> linkedList = new LinkedList<>();
        if (!this.checkedElements.checkCrawlCondition(this.browser)) {
            LOG.info("State {} did not satisfy the CrawlConditions.", stateVertex.getName());
            return ImmutableList.of();
        }
        LOG.debug("Looking in state: {} for candidate elements", stateVertex.getName());
        try {
            extractElements(DomUtils.asDocument(this.browser.getStrippedDomWithoutIframeContent()), linkedList, "");
            if (this.randomizeElementsOrder) {
                Collections.shuffle(linkedList);
            }
            stateVertex.setElementsFound(linkedList);
            LOG.debug("Found {} new candidate elements to analyze!", Integer.valueOf(linkedList.size()));
            return ImmutableList.copyOf(linkedList);
        } catch (IOException e) {
            LOG.error(e.getMessage(), e);
            throw new CrawljaxException(e);
        }
    }

    private void extractElements(Document document, List<CandidateElement> list, String str) {
        LOG.debug("Extracting elements for related frame '{}'", str);
        Iterator it = this.includedCrawlElements.iterator();
        while (it.hasNext()) {
            CrawlElement crawlElement = (CrawlElement) it.next();
            LOG.debug("Extracting TAG: {}", crawlElement);
            addFramesCandidates(document, list, str, document.getElementsByTagName("FRAME"));
            addFramesCandidates(document, list, str, document.getElementsByTagName("IFRAME"));
            evaluateElements(document, crawlElement, list, str);
        }
    }

    private void addFramesCandidates(Document document, List<CandidateElement> list, String str, NodeList nodeList) {
        String str2;
        if (nodeList == null) {
            return;
        }
        for (int i = 0; i < nodeList.getLength(); i++) {
            String frameIdentification = DomUtils.getFrameIdentification((Element) nodeList.item(i));
            str2 = "";
            str2 = Strings.isNullOrEmpty(str) ? "" : str2 + str + ".";
            if (frameIdentification != null && !isFrameIgnored(str2 + frameIdentification)) {
                String str3 = str2 + frameIdentification;
                LOG.debug("frame Identification: {}", str3);
                try {
                    extractElements(DomUtils.asDocument(this.browser.getFrameDom(str3)), list, str3);
                } catch (IOException e) {
                    LOG.info("Got exception while inspecting a frame: {} continuing...", str3, e);
                }
            }
        }
    }

    private boolean isFrameIgnored(String str) {
        if (!this.crawlFrames) {
            return true;
        }
        Iterator it = this.ignoredFrameIdentifiers.iterator();
        while (it.hasNext()) {
            String str2 = (String) it.next();
            if (str2.contains("%")) {
                if (str.matches(str2.replace("%", ".*"))) {
                    return true;
                }
            } else if (str2.equals(str)) {
                return true;
            }
        }
        return false;
    }

    private void evaluateElements(Document document, CrawlElement crawlElement, List<CandidateElement> list, String str) {
        try {
            Iterator it = getNodeListForTagElement(document, crawlElement, this.checkedElements.getEventableConditionChecker()).iterator();
            while (it.hasNext()) {
                evaluateElement(list, str, crawlElement, (Element) it.next());
            }
        } catch (CrawljaxException e) {
            LOG.warn("Catched exception during NodeList For Tag Element retrieval", e);
        }
    }

    private ImmutableList<Element> getNodeListForTagElement(Document document, CrawlElement crawlElement, EventableConditionChecker eventableConditionChecker) {
        ImmutableList.Builder<Element> builder = ImmutableList.builder();
        if (crawlElement.getTagName() == null) {
            return builder.build();
        }
        EventableCondition eventableCondition = eventableConditionChecker.getEventableCondition(crawlElement.getId());
        ImmutableList<String> fullXpathForGivenXpath = getFullXpathForGivenXpath(document, eventableCondition);
        NodeList elementsByTagName = document.getElementsByTagName(crawlElement.getTagName());
        for (int i = 0; i < elementsByTagName.getLength(); i++) {
            Element element = (Element) elementsByTagName.item(i);
            boolean elementMatchesXpath = elementMatchesXpath(eventableConditionChecker, eventableCondition, fullXpathForGivenXpath, element);
            LOG.debug("Element {} matches Xpath={}", DomUtils.getElementString(element), Boolean.valueOf(elementMatchesXpath));
            String str = element.getNodeName() + ": " + DomUtils.getAllElementAttributes(element);
            if (!elementMatchesXpath || this.checkedElements.isChecked(str) || isExcluded(document, element, eventableConditionChecker)) {
                LOG.debug("Element {} was not added", element);
            } else {
                addElement(element, builder, crawlElement);
            }
        }
        return builder.build();
    }

    private boolean elementMatchesXpath(EventableConditionChecker eventableConditionChecker, EventableCondition eventableCondition, ImmutableList<String> immutableList, Element element) {
        boolean z = true;
        if (eventableCondition != null && eventableCondition.getInXPath() != null) {
            try {
                z = eventableConditionChecker.checkXPathUnderXPaths(XPathHelper.getXPathExpression(element), immutableList);
            } catch (RuntimeException e) {
                z = false;
            }
        }
        return z;
    }

    private ImmutableList<String> getFullXpathForGivenXpath(Document document, EventableCondition eventableCondition) {
        if (eventableCondition != null && eventableCondition.getInXPath() != null) {
            try {
                ImmutableList<String> xpathForXPathExpressions = XPathHelper.getXpathForXPathExpressions(document, eventableCondition.getInXPath());
                LOG.debug("Xpath {} resolved to xpaths in document: {}", eventableCondition.getInXPath(), xpathForXPathExpressions);
                return xpathForXPathExpressions;
            } catch (XPathExpressionException e) {
                LOG.debug("Could not load XPath expressions for {}", eventableCondition, e);
            }
        }
        return ImmutableList.of();
    }

    private void addElement(Element element, ImmutableList.Builder<Element> builder, CrawlElement crawlElement) {
        if ("A".equalsIgnoreCase(crawlElement.getTagName()) && hrefShouldBeIgnored(element)) {
            return;
        }
        builder.add(element);
        LOG.debug("Adding element {}", element);
        this.checkedElements.increaseElementsCounter();
    }

    private boolean hrefShouldBeIgnored(Element element) {
        String nullToEmpty = Strings.nullToEmpty(element.getAttribute("href"));
        return isFileForDownloading(nullToEmpty) || nullToEmpty.startsWith("mailto:") || (!this.followExternalLinks && isExternal(nullToEmpty));
    }

    private boolean isExternal(String str) {
        if (!str.startsWith("http")) {
            return false;
        }
        try {
            return !URI.create(str).getHost().equalsIgnoreCase(this.siteHostName);
        } catch (IllegalArgumentException e) {
            LOG.info("Unreadable externa link {}", str);
            return false;
        }
    }

    private boolean isFileForDownloading(String str) {
        return Pattern.compile(".+.pdf|.+.ps|.+.zip|.+.mp3").matcher(str).matches();
    }

    /* JADX WARN: Multi-variable type inference failed */
    private void evaluateElement(List<CandidateElement> list, String str, CrawlElement crawlElement, Element element) {
        EventableCondition eventableCondition = this.checkedElements.getEventableConditionChecker().getEventableCondition(crawlElement.getId());
        String xPathExpression = XPathHelper.getXPathExpression(element);
        List<CandidateElement> arrayList = new ArrayList();
        if (eventableCondition == null || eventableCondition.getLinkedInputFields() == null || eventableCondition.getLinkedInputFields().size() <= 0) {
            arrayList.add(new CandidateElement(element, new Identification(Identification.How.xpath, xPathExpression), str));
        } else {
            arrayList = this.formHandler.getCandidateElementsForInputs(element, eventableCondition);
        }
        for (CandidateElement candidateElement : arrayList) {
            if (!this.clickOnce || this.checkedElements.markChecked(candidateElement)) {
                LOG.debug("Found new candidate element: {} with eventableCondition {}", candidateElement.getUniqueString(), eventableCondition);
                candidateElement.setEventableCondition(eventableCondition);
                list.add(candidateElement);
            }
        }
    }

    private boolean isExcluded(Document document, Element element, EventableConditionChecker eventableConditionChecker) {
        boolean z;
        Node parentNode = element.getParentNode();
        if ((parentNode instanceof Element) && isExcluded(document, (Element) parentNode, eventableConditionChecker)) {
            return true;
        }
        Iterator it = this.excludeCrawlElements.get(element.getTagName().toUpperCase()).iterator();
        while (it.hasNext()) {
            try {
                z = eventableConditionChecker.checkXpathStartsWithXpathEventableCondition(document, eventableConditionChecker.getEventableCondition(((CrawlElement) it.next()).getId()), XPathHelper.getXPathExpression(element));
            } catch (CrawljaxException | XPathExpressionException e) {
                LOG.debug("Could not check exclusion by Xpath for element because {}", e.getMessage());
                z = false;
            }
            if (z) {
                LOG.info("Excluded element because of xpath: " + element);
                return true;
            }
        }
        return false;
    }

    public boolean checkCrawlCondition() {
        return this.checkedElements.checkCrawlCondition(this.browser);
    }
}
