package org.sonatype.nexus.proxy.maven.routing.internal.scrape;

import com.google.common.base.Throwables;
import java.io.IOException;
import java.util.Iterator;
import java.util.List;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import org.sonatype.nexus.proxy.maven.routing.internal.scrape.AbstractScraper;
import org.sonatype.nexus.proxy.maven.routing.internal.scrape.Page;
import org.sonatype.nexus.proxy.maven.routing.internal.task.CancelableUtil;
import org.sonatype.nexus.proxy.walker.ParentOMatic;
import org.sonatype.nexus.scheduling.NexusTask;
import org.sonatype.nexus.util.Node;
import org.sonatype.nexus.util.SystemPropertiesHelper;

/* loaded from: input_file:org/sonatype/nexus/proxy/maven/routing/internal/scrape/AbstractGeneratedIndexPageScraper.class */
public abstract class AbstractGeneratedIndexPageScraper extends AbstractScraper {
    private long pageSleepTimeMillis;

    /* JADX INFO: Access modifiers changed from: protected */
    public AbstractGeneratedIndexPageScraper(int i, String str) {
        super(i, str);
        this.pageSleepTimeMillis = SystemPropertiesHelper.getLong(Scraper.class.getName() + ".pageSleepTimeMillis", 200L);
    }

    protected abstract String getTargetedServer();

    /* JADX INFO: Access modifiers changed from: protected */
    @Override // org.sonatype.nexus.proxy.maven.routing.internal.scrape.AbstractScraper
    public AbstractScraper.RemoteDetectionResult detectRemoteRepository(ScrapeContext scrapeContext, Page page) {
        if (page.getHttpResponse().getStatusLine().getStatusCode() == 200) {
            Elements elementsByTag = page.getDocument().getElementsByTag("a");
            if (!elementsByTag.isEmpty()) {
                Element parentDirectoryElement = getParentDirectoryElement(page);
                Iterator it = elementsByTag.iterator();
                while (it.hasNext()) {
                    Element element = (Element) it.next();
                    if (parentDirectoryElement.text().equals(element.text()) && parentDirectoryElement.absUrl("href").equals(element.absUrl("href"))) {
                        return new AbstractScraper.RemoteDetectionResult(AbstractScraper.RemoteDetectionOutcome.RECOGNIZED_SHOULD_BE_SCRAPED, getTargetedServer(), "Remote is a generated index page of " + getTargetedServer());
                    }
                }
            }
        }
        return new AbstractScraper.RemoteDetectionResult(AbstractScraper.RemoteDetectionOutcome.UNRECOGNIZED, getTargetedServer(), "Remote is not a generated index page of " + getTargetedServer());
    }

    @Override // org.sonatype.nexus.proxy.maven.routing.internal.scrape.AbstractScraper
    protected List<String> diveIn(ScrapeContext scrapeContext, Page page) throws IOException {
        ParentOMatic parentOMatic = new ParentOMatic();
        diveIn(scrapeContext, page, 0, parentOMatic, parentOMatic.getRoot());
        if (!parentOMatic.getRoot().isLeaf()) {
            return parentOMatic.getAllLeafPaths();
        }
        scrapeContext.stop("Remote recognized as " + getTargetedServer() + ", but scraped 0 entries. This is considered a failure.");
        return null;
    }

    protected void diveIn(ScrapeContext scrapeContext, Page page, int i, ParentOMatic parentOMatic, Node<ParentOMatic.Payload> node) throws IOException {
        int i2;
        if (i >= scrapeContext.getScrapeDepth()) {
            return;
        }
        CancelableUtil.checkInterruption();
        getLogger().debug("Processing page response from URL {}", page.getUrl());
        Elements elementsByTag = page.getDocument().getElementsByTag("a");
        List<String> pathElements = node.getPathElements();
        String path = node.getPath();
        Iterator it = elementsByTag.iterator();
        while (it.hasNext()) {
            Element element = (Element) it.next();
            if (isDeeperRepoLink(scrapeContext, pathElements, element) && !element.text().startsWith(NexusTask.PRIVATE_PROP_PREFIX)) {
                Node<ParentOMatic.Payload> addPath = parentOMatic.addPath(path + "/" + element.text());
                if (element.absUrl("href").endsWith("/") && (i2 = i + 1) < scrapeContext.getScrapeDepth()) {
                    maySleepBeforeSubsequentFetch();
                    Page pageFor = Page.getPageFor(scrapeContext, getRemoteUrlForRepositoryPath(scrapeContext, addPath.getPathElements()) + "/");
                    if (pageFor.getHttpResponse().getStatusLine().getStatusCode() != 200) {
                        throw new Page.UnexpectedPageResponse(page.getUrl(), page.getHttpResponse().getStatusLine());
                    }
                    diveIn(scrapeContext, pageFor, i2, parentOMatic, addPath);
                }
            }
        }
    }

    protected void maySleepBeforeSubsequentFetch() {
        if (this.pageSleepTimeMillis > 0) {
            try {
                Thread.sleep(this.pageSleepTimeMillis);
            } catch (InterruptedException e) {
                Throwables.propagate(e);
            }
        }
    }

    protected boolean isDeeperRepoLink(ScrapeContext scrapeContext, List<String> list, Element element) {
        if (element.attr("href").startsWith("?")) {
            return false;
        }
        return element.absUrl("href").startsWith(getRemoteUrlForRepositoryPath(scrapeContext, list));
    }

    protected abstract Element getParentDirectoryElement(Page page);
}
