package com.github.vector4wang;

import com.github.vector4wang.downloader.AbstractDownloader;
import com.github.vector4wang.downloader.JsoupDownloader;
import com.github.vector4wang.proxy.AbstractProxyExtractor;
import com.github.vector4wang.proxy.Proxy2;
import com.github.vector4wang.proxy.RandomProxy;
import com.github.vector4wang.service.CrawlerService;
import com.github.vector4wang.thread.CrawlerThread;
import com.github.vector4wang.util.CrawlerUtil;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.CopyOnWriteArrayList;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.LinkedBlockingQueue;
import org.apache.commons.lang3.StringUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:com/github/vector4wang/VWCrawler.class */
public class VWCrawler {
    private static Logger logger = LoggerFactory.getLogger(CrawlerThread.class);
    private String url;
    private volatile Map<String, String> cookieMap;
    private volatile Map<String, String> headerMap;
    private CrawlerService crawlerService;
    private int timeout = 2000;
    private int retryCount = 2;
    private volatile LinkedBlockingQueue<String> waitCrawlerUrls = new LinkedBlockingQueue<>();
    private AbstractDownloader downloader = new JsoupDownloader();
    private volatile Set<String> crawledUrls = new HashSet();
    private volatile Set<String> targetUrlRex = new HashSet();
    private volatile Set<String> seedsPageUrlRex = new HashSet();
    private volatile AbstractProxyExtractor proxyExtractor = new RandomProxy();
    private int threadCount = 1;
    private ExecutorService crawler = Executors.newCachedThreadPool();
    private List<CrawlerThread> crawlerThreads = new CopyOnWriteArrayList();

    /* loaded from: input_file:com/github/vector4wang/VWCrawler$Builder.class */
    public static class Builder {
        private VWCrawler crawler = new VWCrawler();

        public Builder setUrl(String str) {
            this.crawler.waitCrawlerUrls.add(str);
            return this;
        }

        public Builder setTimeOut(int i) {
            this.crawler.timeout = i;
            return this;
        }

        public Builder setSeedUrl(String... strArr) {
            if (strArr != null && strArr.length > 0) {
                for (String str : strArr) {
                    if (StringUtils.isNotEmpty(str)) {
                        this.crawler.addWaitCrawlerUrl(str);
                    }
                }
            }
            return this;
        }

        public Builder setThreadCount(int i) {
            if (i <= 0) {
                throw new RuntimeException("线程数不能小于0");
            }
            this.crawler.threadCount = i;
            return this;
        }

        public Builder setSeedsPage(String... strArr) {
            if (strArr != null && strArr.length > 0) {
                for (String str : strArr) {
                    this.crawler.seedsPageUrlRex.add(str);
                }
            }
            return this;
        }

        public Builder setTargetUrlRex(String... strArr) {
            if (strArr != null && strArr.length > 0) {
                for (String str : strArr) {
                    this.crawler.targetUrlRex.add(str);
                }
            }
            return this;
        }

        public Builder setSeedsPageUrlRex(String... strArr) {
            if (strArr != null && strArr.length > 0) {
                for (String str : strArr) {
                    this.crawler.seedsPageUrlRex.add(str);
                }
            }
            return this;
        }

        public Builder setPageParser(CrawlerService crawlerService) {
            this.crawler.crawlerService = crawlerService;
            return this;
        }

        public Builder setDownloader(AbstractDownloader abstractDownloader) {
            this.crawler.downloader = abstractDownloader;
            return this;
        }

        public Builder setHeaders(HashMap<String, String> hashMap) {
            getHeader().putAll(hashMap);
            return this;
        }

        public Builder setHeader(String str, String str2) {
            getHeader().put(str, str2);
            return this;
        }

        public Builder setProxys(List<Proxy2> list) {
            if (list != null && list.size() > 0) {
                this.crawler.proxyExtractor.setProxy2s(list);
            }
            return this;
        }

        public Builder setAbsProxyExtracter(AbstractProxyExtractor abstractProxyExtractor) {
            if (abstractProxyExtractor != null) {
                if (!this.crawler.proxyExtractor.getProxy2s().isEmpty()) {
                    abstractProxyExtractor.setProxy2s(this.crawler.proxyExtractor.getProxy2s());
                }
                this.crawler.proxyExtractor = abstractProxyExtractor;
            }
            return this;
        }

        private Map<String, String> getHeader() {
            if (this.crawler.headerMap == null) {
                this.crawler.headerMap = new HashMap();
            }
            return this.crawler.headerMap;
        }

        public Builder setRetryCount(int i) {
            this.crawler.retryCount = i;
            return this;
        }

        public VWCrawler build() {
            return this.crawler;
        }
    }

    public String getUrl() {
        return this.url;
    }

    public int getTimeout() {
        return this.timeout;
    }

    public int getRetryCount() {
        return this.retryCount;
    }

    public int getThreadCount() {
        return this.threadCount;
    }

    public LinkedBlockingQueue<String> getWaitCrawlerUrls() {
        return this.waitCrawlerUrls;
    }

    public Set<String> getCrawledUrls() {
        return this.crawledUrls;
    }

    public Set<String> getTargetUrlRex() {
        return this.targetUrlRex;
    }

    public Set<String> getSeedsPageUrlRex() {
        return this.seedsPageUrlRex;
    }

    public CrawlerService getCrawlerService() {
        return this.crawlerService;
    }

    public AbstractProxyExtractor getProxyExtractor() {
        return this.proxyExtractor;
    }

    public void setProxyExtractor(AbstractProxyExtractor abstractProxyExtractor) {
        this.proxyExtractor = abstractProxyExtractor;
    }

    public Map<String, String> getHeaderMap() {
        return this.headerMap;
    }

    public AbstractDownloader getDownloader() {
        return this.downloader;
    }

    public void start() {
        if (this.waitCrawlerUrls.isEmpty()) {
            throw new RuntimeException("待抓取URL为空，请确认是否有设置waitCrawlerUrls(爬虫起始URL)");
        }
        for (int i = 0; i < this.threadCount; i++) {
            this.crawlerThreads.add(new CrawlerThread(this));
        }
        Iterator<CrawlerThread> it = this.crawlerThreads.iterator();
        while (it.hasNext()) {
            this.crawler.execute(it.next());
        }
        this.crawler.shutdown();
    }

    public void tryStop() {
        boolean z = false;
        Iterator<CrawlerThread> it = this.crawlerThreads.iterator();
        while (true) {
            if (!it.hasNext()) {
                break;
            } else if (it.next().isRunning()) {
                z = true;
                break;
            }
        }
        if (this.waitCrawlerUrls.isEmpty() && !z) {
            logger.info("vw-crawler is finished and will stop!");
            stop();
        }
    }

    public boolean isTargetUrl(String str) {
        if (this.targetUrlRex.isEmpty()) {
            return true;
        }
        Iterator<String> it = this.targetUrlRex.iterator();
        while (it.hasNext()) {
            if (CrawlerUtil.isMatch(it.next(), str)) {
                return true;
            }
        }
        return false;
    }

    public String generateUrl() throws InterruptedException {
        String take = getWaitCrawlerUrls().take();
        getCrawledUrls().add(take);
        return take;
    }

    public void addWaitCrawlerUrl(String str) {
        if (getCrawledUrls().contains(str) || getWaitCrawlerUrls().contains(str)) {
            return;
        }
        try {
            getWaitCrawlerUrls().put(str);
        } catch (InterruptedException e) {
            e.printStackTrace();
        }
    }

    public void stop() {
        this.crawler.shutdownNow();
        logger.info("vw-crawler pool closed!");
    }
}
