package us.codecraft.webmagic;

import com.google.common.collect.Lists;
import java.io.Closeable;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Date;
import java.util.Iterator;
import java.util.List;
import java.util.UUID;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.concurrent.atomic.AtomicLong;
import java.util.concurrent.locks.Condition;
import java.util.concurrent.locks.ReentrantLock;
import org.apache.commons.collections.CollectionUtils;
import org.apache.http.HttpHost;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import us.codecraft.webmagic.downloader.Downloader;
import us.codecraft.webmagic.downloader.HttpClientDownloader;
import us.codecraft.webmagic.pipeline.CollectorPipeline;
import us.codecraft.webmagic.pipeline.ConsolePipeline;
import us.codecraft.webmagic.pipeline.Pipeline;
import us.codecraft.webmagic.pipeline.ResultItemsCollectorPipeline;
import us.codecraft.webmagic.processor.PageProcessor;
import us.codecraft.webmagic.scheduler.QueueScheduler;
import us.codecraft.webmagic.scheduler.Scheduler;
import us.codecraft.webmagic.selector.thread.CountableThreadPool;
import us.codecraft.webmagic.utils.UrlUtils;

/* loaded from: input_file:us/codecraft/webmagic/Spider.class */
public class Spider implements Runnable, Task {
    protected Downloader downloader;
    protected PageProcessor pageProcessor;
    protected List<Request> startRequests;
    protected Site site;
    protected String uuid;
    protected CountableThreadPool threadPool;
    protected ExecutorService executorService;
    protected static final int STAT_INIT = 0;
    protected static final int STAT_RUNNING = 1;
    protected static final int STAT_STOPPED = 2;
    private List<SpiderListener> spiderListeners;
    private Date startTime;
    protected List<Pipeline> pipelines = new ArrayList();
    protected Scheduler scheduler = new QueueScheduler();
    protected Logger logger = LoggerFactory.getLogger(getClass());
    protected int threadNum = STAT_RUNNING;
    protected AtomicInteger stat = new AtomicInteger(STAT_INIT);
    protected boolean exitWhenComplete = true;
    protected boolean spawnUrl = true;
    protected boolean destroyWhenExit = true;
    private ReentrantLock newUrlLock = new ReentrantLock();
    private Condition newUrlCondition = this.newUrlLock.newCondition();
    private final AtomicLong pageCount = new AtomicLong(0);
    private int emptySleepTime = 30000;

    /* loaded from: input_file:us/codecraft/webmagic/Spider$Status.class */
    public enum Status {
        Init(Spider.STAT_INIT),
        Running(Spider.STAT_RUNNING),
        Stopped(Spider.STAT_STOPPED);

        private int value;

        Status(int i) {
            this.value = i;
        }

        int getValue() {
            return this.value;
        }

        public static Status fromValue(int i) {
            Status[] values = values();
            int length = values.length;
            for (int i2 = Spider.STAT_INIT; i2 < length; i2 += Spider.STAT_RUNNING) {
                Status status = values[i2];
                if (status.getValue() == i) {
                    return status;
                }
            }
            return Init;
        }
    }

    public static Spider create(PageProcessor pageProcessor) {
        return new Spider(pageProcessor);
    }

    public Spider(PageProcessor pageProcessor) {
        this.pageProcessor = pageProcessor;
        this.site = pageProcessor.getSite();
        this.startRequests = pageProcessor.getSite().getStartRequests();
    }

    public Spider startUrls(List<String> list) {
        checkIfRunning();
        this.startRequests = UrlUtils.convertToRequests(list);
        return this;
    }

    public Spider startRequest(List<Request> list) {
        checkIfRunning();
        this.startRequests = list;
        return this;
    }

    public Spider setUUID(String str) {
        this.uuid = str;
        return this;
    }

    public Spider scheduler(Scheduler scheduler) {
        return setScheduler(scheduler);
    }

    public Spider setScheduler(Scheduler scheduler) {
        checkIfRunning();
        Scheduler scheduler2 = this.scheduler;
        this.scheduler = scheduler;
        if (scheduler2 != null) {
            while (true) {
                Request poll = scheduler2.poll(this);
                if (poll == null) {
                    break;
                }
                this.scheduler.push(poll, this);
            }
        }
        return this;
    }

    public Spider pipeline(Pipeline pipeline) {
        return addPipeline(pipeline);
    }

    public Spider addPipeline(Pipeline pipeline) {
        checkIfRunning();
        this.pipelines.add(pipeline);
        return this;
    }

    public Spider setPipelines(List<Pipeline> list) {
        checkIfRunning();
        this.pipelines = list;
        return this;
    }

    public Spider clearPipeline() {
        this.pipelines = new ArrayList();
        return this;
    }

    public Spider downloader(Downloader downloader) {
        return setDownloader(downloader);
    }

    public Spider setDownloader(Downloader downloader) {
        checkIfRunning();
        this.downloader = downloader;
        return this;
    }

    protected void initComponent() {
        if (this.downloader == null) {
            this.downloader = new HttpClientDownloader();
        }
        if (this.pipelines.isEmpty()) {
            this.pipelines.add(new ConsolePipeline());
        }
        this.downloader.setThread(this.threadNum);
        if (this.threadPool == null || this.threadPool.isShutdown()) {
            if (this.executorService == null || this.executorService.isShutdown()) {
                this.threadPool = new CountableThreadPool(this.threadNum);
            } else {
                this.threadPool = new CountableThreadPool(this.threadNum, this.executorService);
            }
        }
        if (this.startRequests != null) {
            Iterator<Request> it = this.startRequests.iterator();
            while (it.hasNext()) {
                this.scheduler.push(it.next(), this);
            }
            this.startRequests.clear();
        }
        this.startTime = new Date();
    }

    @Override // java.lang.Runnable
    public void run() {
        checkRunningStat();
        initComponent();
        this.logger.info("Spider " + getUUID() + " started!");
        while (!Thread.currentThread().isInterrupted() && this.stat.get() == STAT_RUNNING) {
            final Request poll = this.scheduler.poll(this);
            if (poll != null) {
                this.threadPool.execute(new Runnable() { // from class: us.codecraft.webmagic.Spider.1
                    @Override // java.lang.Runnable
                    public void run() {
                        try {
                            try {
                                Spider.this.processRequest(poll);
                                Spider.this.onSuccess(poll);
                                if (Spider.this.site.getHttpProxyPool().isEnable()) {
                                    Spider.this.site.returnHttpProxyToPool((HttpHost) poll.getExtra(Request.PROXY), ((Integer) poll.getExtra(Request.STATUS_CODE)).intValue());
                                }
                                Spider.this.pageCount.incrementAndGet();
                                Spider.this.signalNewUrl();
                            } catch (Exception e) {
                                Spider.this.onError(poll);
                                Spider.this.logger.error("process request " + poll + " error", e);
                                if (Spider.this.site.getHttpProxyPool().isEnable()) {
                                    Spider.this.site.returnHttpProxyToPool((HttpHost) poll.getExtra(Request.PROXY), ((Integer) poll.getExtra(Request.STATUS_CODE)).intValue());
                                }
                                Spider.this.pageCount.incrementAndGet();
                                Spider.this.signalNewUrl();
                            }
                        } catch (Throwable th) {
                            if (Spider.this.site.getHttpProxyPool().isEnable()) {
                                Spider.this.site.returnHttpProxyToPool((HttpHost) poll.getExtra(Request.PROXY), ((Integer) poll.getExtra(Request.STATUS_CODE)).intValue());
                            }
                            Spider.this.pageCount.incrementAndGet();
                            Spider.this.signalNewUrl();
                            throw th;
                        }
                    }
                });
            } else if (this.threadPool.getThreadAlive() == 0 && this.exitWhenComplete) {
                break;
            } else {
                waitNewUrl();
            }
        }
        this.stat.set(STAT_STOPPED);
        if (this.destroyWhenExit) {
            close();
        }
    }

    protected void onError(Request request) {
        if (CollectionUtils.isNotEmpty(this.spiderListeners)) {
            Iterator<SpiderListener> it = this.spiderListeners.iterator();
            while (it.hasNext()) {
                it.next().onError(request);
            }
        }
    }

    protected void onSuccess(Request request) {
        if (CollectionUtils.isNotEmpty(this.spiderListeners)) {
            Iterator<SpiderListener> it = this.spiderListeners.iterator();
            while (it.hasNext()) {
                it.next().onSuccess(request);
            }
        }
    }

    private void checkRunningStat() {
        int i;
        do {
            i = this.stat.get();
            if (i == STAT_RUNNING) {
                throw new IllegalStateException("Spider is already running!");
            }
        } while (!this.stat.compareAndSet(i, STAT_RUNNING));
    }

    public void close() {
        destroyEach(this.downloader);
        destroyEach(this.pageProcessor);
        Iterator<Pipeline> it = this.pipelines.iterator();
        while (it.hasNext()) {
            destroyEach(it.next());
        }
        this.threadPool.shutdown();
    }

    private void destroyEach(Object obj) {
        if (obj instanceof Closeable) {
            try {
                ((Closeable) obj).close();
            } catch (IOException e) {
                e.printStackTrace();
            }
        }
    }

    public void test(String... strArr) {
        initComponent();
        if (strArr.length > 0) {
            int length = strArr.length;
            for (int i = STAT_INIT; i < length; i += STAT_RUNNING) {
                processRequest(new Request(strArr[i]));
            }
        }
    }

    protected void processRequest(Request request) {
        Page download = this.downloader.download(request, this);
        if (download == null) {
            sleep(this.site.getSleepTime());
            onError(request);
            return;
        }
        if (download.isNeedCycleRetry()) {
            extractAndAddRequests(download, true);
            sleep(this.site.getSleepTime());
            return;
        }
        this.pageProcessor.process(download);
        extractAndAddRequests(download, this.spawnUrl);
        if (!download.getResultItems().isSkip()) {
            Iterator<Pipeline> it = this.pipelines.iterator();
            while (it.hasNext()) {
                it.next().process(download.getResultItems(), this);
            }
        }
        sleep(this.site.getSleepTime());
    }

    protected void sleep(int i) {
        try {
            Thread.sleep(i);
        } catch (InterruptedException e) {
            e.printStackTrace();
        }
    }

    protected void extractAndAddRequests(Page page, boolean z) {
        if (z && CollectionUtils.isNotEmpty(page.getTargetRequests())) {
            Iterator<Request> it = page.getTargetRequests().iterator();
            while (it.hasNext()) {
                addRequest(it.next());
            }
        }
    }

    private void addRequest(Request request) {
        if (this.site.getDomain() == null && request != null && request.getUrl() != null) {
            this.site.setDomain(UrlUtils.getDomain(request.getUrl()));
        }
        this.scheduler.push(request, this);
    }

    protected void checkIfRunning() {
        if (this.stat.get() == STAT_RUNNING) {
            throw new IllegalStateException("Spider is already running!");
        }
    }

    public void runAsync() {
        Thread thread = new Thread(this);
        thread.setDaemon(false);
        thread.start();
    }

    public Spider addUrl(String... strArr) {
        int length = strArr.length;
        for (int i = STAT_INIT; i < length; i += STAT_RUNNING) {
            addRequest(new Request(strArr[i]));
        }
        signalNewUrl();
        return this;
    }

    public <T> List<T> getAll(Collection<String> collection) {
        this.destroyWhenExit = false;
        this.spawnUrl = false;
        this.startRequests.clear();
        Iterator<Request> it = UrlUtils.convertToRequests(collection).iterator();
        while (it.hasNext()) {
            addRequest(it.next());
        }
        CollectorPipeline collectorPipeline = getCollectorPipeline();
        this.pipelines.add(collectorPipeline);
        run();
        this.spawnUrl = true;
        this.destroyWhenExit = true;
        return collectorPipeline.getCollected();
    }

    protected CollectorPipeline getCollectorPipeline() {
        return new ResultItemsCollectorPipeline();
    }

    public <T> T get(String str) {
        List<T> all = getAll(Lists.newArrayList(new String[]{str}));
        if (all == null || all.size() <= 0) {
            return null;
        }
        return all.get(STAT_INIT);
    }

    public Spider addRequest(Request... requestArr) {
        int length = requestArr.length;
        for (int i = STAT_INIT; i < length; i += STAT_RUNNING) {
            addRequest(requestArr[i]);
        }
        signalNewUrl();
        return this;
    }

    private void waitNewUrl() {
        this.newUrlLock.lock();
        try {
            try {
                if (this.threadPool.getThreadAlive() == 0 && this.exitWhenComplete) {
                    this.newUrlLock.unlock();
                } else {
                    this.newUrlCondition.await(this.emptySleepTime, TimeUnit.MILLISECONDS);
                    this.newUrlLock.unlock();
                }
            } catch (InterruptedException e) {
                this.logger.warn("waitNewUrl - interrupted, error {}", e);
                this.newUrlLock.unlock();
            }
        } catch (Throwable th) {
            this.newUrlLock.unlock();
            throw th;
        }
    }

    /* JADX INFO: Access modifiers changed from: private */
    public void signalNewUrl() {
        try {
            this.newUrlLock.lock();
            this.newUrlCondition.signalAll();
            this.newUrlLock.unlock();
        } catch (Throwable th) {
            this.newUrlLock.unlock();
            throw th;
        }
    }

    public void start() {
        runAsync();
    }

    public void stop() {
        if (this.stat.compareAndSet(STAT_RUNNING, STAT_STOPPED)) {
            this.logger.info("Spider " + getUUID() + " stop success!");
        } else {
            this.logger.info("Spider " + getUUID() + " stop fail!");
        }
    }

    public Spider thread(int i) {
        checkIfRunning();
        this.threadNum = i;
        if (i <= 0) {
            throw new IllegalArgumentException("threadNum should be more than one!");
        }
        return this;
    }

    public Spider thread(ExecutorService executorService, int i) {
        checkIfRunning();
        this.threadNum = i;
        if (i <= 0) {
            throw new IllegalArgumentException("threadNum should be more than one!");
        }
        return this;
    }

    public boolean isExitWhenComplete() {
        return this.exitWhenComplete;
    }

    public Spider setExitWhenComplete(boolean z) {
        this.exitWhenComplete = z;
        return this;
    }

    public boolean isSpawnUrl() {
        return this.spawnUrl;
    }

    public long getPageCount() {
        return this.pageCount.get();
    }

    public Status getStatus() {
        return Status.fromValue(this.stat.get());
    }

    public int getThreadAlive() {
        return this.threadPool == null ? STAT_INIT : this.threadPool.getThreadAlive();
    }

    public Spider setSpawnUrl(boolean z) {
        this.spawnUrl = z;
        return this;
    }

    @Override // us.codecraft.webmagic.Task
    public String getUUID() {
        if (this.uuid != null) {
            return this.uuid;
        }
        if (this.site != null) {
            return this.site.getDomain();
        }
        this.uuid = UUID.randomUUID().toString();
        return this.uuid;
    }

    public Spider setExecutorService(ExecutorService executorService) {
        checkIfRunning();
        this.executorService = executorService;
        return this;
    }

    @Override // us.codecraft.webmagic.Task
    public Site getSite() {
        return this.site;
    }

    public List<SpiderListener> getSpiderListeners() {
        return this.spiderListeners;
    }

    public Spider setSpiderListeners(List<SpiderListener> list) {
        this.spiderListeners = list;
        return this;
    }

    public Date getStartTime() {
        return this.startTime;
    }

    public Scheduler getScheduler() {
        return this.scheduler;
    }

    public void setEmptySleepTime(int i) {
        this.emptySleepTime = i;
    }
}
