package com.xuxueli.crawler.thread;

import com.xuxueli.crawler.XxlCrawler;
import com.xuxueli.crawler.annotation.PageFieldSelect;
import com.xuxueli.crawler.annotation.PageSelect;
import com.xuxueli.crawler.conf.XxlCrawlerConf;
import com.xuxueli.crawler.exception.XxlCrawlerException;
import com.xuxueli.crawler.model.PageLoadInfo;
import com.xuxueli.crawler.util.FieldReflectionUtil;
import com.xuxueli.crawler.util.JsoupUtil;
import com.xuxueli.crawler.util.UrlUtil;
import java.lang.reflect.Field;
import java.lang.reflect.Modifier;
import java.lang.reflect.ParameterizedType;
import java.net.Proxy;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.Random;
import java.util.Set;
import java.util.concurrent.TimeUnit;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:com/xuxueli/crawler/thread/CrawlerThread.class */
public class CrawlerThread implements Runnable {
    private static Logger logger = LoggerFactory.getLogger(CrawlerThread.class);
    private XxlCrawler crawler;
    private boolean running = true;
    private boolean toStop = false;

    public CrawlerThread(XxlCrawler xxlCrawler) {
        this.crawler = xxlCrawler;
    }

    public void toStop() {
        this.toStop = true;
    }

    public boolean isRunning() {
        return this.running;
    }

    @Override // java.lang.Runnable
    public void run() {
        while (!this.toStop) {
            try {
                this.running = false;
                this.crawler.tryFinish();
                String url = this.crawler.getRunData().getUrl();
                this.running = true;
                logger.info(">>>>>>>>>>> xxl crawler, process link : {}", url);
                if (UrlUtil.isUrl(url)) {
                    for (int i = 0; i < 1 + this.crawler.getRunConf().getFailRetryCount(); i++) {
                        boolean process = process(url);
                        if (this.crawler.getRunConf().getPauseMillis() > 0) {
                            try {
                                TimeUnit.MILLISECONDS.sleep(this.crawler.getRunConf().getPauseMillis());
                            } catch (InterruptedException e) {
                                logger.info(">>>>>>>>>>> xxl crawler thread is interrupted. 2{}", e.getMessage());
                            }
                        }
                        if (process) {
                            break;
                        }
                    }
                }
            } catch (Throwable th) {
                if (th instanceof InterruptedException) {
                    logger.info(">>>>>>>>>>> xxl crawler thread is interrupted. {}", th.getMessage());
                } else if (th instanceof XxlCrawlerException) {
                    logger.info(">>>>>>>>>>> xxl crawler thread {}", th.getMessage());
                } else {
                    logger.error(th.getMessage(), th);
                }
            }
        }
    }

    private boolean process(String str) throws IllegalAccessException, InstantiationException {
        Elements select;
        Set<String> findLinks;
        String str2 = this.crawler.getRunConf().getUserAgentList().size() > 1 ? this.crawler.getRunConf().getUserAgentList().get(new Random().nextInt(this.crawler.getRunConf().getUserAgentList().size())) : this.crawler.getRunConf().getUserAgentList().size() == 1 ? this.crawler.getRunConf().getUserAgentList().get(0) : null;
        Proxy make = this.crawler.getRunConf().getProxyMaker() != null ? this.crawler.getRunConf().getProxyMaker().make() : null;
        PageLoadInfo pageLoadInfo = new PageLoadInfo();
        pageLoadInfo.setUrl(str);
        pageLoadInfo.setParamMap(this.crawler.getRunConf().getParamMap());
        pageLoadInfo.setCookieMap(this.crawler.getRunConf().getCookieMap());
        pageLoadInfo.setHeaderMap(this.crawler.getRunConf().getHeaderMap());
        pageLoadInfo.setUserAgent(str2);
        pageLoadInfo.setReferrer(this.crawler.getRunConf().getReferrer());
        pageLoadInfo.setIfPost(this.crawler.getRunConf().isIfPost());
        pageLoadInfo.setTimeoutMillis(this.crawler.getRunConf().getTimeoutMillis());
        pageLoadInfo.setProxy(make);
        this.crawler.getRunConf().getPageParser().preLoad(pageLoadInfo);
        Document load = this.crawler.getRunConf().getPageLoader().load(pageLoadInfo);
        this.crawler.getRunConf().getPageParser().postLoad(load);
        if (load == null) {
            return false;
        }
        if (this.crawler.getRunConf().isAllowSpread() && (findLinks = JsoupUtil.findLinks(load)) != null && findLinks.size() > 0) {
            for (String str3 : findLinks) {
                if (this.crawler.getRunConf().validWhiteUrl(str3)) {
                    this.crawler.getRunData().addUrl(str3);
                }
            }
        }
        if (!this.crawler.getRunConf().validWhiteUrl(str)) {
            return false;
        }
        Class cls = (Class) ((ParameterizedType) this.crawler.getRunConf().getPageParser().getClass().getGenericSuperclass()).getActualTypeArguments()[0];
        PageSelect pageSelect = (PageSelect) cls.getAnnotation(PageSelect.class);
        Elements select2 = load.select((pageSelect == null || pageSelect.cssQuery() == null || pageSelect.cssQuery().trim().length() <= 0) ? "html" : pageSelect.cssQuery());
        if (select2 == null || !select2.hasText()) {
            return true;
        }
        Iterator it = select2.iterator();
        while (it.hasNext()) {
            Element element = (Element) it.next();
            Object newInstance = cls.newInstance();
            Field[] declaredFields = cls.getDeclaredFields();
            if (declaredFields != null) {
                for (Field field : declaredFields) {
                    if (!Modifier.isStatic(field.getModifiers())) {
                        PageFieldSelect pageFieldSelect = (PageFieldSelect) field.getAnnotation(PageFieldSelect.class);
                        String str4 = null;
                        XxlCrawlerConf.SelectType selectType = null;
                        String str5 = null;
                        if (pageFieldSelect != null) {
                            str4 = pageFieldSelect.cssQuery();
                            selectType = pageFieldSelect.selectType();
                            str5 = pageFieldSelect.selectVal();
                        }
                        if (str4 != null && str4.trim().length() != 0) {
                            Object obj = null;
                            if (!(field.getGenericType() instanceof ParameterizedType)) {
                                Elements select3 = element.select(str4);
                                String str6 = null;
                                if (select3 != null && select3.size() > 0) {
                                    str6 = JsoupUtil.parseElement((Element) select3.get(0), selectType, str5);
                                }
                                if (str6 != null && str6.length() != 0) {
                                    try {
                                        obj = FieldReflectionUtil.parseValue(field, str6);
                                    } catch (Exception e) {
                                        logger.error(e.getMessage(), e);
                                    }
                                }
                            } else if (((ParameterizedType) field.getGenericType()).getRawType().equals(List.class) && (select = element.select(str4)) != null && select.size() > 0) {
                                ArrayList arrayList = new ArrayList();
                                Iterator it2 = select.iterator();
                                while (it2.hasNext()) {
                                    String parseElement = JsoupUtil.parseElement((Element) it2.next(), selectType, str5);
                                    if (parseElement != null && parseElement.length() != 0) {
                                        try {
                                            arrayList.add(FieldReflectionUtil.parseValue(field, parseElement));
                                        } catch (Exception e2) {
                                            logger.error(e2.getMessage(), e2);
                                        }
                                    }
                                }
                                if (arrayList.size() > 0) {
                                    obj = arrayList;
                                }
                            }
                            if (obj != null) {
                                field.setAccessible(true);
                                field.set(newInstance, obj);
                            }
                        }
                    }
                }
            }
            this.crawler.getRunConf().getPageParser().parse(load, element, newInstance);
        }
        return true;
    }
}
