package com.xuxueli.crawler.util;

import com.xuxueli.crawler.conf.XxlCrawlerConf;
import com.xuxueli.crawler.model.PageLoadInfo;
import java.io.IOException;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Set;
import org.jsoup.Connection;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:com/xuxueli/crawler/util/JsoupUtil.class */
public class JsoupUtil {
    private static Logger logger = LoggerFactory.getLogger(JsoupUtil.class);

    public static Document load(PageLoadInfo pageLoadInfo) {
        if (!UrlUtil.isUrl(pageLoadInfo.getUrl())) {
            return null;
        }
        try {
            Connection connect = Jsoup.connect(pageLoadInfo.getUrl());
            if (pageLoadInfo.getParamMap() != null && !pageLoadInfo.getParamMap().isEmpty()) {
                connect.data(pageLoadInfo.getParamMap());
            }
            if (pageLoadInfo.getCookieMap() != null && !pageLoadInfo.getCookieMap().isEmpty()) {
                connect.cookies(pageLoadInfo.getCookieMap());
            }
            if (pageLoadInfo.getHeaderMap() != null && !pageLoadInfo.getHeaderMap().isEmpty()) {
                connect.headers(pageLoadInfo.getHeaderMap());
            }
            if (pageLoadInfo.getUserAgent() != null) {
                connect.userAgent(pageLoadInfo.getUserAgent());
            }
            if (pageLoadInfo.getReferrer() != null) {
                connect.referrer(pageLoadInfo.getReferrer());
            }
            connect.timeout(pageLoadInfo.getTimeoutMillis());
            connect.maxBodySize(0);
            if (pageLoadInfo.getProxy() != null) {
                connect.proxy(pageLoadInfo.getProxy());
            }
            return pageLoadInfo.getIfPost() ? connect.post() : connect.get();
        } catch (IOException e) {
            logger.error(e.getMessage(), e);
            return null;
        }
    }

    public static String parseElement(Element element, XxlCrawlerConf.SelectType selectType, String str) {
        return XxlCrawlerConf.SelectType.HTML == selectType ? element.html() : XxlCrawlerConf.SelectType.VAL == selectType ? element.val() : XxlCrawlerConf.SelectType.TEXT == selectType ? element.text() : XxlCrawlerConf.SelectType.ATTR == selectType ? element.attr(str) : XxlCrawlerConf.SelectType.HAS_CLASS == selectType ? String.valueOf(element.hasClass(str)) : element.toString();
    }

    public static Set<String> findLinks(Document document) {
        if (document == null) {
            return null;
        }
        Elements select = document.select("a[href]");
        HashSet hashSet = new HashSet();
        if (select != null && select.size() > 0) {
            Iterator it = select.iterator();
            while (it.hasNext()) {
                String attr = ((Element) it.next()).attr("abs:href");
                if (UrlUtil.isUrl(attr)) {
                    hashSet.add(attr);
                }
            }
        }
        return hashSet;
    }

    public static Set<String> findImages(Document document) {
        Elements elementsByTag = document.getElementsByTag("img");
        HashSet hashSet = new HashSet();
        if (elementsByTag != null && elementsByTag.size() > 0) {
            Iterator it = elementsByTag.iterator();
            while (it.hasNext()) {
                hashSet.add(((Element) it.next()).attr("abs:src"));
            }
        }
        return hashSet;
    }
}
