package org.codelibs.fess.helper;

import java.util.ArrayList;
import java.util.Collections;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.regex.Pattern;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.codelibs.core.lang.StringUtil;
import org.codelibs.core.lang.ThreadUtil;
import org.codelibs.core.stream.StreamUtil;
import org.codelibs.fess.Constants;
import org.codelibs.fess.crawler.Crawler;
import org.codelibs.fess.crawler.CrawlerContext;
import org.codelibs.fess.crawler.CrawlerStatus;
import org.codelibs.fess.crawler.service.impl.EsDataService;
import org.codelibs.fess.crawler.service.impl.EsUrlFilterService;
import org.codelibs.fess.crawler.service.impl.EsUrlQueueService;
import org.codelibs.fess.es.config.exbhv.BoostDocumentRuleBhv;
import org.codelibs.fess.es.config.exentity.BoostDocumentRule;
import org.codelibs.fess.es.config.exentity.CrawlingConfig;
import org.codelibs.fess.es.config.exentity.FileConfig;
import org.codelibs.fess.es.config.exentity.WebConfig;
import org.codelibs.fess.indexer.DocBoostMatcher;
import org.codelibs.fess.indexer.IndexUpdater;
import org.codelibs.fess.util.ComponentUtil;

/* loaded from: input_file:org/codelibs/fess/helper/WebFsIndexHelper.class */
public class WebFsIndexHelper {
    private static final Logger logger = LogManager.getLogger(WebFsIndexHelper.class);
    private static final String DISABLE_URL_ENCODE = "#DISABLE_URL_ENCODE";
    protected long maxAccessCount = Long.MAX_VALUE;
    protected long crawlingExecutionInterval = Constants.DEFAULT_CRAWLING_EXECUTION_INTERVAL;
    protected int indexUpdaterPriority = 10;
    protected int crawlerPriority = 5;
    protected final List<Crawler> crawlerList = Collections.synchronizedList(new ArrayList());

    public void crawl(String str, List<String> list, List<String> list2) {
        boolean z = list == null && list2 == null;
        List<WebConfig> webConfigListByIds = (z || list != null) ? ComponentUtil.getCrawlingConfigHelper().getWebConfigListByIds(list) : Collections.emptyList();
        List<FileConfig> fileConfigListByIds = (z || list2 != null) ? ComponentUtil.getCrawlingConfigHelper().getFileConfigListByIds(list2) : Collections.emptyList();
        if (!webConfigListByIds.isEmpty() || !fileConfigListByIds.isEmpty()) {
            doCrawl(str, webConfigListByIds, fileConfigListByIds);
        } else if (logger.isInfoEnabled()) {
            logger.info("No crawling target urls.");
        }
    }

    protected void doCrawl(String str, List<WebConfig> list, List<FileConfig> list2) {
        int crawlingThreadCount = ComponentUtil.getFessConfig().getCrawlingThreadCount();
        SystemHelper systemHelper = ComponentUtil.getSystemHelper();
        ComponentUtil.getFessConfig();
        ProtocolHelper protocolHelper = ComponentUtil.getProtocolHelper();
        long currentTimeAsLong = systemHelper.getCurrentTimeAsLong();
        ArrayList<String> arrayList = new ArrayList();
        this.crawlerList.clear();
        ArrayList arrayList2 = new ArrayList();
        Iterator<WebConfig> it = list.iterator();
        while (true) {
            if (!it.hasNext()) {
                break;
            }
            WebConfig next = it.next();
            String store = ComponentUtil.getCrawlingConfigHelper().store(str, next);
            Crawler crawler = (Crawler) ComponentUtil.getComponent(Crawler.class);
            crawler.setSessionId(store);
            arrayList.add(store);
            String urls = next.getUrls();
            if (StringUtil.isBlank(urls)) {
                logger.warn("No target urls. Skipped");
                break;
            }
            crawler.getIntervalController().setDelayMillisForWaitingNewUrl(next.getIntervalTime() != null ? next.getIntervalTime().intValue() : Constants.DEFAULT_INTERVAL_TIME_FOR_WEB);
            String includedUrls = next.getIncludedUrls() != null ? next.getIncludedUrls() : Constants.DEFAULT_IGNORE_FAILURE_TYPE;
            String excludedUrls = next.getExcludedUrls() != null ? next.getExcludedUrls() : Constants.DEFAULT_IGNORE_FAILURE_TYPE;
            CrawlerContext crawlerContext = crawler.getCrawlerContext();
            crawlerContext.setNumOfThread(next.getNumOfThread() != null ? next.getNumOfThread().intValue() : 1);
            crawlerContext.setMaxDepth(next.getDepth() != null ? next.getDepth().intValue() : -1);
            crawlerContext.setMaxAccessCount(next.getMaxAccessCount() != null ? next.getMaxAccessCount().longValue() : this.maxAccessCount);
            next.initializeClientFactory(() -> {
                return crawler.getClientFactory();
            });
            Map<String, String> configParameterMap = next.getConfigParameterMap(CrawlingConfig.ConfigName.CONFIG);
            if (Constants.TRUE.equalsIgnoreCase(configParameterMap.get(CrawlingConfig.Param.Config.CLEANUP_ALL))) {
                deleteCrawlData(store);
            } else if (Constants.TRUE.equalsIgnoreCase(configParameterMap.get(CrawlingConfig.Param.Config.CLEANUP_URL_FILTERS))) {
                try {
                    ((EsUrlFilterService) ComponentUtil.getComponent(EsUrlFilterService.class)).delete(store);
                } catch (Exception e) {
                    logger.warn("Failed to delete url filters for {}", store);
                }
            }
            DuplicateHostHelper duplicateHostHelper = ComponentUtil.getDuplicateHostHelper();
            StreamUtil.split(urls, "[\r\n]").of(stream -> {
                stream.filter(StringUtil::isNotBlank).map((v0) -> {
                    return v0.trim();
                }).distinct().forEach(str2 -> {
                    if (str2.startsWith("#") || !protocolHelper.isValidWebProtocol(str2)) {
                        return;
                    }
                    String convert = duplicateHostHelper.convert(str2);
                    crawler.addUrl(convert);
                    if (logger.isInfoEnabled()) {
                        logger.info("Target URL: {}", convert);
                    }
                });
            });
            AtomicBoolean atomicBoolean = new AtomicBoolean(false);
            StreamUtil.split(includedUrls, "[\r\n]").of(stream2 -> {
                stream2.filter(StringUtil::isNotBlank).map((v0) -> {
                    return v0.trim();
                }).forEach(str2 -> {
                    String encodeUrlFilter;
                    if (str2.startsWith("#")) {
                        if (str2.startsWith(DISABLE_URL_ENCODE)) {
                            atomicBoolean.set(true);
                            return;
                        }
                        return;
                    }
                    if (atomicBoolean.get()) {
                        encodeUrlFilter = str2;
                        atomicBoolean.set(false);
                    } else {
                        encodeUrlFilter = systemHelper.encodeUrlFilter(str2);
                    }
                    crawler.addIncludeFilter(encodeUrlFilter);
                    if (logger.isInfoEnabled()) {
                        logger.info("Included URL: {}", encodeUrlFilter);
                    }
                });
            });
            atomicBoolean.set(false);
            StreamUtil.split(excludedUrls, "[\r\n]").of(stream3 -> {
                stream3.filter(StringUtil::isNotBlank).map((v0) -> {
                    return v0.trim();
                }).forEach(str2 -> {
                    String encodeUrlFilter;
                    if (str2.startsWith("#")) {
                        if (str2.startsWith(DISABLE_URL_ENCODE)) {
                            atomicBoolean.set(true);
                            return;
                        }
                        return;
                    }
                    if (atomicBoolean.get()) {
                        encodeUrlFilter = str2;
                        atomicBoolean.set(false);
                    } else {
                        encodeUrlFilter = systemHelper.encodeUrlFilter(str2);
                    }
                    crawler.addExcludeFilter(encodeUrlFilter);
                    if (logger.isInfoEnabled()) {
                        logger.info("Excluded URL: {}", encodeUrlFilter);
                    }
                });
            });
            List<String> excludedUrlList = ComponentUtil.getCrawlingConfigHelper().getExcludedUrlList(next.getConfigId());
            if (excludedUrlList != null) {
                excludedUrlList.stream().filter(StringUtil::isNotBlank).map((v0) -> {
                    return v0.trim();
                }).distinct().forEach(str2 -> {
                    String quote = Pattern.quote(str2);
                    crawler.addExcludeFilter(quote);
                    if (logger.isInfoEnabled()) {
                        logger.info("Excluded URL from failures: {}", quote);
                    }
                });
            }
            if (logger.isDebugEnabled()) {
                logger.debug("Crawling {}", urls);
            }
            crawler.setBackground(true);
            crawler.setThreadPriority(this.crawlerPriority);
            this.crawlerList.add(crawler);
            arrayList2.add(Constants.READY);
        }
        Iterator<FileConfig> it2 = list2.iterator();
        while (true) {
            if (!it2.hasNext()) {
                break;
            }
            FileConfig next2 = it2.next();
            String store2 = ComponentUtil.getCrawlingConfigHelper().store(str, next2);
            Crawler crawler2 = (Crawler) ComponentUtil.getComponent(Crawler.class);
            crawler2.setSessionId(store2);
            arrayList.add(store2);
            String paths = next2.getPaths();
            if (StringUtil.isBlank(paths)) {
                logger.warn("No target uris. Skipped");
                break;
            }
            crawler2.getIntervalController().setDelayMillisForWaitingNewUrl(next2.getIntervalTime() != null ? next2.getIntervalTime().intValue() : 1000);
            String includedPaths = next2.getIncludedPaths() != null ? next2.getIncludedPaths() : Constants.DEFAULT_IGNORE_FAILURE_TYPE;
            String excludedPaths = next2.getExcludedPaths() != null ? next2.getExcludedPaths() : Constants.DEFAULT_IGNORE_FAILURE_TYPE;
            CrawlerContext crawlerContext2 = crawler2.getCrawlerContext();
            crawlerContext2.setNumOfThread(next2.getNumOfThread() != null ? next2.getNumOfThread().intValue() : 5);
            crawlerContext2.setMaxDepth(next2.getDepth() != null ? next2.getDepth().intValue() : -1);
            crawlerContext2.setMaxAccessCount(next2.getMaxAccessCount() != null ? next2.getMaxAccessCount().longValue() : this.maxAccessCount);
            next2.initializeClientFactory(() -> {
                return crawler2.getClientFactory();
            });
            Map<String, String> configParameterMap2 = next2.getConfigParameterMap(CrawlingConfig.ConfigName.CONFIG);
            if (Constants.TRUE.equalsIgnoreCase(configParameterMap2.get(CrawlingConfig.Param.Config.CLEANUP_ALL))) {
                deleteCrawlData(store2);
            } else if (Constants.TRUE.equalsIgnoreCase(configParameterMap2.get(CrawlingConfig.Param.Config.CLEANUP_URL_FILTERS))) {
                try {
                    ((EsUrlFilterService) ComponentUtil.getComponent(EsUrlFilterService.class)).delete(store2);
                } catch (Exception e2) {
                    logger.warn("Failed to delete url filters for {}", store2);
                }
            }
            StreamUtil.split(paths, "[\r\n]").of(stream4 -> {
                stream4.filter(StringUtil::isNotBlank).map((v0) -> {
                    return v0.trim();
                }).distinct().forEach(str3 -> {
                    if (str3.startsWith("#")) {
                        return;
                    }
                    String str3 = !protocolHelper.isValidFileProtocol(str3) ? str3.startsWith("/") ? "file:" + str3 : "file:/" + str3 : str3;
                    crawler2.addUrl(str3);
                    if (logger.isInfoEnabled()) {
                        logger.info("Target Path: {}", str3);
                    }
                });
            });
            AtomicBoolean atomicBoolean2 = new AtomicBoolean(false);
            StreamUtil.split(includedPaths, "[\r\n]").of(stream5 -> {
                stream5.filter(StringUtil::isNotBlank).map((v0) -> {
                    return v0.trim();
                }).forEach(str3 -> {
                    String encodeUrlFilter;
                    if (str3.startsWith("#")) {
                        if (str3.startsWith(DISABLE_URL_ENCODE)) {
                            atomicBoolean2.set(true);
                            return;
                        }
                        return;
                    }
                    if (atomicBoolean2.get()) {
                        encodeUrlFilter = str3;
                        atomicBoolean2.set(false);
                    } else {
                        encodeUrlFilter = systemHelper.encodeUrlFilter(str3);
                    }
                    crawler2.addIncludeFilter(encodeUrlFilter);
                    if (logger.isInfoEnabled()) {
                        logger.info("Included Path: {}", encodeUrlFilter);
                    }
                });
            });
            atomicBoolean2.set(false);
            StreamUtil.split(excludedPaths, "[\r\n]").of(stream6 -> {
                stream6.filter(StringUtil::isNotBlank).map((v0) -> {
                    return v0.trim();
                }).forEach(str3 -> {
                    String encodeUrlFilter;
                    if (str3.startsWith("#")) {
                        if (str3.startsWith(DISABLE_URL_ENCODE)) {
                            atomicBoolean2.set(true);
                            return;
                        }
                        return;
                    }
                    if (atomicBoolean2.get()) {
                        encodeUrlFilter = str3;
                        atomicBoolean2.set(false);
                    } else {
                        encodeUrlFilter = systemHelper.encodeUrlFilter(str3);
                    }
                    crawler2.addExcludeFilter(encodeUrlFilter);
                    if (logger.isInfoEnabled()) {
                        logger.info("Excluded Path: {}", encodeUrlFilter);
                    }
                });
            });
            List<String> excludedUrlList2 = ComponentUtil.getCrawlingConfigHelper().getExcludedUrlList(next2.getConfigId());
            if (excludedUrlList2 != null) {
                excludedUrlList2.stream().filter(StringUtil::isNotBlank).map((v0) -> {
                    return v0.trim();
                }).distinct().forEach(str3 -> {
                    String quote = Pattern.quote(str3);
                    crawler2.addExcludeFilter(quote);
                    if (logger.isInfoEnabled()) {
                        logger.info("Excluded Path from failures: {}", quote);
                    }
                });
            }
            if (logger.isDebugEnabled()) {
                logger.debug("Crawling {}", paths);
            }
            crawler2.setBackground(true);
            crawler2.setThreadPriority(this.crawlerPriority);
            this.crawlerList.add(crawler2);
            arrayList2.add(Constants.READY);
        }
        IndexUpdater indexUpdater = ComponentUtil.getIndexUpdater();
        indexUpdater.setName("IndexUpdater");
        indexUpdater.setPriority(this.indexUpdaterPriority);
        indexUpdater.setSessionIdList(arrayList);
        indexUpdater.setDaemon(true);
        indexUpdater.setCrawlerList(this.crawlerList);
        getAvailableBoostDocumentRuleList().forEach(boostDocumentRule -> {
            indexUpdater.addDocBoostMatcher(new DocBoostMatcher(boostDocumentRule));
        });
        indexUpdater.start();
        int i = 0;
        int i2 = 0;
        while (true) {
            try {
                if (i >= this.crawlerList.size()) {
                    break;
                }
                if (systemHelper.isForceStop()) {
                    Iterator<Crawler> it3 = this.crawlerList.iterator();
                    while (it3.hasNext()) {
                        it3.next().stop();
                    }
                } else if (i2 < crawlingThreadCount) {
                    this.crawlerList.get(i).execute();
                    arrayList2.set(i, Constants.RUNNING);
                    i++;
                    i2++;
                    ThreadUtil.sleep(this.crawlingExecutionInterval);
                } else {
                    for (int i3 = 0; i3 < i; i3++) {
                        if (this.crawlerList.get(i3).getCrawlerContext().getStatus() == CrawlerStatus.DONE && Constants.RUNNING.equals(arrayList2.get(i3))) {
                            this.crawlerList.get(i3).awaitTermination();
                            arrayList2.set(i3, Constants.DONE);
                            indexUpdater.addFinishedSessionId(this.crawlerList.get(i3).getCrawlerContext().getSessionId());
                            i2--;
                        }
                    }
                    ThreadUtil.sleep(this.crawlingExecutionInterval);
                }
            } finally {
                this.crawlerList.forEach(crawler3 -> {
                    try {
                        crawler3.close();
                    } catch (Exception e3) {
                        logger.warn("Failed to close the crawler.", e3);
                    }
                });
            }
        }
        boolean z = false;
        while (!z) {
            z = true;
            for (int i4 = 0; i4 < this.crawlerList.size(); i4++) {
                Crawler crawler4 = this.crawlerList.get(i4);
                crawler4.awaitTermination(this.crawlingExecutionInterval);
                if (crawler4.getCrawlerContext().getStatus() == CrawlerStatus.DONE && !Constants.DONE.equals(arrayList2.get(i4))) {
                    arrayList2.set(i4, Constants.DONE);
                    indexUpdater.addFinishedSessionId(crawler4.getCrawlerContext().getSessionId());
                    try {
                        crawler4.close();
                    } catch (Exception e3) {
                        logger.warn("Failed to close the crawler.", e3);
                    }
                }
                if (!Constants.DONE.equals(arrayList2.get(i4))) {
                    z = false;
                }
            }
        }
        this.crawlerList.clear();
        arrayList2.clear();
        CrawlingInfoHelper crawlingInfoHelper = ComponentUtil.getCrawlingInfoHelper();
        long currentTimeAsLong2 = systemHelper.getCurrentTimeAsLong() - currentTimeAsLong;
        crawlingInfoHelper.putToInfoMap(Constants.WEB_FS_CRAWLING_EXEC_TIME, Long.toString(currentTimeAsLong2));
        if (logger.isInfoEnabled()) {
            logger.info("[EXEC TIME] crawling time: {}ms", Long.valueOf(currentTimeAsLong2));
        }
        indexUpdater.setFinishCrawling(true);
        try {
            indexUpdater.join();
        } catch (InterruptedException e4) {
            logger.warn("Interrupted index update.", e4);
        }
        crawlingInfoHelper.putToInfoMap(Constants.WEB_FS_INDEX_EXEC_TIME, Long.toString(indexUpdater.getExecuteTime()));
        crawlingInfoHelper.putToInfoMap(Constants.WEB_FS_INDEX_SIZE, Long.toString(indexUpdater.getDocumentSize()));
        if (systemHelper.isForceStop()) {
            return;
        }
        for (String str4 : arrayList) {
            ComponentUtil.getCrawlingConfigHelper().remove(str4);
            deleteCrawlData(str4);
        }
    }

    protected List<BoostDocumentRule> getAvailableBoostDocumentRuleList() {
        return ((BoostDocumentRuleBhv) ComponentUtil.getComponent(BoostDocumentRuleBhv.class)).selectList(boostDocumentRuleCB -> {
            boostDocumentRuleCB.query().matchAll();
            boostDocumentRuleCB.query().addOrderBy_SortOrder_Asc();
            boostDocumentRuleCB.fetchFirst(ComponentUtil.getFessConfig().getPageDocboostMaxFetchSizeAsInteger().intValue());
        });
    }

    protected void deleteCrawlData(String str) {
        EsUrlFilterService esUrlFilterService = (EsUrlFilterService) ComponentUtil.getComponent(EsUrlFilterService.class);
        EsUrlQueueService esUrlQueueService = (EsUrlQueueService) ComponentUtil.getComponent(EsUrlQueueService.class);
        EsDataService esDataService = (EsDataService) ComponentUtil.getComponent(EsDataService.class);
        try {
            esUrlFilterService.delete(str);
        } catch (Exception e) {
            logger.warn("Failed to delete UrlFilter for {}", str, e);
        }
        try {
            esUrlQueueService.clearCache();
            esUrlQueueService.delete(str);
        } catch (Exception e2) {
            logger.warn("Failed to delete UrlQueue for {}", str, e2);
        }
        try {
            esDataService.delete(str);
        } catch (Exception e3) {
            logger.warn("Failed to delete AccessResult for {}", str, e3);
        }
    }

    public void setMaxAccessCount(long j) {
        this.maxAccessCount = j;
    }

    public void setCrawlingExecutionInterval(long j) {
        this.crawlingExecutionInterval = j;
    }

    public void setIndexUpdaterPriority(int i) {
        this.indexUpdaterPriority = i;
    }

    public void setCrawlerPriority(int i) {
        this.crawlerPriority = i;
    }
}
