package org.codelibs.fess.helper;

import java.util.ArrayList;
import java.util.Collections;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.regex.Pattern;
import org.codelibs.core.lang.StringUtil;
import org.codelibs.fess.Constants;
import org.codelibs.fess.crawler.Crawler;
import org.codelibs.fess.crawler.CrawlerContext;
import org.codelibs.fess.crawler.CrawlerStatus;
import org.codelibs.fess.crawler.service.impl.EsDataService;
import org.codelibs.fess.crawler.service.impl.EsUrlFilterService;
import org.codelibs.fess.crawler.service.impl.EsUrlQueueService;
import org.codelibs.fess.es.config.exbhv.BoostDocumentRuleBhv;
import org.codelibs.fess.es.config.exentity.BoostDocumentRule;
import org.codelibs.fess.es.config.exentity.CrawlingConfig;
import org.codelibs.fess.es.config.exentity.FileConfig;
import org.codelibs.fess.es.config.exentity.WebConfig;
import org.codelibs.fess.indexer.DocBoostMatcher;
import org.codelibs.fess.indexer.IndexUpdater;
import org.codelibs.fess.mylasta.direction.FessConfig;
import org.codelibs.fess.util.ComponentUtil;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:org/codelibs/fess/helper/WebFsIndexHelper.class */
public class WebFsIndexHelper {
    private static final Logger logger = LoggerFactory.getLogger(WebFsIndexHelper.class);
    protected long maxAccessCount = Long.MAX_VALUE;
    protected long crawlingExecutionInterval = Constants.DEFAULT_CRAWLING_EXECUTION_INTERVAL;
    protected int indexUpdaterPriority = 10;
    protected int crawlerPriority = 5;
    protected final List<Crawler> crawlerList = Collections.synchronizedList(new ArrayList());

    public void crawl(String str, List<String> list, List<String> list2) {
        boolean z = list == null && list2 == null;
        List<WebConfig> webConfigListByIds = (z || list != null) ? ComponentUtil.getCrawlingConfigHelper().getWebConfigListByIds(list) : Collections.emptyList();
        List<FileConfig> fileConfigListByIds = (z || list2 != null) ? ComponentUtil.getCrawlingConfigHelper().getFileConfigListByIds(list2) : Collections.emptyList();
        if (!webConfigListByIds.isEmpty() || !fileConfigListByIds.isEmpty()) {
            doCrawl(str, webConfigListByIds, fileConfigListByIds);
        } else if (logger.isInfoEnabled()) {
            logger.info("No crawling target urls.");
        }
    }

    protected void doCrawl(String str, List<WebConfig> list, List<FileConfig> list2) {
        String encodeUrlFilter;
        String encodeUrlFilter2;
        int crawlingThreadCount = ComponentUtil.getFessConfig().getCrawlingThreadCount();
        SystemHelper systemHelper = ComponentUtil.getSystemHelper();
        FessConfig fessConfig = ComponentUtil.getFessConfig();
        long currentTimeMillis = System.currentTimeMillis();
        ArrayList<String> arrayList = new ArrayList();
        this.crawlerList.clear();
        ArrayList arrayList2 = new ArrayList();
        Iterator<WebConfig> it = list.iterator();
        while (true) {
            if (!it.hasNext()) {
                break;
            }
            WebConfig next = it.next();
            String store = ComponentUtil.getCrawlingConfigHelper().store(str, next);
            Crawler crawler = (Crawler) ComponentUtil.getComponent(Crawler.class);
            crawler.setSessionId(store);
            arrayList.add(store);
            String urls = next.getUrls();
            if (StringUtil.isBlank(urls)) {
                logger.warn("No target urls. Skipped");
                break;
            }
            crawler.getIntervalController().setDelayMillisForWaitingNewUrl(next.getIntervalTime() != null ? next.getIntervalTime().intValue() : Constants.DEFAULT_INTERVAL_TIME_FOR_WEB);
            String includedUrls = next.getIncludedUrls() != null ? next.getIncludedUrls() : Constants.DEFAULT_IGNORE_FAILURE_TYPE;
            String excludedUrls = next.getExcludedUrls() != null ? next.getExcludedUrls() : Constants.DEFAULT_IGNORE_FAILURE_TYPE;
            CrawlerContext crawlerContext = crawler.getCrawlerContext();
            crawlerContext.setNumOfThread(next.getNumOfThread() != null ? next.getNumOfThread().intValue() : 1);
            crawlerContext.setMaxDepth(next.getDepth() != null ? next.getDepth().intValue() : -1);
            crawlerContext.setMaxAccessCount(next.getMaxAccessCount() != null ? next.getMaxAccessCount().longValue() : this.maxAccessCount);
            next.initializeClientFactory(crawler.getClientFactory());
            Map<String, String> configParameterMap = next.getConfigParameterMap(CrawlingConfig.ConfigName.CONFIG);
            if (Constants.TRUE.equalsIgnoreCase(configParameterMap.get(Constants.CONFIG_CLEANUP_ALL))) {
                deleteCrawlData(store);
            } else if (Constants.TRUE.equalsIgnoreCase(configParameterMap.get(Constants.CONFIG_CLEANUP_FILTERS))) {
                try {
                    ((EsUrlFilterService) ComponentUtil.getComponent(EsUrlFilterService.class)).delete(store);
                } catch (Exception e) {
                    logger.warn("Failed to delete url filters for " + store);
                }
            }
            for (String str2 : urls.split("[\r\n]")) {
                if (StringUtil.isNotBlank(str2)) {
                    String trim = str2.trim();
                    if (!trim.startsWith("#") && fessConfig.isValidCrawlerWebProtocol(str2)) {
                        crawler.addUrl(trim);
                        if (logger.isInfoEnabled()) {
                            logger.info("Target URL: " + trim);
                        }
                    }
                }
            }
            for (String str3 : includedUrls.split("[\r\n]")) {
                if (StringUtil.isNotBlank(str3)) {
                    String trim2 = str3.trim();
                    if (!trim2.startsWith("#")) {
                        crawler.addIncludeFilter(trim2);
                        if (logger.isInfoEnabled()) {
                            logger.info("Included URL: " + trim2);
                        }
                    }
                }
            }
            for (String str4 : excludedUrls.split("[\r\n]")) {
                if (StringUtil.isNotBlank(str4)) {
                    String trim3 = str4.trim();
                    if (!trim3.startsWith("#")) {
                        crawler.addExcludeFilter(trim3);
                        if (logger.isInfoEnabled()) {
                            logger.info("Excluded URL: " + trim3);
                        }
                    }
                }
            }
            for (String str5 : ComponentUtil.getCrawlingConfigHelper().getExcludedUrlList(next.getConfigId())) {
                if (StringUtil.isNotBlank(str5)) {
                    String quote = Pattern.quote(str5.trim());
                    crawler.addExcludeFilter(quote);
                    if (logger.isInfoEnabled()) {
                        logger.info("Excluded URL from failures: " + quote);
                    }
                }
            }
            if (logger.isDebugEnabled()) {
                logger.debug("Crawling " + urls);
            }
            crawler.setBackground(true);
            crawler.setThreadPriority(this.crawlerPriority);
            this.crawlerList.add(crawler);
            arrayList2.add(Constants.READY);
        }
        Iterator<FileConfig> it2 = list2.iterator();
        while (true) {
            if (!it2.hasNext()) {
                break;
            }
            FileConfig next2 = it2.next();
            String store2 = ComponentUtil.getCrawlingConfigHelper().store(str, next2);
            Crawler crawler2 = (Crawler) ComponentUtil.getComponent(Crawler.class);
            crawler2.setSessionId(store2);
            arrayList.add(store2);
            String paths = next2.getPaths();
            if (StringUtil.isBlank(paths)) {
                logger.warn("No target uris. Skipped");
                break;
            }
            crawler2.getIntervalController().setDelayMillisForWaitingNewUrl(next2.getIntervalTime() != null ? next2.getIntervalTime().intValue() : Constants.DEFAULT_INTERVAL_TIME_FOR_FS);
            String includedPaths = next2.getIncludedPaths() != null ? next2.getIncludedPaths() : Constants.DEFAULT_IGNORE_FAILURE_TYPE;
            String excludedPaths = next2.getExcludedPaths() != null ? next2.getExcludedPaths() : Constants.DEFAULT_IGNORE_FAILURE_TYPE;
            CrawlerContext crawlerContext2 = crawler2.getCrawlerContext();
            crawlerContext2.setNumOfThread(next2.getNumOfThread() != null ? next2.getNumOfThread().intValue() : 5);
            crawlerContext2.setMaxDepth(next2.getDepth() != null ? next2.getDepth().intValue() : -1);
            crawlerContext2.setMaxAccessCount(next2.getMaxAccessCount() != null ? next2.getMaxAccessCount().longValue() : this.maxAccessCount);
            next2.initializeClientFactory(crawler2.getClientFactory());
            Map<String, String> configParameterMap2 = next2.getConfigParameterMap(CrawlingConfig.ConfigName.CONFIG);
            if (Constants.TRUE.equalsIgnoreCase(configParameterMap2.get(Constants.CONFIG_CLEANUP_ALL))) {
                deleteCrawlData(store2);
            } else if (Constants.TRUE.equalsIgnoreCase(configParameterMap2.get(Constants.CONFIG_CLEANUP_FILTERS))) {
                try {
                    ((EsUrlFilterService) ComponentUtil.getComponent(EsUrlFilterService.class)).delete(store2);
                } catch (Exception e2) {
                    logger.warn("Failed to delete url filters for " + store2);
                }
            }
            for (String str6 : paths.split("[\r\n]")) {
                if (StringUtil.isNotBlank(str6)) {
                    String trim4 = str6.trim();
                    if (!trim4.startsWith("#")) {
                        if (!fessConfig.isValidCrawlerFileProtocol(trim4)) {
                            trim4 = trim4.startsWith("/") ? "file:" + trim4 : "file:/" + trim4;
                        }
                        crawler2.addUrl(trim4);
                        if (logger.isInfoEnabled()) {
                            logger.info("Target Path: " + trim4);
                        }
                    }
                }
            }
            boolean z = false;
            for (String str7 : includedPaths.split("[\r\n]")) {
                if (StringUtil.isNotBlank(str7)) {
                    String trim5 = str7.trim();
                    if (!trim5.startsWith("#")) {
                        if (z) {
                            encodeUrlFilter2 = trim5;
                            z = false;
                        } else {
                            encodeUrlFilter2 = systemHelper.encodeUrlFilter(trim5);
                        }
                        crawler2.addIncludeFilter(encodeUrlFilter2);
                        if (logger.isInfoEnabled()) {
                            logger.info("Included Path: " + encodeUrlFilter2);
                        }
                    } else if (trim5.startsWith("#DISABLE_URL_ENCODE")) {
                        z = true;
                    }
                }
            }
            boolean z2 = false;
            for (String str8 : excludedPaths.split("[\r\n]")) {
                if (StringUtil.isNotBlank(str8)) {
                    String trim6 = str8.trim();
                    if (!trim6.startsWith("#")) {
                        if (z2) {
                            encodeUrlFilter = trim6;
                            z2 = false;
                        } else {
                            encodeUrlFilter = systemHelper.encodeUrlFilter(trim6);
                        }
                        crawler2.addExcludeFilter(encodeUrlFilter);
                        if (logger.isInfoEnabled()) {
                            logger.info("Excluded Path: " + encodeUrlFilter);
                        }
                    } else if (trim6.startsWith("#DISABLE_URL_ENCODE")) {
                        z2 = true;
                    }
                }
            }
            List<String> excludedUrlList = ComponentUtil.getCrawlingConfigHelper().getExcludedUrlList(next2.getConfigId());
            if (excludedUrlList != null) {
                for (String str9 : excludedUrlList) {
                    if (StringUtil.isNotBlank(str9)) {
                        String quote2 = Pattern.quote(str9.trim());
                        crawler2.addExcludeFilter(quote2);
                        if (logger.isInfoEnabled()) {
                            logger.info("Excluded Path from failures: " + quote2);
                        }
                    }
                }
            }
            if (logger.isDebugEnabled()) {
                logger.debug("Crawling " + paths);
            }
            crawler2.setBackground(true);
            crawler2.setThreadPriority(this.crawlerPriority);
            this.crawlerList.add(crawler2);
            arrayList2.add(Constants.READY);
        }
        IndexUpdater indexUpdater = ComponentUtil.getIndexUpdater();
        indexUpdater.setName("IndexUpdater");
        indexUpdater.setPriority(this.indexUpdaterPriority);
        indexUpdater.setSessionIdList(arrayList);
        indexUpdater.setDaemon(true);
        indexUpdater.setCrawlerList(this.crawlerList);
        getAvailableBoostDocumentRuleList().forEach(boostDocumentRule -> {
            indexUpdater.addDocBoostMatcher(new DocBoostMatcher(boostDocumentRule));
        });
        indexUpdater.start();
        int i = 0;
        int i2 = 0;
        while (true) {
            if (i >= this.crawlerList.size()) {
                break;
            }
            if (systemHelper.isForceStop()) {
                Iterator<Crawler> it3 = this.crawlerList.iterator();
                while (it3.hasNext()) {
                    it3.next().stop();
                }
            } else if (i2 < crawlingThreadCount) {
                this.crawlerList.get(i).execute();
                arrayList2.set(i, Constants.RUNNING);
                i++;
                i2++;
                try {
                    Thread.sleep(this.crawlingExecutionInterval);
                } catch (InterruptedException e3) {
                    if (logger.isDebugEnabled()) {
                        logger.debug("Interrupted.", e3);
                    }
                }
            } else {
                for (int i3 = 0; i3 < i; i3++) {
                    if (this.crawlerList.get(i3).getCrawlerContext().getStatus() == CrawlerStatus.DONE && ((String) arrayList2.get(i3)).equals(Constants.RUNNING)) {
                        this.crawlerList.get(i3).awaitTermination();
                        arrayList2.set(i3, Constants.DONE);
                        indexUpdater.addFinishedSessionId(this.crawlerList.get(i3).getCrawlerContext().getSessionId());
                        i2--;
                    }
                }
                try {
                    Thread.sleep(this.crawlingExecutionInterval);
                } catch (InterruptedException e4) {
                    if (logger.isDebugEnabled()) {
                        logger.debug("Interrupted.", e4);
                    }
                }
            }
        }
        boolean z3 = false;
        while (!z3) {
            z3 = true;
            for (int i4 = 0; i4 < this.crawlerList.size(); i4++) {
                this.crawlerList.get(i4).awaitTermination(this.crawlingExecutionInterval);
                if (this.crawlerList.get(i4).getCrawlerContext().getStatus() == CrawlerStatus.DONE && !((String) arrayList2.get(i4)).equals(Constants.DONE)) {
                    arrayList2.set(i4, Constants.DONE);
                    indexUpdater.addFinishedSessionId(this.crawlerList.get(i4).getCrawlerContext().getSessionId());
                }
                if (!((String) arrayList2.get(i4)).equals(Constants.DONE)) {
                    z3 = false;
                }
            }
        }
        this.crawlerList.clear();
        arrayList2.clear();
        CrawlingInfoHelper crawlingInfoHelper = ComponentUtil.getCrawlingInfoHelper();
        long currentTimeMillis2 = System.currentTimeMillis() - currentTimeMillis;
        crawlingInfoHelper.putToInfoMap(Constants.WEB_FS_CRAWLING_EXEC_TIME, Long.toString(currentTimeMillis2));
        if (logger.isInfoEnabled()) {
            logger.info("[EXEC TIME] crawling time: " + currentTimeMillis2 + "ms");
        }
        indexUpdater.setFinishCrawling(true);
        try {
            indexUpdater.join();
        } catch (InterruptedException e5) {
            logger.warn("Interrupted index update.", e5);
        }
        crawlingInfoHelper.putToInfoMap(Constants.WEB_FS_INDEX_EXEC_TIME, Long.toString(indexUpdater.getExecuteTime()));
        crawlingInfoHelper.putToInfoMap(Constants.WEB_FS_INDEX_SIZE, Long.toString(indexUpdater.getDocumentSize()));
        if (systemHelper.isForceStop()) {
            return;
        }
        for (String str10 : arrayList) {
            ComponentUtil.getCrawlingConfigHelper().remove(str10);
            deleteCrawlData(str10);
        }
    }

    protected List<BoostDocumentRule> getAvailableBoostDocumentRuleList() {
        return ((BoostDocumentRuleBhv) ComponentUtil.getComponent(BoostDocumentRuleBhv.class)).selectList(boostDocumentRuleCB -> {
            boostDocumentRuleCB.query().matchAll();
            boostDocumentRuleCB.query().addOrderBy_SortOrder_Asc();
            boostDocumentRuleCB.fetchFirst(ComponentUtil.getFessConfig().getPageDocboostMaxFetchSizeAsInteger().intValue());
        });
    }

    protected void deleteCrawlData(String str) {
        EsUrlFilterService esUrlFilterService = (EsUrlFilterService) ComponentUtil.getComponent(EsUrlFilterService.class);
        EsUrlQueueService esUrlQueueService = (EsUrlQueueService) ComponentUtil.getComponent(EsUrlQueueService.class);
        EsDataService esDataService = (EsDataService) ComponentUtil.getComponent(EsDataService.class);
        try {
            esUrlFilterService.delete(str);
        } catch (Exception e) {
            logger.warn("Failed to delete UrlFilter for " + str, e);
        }
        try {
            esUrlQueueService.clearCache();
            esUrlQueueService.delete(str);
        } catch (Exception e2) {
            logger.warn("Failed to delete UrlQueue for " + str, e2);
        }
        try {
            esDataService.delete(str);
        } catch (Exception e3) {
            logger.warn("Failed to delete AccessResult for " + str, e3);
        }
    }

    public void setMaxAccessCount(long j) {
        this.maxAccessCount = j;
    }

    public void setCrawlingExecutionInterval(long j) {
        this.crawlingExecutionInterval = j;
    }

    public void setIndexUpdaterPriority(int i) {
        this.indexUpdaterPriority = i;
    }

    public void setCrawlerPriority(int i) {
        this.crawlerPriority = i;
    }
}
