package org.codelibs.fess.crawler.transformer;

import java.io.InputStream;
import java.net.URLDecoder;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import org.apache.commons.lang3.StringUtils;
import org.codelibs.core.io.SerializeUtil;
import org.codelibs.core.lang.StringUtil;
import org.codelibs.core.misc.Pair;
import org.codelibs.core.stream.StreamUtil;
import org.codelibs.fess.Constants;
import org.codelibs.fess.crawler.entity.AccessResultData;
import org.codelibs.fess.crawler.entity.ExtractData;
import org.codelibs.fess.crawler.entity.ResponseData;
import org.codelibs.fess.crawler.entity.ResultData;
import org.codelibs.fess.crawler.entity.UrlQueue;
import org.codelibs.fess.crawler.exception.CrawlerSystemException;
import org.codelibs.fess.crawler.exception.CrawlingAccessException;
import org.codelibs.fess.crawler.extractor.Extractor;
import org.codelibs.fess.crawler.transformer.impl.AbstractTransformer;
import org.codelibs.fess.crawler.util.CrawlingParameterUtil;
import org.codelibs.fess.es.config.exentity.CrawlingConfig;
import org.codelibs.fess.helper.CrawlingInfoHelper;
import org.codelibs.fess.helper.DocumentHelper;
import org.codelibs.fess.helper.FileTypeHelper;
import org.codelibs.fess.helper.PathMappingHelper;
import org.codelibs.fess.helper.PermissionHelper;
import org.codelibs.fess.helper.SystemHelper;
import org.codelibs.fess.mylasta.direction.FessConfig;
import org.codelibs.fess.util.ComponentUtil;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:org/codelibs/fess/crawler/transformer/AbstractFessFileTransformer.class */
public abstract class AbstractFessFileTransformer extends AbstractTransformer implements FessTransformer {
    private static final Logger logger = LoggerFactory.getLogger(AbstractFessFileTransformer.class);
    protected Map<String, String> metaContentMapping;
    protected FessConfig fessConfig;

    protected abstract Extractor getExtractor(ResponseData responseData);

    public ResultData transform(ResponseData responseData) {
        if (responseData == null || !responseData.hasResponseBody()) {
            throw new CrawlingAccessException("No response body.");
        }
        ResultData resultData = new ResultData();
        resultData.setTransformerName(getName());
        try {
            resultData.setData(SerializeUtil.fromObjectToBinary(generateData(responseData)));
            resultData.setEncoding(this.fessConfig.getCrawlerCrawlingDataEncoding());
            return resultData;
        } catch (Exception e) {
            throw new CrawlingAccessException("Could not serialize object", e);
        }
    }

    protected Map<String, Object> generateData(ResponseData responseData) {
        CrawlingConfig crawlingConfig = ComponentUtil.getCrawlingConfigHelper().get(responseData.getSessionId());
        Extractor extractor = getExtractor(responseData);
        String mimeType = responseData.getMimeType();
        StringBuilder sb = new StringBuilder(Constants.DEFAULT_INTERVAL_TIME_FOR_FS);
        Map<String, Object> hashMap = new HashMap<>();
        HashMap hashMap2 = new HashMap();
        try {
            InputStream responseBody = responseData.getResponseBody();
            Throwable th = null;
            try {
                try {
                    ExtractData extractData = getExtractData(extractor, responseBody, createExtractParams(responseData, crawlingConfig));
                    String content = extractData.getContent();
                    if (this.fessConfig.isCrawlerDocumentFileIgnoreEmptyContent() && StringUtil.isBlank(content)) {
                        if (responseBody != null) {
                            if (0 != 0) {
                                try {
                                    responseBody.close();
                                } catch (Throwable th2) {
                                    th.addSuppressed(th2);
                                }
                            } else {
                                responseBody.close();
                            }
                        }
                        return null;
                    }
                    if (getLogger().isDebugEnabled()) {
                        getLogger().debug("ExtractData: " + extractData);
                    }
                    extractData.getKeySet().stream().filter(str -> {
                        return extractData.getValues(str) != null;
                    }).forEach(str2 -> {
                        String[] values = extractData.getValues(str2);
                        hashMap2.put(str2, values);
                        if (this.fessConfig.isCrawlerMetadataContentIncluded(str2)) {
                            String join = StringUtils.join(values, ' ');
                            if (StringUtil.isNotBlank(join)) {
                                if (sb.length() > 0) {
                                    sb.append(' ');
                                }
                                sb.append(join.trim());
                            }
                        }
                        Pair<String, String> crawlerMetadataNameMapping = this.fessConfig.getCrawlerMetadataNameMapping(str2);
                        if (crawlerMetadataNameMapping != null) {
                            if (Constants.MAPPING_TYPE_ARRAY.equalsIgnoreCase((String) crawlerMetadataNameMapping.getSecond())) {
                                hashMap.put(crawlerMetadataNameMapping.getFirst(), values);
                                return;
                            }
                            if (Constants.MAPPING_TYPE_STRING.equalsIgnoreCase((String) crawlerMetadataNameMapping.getSecond())) {
                                hashMap.put(crawlerMetadataNameMapping.getFirst(), StringUtils.join(values, ' ').trim());
                            } else if (values.length == 1) {
                                try {
                                    if (Constants.MAPPING_TYPE_LONG.equalsIgnoreCase((String) crawlerMetadataNameMapping.getSecond())) {
                                        hashMap.put(crawlerMetadataNameMapping.getFirst(), Long.valueOf(Long.parseLong(values[0])));
                                    } else if (Constants.MAPPING_TYPE_DOUBLE.equalsIgnoreCase((String) crawlerMetadataNameMapping.getSecond())) {
                                        hashMap.put(crawlerMetadataNameMapping.getFirst(), Double.valueOf(Double.parseDouble(values[0])));
                                    } else {
                                        logger.warn("Unknown mapping type: {}={}", str2, crawlerMetadataNameMapping);
                                    }
                                } catch (NumberFormatException e) {
                                    logger.warn("Failed to parse " + values[0], e);
                                }
                            }
                        }
                    });
                    if (responseBody != null) {
                        if (0 != 0) {
                            try {
                                responseBody.close();
                            } catch (Throwable th3) {
                                th.addSuppressed(th3);
                            }
                        } else {
                            responseBody.close();
                        }
                    }
                    if (content == null) {
                        content = Constants.DEFAULT_IGNORE_FAILURE_TYPE;
                    }
                    String trim = sb.toString().trim();
                    CrawlingInfoHelper crawlingInfoHelper = ComponentUtil.getCrawlingInfoHelper();
                    String canonicalSessionId = crawlingInfoHelper.getCanonicalSessionId(responseData.getSessionId());
                    PathMappingHelper pathMappingHelper = ComponentUtil.getPathMappingHelper();
                    Object documentExpires = crawlingInfoHelper.getDocumentExpires(crawlingConfig);
                    SystemHelper systemHelper = ComponentUtil.getSystemHelper();
                    FileTypeHelper fileTypeHelper = ComponentUtil.getFileTypeHelper();
                    DocumentHelper documentHelper = ComponentUtil.getDocumentHelper();
                    String url = responseData.getUrl();
                    String indexingTarget = crawlingConfig.getIndexingTarget(url);
                    String replaceUrl = pathMappingHelper.replaceUrl(canonicalSessionId, url);
                    Map<String, String> configParameterMap = crawlingConfig.getConfigParameterMap(CrawlingConfig.ConfigName.FIELD);
                    UrlQueue urlQueue = CrawlingParameterUtil.getUrlQueue();
                    String charSet = (urlQueue == null || urlQueue.getEncoding() == null) ? responseData.getCharSet() : urlQueue.getEncoding();
                    String configId = crawlingConfig.getConfigId();
                    if (configId != null) {
                        putResultDataBody(hashMap, this.fessConfig.getIndexFieldConfigId(), configId);
                    }
                    if (documentExpires != null) {
                        putResultDataBody(hashMap, this.fessConfig.getIndexFieldExpires(), documentExpires);
                    }
                    putResultDataBody(hashMap, this.fessConfig.getIndexFieldSegment(), canonicalSessionId);
                    StringBuilder sb2 = new StringBuilder(content.length() + Constants.DEFAULT_INTERVAL_TIME_FOR_FS);
                    if (this.fessConfig.isCrawlerDocumentFileAppendBodyContent()) {
                        sb2.append(content);
                    }
                    if (this.fessConfig.isCrawlerDocumentFileAppendMetaContent()) {
                        if (sb2.length() > 0) {
                            sb2.append(' ');
                        }
                        sb2.append(trim);
                    }
                    String trim2 = sb2.toString().trim();
                    String content2 = documentHelper.getContent(responseData, trim2, hashMap);
                    putResultDataBody(hashMap, this.fessConfig.getIndexFieldContent(), content2);
                    if ((Constants.TRUE.equalsIgnoreCase(configParameterMap.get(this.fessConfig.getIndexFieldCache())) || this.fessConfig.isCrawlerDocumentCacheEnabled()) && this.fessConfig.isSupportedDocumentCacheMimetypes(mimeType) && responseData.getContentLength() > 0 && responseData.getContentLength() <= this.fessConfig.getCrawlerDocumentCacheMaxSizeAsInteger().longValue()) {
                        putResultDataBody(hashMap, this.fessConfig.getIndexFieldCache(), content.trim().replaceAll("[ \\t\\x0B\\f]+", " "));
                        putResultDataBody(hashMap, this.fessConfig.getIndexFieldHasCache(), Constants.TRUE);
                    }
                    putResultDataBody(hashMap, this.fessConfig.getIndexFieldDigest(), documentHelper.getDigest(responseData, trim2, hashMap, this.fessConfig.getCrawlerDocumentFileMaxDigestLengthAsInteger().intValue()));
                    String fileName = getFileName(replaceUrl, charSet);
                    if (!hashMap.containsKey(this.fessConfig.getIndexFieldTitle())) {
                        if (replaceUrl.endsWith("/")) {
                            if (StringUtil.isNotBlank(content)) {
                                putResultDataBody(hashMap, this.fessConfig.getIndexFieldTitle(), documentHelper.getDigest(responseData, content2, hashMap, this.fessConfig.getCrawlerDocumentFileMaxTitleLengthAsInteger().intValue()));
                            } else {
                                putResultDataBody(hashMap, this.fessConfig.getIndexFieldTitle(), this.fessConfig.getCrawlerDocumentFileNoTitleLabel());
                            }
                        } else if (StringUtil.isBlank(fileName)) {
                            putResultDataBody(hashMap, this.fessConfig.getIndexFieldTitle(), decodeUrlAsName(replaceUrl, replaceUrl.startsWith("file:")));
                        } else {
                            putResultDataBody(hashMap, this.fessConfig.getIndexFieldTitle(), fileName);
                        }
                    }
                    putResultDataBody(hashMap, this.fessConfig.getIndexFieldHost(), getHostOnFile(replaceUrl));
                    putResultDataBody(hashMap, this.fessConfig.getIndexFieldSite(), getSiteOnFile(replaceUrl, charSet));
                    if (StringUtil.isNotBlank(fileName)) {
                        putResultDataBody(hashMap, this.fessConfig.getIndexFieldFilename(), fileName);
                    }
                    putResultDataBody(hashMap, this.fessConfig.getIndexFieldUrl(), replaceUrl);
                    Object currentTime = systemHelper.getCurrentTime();
                    putResultDataBody(hashMap, this.fessConfig.getIndexFieldCreated(), currentTime);
                    putResultDataBody(hashMap, this.fessConfig.getIndexFieldAnchor(), Constants.DEFAULT_IGNORE_FAILURE_TYPE);
                    putResultDataBody(hashMap, this.fessConfig.getIndexFieldMimetype(), mimeType);
                    if (fileTypeHelper != null) {
                        putResultDataBody(hashMap, this.fessConfig.getIndexFieldFiletype(), fileTypeHelper.get(mimeType));
                    }
                    putResultDataBody(hashMap, this.fessConfig.getIndexFieldContentLength(), Long.toString(responseData.getContentLength()));
                    Object lastModified = responseData.getLastModified();
                    if (lastModified != null) {
                        putResultDataBody(hashMap, this.fessConfig.getIndexFieldLastModified(), lastModified);
                        putResultDataBody(hashMap, this.fessConfig.getIndexFieldTimestamp(), lastModified);
                    } else {
                        putResultDataBody(hashMap, this.fessConfig.getIndexFieldTimestamp(), currentTime);
                    }
                    putResultDataBody(hashMap, Constants.INDEXING_TARGET, indexingTarget);
                    putResultDataBody(hashMap, this.fessConfig.getIndexFieldBoost(), crawlingConfig.getDocumentBoost());
                    HashSet hashSet = new HashSet();
                    for (String str3 : crawlingConfig.getLabelTypeValues()) {
                        hashSet.add(str3);
                    }
                    hashSet.addAll(ComponentUtil.getLabelTypeHelper().getMatchedLabelValueSet(replaceUrl));
                    putResultDataBody(hashMap, this.fessConfig.getIndexFieldLabel(), hashSet);
                    List<String> roleTypes = getRoleTypes(responseData);
                    StreamUtil.stream(crawlingConfig.getPermissions()).of(stream -> {
                        stream.forEach(str4 -> {
                            roleTypes.add(str4);
                        });
                    });
                    putResultDataBody(hashMap, this.fessConfig.getIndexFieldRole(), roleTypes);
                    putResultDataBody(hashMap, this.fessConfig.getIndexFieldVirtualHost(), StreamUtil.stream(crawlingConfig.getVirtualHosts()).get(stream2 -> {
                        return (String[]) stream2.filter(StringUtil::isNotBlank).toArray(i -> {
                            return new String[i];
                        });
                    }));
                    if (StringUtil.isNotBlank(this.fessConfig.getCrawlerDocumentFileDefaultLang())) {
                        putResultDataBody(hashMap, this.fessConfig.getIndexFieldLang(), this.fessConfig.getCrawlerDocumentFileDefaultLang());
                    }
                    putResultDataBody(hashMap, this.fessConfig.getIndexFieldId(), crawlingInfoHelper.generateId(hashMap));
                    String parentUrl = responseData.getParentUrl();
                    if (StringUtil.isNotBlank(parentUrl)) {
                        putResultDataBody(hashMap, this.fessConfig.getIndexFieldUrl(), pathMappingHelper.replaceUrl(canonicalSessionId, parentUrl));
                        putResultDataBody(hashMap, this.fessConfig.getIndexFieldParentId(), crawlingInfoHelper.generateId(hashMap));
                        putResultDataBody(hashMap, this.fessConfig.getIndexFieldUrl(), replaceUrl);
                    }
                    putResultDataBody(hashMap, this.fessConfig.getIndexFieldThumbnail(), responseData.getUrl());
                    Map<String, String> configParameterMap2 = crawlingConfig.getConfigParameterMap(CrawlingConfig.ConfigName.SCRIPT);
                    for (Map.Entry<String, String> entry : crawlingConfig.getConfigParameterMap(CrawlingConfig.ConfigName.META).entrySet()) {
                        String key = entry.getKey();
                        for (String str4 : entry.getValue().split(",")) {
                            putResultDataWithTemplate(hashMap, key, hashMap2.get(str4), configParameterMap2.get(key));
                        }
                    }
                    for (Map.Entry<String, String> entry2 : crawlingConfig.getConfigParameterMap(CrawlingConfig.ConfigName.VALUE).entrySet()) {
                        String key2 = entry2.getKey();
                        putResultDataWithTemplate(hashMap, key2, entry2.getValue(), configParameterMap2.get(key2));
                    }
                    return hashMap;
                } finally {
                }
            } finally {
            }
        } catch (Exception e) {
            CrawlingAccessException crawlingAccessException = new CrawlingAccessException("Could not get a text from " + responseData.getUrl(), e);
            crawlingAccessException.setLogLevel("WARN");
            throw crawlingAccessException;
        }
        CrawlingAccessException crawlingAccessException2 = new CrawlingAccessException("Could not get a text from " + responseData.getUrl(), e);
        crawlingAccessException2.setLogLevel("WARN");
        throw crawlingAccessException2;
    }

    protected Map<String, String> createExtractParams(ResponseData responseData, CrawlingConfig crawlingConfig) {
        HashMap hashMap = new HashMap(crawlingConfig.getConfigParameterMap(CrawlingConfig.ConfigName.CONFIG));
        hashMap.put("resourceName", getResourceName(responseData));
        hashMap.put("Content-Type", responseData.getMimeType());
        hashMap.put("Content-Encoding", responseData.getCharSet());
        hashMap.put("url", responseData.getUrl());
        return hashMap;
    }

    protected ExtractData getExtractData(Extractor extractor, InputStream inputStream, Map<String, String> map) {
        try {
            return extractor.getText(inputStream, map);
        } catch (RuntimeException e) {
            if (!this.fessConfig.isCrawlerIgnoreContentException()) {
                throw e;
            }
            if (logger.isDebugEnabled()) {
                logger.debug("Could not get a text.", e);
            }
            return new ExtractData();
        }
    }

    private String getResourceName(ResponseData responseData) {
        String url = responseData.getUrl();
        String charSet = responseData.getCharSet();
        if (url == null || charSet == null) {
            return null;
        }
        String replaceAll = url.replaceAll("/+$", Constants.DEFAULT_IGNORE_FAILURE_TYPE);
        int lastIndexOf = replaceAll.lastIndexOf(47);
        if (lastIndexOf >= 0) {
            replaceAll = replaceAll.substring(lastIndexOf + 1);
        }
        try {
            return URLDecoder.decode(replaceAll, charSet);
        } catch (Exception e) {
            return replaceAll;
        }
    }

    protected String getHostOnFile(String str) {
        if (StringUtil.isBlank(str)) {
            return Constants.DEFAULT_IGNORE_FAILURE_TYPE;
        }
        if (!str.startsWith("file:////")) {
            return str.startsWith("file:") ? "localhost" : getHost(str);
        }
        String decodeUrlAsName = decodeUrlAsName(str.substring(9), true);
        int indexOf = decodeUrlAsName.indexOf(47);
        return indexOf > 0 ? decodeUrlAsName.substring(0, indexOf) : indexOf == -1 ? decodeUrlAsName : "localhost";
    }

    protected List<String> getRoleTypes(ResponseData responseData) {
        ArrayList arrayList = new ArrayList();
        PermissionHelper permissionHelper = ComponentUtil.getPermissionHelper();
        arrayList.addAll(permissionHelper.getSmbRoleTypeList(responseData));
        arrayList.addAll(permissionHelper.getFileRoleTypeList(responseData));
        arrayList.addAll(permissionHelper.getFtpRoleTypeList(responseData));
        return arrayList;
    }

    protected String getSiteOnFile(String str, String str2) {
        if (StringUtil.isBlank(str)) {
            return Constants.DEFAULT_IGNORE_FAILURE_TYPE;
        }
        if (str.startsWith("file:////")) {
            return abbreviateSite("\\\\" + decodeUrlAsName(str.substring(9), true).replace('/', '\\'));
        }
        if (!str.startsWith("file:")) {
            return getSite(str, str2);
        }
        String decodeUrlAsName = decodeUrlAsName(str.substring(5), true);
        return (decodeUrlAsName.length() <= 2 || decodeUrlAsName.charAt(2) != ':') ? abbreviateSite(decodeUrlAsName) : abbreviateSite(decodeUrlAsName.substring(1).replace('/', '\\'));
    }

    public Object getData(AccessResultData<?> accessResultData) {
        byte[] data = accessResultData.getData();
        if (data == null) {
            return new HashMap();
        }
        try {
            return SerializeUtil.fromBinaryToObject(data);
        } catch (Exception e) {
            throw new CrawlerSystemException("Could not create an instanced from bytes.", e);
        }
    }

    public void addMetaContentMapping(String str, String str2) {
        if (this.metaContentMapping == null) {
            this.metaContentMapping = new HashMap();
        }
        this.metaContentMapping.put(str, str2);
    }
}
