/*
 * Decompiled with CFR 0.152.
 */
package org.apache.solr.update.processor;

import java.io.IOException;
import java.lang.invoke.MethodHandles;
import java.net.MalformedURLException;
import java.net.URI;
import java.net.URISyntaxException;
import java.net.URL;
import java.util.Locale;
import org.apache.solr.common.SolrInputDocument;
import org.apache.solr.common.params.SolrParams;
import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.response.SolrQueryResponse;
import org.apache.solr.update.AddUpdateCommand;
import org.apache.solr.update.processor.UpdateRequestProcessor;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class URLClassifyProcessor
extends UpdateRequestProcessor {
    private static final String INPUT_FIELD_PARAM = "inputField";
    private static final String OUTPUT_LENGTH_FIELD_PARAM = "lengthOutputField";
    private static final String OUTPUT_LEVELS_FIELD_PARAM = "levelsOutputField";
    private static final String OUTPUT_TOPLEVEL_FIELD_PARAM = "toplevelOutputField";
    private static final String OUTPUT_LANDINGPAGE_FIELD_PARAM = "landingpageOutputField";
    private static final String OUTPUT_DOMAIN_FIELD_PARAM = "domainOutputField";
    private static final String OUTPUT_CANONICALURL_FIELD_PARAM = "canonicalUrlOutputField";
    private static final String DEFAULT_URL_FIELDNAME = "url";
    private static final String DEFAULT_LENGTH_FIELDNAME = "url_length";
    private static final String DEFAULT_LEVELS_FIELDNAME = "url_levels";
    private static final String DEFAULT_TOPLEVEL_FIELDNAME = "url_toplevel";
    private static final String DEFAULT_LANDINGPAGE_FIELDNAME = "url_landingpage";
    private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
    private boolean enabled = true;
    private String urlFieldname = "url";
    private String lengthFieldname = "url_length";
    private String levelsFieldname = "url_levels";
    private String toplevelpageFieldname = "url_toplevel";
    private String landingpageFieldname = "url_landingpage";
    private String domainFieldname = null;
    private String canonicalUrlFieldname = null;
    private static final String[] landingPageSuffixes = new String[]{"/", "index.html", "index.htm", "index.phtml", "index.shtml", "index.xml", "index.php", "index.asp", "index.aspx", "welcome.html", "welcome.htm", "welcome.phtml", "welcome.shtml", "welcome.xml", "welcome.php", "welcome.asp", "welcome.aspx"};

    public URLClassifyProcessor(SolrParams parameters, SolrQueryRequest request, SolrQueryResponse response, UpdateRequestProcessor nextProcessor) {
        super(nextProcessor);
        this.initParameters(parameters);
    }

    private void initParameters(SolrParams parameters) {
        if (parameters != null) {
            this.setEnabled(parameters.getBool("enabled", true));
            this.urlFieldname = parameters.get(INPUT_FIELD_PARAM, DEFAULT_URL_FIELDNAME);
            this.lengthFieldname = parameters.get(OUTPUT_LENGTH_FIELD_PARAM, DEFAULT_LENGTH_FIELDNAME);
            this.levelsFieldname = parameters.get(OUTPUT_LEVELS_FIELD_PARAM, DEFAULT_LEVELS_FIELDNAME);
            this.toplevelpageFieldname = parameters.get(OUTPUT_TOPLEVEL_FIELD_PARAM, DEFAULT_TOPLEVEL_FIELDNAME);
            this.landingpageFieldname = parameters.get(OUTPUT_LANDINGPAGE_FIELD_PARAM, DEFAULT_LANDINGPAGE_FIELDNAME);
            this.domainFieldname = parameters.get(OUTPUT_DOMAIN_FIELD_PARAM);
            this.canonicalUrlFieldname = parameters.get(OUTPUT_CANONICALURL_FIELD_PARAM);
        }
    }

    @Override
    public void processAdd(AddUpdateCommand command) throws IOException {
        SolrInputDocument document;
        if (this.isEnabled() && (document = command.getSolrInputDocument()).containsKey(this.urlFieldname)) {
            String url = (String)document.getFieldValue(this.urlFieldname);
            try {
                URL normalizedURL = this.getNormalizedURL(url);
                document.setField(this.lengthFieldname, this.length(normalizedURL));
                document.setField(this.levelsFieldname, this.levels(normalizedURL));
                document.setField(this.toplevelpageFieldname, this.isTopLevelPage(normalizedURL) ? 1 : 0);
                document.setField(this.landingpageFieldname, this.isLandingPage(normalizedURL) ? 1 : 0);
                if (this.domainFieldname != null) {
                    document.setField(this.domainFieldname, normalizedURL.getHost());
                }
                if (this.canonicalUrlFieldname != null) {
                    document.setField(this.canonicalUrlFieldname, this.getCanonicalUrl(normalizedURL));
                }
                log.debug("{}", (Object)document);
            }
            catch (MalformedURLException | URISyntaxException e) {
                log.warn("cannot get the normalized url for '{}' due to ", (Object)url, (Object)e);
            }
        }
        super.processAdd(command);
    }

    public URL getCanonicalUrl(URL url) {
        String urlString = url.toString();
        try {
            String lps = this.landingPageSuffix(url);
            return new URL(urlString.replaceFirst("/" + lps + "$", "/"));
        }
        catch (MalformedURLException e) {
            e.printStackTrace();
            return url;
        }
    }

    public int length(URL url) {
        return url.toString().length();
    }

    public int levels(URL url) {
        String path = this.getPathWithoutSuffix(url).replaceAll("/+$", "");
        int levels = 0;
        for (int i = 0; i < path.length(); ++i) {
            if (path.charAt(i) != '/') continue;
            ++levels;
        }
        return levels;
    }

    public boolean isTopLevelPage(URL url) {
        String path = this.getPathWithoutSuffix(url).replaceAll("/+$", "");
        return path.length() == 0 && url.getQuery() == null;
    }

    public boolean isLandingPage(URL url) {
        if (url.getQuery() != null) {
            return false;
        }
        return this.landingPageSuffix(url) != "";
    }

    public URL getNormalizedURL(String url) throws MalformedURLException, URISyntaxException {
        return new URI(url).normalize().toURL();
    }

    public boolean isEnabled() {
        return this.enabled;
    }

    public void setEnabled(boolean enabled) {
        this.enabled = enabled;
    }

    private String landingPageSuffix(URL url) {
        String path = url.getPath().toLowerCase(Locale.ROOT);
        for (String suffix : landingPageSuffixes) {
            if (!path.endsWith(suffix)) continue;
            return suffix;
        }
        return "";
    }

    private String getPathWithoutSuffix(URL url) {
        return url.getPath().toLowerCase(Locale.ROOT).replaceFirst(this.landingPageSuffix(url) + "$", "");
    }
}

