/*
 * Decompiled with CFR 0.152.
 */
package org.eobjects.analyzer.beans.standardize;

import java.util.ArrayList;
import java.util.List;
import org.eobjects.analyzer.beans.api.Categorized;
import org.eobjects.analyzer.beans.api.Configured;
import org.eobjects.analyzer.beans.api.Description;
import org.eobjects.analyzer.beans.api.Initialize;
import org.eobjects.analyzer.beans.api.OutputColumns;
import org.eobjects.analyzer.beans.api.Transformer;
import org.eobjects.analyzer.beans.api.TransformerBean;
import org.eobjects.analyzer.beans.categories.MatchingAndStandardizationCategory;
import org.eobjects.analyzer.data.InputColumn;
import org.eobjects.analyzer.data.InputRow;
import org.eobjects.analyzer.util.HasGroupLiteral;
import org.eobjects.analyzer.util.NamedPattern;
import org.eobjects.analyzer.util.NamedPatternMatch;

@TransformerBean(value="URL standardizer")
@Description(value="Retrieve the individual parts of an URL, including protocol, domain, port, path and querystring.")
@Categorized(value={MatchingAndStandardizationCategory.class})
public class UrlStandardizerTransformer
implements Transformer<String> {
    public static final String[] PATTERNS = new String[]{"PROTOCOL://DOMAIN:PORTPATH\\?QUERYSTRING", "PROTOCOL://DOMAINPATH\\?QUERYSTRING", "PROTOCOL://DOMAIN:PORTPATH", "PROTOCOL://DOMAIN:PORT\\?QUERYSTRING", "PROTOCOL://DOMAIN\\?QUERYSTRING", "PROTOCOL://DOMAINPATH", "PROTOCOL://DOMAIN:PORT", "PROTOCOL://DOMAIN"};
    @Configured
    InputColumn<String> inputColumn;
    private List<NamedPattern<UrlPart>> namedPatterns;

    @Initialize
    public void init() {
        this.namedPatterns = new ArrayList<NamedPattern<UrlPart>>(PATTERNS.length);
        for (String pattern : PATTERNS) {
            this.namedPatterns.add(new NamedPattern<UrlPart>(pattern, UrlPart.class));
        }
    }

    public OutputColumns getOutputColumns() {
        return new OutputColumns("Protocol", new String[]{"Domain", "Port", "Path", "Querystring"});
    }

    public String[] transform(InputRow inputRow) {
        String value = (String)inputRow.getValue(this.inputColumn);
        return this.transform(value);
    }

    public String[] transform(String value) {
        String protocol = null;
        String domain = null;
        String port = null;
        String path = null;
        String queryString = null;
        if (value != null) {
            for (NamedPattern<UrlPart> namedPattern : this.namedPatterns) {
                NamedPatternMatch<UrlPart> match = namedPattern.match(value);
                if (match == null) continue;
                protocol = match.get(UrlPart.PROTOCOL);
                domain = match.get(UrlPart.DOMAIN);
                port = match.get(UrlPart.PORT);
                path = match.get(UrlPart.PATH);
                queryString = match.get(UrlPart.QUERYSTRING);
                break;
            }
        }
        return new String[]{protocol, domain, port, path, queryString};
    }

    public static enum UrlPart implements HasGroupLiteral
    {
        PROTOCOL,
        DOMAIN,
        PORT,
        PATH,
        QUERYSTRING;


        @Override
        public String getGroupLiteral() {
            if (this == DOMAIN) {
                return "([a-zA-Z0-9\\._\\-@]+)";
            }
            if (this == PORT) {
                return "([0-9]+)";
            }
            if (this == PATH) {
                return "(/[a-zA-Z0-9\\._\\-/#:%]+)";
            }
            if (this == QUERYSTRING) {
                return "([a-zA-Z0-9\\.=\\?_\\-/%]+)";
            }
            return null;
        }
    }
}

