/*
 * Decompiled with CFR 0.152.
 */
package org.eobjects.analyzer.beans;

import com.ibm.icu.text.UnicodeSet;
import java.io.Serializable;
import java.util.HashMap;
import java.util.Map;
import java.util.Set;
import java.util.TreeMap;
import javax.inject.Inject;
import org.eobjects.analyzer.beans.CharacterSetDistributionAnalyzerColumnDelegate;
import org.eobjects.analyzer.beans.api.Analyzer;
import org.eobjects.analyzer.beans.api.AnalyzerBean;
import org.eobjects.analyzer.beans.api.Concurrent;
import org.eobjects.analyzer.beans.api.Configured;
import org.eobjects.analyzer.beans.api.Description;
import org.eobjects.analyzer.beans.api.Initialize;
import org.eobjects.analyzer.beans.api.Provided;
import org.eobjects.analyzer.data.InputColumn;
import org.eobjects.analyzer.data.InputRow;
import org.eobjects.analyzer.result.AnalyzerResult;
import org.eobjects.analyzer.result.AnnotatedRowsResult;
import org.eobjects.analyzer.result.CharacterSetDistributionResult;
import org.eobjects.analyzer.result.Crosstab;
import org.eobjects.analyzer.result.CrosstabDimension;
import org.eobjects.analyzer.result.CrosstabNavigator;
import org.eobjects.analyzer.storage.RowAnnotation;
import org.eobjects.analyzer.storage.RowAnnotationFactory;

@AnalyzerBean(value="Character set distribution")
@Description(value="Inspects and maps text characters according to character set affinity, such as Latin, Hebrew, Cyrillic, Chinese and more.")
@Concurrent(value=true)
public class CharacterSetDistributionAnalyzer
implements Analyzer<CharacterSetDistributionResult> {
    private static final Map<String, UnicodeSet> UNICODE_SETS = CharacterSetDistributionAnalyzer.createUnicodeSets();
    @Inject
    @Configured
    InputColumn<String>[] _columns;
    @Inject
    @Provided
    RowAnnotationFactory _annotationFactory;
    private final Map<InputColumn<String>, CharacterSetDistributionAnalyzerColumnDelegate> _columnDelegates = new HashMap<InputColumn<String>, CharacterSetDistributionAnalyzerColumnDelegate>();

    @Initialize
    public void init() {
        for (InputColumn<String> column : this._columns) {
            CharacterSetDistributionAnalyzerColumnDelegate delegate = new CharacterSetDistributionAnalyzerColumnDelegate(this._annotationFactory, UNICODE_SETS);
            this._columnDelegates.put(column, delegate);
        }
    }

    protected static Map<String, UnicodeSet> createUnicodeSets() {
        TreeMap<String, UnicodeSet> unicodeSets = new TreeMap<String, UnicodeSet>();
        unicodeSets.put("Latin, ASCII", new UnicodeSet("[:ASCII:]"));
        unicodeSets.put("Latin, non-ASCII", CharacterSetDistributionAnalyzer.subUnicodeSet("[:Latin:]", "[:ASCII:]"));
        unicodeSets.put("Arabic", new UnicodeSet("[:Script=Arabic:]"));
        unicodeSets.put("Armenian", new UnicodeSet("[:Script=Armenian:]"));
        unicodeSets.put("Bengali", new UnicodeSet("[:Script=Bengali:]"));
        unicodeSets.put("Cyrillic", new UnicodeSet("[:Script=Cyrillic:]"));
        unicodeSets.put("Devanagari", new UnicodeSet("[:Script=Devanagari:]"));
        unicodeSets.put("Greek", new UnicodeSet("[:Script=Greek:]"));
        unicodeSets.put("Han", new UnicodeSet("[:Script=Han:]"));
        unicodeSets.put("Gujarati", new UnicodeSet("[:Script=Gujarati:]"));
        unicodeSets.put("Georgian", new UnicodeSet("[:Script=Georgian:]"));
        unicodeSets.put("Gurmukhi", new UnicodeSet("[:Script=Gurmukhi:]"));
        unicodeSets.put("Hangul", new UnicodeSet("[:Script=Hangul:]"));
        unicodeSets.put("Hebrew", new UnicodeSet("[:Script=Hebrew:]"));
        unicodeSets.put("Hiragana", new UnicodeSet("[:Script=Hiragana:]"));
        unicodeSets.put("Kannada", new UnicodeSet("[:Script=Kannada:]"));
        unicodeSets.put("Katakana", new UnicodeSet("[:Script=Katakana:]"));
        unicodeSets.put("Malayalam", new UnicodeSet("[:Script=Malayalam:]"));
        unicodeSets.put("Oriya", new UnicodeSet("[:Script=Oriya:]"));
        unicodeSets.put("Syriac", new UnicodeSet("[:Script=Syriac:]"));
        unicodeSets.put("Tamil", new UnicodeSet("[:Script=Tamil:]"));
        unicodeSets.put("Telugu", new UnicodeSet("[:Script=Telugu:]"));
        unicodeSets.put("Thaana", new UnicodeSet("[:Script=Thaana:]"));
        unicodeSets.put("Thai", new UnicodeSet("[:Script=Thai:]"));
        return unicodeSets;
    }

    private static UnicodeSet subUnicodeSet(String pattern1, String pattern2) {
        UnicodeSet unicodeSet = new UnicodeSet();
        unicodeSet.addAll(new UnicodeSet(pattern1));
        unicodeSet.removeAll(new UnicodeSet(pattern2));
        return unicodeSet;
    }

    public void run(InputRow row, int distinctCount) {
        for (InputColumn<String> column : this._columns) {
            String value = (String)row.getValue(column);
            CharacterSetDistributionAnalyzerColumnDelegate delegate = this._columnDelegates.get(column);
            delegate.run(value, row, distinctCount);
        }
    }

    public CharacterSetDistributionResult getResult() {
        CrosstabDimension measureDimension = new CrosstabDimension("Measures");
        Set<String> unicodeSetNames = UNICODE_SETS.keySet();
        for (String name : unicodeSetNames) {
            measureDimension.addCategory(name);
        }
        CrosstabDimension columnDimension = new CrosstabDimension("Column");
        Crosstab crosstab = new Crosstab(Number.class, new CrosstabDimension[]{columnDimension, measureDimension});
        for (InputColumn<String> column : this._columns) {
            String columnName = column.getName();
            CharacterSetDistributionAnalyzerColumnDelegate delegate = this._columnDelegates.get(column);
            columnDimension.addCategory(columnName);
            CrosstabNavigator nav = crosstab.navigate().where(columnDimension, columnName);
            for (String name : unicodeSetNames) {
                RowAnnotation annotation = delegate.getAnnotation(name);
                int rowCount = annotation.getRowCount();
                nav.where(measureDimension, name).put((Serializable)Integer.valueOf(rowCount));
                if (rowCount <= 0) continue;
                nav.attach((AnalyzerResult)new AnnotatedRowsResult(annotation, this._annotationFactory, new InputColumn[]{column}));
            }
        }
        return new CharacterSetDistributionResult(this._columns, unicodeSetNames, (Crosstab<Number>)crosstab);
    }
}

