/*
 * Decompiled with CFR 0.152.
 */
package opennlp.tools.formats.ad;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import opennlp.tools.commons.Internal;
import opennlp.tools.formats.ad.ADSentenceStream;
import opennlp.tools.formats.ad.PortugueseContractionUtility;
import opennlp.tools.namefind.NameSample;
import opennlp.tools.util.InputStreamFactory;
import opennlp.tools.util.ObjectStream;
import opennlp.tools.util.PlainTextByLineStream;
import opennlp.tools.util.Span;

@Internal
public class ADNameSampleStream
implements ObjectStream<NameSample> {
    private static final Pattern TAG_PATTERN = Pattern.compile("<(NER:)?(.*?)>");
    private static final Pattern WHITESPACE_PATTERN = Pattern.compile("\\s+");
    private static final Pattern UNDERLINE_PATTERN = Pattern.compile("[_]+");
    private static final Pattern HYPHEN_PATTERN = Pattern.compile("((\\p{L}+)-$)|(^-(\\p{L}+)(.*))|((\\p{L}+)-(\\p{L}+)(.*))");
    private static final Pattern ALPHANUMERIC_PATTERN = Pattern.compile("^[\\p{L}\\p{Nd}]+$");
    private static final Map<String, String> HAREM;
    private final ObjectStream<ADSentenceStream.Sentence> adSentenceStream;
    private String leftContractionPart = null;
    private final boolean splitHyphenatedTokens;
    private int textID = -1;

    public ADNameSampleStream(ObjectStream<String> lineStream, boolean splitHyphenatedTokens) {
        this.adSentenceStream = new ADSentenceStream(lineStream);
        this.splitHyphenatedTokens = splitHyphenatedTokens;
    }

    @Deprecated
    public ADNameSampleStream(InputStreamFactory in, String charsetName, boolean splitHyphenatedTokens) throws IOException {
        this(new PlainTextByLineStream(in, charsetName), splitHyphenatedTokens);
    }

    @Override
    public NameSample read() throws IOException {
        ADSentenceStream.Sentence paragraph = this.adSentenceStream.read();
        if (paragraph != null) {
            int currentTextID = this.getTextID(paragraph);
            boolean clearData = false;
            if (currentTextID != this.textID) {
                clearData = true;
                this.textID = currentTextID;
            }
            ADSentenceStream.SentenceParser.Node root = paragraph.getRoot();
            ArrayList<String> sentence = new ArrayList<String>();
            ArrayList<Span> names = new ArrayList<Span>();
            this.process(root, sentence, names);
            return new NameSample(sentence.toArray(new String[0]), names.toArray(new Span[0]), clearData);
        }
        return null;
    }

    private void process(ADSentenceStream.SentenceParser.Node node, List<String> sentence, List<Span> names) {
        if (node != null) {
            for (ADSentenceStream.SentenceParser.TreeElement element : node.getElements()) {
                if (element.isLeaf()) {
                    this.processLeaf((ADSentenceStream.SentenceParser.Leaf)element, sentence, names);
                    continue;
                }
                this.process((ADSentenceStream.SentenceParser.Node)element, sentence, names);
            }
        }
    }

    private void processLeaf(ADSentenceStream.SentenceParser.Leaf leaf, List<String> sentence, List<Span> names) {
        boolean alreadyAdded = false;
        if (this.leftContractionPart != null) {
            String right = leaf.getLexeme();
            String c = PortugueseContractionUtility.toContraction(this.leftContractionPart, right);
            if (c != null) {
                String[] parts = WHITESPACE_PATTERN.split(c);
                sentence.addAll(Arrays.asList(parts));
                alreadyAdded = true;
            } else {
                sentence.add(this.leftContractionPart);
            }
            this.leftContractionPart = null;
        }
        String namedEntityTag = null;
        int startOfNamedEntity = -1;
        String leafTag = leaf.getSecondaryTag();
        boolean expandLastNER = false;
        if (leafTag != null) {
            if (leafTag.contains("<sam->") && !alreadyAdded) {
                String[] lexemes = UNDERLINE_PATTERN.split(leaf.getLexeme());
                if (lexemes.length > 1) {
                    sentence.addAll(Arrays.asList(lexemes).subList(0, lexemes.length - 1));
                }
                this.leftContractionPart = lexemes[lexemes.length - 1];
                return;
            }
            if (leafTag.contains("<NER2>")) {
                expandLastNER = true;
            }
            namedEntityTag = ADNameSampleStream.getNER(leafTag);
        }
        if (namedEntityTag != null) {
            startOfNamedEntity = sentence.size();
        }
        if (!alreadyAdded) {
            sentence.addAll(this.processLexeme(leaf.getLexeme()));
        }
        if (namedEntityTag != null) {
            names.add(new Span(startOfNamedEntity, sentence.size(), namedEntityTag));
        }
        if (expandLastNER) {
            Span last;
            int lastIndex = names.size() - 1;
            if (names.size() > 0 && (last = names.get(lastIndex)).getEnd() == sentence.size() - 1) {
                names.set(lastIndex, new Span(last.getStart(), sentence.size(), last.getType()));
            }
        }
    }

    private List<String> processLexeme(String lexemeStr) {
        String[] parts;
        ArrayList<String> out = new ArrayList<String>();
        for (String tok : parts = UNDERLINE_PATTERN.split(lexemeStr)) {
            if (tok.length() > 1 && !ALPHANUMERIC_PATTERN.matcher(tok).matches()) {
                out.addAll(this.processTok(tok));
                continue;
            }
            out.add(tok);
        }
        return out;
    }

    private List<String> processTok(String tok) {
        char last;
        boolean tokAdded = false;
        String original = tok;
        ArrayList<String> out = new ArrayList<String>();
        LinkedList<String> suffix = new LinkedList<String>();
        char first = tok.charAt(0);
        if (first == '\u00ab') {
            out.add(Character.toString(first));
            tok = tok.substring(1);
        }
        if ((last = tok.charAt(tok.length() - 1)) == '\u00bb' || last == ':' || last == ',' || last == '!') {
            suffix.add(Character.toString(last));
            tok = tok.substring(0, tok.length() - 1);
        }
        if (this.splitHyphenatedTokens && tok.contains("-") && tok.length() > 1) {
            Matcher matcher = HYPHEN_PATTERN.matcher(tok);
            String firstTok = null;
            String hyphen = "-";
            String secondTok = null;
            String rest = null;
            if (matcher.matches()) {
                if (matcher.group(1) != null) {
                    firstTok = matcher.group(2);
                } else if (matcher.group(3) != null) {
                    secondTok = matcher.group(4);
                    rest = matcher.group(5);
                } else if (matcher.group(6) != null) {
                    firstTok = matcher.group(7);
                    secondTok = matcher.group(8);
                    rest = matcher.group(9);
                }
                this.addIfNotEmpty(firstTok, out);
                this.addIfNotEmpty(hyphen, out);
                this.addIfNotEmpty(secondTok, out);
                this.addIfNotEmpty(rest, out);
                tokAdded = true;
            }
        }
        if (!tokAdded) {
            if (!original.equals(tok) && tok.length() > 1 && !ALPHANUMERIC_PATTERN.matcher(tok).matches()) {
                out.addAll(this.processTok(tok));
            } else {
                out.add(tok);
            }
        }
        out.addAll(suffix);
        return out;
    }

    private void addIfNotEmpty(String firstTok, List<String> out) {
        if (firstTok != null && firstTok.length() > 0) {
            out.addAll(this.processTok(firstTok));
        }
    }

    private static String getNER(String tags) {
        String[] tag;
        if (tags.contains("<NER2>")) {
            return null;
        }
        for (String t : tag = tags.split("\\s+")) {
            String ner;
            Matcher matcher = TAG_PATTERN.matcher(t);
            if (!matcher.matches() || !HAREM.containsKey(ner = matcher.group(2))) continue;
            return HAREM.get(ner);
        }
        return null;
    }

    @Override
    public void reset() throws IOException, UnsupportedOperationException {
        this.adSentenceStream.reset();
    }

    @Override
    public void close() throws IOException {
        this.adSentenceStream.close();
    }

    private int getTextID(ADSentenceStream.Sentence paragraph) {
        Pattern metaPattern;
        Type corpusType;
        String meta = paragraph.getMetadata();
        int textIdMeta2 = -1;
        String textMeta2 = "";
        if (meta.startsWith("LIT")) {
            corpusType = Type.lit;
            metaPattern = Pattern.compile("^([a-zA-Z\\-]+)(\\d+).*?p=(\\d+).*");
        } else if (meta.startsWith("CIE")) {
            corpusType = Type.cie;
            metaPattern = Pattern.compile("^.*?source=\"(.*?)\".*");
        } else {
            corpusType = Type.ama;
            metaPattern = Pattern.compile("^(?:[a-zA-Z\\-]*(\\d+)).*?p=(\\d+).*");
        }
        if (corpusType.equals((Object)Type.lit)) {
            Matcher m2 = metaPattern.matcher(meta);
            if (m2.matches()) {
                String textId = m2.group(1);
                if (!textId.equals(textMeta2)) {
                    ++textIdMeta2;
                    textMeta2 = textId;
                }
                return textIdMeta2;
            }
            throw new RuntimeException("Invalid metadata: " + meta);
        }
        if (corpusType.equals((Object)Type.cie)) {
            Matcher m2 = metaPattern.matcher(meta);
            if (m2.matches()) {
                String textId = m2.group(1);
                if (!textId.equals(textMeta2)) {
                    ++textIdMeta2;
                    textMeta2 = textId;
                }
                return textIdMeta2;
            }
            throw new RuntimeException("Invalid metadata: " + meta);
        }
        if (corpusType.equals((Object)Type.ama)) {
            Matcher m2 = metaPattern.matcher(meta);
            if (m2.matches()) {
                return Integer.parseInt(m2.group(1));
            }
            throw new RuntimeException("Invalid metadata: " + meta);
        }
        return 0;
    }

    static {
        HashMap<String, String> harem = new HashMap<String, String>();
        String person = "person";
        harem.put("hum", "person");
        harem.put("official", "person");
        harem.put("member", "person");
        String organization = "organization";
        harem.put("admin", "organization");
        harem.put("org", "organization");
        harem.put("inst", "organization");
        harem.put("media", "organization");
        harem.put("party", "organization");
        harem.put("suborg", "organization");
        String group = "group";
        harem.put("groupind", "group");
        harem.put("groupofficial", "group");
        String place = "place";
        harem.put("top", "place");
        harem.put("civ", "place");
        harem.put("address", "place");
        harem.put("site", "place");
        harem.put("virtual", "place");
        harem.put("astro", "place");
        String event = "event";
        harem.put("occ", "event");
        harem.put("event", "event");
        harem.put("history", "event");
        String artprod = "artprod";
        harem.put("tit", "artprod");
        harem.put("pub", "artprod");
        harem.put("product", "artprod");
        harem.put("V", "artprod");
        harem.put("artwork", "artprod");
        String _abstract = "abstract";
        harem.put("brand", "abstract");
        harem.put("genre", "abstract");
        harem.put("school", "abstract");
        harem.put("idea", "abstract");
        harem.put("plan", "abstract");
        harem.put("author", "abstract");
        harem.put("absname", "abstract");
        harem.put("disease", "abstract");
        String thing = "thing";
        harem.put("object", "thing");
        harem.put("common", "thing");
        harem.put("mat", "thing");
        harem.put("class", "thing");
        harem.put("plant", "thing");
        harem.put("currency", "thing");
        String time = "time";
        harem.put("date", "time");
        harem.put("hour", "time");
        harem.put("period", "time");
        harem.put("cyclic", "time");
        String numeric = "numeric";
        harem.put("quantity", "numeric");
        harem.put("prednum", "numeric");
        harem.put("currency", "numeric");
        HAREM = Collections.unmodifiableMap(harem);
    }

    static enum Type {
        ama,
        cie,
        lit;

    }
}

