/*
 * Decompiled with CFR 0.152.
 */
package opennlp.tools.formats.brat;

import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import opennlp.tools.formats.brat.BratAnnotation;
import opennlp.tools.formats.brat.BratDocument;
import opennlp.tools.formats.brat.SpanAnnotation;
import opennlp.tools.namefind.NameSample;
import opennlp.tools.sentdetect.SentenceDetector;
import opennlp.tools.tokenize.Tokenizer;
import opennlp.tools.util.Span;

public class BratDocumentParser {
    private final SentenceDetector sentDetector;
    private final Tokenizer tokenizer;
    private final Set<String> nameTypes;

    public BratDocumentParser(SentenceDetector sentenceDetector, Tokenizer tokenizer) {
        this(sentenceDetector, tokenizer, null);
    }

    public BratDocumentParser(SentenceDetector sentenceDetector, Tokenizer tokenizer, Set<String> nameTypes) {
        if (nameTypes != null && nameTypes.size() == 0) {
            throw new IllegalArgumentException("nameTypes should be null or have one or more elements");
        }
        this.sentDetector = sentenceDetector;
        this.tokenizer = tokenizer;
        this.nameTypes = nameTypes;
    }

    public List<NameSample> parse(BratDocument sample) {
        HashSet<String> entityIdSet = new HashSet<String>();
        HashMap<Integer, Span> coveredIndexes = new HashMap<Integer, Span>();
        for (BratAnnotation bratAnnotation : sample.getAnnotations()) {
            if (!this.isSpanAnnotation(bratAnnotation)) continue;
            entityIdSet.add(bratAnnotation.getId());
            Span[] spanArray = ((SpanAnnotation)bratAnnotation).getSpans();
            int n = spanArray.length;
            for (int i = 0; i < n; ++i) {
                Span span = spanArray[i];
                for (int i2 = span.getStart(); i2 < span.getEnd(); ++i2) {
                    coveredIndexes.put(i2, span);
                }
            }
        }
        ArrayList<Span> sentences = new ArrayList<Span>();
        for (Span sentence : this.sentDetector.sentPosDetect(sample.getText())) {
            Span conflictingName = (Span)coveredIndexes.get(sentence.getStart());
            if (sentences.size() > 0 && conflictingName != null && conflictingName.getStart() < sentence.getStart()) {
                Span lastSentence = (Span)sentences.remove(sentences.size() - 1);
                sentences.add(new Span(lastSentence.getStart(), sentence.getEnd()));
                System.out.println("Correcting sentence segmentation in document " + sample.getId());
                continue;
            }
            sentences.add(sentence);
        }
        ArrayList<NameSample> arrayList = new ArrayList<NameSample>(sentences.size());
        for (Span sentence : sentences) {
            String sentenceText = sentence.getCoveredText(sample.getText()).toString();
            Span[] tokens = this.tokenizer.tokenizePos(sentenceText);
            HashMap<Integer, Integer> tokenIndexMap = new HashMap<Integer, Integer>();
            for (int i = 0; i < tokens.length; ++i) {
                tokenIndexMap.put(-(sentence.getStart() + tokens[i].getStart()), i);
                tokenIndexMap.put(sentence.getStart() + tokens[i].getEnd(), i + 1);
            }
            ArrayList<Span> names = new ArrayList<Span>();
            for (BratAnnotation ann : sample.getAnnotations()) {
                if (!this.isSpanAnnotation(ann)) continue;
                SpanAnnotation entity = (SpanAnnotation)ann;
                ArrayList<Span> mappedFragments = new ArrayList<Span>();
                for (Span entitySpan : entity.getSpans()) {
                    if (!sentence.contains(entitySpan)) continue;
                    entityIdSet.remove(ann.getId());
                    entitySpan = entitySpan.trim(sample.getText());
                    Integer nameBeginIndex = (Integer)tokenIndexMap.get(-entitySpan.getStart());
                    Integer nameEndIndex = (Integer)tokenIndexMap.get(entitySpan.getEnd());
                    if (nameBeginIndex != null && nameEndIndex != null) {
                        mappedFragments.add(new Span((int)nameBeginIndex, (int)nameEndIndex, entity.getType()));
                        continue;
                    }
                    System.err.println("Dropped entity " + entity.getId() + " (" + entitySpan.getCoveredText(sample.getText()) + ")  in document " + sample.getId() + ", it is not matching tokenization!");
                }
                Collections.sort(mappedFragments);
                for (int i = 1; i < mappedFragments.size(); ++i) {
                    if (((Span)mappedFragments.get(i - 1)).getEnd() != ((Span)mappedFragments.get(i)).getStart()) continue;
                    mappedFragments.set(i, new Span(((Span)mappedFragments.get(i - 1)).getStart(), ((Span)mappedFragments.get(i)).getEnd(), ((Span)mappedFragments.get(i)).getType()));
                    mappedFragments.set(i - 1, null);
                }
                for (Span span : mappedFragments) {
                    if (span == null) continue;
                    names.add(span);
                }
            }
            arrayList.add(new NameSample(sample.getId(), Span.spansToStrings(tokens, sentenceText), names.toArray(new Span[names.size()]), null, arrayList.size() == 0));
        }
        for (String id : entityIdSet) {
            System.err.println("Dropped entity " + id + " in document " + sample.getId() + ", is not matching sentence segmentation!");
        }
        return arrayList;
    }

    private boolean isSpanAnnotation(BratAnnotation ann) {
        return ann instanceof SpanAnnotation && (this.nameTypes == null || this.nameTypes.contains(ann.getType()));
    }
}

