package edu.stanford.nlp.dcoref;

import edu.stanford.nlp.classify.LogisticClassifier;
import edu.stanford.nlp.io.IOUtils;
import edu.stanford.nlp.ling.CoreAnnotations;
import edu.stanford.nlp.ling.CoreLabel;
import edu.stanford.nlp.pipeline.Annotation;
import edu.stanford.nlp.process.CoreLabelTokenFactory;
import edu.stanford.nlp.process.PTBTokenizer;
import edu.stanford.nlp.process.TokenizerFactory;
import edu.stanford.nlp.semgraph.SemanticGraph;
import edu.stanford.nlp.semgraph.SemanticGraphCoreAnnotations;
import edu.stanford.nlp.trees.TreeCoreAnnotations;
import edu.stanford.nlp.util.CoreMap;
import edu.stanford.nlp.util.Generics;
import edu.stanford.nlp.util.logging.Redwood;
import java.io.StringReader;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Properties;
import java.util.Stack;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

/* loaded from: input_file:edu/stanford/nlp/dcoref/MUCMentionExtractor.class */
public class MUCMentionExtractor extends MentionExtractor {
    private static Redwood.RedwoodChannels log = Redwood.channels(MUCMentionExtractor.class);
    private final TokenizerFactory<CoreLabel> tokenizerFactory;
    private final String fileContents;
    private int currentOffset;

    public MUCMentionExtractor(Dictionaries dictionaries, Properties properties, Semantics semantics) throws Exception {
        super(dictionaries, semantics);
        this.fileContents = IOUtils.slurpFile(properties.getProperty(Constants.MUC_PROP));
        this.currentOffset = 0;
        this.tokenizerFactory = PTBTokenizer.factory(new CoreLabelTokenFactory(false), "");
        this.stanfordProcessor = loadStanfordProcessor(properties);
    }

    public MUCMentionExtractor(Dictionaries dictionaries, Properties properties, Semantics semantics, LogisticClassifier<String, String> logisticClassifier) throws Exception {
        this(dictionaries, properties, semantics);
        this.singletonPredictor = logisticClassifier;
    }

    @Override // edu.stanford.nlp.dcoref.MentionExtractor
    public void resetDocs() {
        super.resetDocs();
        this.currentOffset = 0;
    }

    @Override // edu.stanford.nlp.dcoref.MentionExtractor
    public Document nextDoc() throws Exception {
        List<List<CoreLabel>> arrayList = new ArrayList<>();
        ArrayList arrayList2 = new ArrayList();
        List<List<Mention>> arrayList3 = new ArrayList<>();
        ArrayList arrayList4 = new ArrayList();
        Annotation annotation = new Annotation("");
        Pattern compile = Pattern.compile("<DOC>(.*?)</DOC>", 34);
        Pattern compile2 = Pattern.compile("(<s>|<hl>|<dd>|<DATELINE>)(.*?)(</s>|</hl>|</dd>|</DATELINE>)", 34);
        Matcher matcher = compile.matcher(this.fileContents);
        if (!matcher.find(this.currentOffset)) {
            return null;
        }
        this.currentOffset = matcher.end();
        String group = matcher.group(1);
        Matcher matcher2 = compile2.matcher(group);
        String str = null;
        Matcher matcher3 = Pattern.compile("<DOCNO>(.*?)</DOCNO>", 34).matcher(group);
        if (matcher3.find()) {
            this.currentDocumentID = matcher3.group(1);
        } else {
            this.currentDocumentID = "documentAfter " + this.currentDocumentID;
        }
        while (matcher2.find()) {
            List<CoreLabel> list = this.tokenizerFactory.getTokenizer(new StringReader(matcher2.group(2))).tokenize();
            int i = 0;
            while (i < list.size()) {
                CoreLabel coreLabel = list.get(i);
                if (i <= 0 || !coreLabel.word().equals("$")) {
                    if (coreLabel.word().equals("\\/") && !list.get(i - 1).word().equals("</COREF>")) {
                        coreLabel.set(CoreAnnotations.TextAnnotation.class, list.get(i - 1).word() + "\\/" + list.get(i + 1).word());
                        list.remove(i + 1);
                        list.remove(i - 1);
                    }
                } else if (list.get(i - 1).word().endsWith("PRP") || list.get(i - 1).word().endsWith("WP")) {
                    list.get(i - 1).set(CoreAnnotations.TextAnnotation.class, list.get(i - 1).word() + "$");
                    list.remove(i);
                    i--;
                }
                i++;
            }
            ArrayList arrayList5 = new ArrayList();
            Stack stack = new Stack();
            List<Mention> arrayList6 = new ArrayList<>();
            arrayList.add(arrayList5);
            arrayList3.add(arrayList6);
            for (CoreLabel coreLabel2 : list) {
                String str2 = (String) coreLabel2.get(CoreAnnotations.TextAnnotation.class);
                if (!str2.startsWith("<") && str2.contains("\\/") && str2.lastIndexOf("\\/") != str2.length() - 2) {
                    coreLabel2.set(CoreAnnotations.TextAnnotation.class, str2.substring(0, str2.lastIndexOf("\\/")));
                    coreLabel2.remove(CoreAnnotations.OriginalTextAnnotation.class);
                    arrayList5.add(coreLabel2);
                } else if (str2.startsWith("<") && !str2.startsWith("<COREF") && !str2.startsWith("</")) {
                    Matcher matcher4 = Pattern.compile("<(.*?)>").matcher(str2);
                    matcher4.find();
                    str = matcher4.group(1);
                } else if (str2.startsWith("</") && !str2.startsWith("</COREF")) {
                    Matcher matcher5 = Pattern.compile("</(.*?)>").matcher(str2);
                    matcher5.find();
                    String group2 = matcher5.group(1);
                    if (str != null && !str.equals(group2)) {
                        throw new RuntimeException("Unmatched NE labels in MUC file: " + str + " v. " + group2);
                    }
                    str = null;
                } else if (str2.startsWith("<COREF")) {
                    Mention mention = new Mention();
                    mention.startIndex = arrayList5.size();
                    Pattern compile3 = Pattern.compile("ID=\"(.*?)\"");
                    Pattern compile4 = Pattern.compile("REF=\"(.*?)\"");
                    Matcher matcher6 = compile3.matcher(str2);
                    matcher6.find();
                    mention.mentionID = Integer.parseInt(matcher6.group(1));
                    Matcher matcher7 = compile4.matcher(str2);
                    if (matcher7.find()) {
                        mention.originalRef = Integer.parseInt(matcher7.group(1));
                    }
                    stack.push(mention);
                } else if (str2.equals("</COREF>")) {
                    Mention mention2 = (Mention) stack.pop();
                    mention2.endIndex = arrayList5.size();
                    arrayList6.add(mention2);
                } else {
                    coreLabel2.remove(CoreAnnotations.OriginalTextAnnotation.class);
                    arrayList5.add(coreLabel2);
                }
            }
            StringBuilder sb = new StringBuilder();
            for (int i2 = 0; i2 < arrayList5.size(); i2++) {
                CoreLabel coreLabel3 = (CoreLabel) arrayList5.get(i2);
                coreLabel3.set(CoreAnnotations.IndexAnnotation.class, Integer.valueOf(i2 + 1));
                coreLabel3.set(CoreAnnotations.UtteranceAnnotation.class, 0);
                if (i2 > 0) {
                    sb.append(" ");
                }
                sb.append(coreLabel3.getString(CoreAnnotations.TextAnnotation.class));
            }
            Annotation annotation2 = new Annotation(sb.toString());
            arrayList4.add(annotation2);
            annotation2.set(CoreAnnotations.TokensAnnotation.class, arrayList5);
        }
        Map newHashMap = Generics.newHashMap();
        Iterator<List<Mention>> it = arrayList3.iterator();
        while (it.hasNext()) {
            for (Mention mention3 : it.next()) {
                newHashMap.put(Integer.valueOf(mention3.mentionID), mention3);
            }
        }
        Iterator<List<Mention>> it2 = arrayList3.iterator();
        while (it2.hasNext()) {
            for (Mention mention4 : it2.next()) {
                if (mention4.goldCorefClusterID == -1) {
                    if (mention4.originalRef == -1) {
                        mention4.goldCorefClusterID = mention4.mentionID;
                    } else {
                        int i3 = mention4.originalRef;
                        while (true) {
                            Mention mention5 = (Mention) newHashMap.get(Integer.valueOf(i3));
                            if (mention5.goldCorefClusterID != -1) {
                                mention4.goldCorefClusterID = mention5.goldCorefClusterID;
                                break;
                            }
                            if (mention5.originalRef == -1) {
                                mention5.goldCorefClusterID = mention5.mentionID;
                                mention4.goldCorefClusterID = mention5.goldCorefClusterID;
                                break;
                            }
                            i3 = mention5.originalRef;
                        }
                    }
                }
            }
        }
        annotation.set(CoreAnnotations.SentencesAnnotation.class, arrayList4);
        this.stanfordProcessor.annotate(annotation);
        if (arrayList4.size() != arrayList.size()) {
            throw new IllegalStateException("allSentences != allWords");
        }
        for (int i4 = 0; i4 < arrayList4.size(); i4++) {
            List<CoreLabel> list2 = (List) ((CoreMap) arrayList4.get(i4)).get(CoreAnnotations.TokensAnnotation.class);
            List<CoreLabel> list3 = arrayList.get(i4);
            Iterator<Mention> it3 = arrayList3.get(i4).iterator();
            while (it3.hasNext()) {
                it3.next().dependency = (SemanticGraph) ((CoreMap) arrayList4.get(i4)).get(SemanticGraphCoreAnnotations.EnhancedDependenciesAnnotation.class);
            }
            if (list2.size() != list3.size()) {
                throw new IllegalStateException("annotatedSent != unannotatedSent");
            }
            int size = list2.size();
            for (int i5 = 0; i5 < size; i5++) {
                if (!((String) list2.get(i5).get(CoreAnnotations.TextAnnotation.class)).equals(list3.get(i5).get(CoreAnnotations.TextAnnotation.class))) {
                    throw new IllegalStateException("annotatedWord != unannotatedWord");
                }
            }
            arrayList.set(i4, list2);
            arrayList2.add(((CoreMap) arrayList4.get(i4)).get(TreeCoreAnnotations.TreeAnnotation.class));
        }
        return arrange(annotation, arrayList, arrayList2, this.mentionFinder.extractPredictedMentions(annotation, this.maxID, this.dictionaries), arrayList3, true);
    }
}
