package edu.stanford.nlp.ie.machinereading;

import edu.stanford.nlp.ie.machinereading.common.NoPunctuationHeadFinder;
import edu.stanford.nlp.ie.machinereading.structure.EntityMention;
import edu.stanford.nlp.ie.machinereading.structure.MachineReadingAnnotations;
import edu.stanford.nlp.ie.machinereading.structure.Span;
import edu.stanford.nlp.ling.CoreAnnotations;
import edu.stanford.nlp.ling.CoreLabel;
import edu.stanford.nlp.ling.Label;
import edu.stanford.nlp.parser.lexparser.ParserAnnotations$ConstraintAnnotation;
import edu.stanford.nlp.parser.lexparser.ParserConstraint;
import edu.stanford.nlp.pipeline.Annotation;
import edu.stanford.nlp.pipeline.Annotator;
import edu.stanford.nlp.pipeline.CleanXmlAnnotator;
import edu.stanford.nlp.pipeline.StanfordCoreNLP;
import edu.stanford.nlp.trees.HeadFinder;
import edu.stanford.nlp.trees.Tree;
import edu.stanford.nlp.trees.TreeCoreAnnotations;
import edu.stanford.nlp.util.CoreMap;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Iterator;
import java.util.List;
import java.util.logging.Level;
import java.util.logging.Logger;
import java.util.regex.Pattern;

/* loaded from: input_file:edu/stanford/nlp/ie/machinereading/GenericDataSetReader.class */
public class GenericDataSetReader {
    protected Logger logger;
    protected final HeadFinder headFinder;
    protected StanfordCoreNLP processor;
    protected Annotator parserProcessor;
    protected final boolean preProcessSentences;
    protected final boolean calculateHeadSpan;
    protected final boolean forceGenerationOfIndexSpans;
    protected boolean useNewHeadFinder;
    static final /* synthetic */ boolean $assertionsDisabled;

    public GenericDataSetReader() {
        this(null, false, false, false);
    }

    public GenericDataSetReader(StanfordCoreNLP stanfordCoreNLP, boolean z, boolean z2, boolean z3) {
        this.headFinder = new NoPunctuationHeadFinder();
        this.useNewHeadFinder = true;
        this.logger = Logger.getLogger(GenericDataSetReader.class.getName());
        this.logger.setLevel(Level.SEVERE);
        if (stanfordCoreNLP != null) {
            setProcessor(stanfordCoreNLP);
        }
        this.parserProcessor = null;
        this.preProcessSentences = z;
        this.calculateHeadSpan = z2;
        this.forceGenerationOfIndexSpans = z3;
    }

    public void setProcessor(StanfordCoreNLP stanfordCoreNLP) {
        this.processor = stanfordCoreNLP;
    }

    public void setUseNewHeadFinder(boolean z) {
        this.useNewHeadFinder = z;
    }

    public Annotator getParser() {
        if (this.parserProcessor == null) {
            this.parserProcessor = StanfordCoreNLP.getExistingAnnotator(StanfordCoreNLP.STANFORD_PARSE);
            if (!$assertionsDisabled && this.parserProcessor == null) {
                throw new AssertionError();
            }
        }
        return this.parserProcessor;
    }

    public void setLoggerLevel(Level level) {
        this.logger.setLevel(level);
    }

    public Level getLoggerLevel() {
        return this.logger.getLevel();
    }

    public final Annotation parse(String str) throws IOException {
        try {
            Annotation read = read(str);
            if (this.preProcessSentences) {
                preProcessSentences(read);
            }
            return read;
        } catch (Exception e) {
            IOException iOException = new IOException();
            iOException.initCause(e);
            throw iOException;
        }
    }

    public Annotation read(String str) throws Exception {
        return null;
    }

    private static String sentenceToString(List<CoreLabel> list) {
        StringBuilder sb = new StringBuilder();
        if (list != null) {
            boolean z = true;
            for (CoreLabel coreLabel : list) {
                if (!z) {
                    sb.append(" ");
                }
                sb.append(coreLabel.word());
                z = false;
            }
        }
        return sb.toString();
    }

    public int assignSyntacticHead(EntityMention entityMention, Tree tree, List<CoreLabel> list, boolean z) {
        if (entityMention.getSyntacticHeadTokenPosition() != -1) {
            return entityMention.getSyntacticHeadTokenPosition();
        }
        this.logger.finest("Finding syntactic head for entity: " + entityMention + " in tree: " + tree.toString());
        this.logger.finest("Flat sentence is: " + list);
        Tree tree2 = null;
        try {
            tree2 = findSyntacticHead(entityMention, tree, list);
        } catch (AssertionError e) {
            this.logger.severe("WARNING: failed to parse sentence. Will continue with the right-most head heuristic: " + sentenceToString(list));
            e.printStackTrace();
        } catch (Exception e2) {
            this.logger.severe("WARNING: failed to parse sentence. Will continue with the right-most head heuristic: " + sentenceToString(list));
            e2.printStackTrace();
        }
        int extentTokenEnd = entityMention.getExtentTokenEnd() - 1;
        if (tree2 != null) {
            extentTokenEnd = ((Integer) ((CoreLabel) tree2.label()).get(CoreAnnotations.BeginIndexAnnotation.class)).intValue();
        } else {
            this.logger.fine("WARNING: failed to find syntactic head for entity: " + entityMention + " in tree: " + tree);
            this.logger.fine("Fallback strategy: will set head to last token in mention: " + list.get(extentTokenEnd));
        }
        entityMention.setHeadTokenPosition(extentTokenEnd);
        if (z) {
            entityMention.setHeadTokenSpan(new Span(extentTokenEnd, extentTokenEnd + 1));
        }
        return extentTokenEnd;
    }

    public void preProcessSentences(Annotation annotation) {
        this.logger.severe("GenericDataSetReader: Started pre-processing the corpus...");
        if (this.processor != null) {
            List list = (List) annotation.get(CoreAnnotations.SentencesAnnotation.class);
            if (list.size() <= 0 || ((CoreMap) list.get(0)).containsKey(TreeCoreAnnotations.TreeAnnotation.class)) {
                this.logger.info("Found existing syntactic annotations. Will not use the NLP processor.");
            } else {
                this.logger.info("Annotating dataset with " + this.processor);
                this.processor.annotate(annotation);
            }
        }
        List<CoreMap> list2 = (List) annotation.get(CoreAnnotations.SentencesAnnotation.class);
        this.logger.fine("Extracted " + list2.size() + " sentences.");
        for (CoreMap coreMap : list2) {
            List<CoreLabel> list3 = (List) coreMap.get(CoreAnnotations.TokensAnnotation.class);
            this.logger.fine("Processing sentence " + list3);
            Tree tree = (Tree) coreMap.get(TreeCoreAnnotations.TreeAnnotation.class);
            if (tree == null) {
                throw new RuntimeException("ERROR: MR requires full syntactic analysis!");
            }
            convertToCoreLabels(tree);
            CoreLabel coreLabel = (CoreLabel) tree.label();
            if (this.forceGenerationOfIndexSpans || !(coreLabel.containsKey(CoreAnnotations.BeginIndexAnnotation.class) || coreLabel.containsKey(CoreAnnotations.EndIndexAnnotation.class))) {
                tree.indexSpans(0);
                this.logger.fine("Index spans were generated.");
            } else {
                this.logger.fine("Index spans were NOT generated.");
            }
            this.logger.fine("Parse tree using CoreLabel:\n" + tree.pennString());
            if (coreMap.get(MachineReadingAnnotations.EntityMentionsAnnotation.class) != null) {
                for (EntityMention entityMention : (List) coreMap.get(MachineReadingAnnotations.EntityMentionsAnnotation.class)) {
                    this.logger.fine("Finding head for entity: " + entityMention);
                    this.logger.fine("Syntactic head of mention \"" + entityMention + "\" is: " + list3.get(assignSyntacticHead(entityMention, tree, list3, this.calculateHeadSpan)).word());
                    if (!$assertionsDisabled && entityMention.getExtent() == null) {
                        throw new AssertionError();
                    }
                    if (!$assertionsDisabled && entityMention.getHead() == null) {
                        throw new AssertionError();
                    }
                    if (!$assertionsDisabled && entityMention.getSyntacticHeadTokenPosition() < 0) {
                        throw new AssertionError();
                    }
                }
            }
        }
        this.logger.severe("GenericDataSetReader: Pre-processing complete.");
    }

    public static void convertToCoreLabels(Tree tree) {
        Label label = tree.label();
        if (!(label instanceof CoreLabel)) {
            CoreLabel coreLabel = new CoreLabel();
            coreLabel.setValue(label.value());
            tree.setLabel(coreLabel);
        }
        for (Tree tree2 : tree.children()) {
            convertToCoreLabels(tree2);
        }
    }

    private static String printTree(Tree tree) {
        return tree.toStringBuilder(new StringBuilder(), true).toString();
    }

    private Tree safeHead(Tree tree) {
        Tree headTerminal = tree.headTerminal(this.headFinder);
        if (headTerminal != null) {
            return headTerminal;
        }
        List leaves = tree.getLeaves();
        return leaves.size() > 0 ? (Tree) leaves.get(leaves.size() - 1) : tree;
    }

    public Tree findSyntacticHead(EntityMention entityMention, Tree tree, List<CoreLabel> list) {
        if (!this.useNewHeadFinder) {
            return originalFindSyntacticHead(entityMention, tree, list);
        }
        this.logger.fine("Searching for tree matching " + entityMention);
        Tree findTreeWithSpan = findTreeWithSpan(tree, entityMention.getExtentTokenStart(), entityMention.getExtentTokenEnd());
        if (findTreeWithSpan != null) {
            this.logger.fine("Mention \"" + entityMention + "\" mapped to tree: " + printTree(findTreeWithSpan));
            return safeHead(findTreeWithSpan);
        }
        int i = 0;
        ArrayList arrayList = new ArrayList();
        arrayList.add(initCoreLabel("It"));
        arrayList.add(initCoreLabel("was"));
        for (int extentTokenStart = entityMention.getExtentTokenStart(); extentTokenStart < entityMention.getExtentTokenEnd(); extentTokenStart++) {
            if ("-".equals(list.get(extentTokenStart).word())) {
                i++;
            } else {
                arrayList.add(list.get(extentTokenStart));
            }
        }
        arrayList.add(initCoreLabel("."));
        ParserConstraint parserConstraint = new ParserConstraint();
        parserConstraint.start = 2;
        parserConstraint.end = arrayList.size() - 1;
        parserConstraint.state = Pattern.compile(CleanXmlAnnotator.DEFAULT_XML_TAGS);
        Tree parse = parse(arrayList, Collections.singletonList(parserConstraint));
        this.logger.fine("No exact match found. Local parse:\n" + parse.pennString());
        convertToCoreLabels(parse);
        parse.indexSpans(entityMention.getExtentTokenStart() - 2);
        Tree safeHead = safeHead(findPartialSpan(parse, entityMention.getExtentTokenStart()));
        this.logger.fine("Head is: " + safeHead);
        if (!$assertionsDisabled && safeHead == null) {
            throw new AssertionError();
        }
        CoreLabel coreLabel = (CoreLabel) safeHead.label();
        Tree funkyFindLeafWithApproximateSpan = funkyFindLeafWithApproximateSpan(tree, coreLabel.value(), ((Integer) coreLabel.get(CoreAnnotations.BeginIndexAnnotation.class)).intValue(), i);
        if (funkyFindLeafWithApproximateSpan != null) {
            this.logger.fine("Chosen head: " + funkyFindLeafWithApproximateSpan);
        }
        return funkyFindLeafWithApproximateSpan;
    }

    private Tree findPartialSpan(Tree tree, int i) {
        if (((Integer) ((CoreLabel) tree.label()).get(CoreAnnotations.BeginIndexAnnotation.class)).intValue() == i) {
            this.logger.fine("findPartialSpan: Returning " + tree);
            return tree;
        }
        for (Tree tree2 : tree.children()) {
            CoreLabel coreLabel = (CoreLabel) tree2.label();
            int intValue = ((Integer) coreLabel.get(CoreAnnotations.BeginIndexAnnotation.class)).intValue();
            int intValue2 = ((Integer) coreLabel.get(CoreAnnotations.EndIndexAnnotation.class)).intValue();
            if (intValue <= i && intValue2 > i) {
                return findPartialSpan(tree2, i);
            }
        }
        throw new RuntimeException("Shouldn't happen: " + i + " " + tree);
    }

    private Tree funkyFindLeafWithApproximateSpan(Tree tree, String str, int i, int i2) {
        this.logger.fine("Looking for " + str + " at pos " + i + " plus upto " + i2 + " in tree: " + tree.pennString());
        for (Tree tree2 : tree.getLeaves()) {
            int intValue = ((Integer) ((CoreLabel) CoreLabel.class.cast(tree2.label())).get(CoreAnnotations.BeginIndexAnnotation.class)).intValue();
            if (str.equals(tree2.value()) && intValue >= i && intValue <= i + i2) {
                return tree2;
            }
        }
        this.logger.severe("GenericDataSetReader: WARNING: Failed to find head token");
        this.logger.severe("  when looking for " + str + " at pos " + i + " plus upto " + i2 + " in tree: " + tree.pennString());
        return null;
    }

    public Tree originalFindSyntacticHead(EntityMention entityMention, Tree tree, List<CoreLabel> list) {
        this.logger.fine("Searching for tree matching " + entityMention);
        Tree findTreeWithSpan = findTreeWithSpan(tree, entityMention.getExtentTokenStart(), entityMention.getExtentTokenEnd());
        if (findTreeWithSpan != null) {
            this.logger.fine("Mention \"" + entityMention + "\" mapped to tree: " + printTree(findTreeWithSpan));
            return safeHead(findTreeWithSpan);
        }
        ArrayList arrayList = new ArrayList();
        for (int extentTokenStart = entityMention.getExtentTokenStart(); extentTokenStart < entityMention.getExtentTokenEnd(); extentTokenStart++) {
            arrayList.add(list.get(extentTokenStart));
        }
        Tree parse = parse(arrayList);
        this.logger.fine("No exact match found. Local parse:\n" + parse.pennString());
        convertToCoreLabels(parse);
        parse.indexSpans(entityMention.getExtentTokenStart());
        Tree safeHead = safeHead(parse);
        if (!$assertionsDisabled && safeHead == null) {
            throw new AssertionError();
        }
        CoreLabel coreLabel = (CoreLabel) safeHead.label();
        Tree findTreeWithSpan2 = findTreeWithSpan(tree, ((Integer) coreLabel.get(CoreAnnotations.BeginIndexAnnotation.class)).intValue(), ((Integer) coreLabel.get(CoreAnnotations.EndIndexAnnotation.class)).intValue());
        if ($assertionsDisabled || findTreeWithSpan2 != null) {
            return findTreeWithSpan2;
        }
        throw new AssertionError();
    }

    private static CoreLabel initCoreLabel(String str) {
        CoreLabel coreLabel = new CoreLabel();
        coreLabel.setWord(str);
        coreLabel.set(CoreAnnotations.TextAnnotation.class, str);
        return coreLabel;
    }

    protected Tree parseStrings(List<String> list) {
        ArrayList arrayList = new ArrayList();
        Iterator<String> it = list.iterator();
        while (it.hasNext()) {
            arrayList.add(initCoreLabel(it.next()));
        }
        return parse(arrayList);
    }

    protected Tree parse(List<CoreLabel> list) {
        return parse(list, null);
    }

    protected Tree parse(List<CoreLabel> list, List<ParserConstraint> list2) {
        Annotation annotation = new Annotation("");
        annotation.set(CoreAnnotations.TokensAnnotation.class, list);
        annotation.set(ParserAnnotations$ConstraintAnnotation.class, list2);
        Annotation annotation2 = new Annotation("");
        ArrayList arrayList = new ArrayList();
        arrayList.add(annotation);
        annotation2.set(CoreAnnotations.SentencesAnnotation.class, arrayList);
        getParser().annotate(annotation2);
        return (Tree) ((CoreMap) ((List) annotation2.get(CoreAnnotations.SentencesAnnotation.class)).get(0)).get(TreeCoreAnnotations.TreeAnnotation.class);
    }

    private static Tree findTreeWithSpan(Tree tree, int i, int i2) {
        Tree findTreeWithSpan;
        CoreLabel coreLabel = (CoreLabel) tree.label();
        if (coreLabel != null && coreLabel.has(CoreAnnotations.BeginIndexAnnotation.class) && coreLabel.has(CoreAnnotations.EndIndexAnnotation.class)) {
            int intValue = ((Integer) coreLabel.get(CoreAnnotations.BeginIndexAnnotation.class)).intValue();
            int intValue2 = ((Integer) coreLabel.get(CoreAnnotations.EndIndexAnnotation.class)).intValue();
            if (i == intValue && i2 == intValue2) {
                return tree;
            }
            if (i2 < intValue || i >= intValue2) {
                return null;
            }
        }
        for (Tree tree2 : tree.children()) {
            if (tree2 != null && (findTreeWithSpan = findTreeWithSpan(tree2, i, i2)) != null) {
                return findTreeWithSpan;
            }
        }
        return null;
    }

    static {
        $assertionsDisabled = !GenericDataSetReader.class.desiredAssertionStatus();
    }
}
