package edu.stanford.nlp.international.spanish.pipeline;

import edu.stanford.nlp.ling.CoreLabel;
import edu.stanford.nlp.pipeline.Annotator;
import edu.stanford.nlp.stats.Counters;
import edu.stanford.nlp.stats.TwoDimensionalCounter;
import edu.stanford.nlp.trees.LabeledScoredTreeFactory;
import edu.stanford.nlp.trees.Tree;
import edu.stanford.nlp.trees.TreeFactory;
import edu.stanford.nlp.trees.TreeNormalizer;
import edu.stanford.nlp.trees.TreeReader;
import edu.stanford.nlp.trees.Trees;
import edu.stanford.nlp.trees.international.spanish.SpanishSplitTreeNormalizer;
import edu.stanford.nlp.trees.international.spanish.SpanishTreeNormalizer;
import edu.stanford.nlp.trees.international.spanish.SpanishTreebankLanguagePack;
import edu.stanford.nlp.trees.international.spanish.SpanishXMLTreeReaderFactory;
import edu.stanford.nlp.trees.tregex.TregexMatcher;
import edu.stanford.nlp.trees.tregex.TregexPattern;
import edu.stanford.nlp.util.CollectionUtils;
import edu.stanford.nlp.util.Factory;
import edu.stanford.nlp.util.Pair;
import edu.stanford.nlp.util.PropertiesUtils;
import edu.stanford.nlp.util.StringUtils;
import edu.stanford.nlp.util.concurrent.MulticoreWrapper;
import edu.stanford.nlp.util.concurrent.ThreadsafeProcessor;
import edu.stanford.nlp.util.logging.Redwood;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.ObjectInputStream;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Properties;
import java.util.concurrent.ExecutionException;
import java.util.function.Predicate;

/* loaded from: input_file:edu/stanford/nlp/international/spanish/pipeline/AnCoraProcessor.class */
public class AnCoraProcessor {
    private final List<File> inputFiles;
    private final Properties options;
    private final TwoDimensionalCounter<String, String> unigramTagger;
    private static Redwood.RedwoodChannels log = Redwood.channels(AnCoraProcessor.class);
    private static TreeNormalizer splittingNormalizer = new SpanishSplitTreeNormalizer();
    private static TreeFactory splittingTreeFactory = new LabeledScoredTreeFactory();
    private static final TregexPattern pSplitPoint = TregexPattern.compile("fp $+ /^[^f]/ > S|sentence");
    private static final String usage = String.format("Usage: java %s [OPTIONS] file(s)%n%n", AnCoraProcessor.class.getName()) + "Options:\n    -unigramTagger <tagger_path>: Path to a serialized `TwoDimensionalCounter` which\n        should be used for unigram tagging in multi-word token expansion. If this option\n        is not provided, a unigram tagger will be built from the provided corpus data.\n        (This option is useful if you are processing splits of the corpus separately but\n        want each step to benefit from a complete tagger.)\n    -ner: Add NER-specific information to trees\n";
    private static final Map<String, Integer> argOptionDefs = new HashMap();

    /* JADX INFO: Access modifiers changed from: private */
    /* loaded from: input_file:edu/stanford/nlp/international/spanish/pipeline/AnCoraProcessor$LeftOfFilter.class */
    public static class LeftOfFilter implements Predicate<Tree>, Serializable {
        private static final long serialVersionUID = -5146948439247427344L;
        private Tree reference;
        private Tree root;

        private LeftOfFilter(Tree tree, Tree tree2) {
            this.reference = tree;
            this.root = tree2;
        }

        @Override // java.util.function.Predicate
        public boolean test(Tree tree) {
            return tree == this.reference || tree.dominates(this.reference) || this.reference.dominates(tree) || Trees.rightEdge(getRightmostDescendant(tree), this.root) <= Trees.leftEdge(this.reference, this.root);
        }

        private Tree getRightmostDescendant(Tree tree) {
            return tree.isLeaf() ? tree : getRightmostDescendant(tree.children()[tree.children().length - 1]);
        }
    }

    /* JADX INFO: Access modifiers changed from: private */
    /* loaded from: input_file:edu/stanford/nlp/international/spanish/pipeline/AnCoraProcessor$MultiWordProcessor.class */
    public class MultiWordProcessor implements ThreadsafeProcessor<Collection<Tree>, Collection<Tree>> {
        private final TreeNormalizer tn;
        private final Factory<TreeNormalizer> tnf;
        private final TreeFactory tf;
        private final boolean ner;

        public MultiWordProcessor(Factory<TreeNormalizer> factory, TreeFactory treeFactory, boolean z) {
            this.tnf = factory;
            this.tn = factory.create();
            this.tf = treeFactory;
            this.ner = z;
        }

        @Override // edu.stanford.nlp.util.concurrent.ThreadsafeProcessor
        public Collection<Tree> process(Collection<Tree> collection) {
            ArrayList arrayList = new ArrayList();
            MultiWordTreeExpander multiWordTreeExpander = new MultiWordTreeExpander();
            for (Tree tree : collection) {
                MultiWordPreprocessor.traverseAndFix(tree, null, AnCoraProcessor.this.unigramTagger, this.ner);
                arrayList.add(this.tn.normalizeWholeTree(multiWordTreeExpander.expandPhrases(tree, this.tn, this.tf), this.tf));
            }
            return arrayList;
        }

        @Override // edu.stanford.nlp.util.concurrent.ThreadsafeProcessor
        /* renamed from: newInstance */
        public ThreadsafeProcessor<Collection<Tree>, Collection<Tree>> newInstance2() {
            return new MultiWordProcessor(this.tnf, this.tf, this.ner);
        }
    }

    /* JADX INFO: Access modifiers changed from: private */
    /* loaded from: input_file:edu/stanford/nlp/international/spanish/pipeline/AnCoraProcessor$RightOfExclusiveFilter.class */
    public static class RightOfExclusiveFilter implements Predicate<Tree>, Serializable {
        private static final long serialVersionUID = 8283161954004080591L;
        private Tree root;
        private Tree firstToKeep;

        private RightOfExclusiveFilter(Tree tree, Tree tree2) {
            this.root = tree2;
            this.firstToKeep = getFollowingTerminal(tree, tree2);
        }

        @Override // java.util.function.Predicate
        public boolean test(Tree tree) {
            return tree.dominates(this.firstToKeep) || Trees.rightEdge(getLeftmostDescendant(tree), this.root) > Trees.leftEdge(this.firstToKeep, this.root);
        }

        private Tree getFollowingTerminal(Tree tree, Tree tree2) {
            Tree rightSiblingOrRightAncestor = getRightSiblingOrRightAncestor(tree, tree2);
            if (rightSiblingOrRightAncestor == null) {
                return null;
            }
            return getLeftmostDescendant(rightSiblingOrRightAncestor);
        }

        private Tree getRightSiblingOrRightAncestor(Tree tree, Tree tree2) {
            Tree parent = tree.parent(tree2);
            if (parent == null) {
                return null;
            }
            int objectIndexOf = parent.objectIndexOf(tree);
            return objectIndexOf < parent.numChildren() - 1 ? parent.getChild(objectIndexOf + 1) : getRightSiblingOrRightAncestor(parent, tree2);
        }

        private Tree getLeftmostDescendant(Tree tree) {
            return tree.isLeaf() ? tree : getLeftmostDescendant(tree.children()[0]);
        }
    }

    public AnCoraProcessor(List<File> list, Properties properties) throws IOException, ClassNotFoundException {
        this.inputFiles = list;
        this.options = properties;
        if (properties.containsKey("unigramTagger")) {
            this.unigramTagger = (TwoDimensionalCounter) new ObjectInputStream(new FileInputStream(properties.getProperty("unigramTagger"))).readObject();
        } else {
            this.unigramTagger = new TwoDimensionalCounter<>();
        }
    }

    public List<Tree> process() throws InterruptedException, IOException, ExecutionException {
        return fixMultiWordTokens(loadTrees());
    }

    private List<Tree> loadTrees() throws InterruptedException, IOException, ExecutionException {
        boolean bool = PropertiesUtils.getBool(this.options, Annotator.STANFORD_NER, false);
        String encoding = new SpanishTreebankLanguagePack().getEncoding();
        SpanishXMLTreeReaderFactory spanishXMLTreeReaderFactory = new SpanishXMLTreeReaderFactory(true, true, bool, false);
        ArrayList arrayList = new ArrayList();
        Iterator<File> it = this.inputFiles.iterator();
        while (it.hasNext()) {
            Pair<TwoDimensionalCounter<String, String>, List<Tree>> processTreeFile = processTreeFile(it.next(), spanishXMLTreeReaderFactory, encoding);
            Counters.addInPlace(this.unigramTagger, processTreeFile.first());
            arrayList.addAll(processTreeFile.second());
        }
        return arrayList;
    }

    private static Pair<TwoDimensionalCounter<String, String>, List<Tree>> processTreeFile(File file, SpanishXMLTreeReaderFactory spanishXMLTreeReaderFactory, String str) {
        Tree findSplitPoint;
        TwoDimensionalCounter twoDimensionalCounter = new TwoDimensionalCounter();
        try {
            TreeReader newTreeReader = spanishXMLTreeReaderFactory.newTreeReader(file.getPath(), new BufferedReader(new InputStreamReader(new FileInputStream(file), str)));
            ArrayList arrayList = new ArrayList();
            while (true) {
                Tree readTree = newTreeReader.readTree();
                Tree tree = readTree;
                if (readTree == null) {
                    newTreeReader.close();
                    return new Pair<>(twoDimensionalCounter, arrayList);
                }
                do {
                    findSplitPoint = findSplitPoint(tree);
                    Pair<Tree, Tree> split = split(tree, findSplitPoint);
                    Tree first = split.first();
                    tree = split.second();
                    arrayList.add(first);
                    updateTagger(twoDimensionalCounter, first);
                } while (findSplitPoint != null);
            }
        } catch (IOException e) {
            e.printStackTrace();
            return null;
        }
    }

    private static void updateTagger(TwoDimensionalCounter<String, String> twoDimensionalCounter, Tree tree) {
        for (CoreLabel coreLabel : tree.taggedLabeledYield()) {
            if (!coreLabel.tag().equals(SpanishTreeNormalizer.MW_TAG)) {
                twoDimensionalCounter.incrementCount(coreLabel.word(), coreLabel.tag());
            }
        }
    }

    static Pair<Tree, Tree> split(Tree tree, Tree tree2) {
        if (tree2 == null) {
            return new Pair<>(tree, null);
        }
        return new Pair<>(splittingNormalizer.normalizeWholeTree(tree.prune(new LeftOfFilter(tree2, tree)), splittingTreeFactory), splittingNormalizer.normalizeWholeTree(tree.prune(new RightOfExclusiveFilter(tree2, tree)), splittingTreeFactory));
    }

    static Tree findSplitPoint(Tree tree) {
        TregexMatcher matcher = pSplitPoint.matcher(tree);
        if (matcher.find()) {
            return matcher.getMatch();
        }
        return null;
    }

    private List<Tree> fixMultiWordTokens(List<Tree> list) throws InterruptedException, ExecutionException {
        MultiWordProcessor multiWordProcessor = new MultiWordProcessor(new Factory<TreeNormalizer>() { // from class: edu.stanford.nlp.international.spanish.pipeline.AnCoraProcessor.1
            /* JADX WARN: Can't rename method to resolve collision */
            @Override // edu.stanford.nlp.util.Factory
            public TreeNormalizer create() {
                return new SpanishTreeNormalizer(true, false, false);
            }
        }, new LabeledScoredTreeFactory(), PropertiesUtils.getBool(this.options, Annotator.STANFORD_NER, false));
        int availableProcessors = Runtime.getRuntime().availableProcessors();
        MulticoreWrapper multicoreWrapper = new MulticoreWrapper(availableProcessors, multiWordProcessor, false);
        List partitionIntoFolds = CollectionUtils.partitionIntoFolds(list, availableProcessors * 20);
        ArrayList arrayList = new ArrayList();
        Iterator it = partitionIntoFolds.iterator();
        while (it.hasNext()) {
            multicoreWrapper.put((Collection) it.next());
            while (multicoreWrapper.peek()) {
                arrayList.addAll((Collection) multicoreWrapper.poll());
            }
        }
        multicoreWrapper.join();
        while (multicoreWrapper.peek()) {
            arrayList.addAll((Collection) multicoreWrapper.poll());
        }
        return arrayList;
    }

    public static void main(String[] strArr) throws InterruptedException, IOException, ExecutionException, ClassNotFoundException {
        if (strArr.length < 1) {
            log.info(usage);
        }
        Properties argsToProperties = StringUtils.argsToProperties(strArr, argOptionDefs);
        String[] split = argsToProperties.getProperty("").split(" ");
        ArrayList arrayList = new ArrayList();
        for (String str : split) {
            arrayList.add(new File(str));
        }
        Iterator<Tree> it = new AnCoraProcessor(arrayList, argsToProperties).process().iterator();
        while (it.hasNext()) {
            System.out.println(it.next());
        }
    }

    static {
        argOptionDefs.put("unigramTagger", 1);
        argOptionDefs.put(Annotator.STANFORD_NER, 0);
    }
}
