package edu.stanford.nlp.parser.lexparser;

import edu.stanford.nlp.io.NumberRangeFileFilter;
import edu.stanford.nlp.io.NumberRangesFileFilter;
import edu.stanford.nlp.ling.CategoryWordTag;
import edu.stanford.nlp.ling.CategoryWordTagFactory;
import edu.stanford.nlp.ling.StringLabelFactory;
import edu.stanford.nlp.ling.Word;
import edu.stanford.nlp.parser.lexparser.CNFTransformers;
import edu.stanford.nlp.stats.ClassicCounter;
import edu.stanford.nlp.trees.CompositeTreeTransformer;
import edu.stanford.nlp.trees.CompositeTreebank;
import edu.stanford.nlp.trees.DiskTreebank;
import edu.stanford.nlp.trees.HeadFinder;
import edu.stanford.nlp.trees.LabeledScoredTreeFactory;
import edu.stanford.nlp.trees.LeftHeadFinder;
import edu.stanford.nlp.trees.Tree;
import edu.stanford.nlp.trees.TreeFactory;
import edu.stanford.nlp.trees.TreeLeafLabelTransformer;
import edu.stanford.nlp.trees.TreeTransformer;
import edu.stanford.nlp.trees.Treebank;
import edu.stanford.nlp.trees.TreebankLanguagePack;
import edu.stanford.nlp.util.Triple;
import java.io.FileFilter;
import java.io.PrintWriter;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Iterator;
import java.util.List;

/* loaded from: input_file:edu/stanford/nlp/parser/lexparser/TreeAnnotatorAndBinarizer.class */
public class TreeAnnotatorAndBinarizer implements TreeTransformer {
    private final TreeFactory tf;
    private final TreebankLanguagePack tlp;
    private final TreeTransformer annotator;
    private final TreeBinarizer binarizer;
    private final PostSplitter postSplitter;
    private final boolean forceCNF;
    private final TrainOptions trainOptions;
    private final ClassicCounter<Tree> annotatedRuleCounts;
    private final ClassicCounter<String> annotatedStateCounts;

    /* loaded from: input_file:edu/stanford/nlp/parser/lexparser/TreeAnnotatorAndBinarizer$TreeNullAnnotator.class */
    static class TreeNullAnnotator implements TreeTransformer {
        private final TreeFactory tf = new LabeledScoredTreeFactory(new CategoryWordTagFactory());
        private final HeadFinder hf;

        @Override // edu.stanford.nlp.trees.TreeTransformer
        public Tree transformTree(Tree tree) {
            return transformTreeHelper(tree.treeSkeletonCopy(this.tf));
        }

        private Tree transformTreeHelper(Tree tree) {
            String word;
            String tag;
            if (tree != null) {
                String value = tree.label().value();
                if (tree.isLeaf()) {
                    tree.setLabel(new Word(value));
                } else {
                    for (Tree tree2 : tree.children()) {
                        transformTreeHelper(tree2);
                    }
                    Tree determineHead = this.hf.determineHead(tree);
                    if (determineHead == null) {
                        System.err.println("ERROR: null head for tree\n" + tree.toString());
                        word = null;
                        tag = null;
                    } else if (determineHead.isLeaf()) {
                        tag = value;
                        word = determineHead.label().value();
                    } else {
                        CategoryWordTag categoryWordTag = (CategoryWordTag) determineHead.label();
                        word = categoryWordTag.word();
                        tag = categoryWordTag.tag();
                    }
                    tree.setLabel(new CategoryWordTag(value, word, tag));
                }
            }
            return tree;
        }

        public TreeNullAnnotator(HeadFinder headFinder) {
            this.hf = headFinder;
        }
    }

    public TreeAnnotatorAndBinarizer(TreebankLangParserParams treebankLangParserParams, boolean z, boolean z2, boolean z3, Options options) {
        this(treebankLangParserParams.headFinder(), treebankLangParserParams.headFinder(), treebankLangParserParams, z, z2, z3, options);
    }

    public TreeAnnotatorAndBinarizer(HeadFinder headFinder, HeadFinder headFinder2, TreebankLangParserParams treebankLangParserParams, boolean z, boolean z2, boolean z3, Options options) {
        this.trainOptions = options.trainOptions;
        if (z3) {
            this.annotator = new TreeAnnotator(headFinder, treebankLangParserParams, options);
        } else {
            this.annotator = new TreeNullAnnotator(headFinder);
        }
        this.binarizer = new TreeBinarizer(headFinder2, treebankLangParserParams.treebankLanguagePack(), z2, this.trainOptions.markovFactor, this.trainOptions.markovOrder, this.trainOptions.compactGrammar() > 0, this.trainOptions.compactGrammar() > 1, this.trainOptions.HSEL_CUT, this.trainOptions.markFinalStates, this.trainOptions.simpleBinarizedLabels, this.trainOptions.noRebinarization);
        if (this.trainOptions.selectivePostSplit) {
            this.postSplitter = new PostSplitter(treebankLangParserParams, options);
        } else {
            this.postSplitter = null;
        }
        this.tf = new LabeledScoredTreeFactory(new CategoryWordTagFactory());
        this.tlp = treebankLangParserParams.treebankLanguagePack();
        this.forceCNF = z;
        if (this.trainOptions.printAnnotatedRuleCounts) {
            this.annotatedRuleCounts = new ClassicCounter<>();
        } else {
            this.annotatedRuleCounts = null;
        }
        if (this.trainOptions.printAnnotatedStateCounts) {
            this.annotatedStateCounts = new ClassicCounter<>();
        } else {
            this.annotatedStateCounts = null;
        }
    }

    public void dumpStats() {
        if (this.trainOptions.selectivePostSplit) {
            this.postSplitter.dumpStats();
        }
    }

    public void setDoSelectiveSplit(boolean z) {
        this.binarizer.setDoSelectiveSplit(z);
    }

    public void addRoot(Tree tree) {
        if (tree.isLeaf()) {
            System.err.println("Warning: tree is leaf: " + tree);
            tree = this.tf.newTreeNode(this.tlp.startSymbol(), Collections.singletonList(tree));
        }
        tree.setLabel(new CategoryWordTag(this.tlp.startSymbol(), ".$.", ".$$."));
        ArrayList arrayList = new ArrayList();
        arrayList.add(this.tf.newLeaf(new Word(".$.")));
        Tree newTreeNode = this.tf.newTreeNode(new CategoryWordTag(".$$.", ".$.", ".$$."), arrayList);
        List<Tree> childrenAsList = tree.getChildrenAsList();
        childrenAsList.add(newTreeNode);
        tree.setChildren(childrenAsList);
    }

    @Override // edu.stanford.nlp.trees.TreeTransformer
    public Tree transformTree(Tree tree) {
        if (this.trainOptions.printTreeTransformations > 0) {
            TrainOptions trainOptions = this.trainOptions;
            TrainOptions.printTrainTree(null, "ORIGINAL TREE:", tree);
        }
        Tree transformTree = this.annotator.transformTree(tree);
        if (this.trainOptions.selectivePostSplit) {
            transformTree = this.postSplitter.transformTree(transformTree);
        }
        if (this.trainOptions.printTreeTransformations > 0) {
            TrainOptions trainOptions2 = this.trainOptions;
            TrainOptions.printTrainTree(this.trainOptions.printAnnotatedPW, "ANNOTATED TREE:", transformTree);
        }
        if (this.trainOptions.printAnnotatedRuleCounts) {
            Iterator<Tree> it = transformTree.deepCopy(new LabeledScoredTreeFactory(), new StringLabelFactory()).localTrees().iterator();
            while (it.hasNext()) {
                this.annotatedRuleCounts.incrementCount(it.next());
            }
        }
        if (this.trainOptions.printAnnotatedStateCounts) {
            Iterator<Tree> it2 = transformTree.iterator();
            while (it2.hasNext()) {
                Tree next = it2.next();
                if (!next.isLeaf()) {
                    this.annotatedStateCounts.incrementCount(next.label().value());
                }
            }
        }
        addRoot(transformTree);
        Tree transformTree2 = this.binarizer.transformTree(transformTree);
        if (this.trainOptions.printTreeTransformations > 0) {
            TrainOptions trainOptions3 = this.trainOptions;
            TrainOptions.printTrainTree(this.trainOptions.printBinarizedPW, "BINARIZED TREE:", transformTree2);
            this.trainOptions.printTreeTransformations--;
        }
        if (this.forceCNF) {
            transformTree2 = new CNFTransformers.ToCNFTransformer().transformTree(transformTree2);
        }
        return transformTree2;
    }

    public void printRuleCounts() {
        System.err.println();
        for (Tree tree : this.annotatedRuleCounts.keySet()) {
            System.err.print(this.annotatedRuleCounts.getCount(tree) + "\t" + tree.label().value() + " -->");
            for (Tree tree2 : tree.getChildrenAsList()) {
                System.err.print(" ");
                System.err.print(tree2.label().value());
            }
            System.err.println();
        }
    }

    public void printStateCounts() {
        System.err.println();
        System.err.println("Annotated state counts");
        ArrayList<String> arrayList = new ArrayList(this.annotatedStateCounts.keySet());
        Collections.sort(arrayList);
        for (String str : arrayList) {
            System.err.println(str + "\t" + this.annotatedStateCounts.getCount(str));
        }
    }

    private static int numSubArgs(String[] strArr, int i) {
        int i2 = i;
        while (i2 + 1 < strArr.length && strArr[i2 + 1].charAt(0) != '-') {
            i2++;
        }
        return i2 - i;
    }

    private static void removeDeleteSplittersFromSplitters(TreebankLanguagePack treebankLanguagePack, Options options) {
        if (options.trainOptions.deleteSplitters != null) {
            ArrayList arrayList = new ArrayList();
            for (String str : options.trainOptions.deleteSplitters) {
                String basicCategory = treebankLanguagePack.basicCategory(str);
                boolean equals = str.equals(basicCategory);
                Iterator<String> it = options.trainOptions.splitters.iterator();
                while (it.hasNext()) {
                    String next = it.next();
                    if ((equals && treebankLanguagePack.basicCategory(next).equals(basicCategory)) || next.equals(str)) {
                        it.remove();
                        arrayList.add(next);
                    }
                }
            }
            if (options.testOptions.verbose) {
                System.err.println("Removed from vertical splitters: " + arrayList);
            }
        }
    }

    public static Triple<Treebank, Treebank, Treebank> getAnnotatedBinaryTreebankFromTreebank(Treebank treebank, Treebank treebank2, Treebank treebank3, Options options) {
        TreeAnnotatorAndBinarizer treeAnnotatorAndBinarizer;
        TreebankLangParserParams treebankLangParserParams = options.tlpParams;
        TreebankLanguagePack treebankLanguagePack = treebankLangParserParams.treebankLanguagePack();
        if (options.testOptions.verbose) {
            PrintWriter pw = treebankLangParserParams.pw(System.err);
            pw.print("Training ");
            pw.println(treebank.textualSummary(treebankLanguagePack));
            if (treebank2 != null) {
                pw.print("Secondary training ");
                pw.println(treebank2.textualSummary(treebankLanguagePack));
            }
        }
        CompositeTreeTransformer compositeTreeTransformer = new CompositeTreeTransformer();
        if (options.trainOptions.preTransformer != null) {
            compositeTreeTransformer.addTransformer(options.trainOptions.preTransformer);
        }
        if (options.trainOptions.collinsPunc) {
            compositeTreeTransformer.addTransformer(new CollinsPuncTransformer(treebankLanguagePack));
        }
        System.err.print("Binarizing trees...");
        if (options.trainOptions.leftToRight) {
            treeAnnotatorAndBinarizer = new TreeAnnotatorAndBinarizer(treebankLangParserParams.headFinder(), new LeftHeadFinder(), treebankLangParserParams, options.forceCNF, !options.trainOptions.outsideFactor(), !options.trainOptions.predictSplits, options);
        } else {
            treeAnnotatorAndBinarizer = new TreeAnnotatorAndBinarizer(treebankLangParserParams, options.forceCNF, !options.trainOptions.outsideFactor(), !options.trainOptions.predictSplits, options);
        }
        compositeTreeTransformer.addTransformer(treeAnnotatorAndBinarizer);
        if (options.wordFunction != null) {
            compositeTreeTransformer.addTransformer(new TreeLeafLabelTransformer(options.wordFunction));
        }
        Treebank compositeTreebank = treebank2 == null ? treebank : new CompositeTreebank(treebank, treebank2);
        if (options.trainOptions.selectiveSplit) {
            options.trainOptions.splitters = ParentAnnotationStats.getSplitCategories(compositeTreebank, options.trainOptions.tagSelectiveSplit, 0, options.trainOptions.selectiveSplitCutOff, options.trainOptions.tagSelectiveSplitCutOff, treebankLanguagePack);
            removeDeleteSplittersFromSplitters(treebankLanguagePack, options);
            if (options.testOptions.verbose) {
                ArrayList arrayList = new ArrayList(options.trainOptions.splitters);
                Collections.sort(arrayList);
                System.err.println("Parent split categories: " + arrayList);
            }
        }
        if (options.trainOptions.selectivePostSplit) {
            compositeTreebank = compositeTreebank.transform(new TreeAnnotator(treebankLangParserParams.headFinder(), treebankLangParserParams, options));
            options.trainOptions.postSplitters = ParentAnnotationStats.getSplitCategories(compositeTreebank, true, 0, options.trainOptions.selectivePostSplitCutOff, options.trainOptions.tagSelectivePostSplitCutOff, treebankLanguagePack);
            if (options.testOptions.verbose) {
                System.err.println("Parent post annotation split categories: " + options.trainOptions.postSplitters);
            }
        }
        if (options.trainOptions.hSelSplit) {
            int i = options.trainOptions.printTreeTransformations;
            options.trainOptions.printTreeTransformations = 0;
            treeAnnotatorAndBinarizer.setDoSelectiveSplit(false);
            Iterator<Tree> it = compositeTreebank.iterator();
            while (it.hasNext()) {
                compositeTreeTransformer.transformTree(it.next());
            }
            treeAnnotatorAndBinarizer.setDoSelectiveSplit(true);
            options.trainOptions.printTreeTransformations = i;
        }
        Treebank transform = treebank.transform(compositeTreeTransformer);
        if (treebank2 != null) {
            treebank2 = treebank2.transform(compositeTreeTransformer);
        }
        if (options.trainOptions.printAnnotatedStateCounts) {
            treeAnnotatorAndBinarizer.printStateCounts();
        }
        if (options.trainOptions.printAnnotatedRuleCounts) {
            treeAnnotatorAndBinarizer.printRuleCounts();
        }
        if (treebank3 != null) {
            treebank3 = treebank3.transform(compositeTreeTransformer);
        }
        if (options.testOptions.verbose) {
            treeAnnotatorAndBinarizer.dumpStats();
        }
        return new Triple<>(transform, treebank2, treebank3);
    }

    public static void main(String[] strArr) {
        Options options = new Options();
        String str = null;
        FileFilter fileFilter = null;
        int i = 0;
        while (i < strArr.length && strArr[i].startsWith("-")) {
            if (strArr[i].equalsIgnoreCase("-train")) {
                int numSubArgs = numSubArgs(strArr, i);
                int i2 = i + 1;
                if (numSubArgs < 1) {
                    throw new RuntimeException("Error: -train option must have treebankPath as first argument.");
                }
                str = strArr[i2];
                i = i2 + 1;
                if (numSubArgs == 2) {
                    i++;
                    fileFilter = new NumberRangesFileFilter(strArr[i], true);
                } else if (numSubArgs >= 3) {
                    fileFilter = new NumberRangeFileFilter(Integer.parseInt(strArr[i]), Integer.parseInt(strArr[i + 1]), true);
                    i += 2;
                }
            } else {
                i = options.setOption(strArr, i);
            }
        }
        if (i < strArr.length) {
            System.err.println("usage: java TreeAnnotatorAndBinarizer options*");
            System.err.println("  Options are like for lexicalized parser including -train treebankPath fileRange]");
            return;
        }
        System.err.println("Annotating from treebank dir: " + str);
        DiskTreebank diskTreebank = options.tlpParams.diskTreebank();
        if (fileFilter == null) {
            diskTreebank.loadPath(str);
        } else {
            diskTreebank.loadPath(str, fileFilter);
        }
        Treebank first = getAnnotatedBinaryTreebankFromTreebank(diskTreebank, null, null, options).first();
        Iterator<Tree> it = diskTreebank.iterator();
        Iterator<Tree> it2 = first.iterator();
        while (it2.hasNext()) {
            Tree next = it2.next();
            System.out.println("Original tree:");
            it.next().pennPrint();
            System.out.println("Binarized tree:");
            next.pennPrint();
            System.out.println();
        }
    }
}
