package de.tudarmstadt.ukp.dkpro.core.stanfordnlp;

import de.tudarmstadt.ukp.dkpro.core.api.lexmorph.type.pos.POS;
import de.tudarmstadt.ukp.dkpro.core.api.metadata.SingletonTagset;
import de.tudarmstadt.ukp.dkpro.core.api.resources.CasConfigurableProviderBase;
import de.tudarmstadt.ukp.dkpro.core.api.resources.MappingProvider;
import de.tudarmstadt.ukp.dkpro.core.api.resources.MappingProviderFactory;
import de.tudarmstadt.ukp.dkpro.core.api.resources.ModelProviderBase;
import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence;
import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token;
import de.tudarmstadt.ukp.dkpro.core.api.syntax.type.constituent.Constituent;
import de.tudarmstadt.ukp.dkpro.core.api.syntax.type.dependency.Dependency;
import de.tudarmstadt.ukp.dkpro.core.api.syntax.type.dependency.ROOT;
import de.tudarmstadt.ukp.dkpro.core.stanfordnlp.util.CoreNlpUtils;
import de.tudarmstadt.ukp.dkpro.core.stanfordnlp.util.StanfordAnnotator;
import de.tudarmstadt.ukp.dkpro.core.stanfordnlp.util.TreeWithTokens;
import edu.stanford.nlp.ling.CoreLabel;
import edu.stanford.nlp.parser.common.ParserGrammar;
import edu.stanford.nlp.parser.common.ParserQuery;
import edu.stanford.nlp.parser.lexparser.LexicalizedParser;
import edu.stanford.nlp.parser.shiftreduce.BaseModel;
import edu.stanford.nlp.parser.shiftreduce.ShiftReduceParser;
import edu.stanford.nlp.trees.AbstractTreebankLanguagePack;
import edu.stanford.nlp.trees.EnglishGrammaticalRelations;
import edu.stanford.nlp.trees.EnglishGrammaticalStructureFactory;
import edu.stanford.nlp.trees.GrammaticalRelation;
import edu.stanford.nlp.trees.GrammaticalStructure;
import edu.stanford.nlp.trees.GrammaticalStructureFactory;
import edu.stanford.nlp.trees.Tree;
import edu.stanford.nlp.trees.TreebankLanguagePack;
import edu.stanford.nlp.trees.TypedDependency;
import edu.stanford.nlp.trees.UniversalEnglishGrammaticalRelations;
import edu.stanford.nlp.trees.UniversalEnglishGrammaticalStructureFactory;
import edu.stanford.nlp.trees.international.pennchinese.ChineseGrammaticalRelations;
import edu.stanford.nlp.util.Index;
import java.io.BufferedInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.ObjectInputStream;
import java.net.URL;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Iterator;
import java.util.List;
import java.util.Properties;
import java.util.zip.GZIPInputStream;
import org.apache.commons.io.IOUtils;
import org.apache.commons.lang.StringUtils;
import org.apache.commons.lang.reflect.FieldUtils;
import org.apache.uima.UimaContext;
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
import org.apache.uima.cas.FSIterator;
import org.apache.uima.fit.component.JCasAnnotator_ImplBase;
import org.apache.uima.fit.descriptor.ConfigurationParameter;
import org.apache.uima.fit.descriptor.ResourceMetaData;
import org.apache.uima.fit.descriptor.TypeCapability;
import org.apache.uima.fit.internal.EnhancedClassFile;
import org.apache.uima.fit.util.JCasUtil;
import org.apache.uima.jcas.JCas;
import org.apache.uima.jcas.tcas.Annotation;
import org.apache.uima.resource.ResourceInitializationException;
import org.apache.uima.util.Level;

@TypeCapability(inputs = {"de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence", "de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token", "de.tudarmstadt.ukp.dkpro.core.api.lexmorph.type.pos.POS"}, outputs = {"de.tudarmstadt.ukp.dkpro.core.api.syntax.type.constituent.Constituent"})
@ResourceMetaData(name = "de.tudarmstadt.ukp.dkpro.core.stanfordnlp.StanfordParser", description = "Stanford Parser component.", version = "1.8.0", vendor = "DKPro Core Project", copyright = "Copyright 2010-2015\n                            Ubiquitous Knowledge Processing (UKP) Lab\n                            Technische Universität Darmstadt")
@EnhancedClassFile
/* loaded from: input_file:de/tudarmstadt/ukp/dkpro/core/stanfordnlp/StanfordParser.class */
public class StanfordParser extends JCasAnnotator_ImplBase {
    public static final String PARAM_PRINT_TAGSET = "printTagSet";

    @ConfigurationParameter(name = "printTagSet", mandatory = true, defaultValue = {"false"}, description = "Write the tag set(s) to the log when a model is loaded.")
    protected boolean printTagSet;
    public static final String PARAM_LANGUAGE = "language";

    @ConfigurationParameter(name = "language", mandatory = false, description = "Use this language instead of the document language to resolve the model and tag set mapping.")
    protected String language;
    public static final String PARAM_VARIANT = "modelVariant";

    @ConfigurationParameter(name = "modelVariant", mandatory = false, description = "Variant of a model the model. Used to address a specific model if here are multiple models\nfor one language.")
    protected String variant;
    public static final String PARAM_MODEL_LOCATION = "modelLocation";

    @ConfigurationParameter(name = "modelLocation", mandatory = false, description = "Location from which the model is read.")
    protected String modelLocation;
    public static final String PARAM_POS_MAPPING_LOCATION = "POSMappingLocation";

    @ConfigurationParameter(name = "POSMappingLocation", mandatory = false, description = "Location of the mapping file for part-of-speech tags to UIMA types.")
    protected String posMappingLocation;
    public static final String PARAM_CONSTITUENT_MAPPING_LOCATION = "ConstituentMappingLocation";

    @ConfigurationParameter(name = PARAM_CONSTITUENT_MAPPING_LOCATION, mandatory = false, description = "Location of the mapping file for constituent tags to UIMA types.")
    protected String constituentMappingLocation;
    public static final String PARAM_WRITE_DEPENDENCY = "writeDependency";

    @ConfigurationParameter(name = PARAM_WRITE_DEPENDENCY, mandatory = true, defaultValue = {"true"}, description = "Sets whether to create or not to create dependency annotations.\n\n<p>Default: true")
    private boolean writeDependency;
    public static final String PARAM_MODE = "mode";

    @ConfigurationParameter(name = "mode", mandatory = false, defaultValue = {"TREE"}, description = "Sets the kind of dependencies being created.\n\n<p>Default: DependenciesMode#COLLAPSED TREE")
    protected DependenciesMode mode;
    public static final String PARAM_WRITE_CONSTITUENT = "writeConstituent";

    @ConfigurationParameter(name = PARAM_WRITE_CONSTITUENT, mandatory = true, defaultValue = {"true"}, description = "Sets whether to create or not to create constituent tags. This is required for POS-tagging\nand lemmatization.\n<p>\nDefault: true")
    private boolean writeConstituent;
    public static final String PARAM_WRITE_PENN_TREE = "writePennTree";

    @ConfigurationParameter(name = PARAM_WRITE_PENN_TREE, mandatory = true, defaultValue = {"false"}, description = "If this parameter is set to true, each sentence is annotated with a PennTree-Annotation,\ncontaining the whole parse tree in Penn Treebank style format.\n<p>\nDefault: false")
    private boolean writePennTree;
    public static final String PARAM_ANNOTATIONTYPE_TO_PARSE = "annotationTypeToParse";

    @ConfigurationParameter(name = PARAM_ANNOTATIONTYPE_TO_PARSE, mandatory = false, description = "This parameter can be used to override the standard behavior which uses the <i>Sentence</i>\nannotation as the basic unit for parsing.\n<p>If the parameter is set with the name of an annotation type <i>x</i>, the parser will no\nlonger parse <i>Sentence</i>-annotations, but <i>x</i>-Annotations.</p>\n<p>Default: null")
    private String annotationTypeToParse;
    public static final String PARAM_WRITE_POS = "writePOS";

    @ConfigurationParameter(name = PARAM_WRITE_POS, mandatory = true, defaultValue = {"false"}, description = "Sets whether to create or not to create POS tags. The creation of constituent tags must be\nturned on for this to work.\n<p>\nDefault: false")
    private boolean writePos;
    public static final String PARAM_READ_POS = "readPOS";

    @ConfigurationParameter(name = PARAM_READ_POS, mandatory = true, defaultValue = {"true"}, description = "Sets whether to use or not to use already existing POS tags from another annotator for the\nparsing process.\n<p>\nDefault: true")
    private boolean readPos;
    public static final String PARAM_MAX_SENTENCE_LENGTH = "maxSentenceLength";

    @ConfigurationParameter(name = "maxSentenceLength", mandatory = true, defaultValue = {"130"}, description = "Maximum number of tokens in a sentence. Longer sentences are not parsed. This is to avoid out\nof memory exceptions.\n<p>\nDefault: 130")
    private int maxTokens;
    public static final String PARAM_MAX_ITEMS = "maxItems";

    @ConfigurationParameter(name = PARAM_MAX_ITEMS, mandatory = true, defaultValue = {"200000"}, description = "Controls when the factored parser considers a sentence to be too complex and falls back to\nthe PCFG parser.\n<p>\nDefault: 200000")
    private int maxItems;
    public static final String PARAM_PTB3_ESCAPING = "ptb3Escaping";

    @ConfigurationParameter(name = "ptb3Escaping", mandatory = true, defaultValue = {"true"}, description = "Enable all traditional PTB3 token transforms (like -LRB-, -RRB-).")
    private boolean ptb3Escaping;
    public static final String PARAM_QUOTE_BEGIN = "quoteBegin";

    @ConfigurationParameter(name = "quoteBegin", mandatory = false, description = "List of extra token texts (usually single character strings) that should be treated like\nopening quotes and escaped accordingly before being sent to the parser.")
    private List<String> quoteBegin;
    public static final String PARAM_QUOTE_END = "quoteEnd";

    @ConfigurationParameter(name = "quoteEnd", mandatory = false, description = "List of extra token texts (usually single character strings) that should be treated like\nclosing quotes and escaped accordingly before being sent to the parser.")
    private List<String> quoteEnd;
    private CasConfigurableProviderBase<ParserGrammar> modelProvider;
    private MappingProvider posMappingProvider;
    private MappingProvider constituentMappingProvider;

    /* loaded from: input_file:de/tudarmstadt/ukp/dkpro/core/stanfordnlp/StanfordParser$DependenciesMode.class */
    public enum DependenciesMode {
        BASIC,
        NON_COLLAPSED,
        COLLAPSED,
        COLLAPSED_WITH_EXTRA,
        CC_PROPAGATED,
        CC_PROPAGATED_NO_EXTRA,
        TREE
    }

    /* loaded from: input_file:de/tudarmstadt/ukp/dkpro/core/stanfordnlp/StanfordParser$StanfordParserModelProvider.class */
    private class StanfordParserModelProvider extends ModelProviderBase<ParserGrammar> {
        private StanfordParserModelProvider() {
            setContextObject(StanfordParser.this);
            setDefault("artifactId", "${groupId}.stanfordnlp-model-parser-${language}-${variant}");
            setDefault("location", "classpath:/${package}/lib/parser-${language}-${variant}.properties");
            setDefaultVariantsLocation("${package}/lib/parser-default-variants.map");
            setOverride("location", StanfordParser.this.modelLocation);
            setOverride("language", StanfordParser.this.language);
            setOverride("variant", StanfordParser.this.variant);
        }

        /* JADX INFO: Access modifiers changed from: protected */
        /* renamed from: produceResource, reason: merged with bridge method [inline-methods] */
        public ParserGrammar m6produceResource(URL url) throws IOException {
            Index index;
            StanfordParser.this.getContext().getLogger().log(Level.INFO, "Loading parser from serialized file " + url + " ...");
            try {
                try {
                    try {
                        InputStream openStream = url.openStream();
                        ObjectInputStream objectInputStream = url.toString().endsWith(".gz") ? new ObjectInputStream(new BufferedInputStream(new GZIPInputStream(openStream))) : new ObjectInputStream(new BufferedInputStream(openStream));
                        LexicalizedParser lexicalizedParser = (ParserGrammar) objectInputStream.readObject();
                        AbstractTreebankLanguagePack treebankLanguagePack = lexicalizedParser.getTLPParams().treebankLanguagePack();
                        lexicalizedParser.getTLPParams().setGenerateOriginalDependencies(true);
                        Properties resourceMetaData = getResourceMetaData();
                        SingletonTagset singletonTagset = new SingletonTagset(POS.class, resourceMetaData.getProperty("pos.tagset"));
                        if (lexicalizedParser instanceof LexicalizedParser) {
                            Iterator it = lexicalizedParser.tagIndex.iterator();
                            while (it.hasNext()) {
                                String basicCategory = treebankLanguagePack.basicCategory((String) it.next());
                                int indexOf = basicCategory.indexOf(treebankLanguagePack.getGfCharacter());
                                if (indexOf > 0) {
                                    basicCategory = basicCategory.substring(0, indexOf);
                                }
                                singletonTagset.add(treebankLanguagePack.basicCategory(basicCategory));
                            }
                            addTagset(singletonTagset, StanfordParser.this.writePos);
                        }
                        SingletonTagset singletonTagset2 = new SingletonTagset(Constituent.class, resourceMetaData.getProperty("constituent.tagset"));
                        if (lexicalizedParser instanceof LexicalizedParser) {
                            index = lexicalizedParser.stateIndex;
                        } else {
                            if (!(lexicalizedParser instanceof ShiftReduceParser)) {
                                throw new IllegalStateException("Unknown parser type [" + lexicalizedParser.getClass().getName() + "]");
                            }
                            index = (Iterable) FieldUtils.readField((BaseModel) FieldUtils.readField(lexicalizedParser, "model", true), "knownStates", true);
                        }
                        Iterator it2 = index.iterator();
                        while (it2.hasNext()) {
                            String basicCategory2 = treebankLanguagePack.basicCategory((String) it2.next());
                            if (!basicCategory2.startsWith("@")) {
                                int indexOf2 = basicCategory2.indexOf(treebankLanguagePack.getGfCharacter());
                                if (indexOf2 > 0) {
                                    basicCategory2 = basicCategory2.substring(0, indexOf2);
                                }
                                if (basicCategory2.length() > 0) {
                                    singletonTagset2.add(basicCategory2);
                                }
                            }
                        }
                        singletonTagset2.removeAll(singletonTagset);
                        addTagset(singletonTagset2, StanfordParser.this.writeConstituent);
                        GrammaticalStructureFactory grammaticalStructureFactory = null;
                        try {
                            grammaticalStructureFactory = treebankLanguagePack.grammaticalStructureFactory(treebankLanguagePack.punctuationWordRejectFilter(), treebankLanguagePack.typedDependencyHeadFinder());
                        } catch (UnsupportedOperationException e) {
                            StanfordParser.this.getContext().getLogger().log(Level.WARNING, "Current model does not seem to support dependencies.");
                        }
                        if (grammaticalStructureFactory != null && EnglishGrammaticalStructureFactory.class.equals(grammaticalStructureFactory.getClass())) {
                            SingletonTagset singletonTagset3 = new SingletonTagset(Dependency.class, "stanford341");
                            Iterator it3 = EnglishGrammaticalRelations.values().iterator();
                            while (it3.hasNext()) {
                                singletonTagset3.add(((GrammaticalRelation) it3.next()).getShortName());
                            }
                            addTagset(singletonTagset3, StanfordParser.this.writeDependency);
                        } else if (grammaticalStructureFactory != null && UniversalEnglishGrammaticalStructureFactory.class.equals(grammaticalStructureFactory.getClass())) {
                            SingletonTagset singletonTagset4 = new SingletonTagset(Dependency.class, "universal");
                            Iterator it4 = UniversalEnglishGrammaticalRelations.values().iterator();
                            while (it4.hasNext()) {
                                singletonTagset4.add(((GrammaticalRelation) it4.next()).getShortName());
                            }
                            addTagset(singletonTagset4, StanfordParser.this.writeDependency);
                        } else if (grammaticalStructureFactory != null && ChineseGrammaticalRelations.class.equals(grammaticalStructureFactory.getClass())) {
                            SingletonTagset singletonTagset5 = new SingletonTagset(Dependency.class, "stanford");
                            Iterator it5 = ChineseGrammaticalRelations.values().iterator();
                            while (it5.hasNext()) {
                                singletonTagset5.add(((GrammaticalRelation) it5.next()).getShortName());
                            }
                            addTagset(singletonTagset5, StanfordParser.this.writeDependency);
                        }
                        if (StanfordParser.this.printTagSet) {
                            StanfordParser.this.getContext().getLogger().log(Level.INFO, getTagset().toString());
                        }
                        lexicalizedParser.setOptionFlags(new String[]{"-maxLength", String.valueOf(StanfordParser.this.maxTokens), "-MAX_ITEMS", String.valueOf(StanfordParser.this.maxItems)});
                        IOUtils.closeQuietly(objectInputStream);
                        IOUtils.closeQuietly(openStream);
                        return lexicalizedParser;
                    } catch (IllegalAccessException e2) {
                        throw new IllegalStateException(e2);
                    }
                } catch (ClassNotFoundException e3) {
                    throw new IOException(e3);
                }
            } catch (Throwable th) {
                IOUtils.closeQuietly((InputStream) null);
                IOUtils.closeQuietly((InputStream) null);
                throw th;
            }
        }
    }

    public void initialize(UimaContext uimaContext) throws ResourceInitializationException {
        super.initialize(uimaContext);
        if (!this.writeConstituent && !this.writeDependency && !this.writePennTree) {
            getLogger().warn("Invalid parameter configuration... will create dependency tags.");
            this.writeDependency = true;
        }
        if (!this.writeConstituent && this.writePos) {
            getLogger().warn("Constituent tag creation is required for POS tagging. Will create constituent tags.");
            this.writeConstituent = true;
        }
        this.modelProvider = new StanfordParserModelProvider();
        this.posMappingProvider = MappingProviderFactory.createPosMappingProvider(this.posMappingLocation, this.language, this.modelProvider);
        this.constituentMappingProvider = MappingProviderFactory.createConstituentMappingProvider(this.constituentMappingLocation, this.language, this.modelProvider);
    }

    /* JADX WARN: Multi-variable type inference failed */
    /* JADX WARN: Type inference failed for: r0v63, types: [java.util.List] */
    public void process(JCas jCas) throws AnalysisEngineProcessException {
        this.modelProvider.configure(jCas.getCas());
        this.posMappingProvider.configure(jCas.getCas());
        this.constituentMappingProvider.configure(jCas.getCas());
        FSIterator it = jCas.getAnnotationIndex(this.annotationTypeToParse != null ? jCas.getCas().getTypeSystem().getType(this.annotationTypeToParse) : JCasUtil.getType(jCas, Sentence.class)).iterator();
        while (it.hasNext()) {
            Annotation annotation = (Annotation) it.next();
            if (!StringUtils.isBlank(annotation.getCoveredText())) {
                ArrayList arrayList = new ArrayList();
                ArrayList arrayList2 = new ArrayList();
                for (Token token : JCasUtil.selectCovered(Token.class, annotation)) {
                    arrayList.add(tokenToWord(token));
                    arrayList2.add(token);
                }
                getContext().getLogger().log(Level.FINE, arrayList.toString());
                ParserGrammar parserGrammar = (ParserGrammar) this.modelProvider.getResource();
                try {
                    if (arrayList.size() <= this.maxTokens) {
                        if (this.ptb3Escaping) {
                            arrayList = CoreNlpUtils.applyPtbEscaping(arrayList, this.quoteBegin, this.quoteEnd);
                        }
                        ParserQuery parserQuery = parserGrammar.parserQuery();
                        parserQuery.parse(arrayList);
                        Tree bestParse = parserQuery.getBestParse();
                        try {
                            StanfordAnnotator stanfordAnnotator = new StanfordAnnotator(new TreeWithTokens(bestParse, arrayList2));
                            stanfordAnnotator.setPosMappingProvider(this.posMappingProvider);
                            stanfordAnnotator.setConstituentMappingProvider(this.constituentMappingProvider);
                            if (this.writePennTree) {
                                stanfordAnnotator.createPennTreeAnnotation(annotation.getBegin(), annotation.getEnd());
                            }
                            if (this.writeDependency) {
                                doCreateDependencyTags(parserGrammar, stanfordAnnotator, bestParse, arrayList2);
                            }
                            if (this.writeConstituent) {
                                stanfordAnnotator.createConstituentAnnotationFromTree(parserGrammar.getTLPParams().treebankLanguagePack(), this.writePos);
                            }
                        } catch (Exception e) {
                            getLogger().error("Unable to parse [" + annotation.getCoveredText() + "]");
                            throw new AnalysisEngineProcessException(e);
                        }
                    }
                } catch (Exception e2) {
                    throw new AnalysisEngineProcessException(e2);
                }
            }
        }
    }

    protected void doCreateDependencyTags(ParserGrammar parserGrammar, StanfordAnnotator stanfordAnnotator, Tree tree, List<Token> list) {
        try {
            TreebankLanguagePack treebankLanguagePack = parserGrammar.getTLPParams().treebankLanguagePack();
            GrammaticalStructure newGrammaticalStructure = treebankLanguagePack.grammaticalStructureFactory(treebankLanguagePack.punctuationWordRejectFilter(), treebankLanguagePack.typedDependencyHeadFinder()).newGrammaticalStructure(tree);
            Collection<TypedDependency> collection = null;
            switch (this.mode) {
                case BASIC:
                    collection = newGrammaticalStructure.typedDependencies();
                    break;
                case NON_COLLAPSED:
                    collection = newGrammaticalStructure.allTypedDependencies();
                    break;
                case COLLAPSED_WITH_EXTRA:
                    collection = newGrammaticalStructure.typedDependenciesCollapsed(true);
                    break;
                case COLLAPSED:
                    collection = newGrammaticalStructure.typedDependenciesCollapsed(false);
                    break;
                case CC_PROPAGATED:
                    collection = newGrammaticalStructure.typedDependenciesCCprocessed(true);
                    break;
                case CC_PROPAGATED_NO_EXTRA:
                    collection = newGrammaticalStructure.typedDependenciesCCprocessed(false);
                    break;
                case TREE:
                    collection = newGrammaticalStructure.typedDependenciesCollapsedTree();
                    break;
            }
            for (TypedDependency typedDependency : collection) {
                int index = typedDependency.gov().index();
                int index2 = typedDependency.dep().index();
                if (index != 0) {
                    stanfordAnnotator.createDependencyAnnotation(typedDependency.reln(), list.get(index - 1), list.get(index2 - 1));
                } else {
                    Token token = list.get(index2 - 1);
                    ROOT root = new ROOT(stanfordAnnotator.getJCas());
                    root.setDependencyType(typedDependency.reln().toString());
                    root.setGovernor(token);
                    root.setDependent(token);
                    root.setBegin(root.getDependent().getBegin());
                    root.setEnd(root.getDependent().getEnd());
                    root.addToIndexes();
                }
            }
        } catch (UnsupportedOperationException e) {
        }
    }

    protected CoreLabel tokenToWord(Token token) {
        CoreLabel coreLabel = CoreNlpUtils.tokenToWord(token);
        coreLabel.setValue(token.getCoveredText());
        if (!this.readPos) {
            coreLabel.setTag((String) null);
        }
        return coreLabel;
    }
}
