package gate.stanford;

import edu.stanford.nlp.ling.TaggedWord;
import edu.stanford.nlp.ling.Word;
import edu.stanford.nlp.tagger.maxent.MaxentTagger;
import gate.Annotation;
import gate.AnnotationSet;
import gate.Factory;
import gate.FeatureMap;
import gate.Resource;
import gate.Utils;
import gate.creole.AbstractLanguageAnalyser;
import gate.creole.ExecutionException;
import gate.creole.ResourceInstantiationException;
import gate.creole.ResourceReference;
import gate.creole.metadata.CreoleParameter;
import gate.creole.metadata.CreoleResource;
import gate.creole.metadata.Optional;
import gate.creole.metadata.RunTime;
import gate.creole.metadata.Sharable;
import gate.util.GateRuntimeException;
import gate.util.OffsetComparator;
import java.text.NumberFormat;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.Iterator;
import java.util.List;
import java.util.ListIterator;
import org.apache.log4j.Level;
import org.apache.log4j.Logger;

@CreoleResource(name = "Stanford POS Tagger", comment = "Stanford Part-of-Speech Tagger", icon = "pos-tagger", helpURL = "http://gate.ac.uk/userguide/sec:misc:creole:stanford")
/* loaded from: input_file:gate/stanford/Tagger.class */
public class Tagger extends AbstractLanguageAnalyser {
    private static final long serialVersionUID = -6001372186847970081L;
    public static final String TAG_DOCUMENT_PARAMETER_NAME = "document";
    public static final String TAG_INPUT_AS_PARAMETER_NAME = "inputASName";
    public static final String TAG_ENCODING_PARAMETER_NAME = "encoding";
    public static final String BASE_TOKEN_ANNOTATION_TYPE_PARAMETER_NAME = "baseTokenAnnotationType";
    public static final String OUTPUT_ANNOTATION_TYPE_PARAMETER_NAME = "outputAnnotationType";
    public static final String BASE_SENTENCE_ANNOTATION_TYPE_PARAMETER_NAME = "baseSentenceAnnotationType";
    public static final String TAG_OUTPUT_AS_PARAMETER_NAME = "outputASName";
    private Boolean useExistingTags;
    protected MaxentTagger tagger;
    private String inputASName;
    private String encoding;
    private String baseTokenAnnotationType;
    private String baseSentenceAnnotationType;
    private String outputAnnotationType;
    private String outputASName;
    private ResourceReference modelFile;
    protected Boolean failOnMissingInputAnnotations = true;
    protected Boolean posTagAllTokens = true;
    protected Logger logger = Logger.getLogger(getClass().getName());

    @CreoleParameter(comment = "Throw an exception when there are none of the required input annotations", defaultValue = "true")
    @RunTime
    @Optional
    public void setFailOnMissingInputAnnotations(Boolean bool) {
        this.failOnMissingInputAnnotations = bool;
    }

    public Boolean getFailOnMissingInputAnnotations() {
        return this.failOnMissingInputAnnotations;
    }

    @CreoleParameter(comment = "Should all Tokens be POS tagged or just those within baseSentenceAnnotationType?", defaultValue = "true")
    @RunTime
    @Optional
    public void setPosTagAllTokens(Boolean bool) {
        this.posTagAllTokens = bool;
    }

    public Boolean getPosTagAllTokens() {
        return this.posTagAllTokens;
    }

    @CreoleParameter(comment = "Should existing category features on input annotations be respected (true) or ignored (false)?", defaultValue = "true")
    @RunTime
    @Optional
    public void setUseExistingTags(Boolean bool) {
        this.useExistingTags = bool;
    }

    public Boolean getUseExistingTags() {
        return this.useExistingTags;
    }

    public Resource init() throws ResourceInstantiationException {
        if (this.tagger == null) {
            try {
                this.tagger = new MaxentTagger(this.modelFile.toURL().toExternalForm());
            } catch (Exception e) {
                throw new ResourceInstantiationException(e);
            }
        }
        return this;
    }

    public void reInit() throws ResourceInstantiationException {
        this.tagger = null;
        init();
    }

    public void execute() throws ExecutionException {
        if (this.document == null) {
            throw new ExecutionException("No document to process!");
        }
        AnnotationSet annotations = this.document.getAnnotations(this.inputASName);
        if (this.baseTokenAnnotationType == null || this.baseTokenAnnotationType.trim().length() == 0) {
            throw new ExecutionException("No base Token Annotation Type provided!");
        }
        if (this.baseSentenceAnnotationType == null || this.baseSentenceAnnotationType.trim().length() == 0) {
            throw new ExecutionException("No base Sentence Annotation Type provided!");
        }
        if (this.outputAnnotationType == null || this.outputAnnotationType.trim().length() == 0) {
            throw new ExecutionException("No AnnotationType provided to store the new feature!");
        }
        AnnotationSet annotationSet = annotations.get(this.baseSentenceAnnotationType);
        AnnotationSet annotationSet2 = annotations.get(this.baseTokenAnnotationType);
        if (annotationSet == null || annotationSet.size() <= 0 || annotationSet2 == null || annotationSet2.size() <= 0) {
            if (this.failOnMissingInputAnnotations.booleanValue()) {
                throw new ExecutionException("No sentences or tokens to process in document " + this.document.getName() + "\nPlease run a sentence splitter and tokeniser first!");
            }
            Utils.logOnce(this.logger, Level.INFO, "POS tagger: no sentence or token annotations in input document - see debug log for details.");
            this.logger.debug("No input annotations in document " + this.document.getName());
            return;
        }
        long currentTimeMillis = System.currentTimeMillis();
        fireStatusChanged("POS tagging " + this.document.getName());
        fireProgressChanged(0);
        ArrayList arrayList = new ArrayList();
        OffsetComparator offsetComparator = new OffsetComparator();
        ArrayList<Annotation> arrayList2 = new ArrayList((Collection) annotationSet);
        Collections.sort(arrayList2, offsetComparator);
        ArrayList arrayList3 = new ArrayList((Collection) annotationSet2);
        Collections.sort(arrayList3, offsetComparator);
        ListIterator listIterator = arrayList3.listIterator();
        ArrayList arrayList4 = new ArrayList();
        Annotation annotation = (Annotation) listIterator.next();
        int i = 0;
        int size = annotationSet.size();
        for (Annotation annotation2 : arrayList2) {
            arrayList4.clear();
            arrayList.clear();
            while (annotation != null && annotation.getEndNode().getOffset().compareTo(annotation2.getEndNode().getOffset()) <= 0) {
                if (this.posTagAllTokens.booleanValue() || annotation.withinSpanOf(annotation2)) {
                    arrayList4.add(annotation);
                    if (this.useExistingTags.booleanValue() && annotation.getFeatures().containsKey("category")) {
                        arrayList.add(new TaggedWord((String) annotation.getFeatures().get(Tokenizer.TOKEN_STRING_FEATURE), (String) annotation.getFeatures().get("category")));
                    } else {
                        arrayList.add(new Word((String) annotation.getFeatures().get(Tokenizer.TOKEN_STRING_FEATURE)));
                    }
                }
                annotation = listIterator.hasNext() ? (Annotation) listIterator.next() : null;
            }
            if (!arrayList.isEmpty()) {
                List tagSentence = this.tagger.tagSentence(arrayList, this.useExistingTags.booleanValue());
                if (tagSentence.size() != arrayList4.size()) {
                    throw new ExecutionException("POS Tagger malfunction: the output size (" + tagSentence.size() + ") is different from the input size (" + arrayList4.size() + ")!");
                }
                Iterator it = tagSentence.iterator();
                Iterator it2 = arrayList4.iterator();
                while (it.hasNext()) {
                    addFeatures((Annotation) it2.next(), "category", ((TaggedWord) it.next()).tag());
                }
                int i2 = i;
                i++;
                fireProgressChanged((i2 * 100) / size);
            }
        }
        if (annotation != null && this.posTagAllTokens.booleanValue()) {
            arrayList4.clear();
            arrayList.clear();
            while (annotation != null) {
                arrayList4.add(annotation);
                if (this.useExistingTags.booleanValue() && annotation.getFeatures().containsKey("category")) {
                    arrayList.add(new TaggedWord((String) annotation.getFeatures().get(Tokenizer.TOKEN_STRING_FEATURE), (String) annotation.getFeatures().get("category")));
                } else {
                    arrayList.add(new Word((String) annotation.getFeatures().get(Tokenizer.TOKEN_STRING_FEATURE)));
                }
                annotation = listIterator.hasNext() ? (Annotation) listIterator.next() : null;
            }
            List tagSentence2 = this.tagger.tagSentence(arrayList, this.useExistingTags.booleanValue());
            if (tagSentence2.size() != arrayList4.size()) {
                throw new ExecutionException("POS Tagger malfunction: the output size (" + tagSentence2.size() + ") is different from the input size (" + arrayList4.size() + ")!");
            }
            Iterator it3 = tagSentence2.iterator();
            Iterator it4 = arrayList4.iterator();
            while (it3.hasNext()) {
                addFeatures((Annotation) it4.next(), "category", ((TaggedWord) it3.next()).tag());
            }
        }
        fireProcessFinished();
        fireStatusChanged(this.document.getName() + " tagged in " + NumberFormat.getInstance().format((System.currentTimeMillis() - currentTimeMillis) / 1000.0d) + " seconds!");
    }

    protected void addFeatures(Annotation annotation, String str, String str2) throws GateRuntimeException {
        String str3 = this.inputASName == null ? "" : this.inputASName;
        String str4 = this.outputASName == null ? "" : this.outputASName;
        if (this.outputAnnotationType.equals(this.baseTokenAnnotationType) && str3.equals(str4)) {
            annotation.getFeatures().put(str, str2);
            return;
        }
        int intValue = annotation.getStartNode().getOffset().intValue();
        int intValue2 = annotation.getEndNode().getOffset().intValue();
        AnnotationSet annotations = this.document.getAnnotations(this.outputASName);
        AnnotationSet annotationSet = annotations.get(this.outputAnnotationType);
        if (annotationSet == null || annotationSet.size() == 0) {
            FeatureMap newFeatureMap = Factory.newFeatureMap();
            newFeatureMap.put(str, str2);
            try {
                annotations.add(new Long(intValue), new Long(intValue2), this.outputAnnotationType, newFeatureMap);
                return;
            } catch (Exception e) {
                throw new GateRuntimeException("Invalid Offsets");
            }
        }
        ArrayList arrayList = new ArrayList((Collection) annotationSet.get());
        boolean z = false;
        int i = 0;
        while (true) {
            if (i >= arrayList.size()) {
                break;
            }
            Annotation annotation2 = (Annotation) arrayList.get(i);
            if (annotation2.getStartNode().getOffset().intValue() == intValue && annotation2.getEndNode().getOffset().intValue() == intValue2) {
                annotation2.getFeatures().put(str, str2);
                z = true;
                break;
            }
            i++;
        }
        if (z) {
            return;
        }
        FeatureMap newFeatureMap2 = Factory.newFeatureMap();
        newFeatureMap2.put(str, str2);
        try {
            annotations.add(new Long(intValue), new Long(intValue2), this.outputAnnotationType, newFeatureMap2);
        } catch (Exception e2) {
            throw new GateRuntimeException("Invalid Offsets");
        }
    }

    public void setEncoding(String str) {
        this.encoding = str;
    }

    @CreoleParameter(comment = "Input annotation set name", defaultValue = "")
    @Optional
    @RunTime
    public void setInputASName(String str) {
        this.inputASName = str;
    }

    public String getInputASName() {
        return this.inputASName;
    }

    public String getEncoding() {
        return this.encoding;
    }

    public String getBaseTokenAnnotationType() {
        return this.baseTokenAnnotationType;
    }

    public String getBaseSentenceAnnotationType() {
        return this.baseSentenceAnnotationType;
    }

    public String getOutputAnnotationType() {
        return this.outputAnnotationType;
    }

    @CreoleParameter(comment = "Annotation type for what should be considered as atomic words to PoS tag", defaultValue = "Token")
    @RunTime
    public void setBaseTokenAnnotationType(String str) {
        this.baseTokenAnnotationType = str;
    }

    @CreoleParameter(comment = "Sentence-level annotation type", defaultValue = "Sentence")
    @RunTime
    public void setBaseSentenceAnnotationType(String str) {
        this.baseSentenceAnnotationType = str;
    }

    @CreoleParameter(comment = "Output annotation type for words (e.g. Token)", defaultValue = "Token")
    @RunTime
    public void setOutputAnnotationType(String str) {
        this.outputAnnotationType = str;
    }

    public String getOutputASName() {
        return this.outputASName;
    }

    @CreoleParameter(comment = "Output annotation set name", defaultValue = "")
    @Optional
    @RunTime
    public void setOutputASName(String str) {
        this.outputASName = str;
    }

    @CreoleParameter(comment = "Path to the tagger's model file", defaultValue = "resources/english-left3words-distsim.tagger", suffixes = "tagger;model")
    public void setModelFile(ResourceReference resourceReference) {
        this.modelFile = resourceReference;
    }

    public ResourceReference getModelFile() {
        return this.modelFile;
    }

    @Sharable
    public void setTagger(MaxentTagger maxentTagger) {
        this.tagger = maxentTagger;
    }

    public MaxentTagger getTagger() {
        return this.tagger;
    }
}
