package edu.stanford.nlp.pipeline;

import edu.stanford.nlp.ie.KBPRelationExtractor;
import edu.stanford.nlp.ie.regexp.ChineseNumberSequenceClassifier;
import edu.stanford.nlp.io.IOUtils;
import edu.stanford.nlp.ling.CoreAnnotation;
import edu.stanford.nlp.ling.CoreAnnotations;
import edu.stanford.nlp.ling.CoreLabel;
import edu.stanford.nlp.ling.tokensregex.types.Expressions;
import edu.stanford.nlp.sequences.SeqClassifierFlags;
import edu.stanford.nlp.time.TimeAnnotations;
import edu.stanford.nlp.time.Timex;
import edu.stanford.nlp.util.ArgumentParser;
import edu.stanford.nlp.util.CoreMap;
import edu.stanford.nlp.util.StringUtils;
import edu.stanford.nlp.util.SystemUtils;
import edu.stanford.nlp.util.logging.Redwood;
import java.io.IOException;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.Properties;
import java.util.Set;
import java.util.regex.Pattern;

/* loaded from: input_file:edu/stanford/nlp/pipeline/WikidictAnnotator.class */
public class WikidictAnnotator extends SentenceAnnotator {
    private static final Redwood.RedwoodChannels log = Redwood.channels(WikidictAnnotator.class);
    private static final Pattern NUMBER_PATTERN = Pattern.compile("[0-9.]+");

    @ArgumentParser.Option(name = "threads", gloss = "The number of threads to run this annotator on")
    private int threads;

    @ArgumentParser.Option(name = "wikidict", gloss = "The location of the <text, link, score> TSV file")
    private String wikidictPath;

    @ArgumentParser.Option(name = "threshold", gloss = "The score threshold under which to discard links")
    private double threshold;

    @ArgumentParser.Option(name = "caseless", gloss = "Ignore case when looking up entries in wikidict")
    private boolean wikidictCaseless;
    private final Map<String, String> dictionary;

    public WikidictAnnotator(String str, Properties properties) {
        this.threads = 1;
        this.wikidictPath = DefaultPaths.DEFAULT_WIKIDICT_TSV;
        this.threshold = 0.0d;
        this.wikidictCaseless = false;
        this.dictionary = new HashMap(21000000);
        ArgumentParser.fillOptions(this, str, properties);
        long currentTimeMillis = System.currentTimeMillis();
        log.info("Reading Wikidict from " + this.wikidictPath);
        try {
            int i = 0;
            String[] strArr = new String[3];
            for (String str2 : IOUtils.readLines(this.wikidictPath, "UTF-8")) {
                if (str2.charAt(0) != '\t') {
                    StringUtils.splitOnChar(strArr, str2, '\t');
                    if (i % 1000000 == 0) {
                        log.info("Loaded " + i + " entries from Wikidict [" + SystemUtils.getMemoryInUse() + "MB memory used; " + Redwood.formatTimeDifference(System.currentTimeMillis() - currentTimeMillis) + " elapsed]");
                    }
                    if (this.threshold <= 0.0d || Double.parseDouble(strArr[2]) >= this.threshold) {
                        String str3 = strArr[0];
                        this.dictionary.put(this.wikidictCaseless ? str3.toLowerCase() : str3, strArr[1].intern());
                        i++;
                    }
                }
            }
            log.info("Done reading Wikidict (" + this.dictionary.size() + " links read; " + Redwood.formatTimeDifference(System.currentTimeMillis() - currentTimeMillis) + " elapsed)");
        } catch (Exception e) {
            throw new RuntimeException(e);
        }
    }

    public WikidictAnnotator(Properties properties) {
        this(Annotator.STANFORD_LINK, properties);
    }

    public static String normalizeTimex(String str) {
        return (!str.contains("T") || "PRESENT".equals(str)) ? str : str.substring(0, str.indexOf("T"));
    }

    public Optional<String> link(CoreMap coreMap) {
        String str = coreMap.get(CoreAnnotations.OriginalTextAnnotation.class) == null ? (String) coreMap.get(CoreAnnotations.TextAnnotation.class) : (String) coreMap.get(CoreAnnotations.OriginalTextAnnotation.class);
        String lowerCase = this.wikidictCaseless ? str.toLowerCase() : str;
        String str2 = (String) coreMap.get(CoreAnnotations.NamedEntityTagAnnotation.class);
        if (str2 == null || (!(KBPRelationExtractor.NERTag.DATE.name.equalsIgnoreCase(str2) || ChineseNumberSequenceClassifier.TIME_TAG.equalsIgnoreCase(str2) || Expressions.TYPE_SET.equalsIgnoreCase(str2)) || coreMap.get(TimeAnnotations.TimexAnnotation.class) == null || ((Timex) coreMap.get(TimeAnnotations.TimexAnnotation.class)).value() == null)) {
            return (str2 == null || !ChineseNumberSequenceClassifier.ORDINAL_TAG.equalsIgnoreCase(str2) || coreMap.get(CoreAnnotations.NumericValueAnnotation.class) == null) ? NUMBER_PATTERN.matcher(str).matches() ? Optional.of(str) : (str2 == null || SeqClassifierFlags.DEFAULT_BACKGROUND_SYMBOL.equals(str2) || !this.dictionary.containsKey(lowerCase)) ? Optional.empty() : Optional.of(this.dictionary.get(lowerCase)) : Optional.of(((Number) coreMap.get(CoreAnnotations.NumericValueAnnotation.class)).toString());
        }
        Timex timex = (Timex) coreMap.get(TimeAnnotations.TimexAnnotation.class);
        return (timex.value() == null || timex.value().equals("PRESENT") || timex.value().equals("PRESENT_REF") || timex.value().equals("PAST") || timex.value().equals("PAST_REF") || timex.value().equals("FUTURE") || timex.value().equals("FUTURE_REF")) ? Optional.empty() : Optional.of(normalizeTimex(timex.value()));
    }

    @Override // edu.stanford.nlp.pipeline.SentenceAnnotator
    protected int nThreads() {
        return this.threads;
    }

    @Override // edu.stanford.nlp.pipeline.SentenceAnnotator
    protected long maxTime() {
        return -1L;
    }

    @Override // edu.stanford.nlp.pipeline.SentenceAnnotator
    protected void doOneSentence(Annotation annotation, CoreMap coreMap) {
        Iterator it = ((List) coreMap.get(CoreAnnotations.TokensAnnotation.class)).iterator();
        while (it.hasNext()) {
            ((CoreLabel) it.next()).set(CoreAnnotations.WikipediaEntityAnnotation.class, SeqClassifierFlags.DEFAULT_BACKGROUND_SYMBOL);
        }
        for (CoreMap coreMap2 : (List) coreMap.get(CoreAnnotations.MentionsAnnotation.class)) {
            Optional<String> link = link(coreMap2);
            if (link.isPresent()) {
                coreMap2.set(CoreAnnotations.WikipediaEntityAnnotation.class, link.get());
                Iterator it2 = ((List) coreMap2.get(CoreAnnotations.TokensAnnotation.class)).iterator();
                while (it2.hasNext()) {
                    ((CoreLabel) it2.next()).set(CoreAnnotations.WikipediaEntityAnnotation.class, link.get());
                }
            }
        }
    }

    @Override // edu.stanford.nlp.pipeline.SentenceAnnotator
    protected void doOneFailedSentence(Annotation annotation, CoreMap coreMap) {
    }

    @Override // edu.stanford.nlp.pipeline.Annotator
    public Set<Class<? extends CoreAnnotation>> requirementsSatisfied() {
        return Collections.singleton(CoreAnnotations.WikipediaEntityAnnotation.class);
    }

    @Override // edu.stanford.nlp.pipeline.Annotator
    public Set<Class<? extends CoreAnnotation>> requires() {
        return Collections.unmodifiableSet(new HashSet(Arrays.asList(CoreAnnotations.TextAnnotation.class, CoreAnnotations.TokensAnnotation.class, CoreAnnotations.SentencesAnnotation.class, CoreAnnotations.OriginalTextAnnotation.class, CoreAnnotations.MentionsAnnotation.class)));
    }

    public static void main(String[] strArr) throws IOException {
        Properties argsToProperties = StringUtils.argsToProperties(strArr);
        argsToProperties.setProperty("annotators", "tokenize,ssplit,pos,lemma,ner,entitymentions,entitylink");
        StanfordCoreNLP stanfordCoreNLP = new StanfordCoreNLP(argsToProperties);
        IOUtils.console("sentence> ", str -> {
            Annotation annotation = new Annotation(str);
            stanfordCoreNLP.annotate(annotation);
            System.err.println(StringUtils.join(((List) ((CoreMap) ((List) annotation.get(CoreAnnotations.SentencesAnnotation.class)).get(0)).get(CoreAnnotations.TokensAnnotation.class)).stream().map(coreLabel -> {
                return (String) coreLabel.get(CoreAnnotations.WikipediaEntityAnnotation.class);
            }), "  "));
        });
    }
}
