package edu.stanford.nlp.ie.regexp;

import edu.stanford.nlp.classify.LinearClassifier;
import edu.stanford.nlp.ie.AbstractSequenceClassifier;
import edu.stanford.nlp.io.IOUtils;
import edu.stanford.nlp.io.RuntimeIOException;
import edu.stanford.nlp.ling.CoreAnnotations;
import edu.stanford.nlp.ling.CoreLabel;
import edu.stanford.nlp.sequences.DocumentReaderAndWriter;
import edu.stanford.nlp.util.CoreMap;
import edu.stanford.nlp.util.Generics;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.ObjectInputStream;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.Iterator;
import java.util.List;
import java.util.Properties;
import java.util.Set;
import java.util.regex.Pattern;

/* loaded from: input_file:edu/stanford/nlp/ie/regexp/RegexNERSequenceClassifier.class */
public class RegexNERSequenceClassifier extends AbstractSequenceClassifier<CoreLabel> {
    private final List<Entry> entries;
    private final Set<String> myLabels;
    private final Pattern validPosPattern;
    public static final String DEFAULT_VALID_POS = "^(NN|JJ)";

    /* JADX INFO: Access modifiers changed from: private */
    /* loaded from: input_file:edu/stanford/nlp/ie/regexp/RegexNERSequenceClassifier$Entry.class */
    public static class Entry implements Comparable<Entry> {
        public List<Pattern> regex;
        public String type;
        public Set<String> overwritableTypes;
        public double priority;

        public Entry(List<Pattern> list, String str, Set<String> set, double d) {
            this.regex = list;
            this.type = str.intern();
            this.overwritableTypes = set;
            this.priority = d;
        }

        @Override // java.lang.Comparable
        public int compareTo(Entry entry) {
            if (this.priority > entry.priority) {
                return -1;
            }
            if (this.priority < entry.priority) {
                return 1;
            }
            return entry.regex.size() - this.regex.size();
        }

        public String toString() {
            return "Entry{" + this.regex + ' ' + this.type + ' ' + this.overwritableTypes + ' ' + this.priority + '}';
        }
    }

    public RegexNERSequenceClassifier(String str, boolean z, boolean z2) {
        this(str, z, z2, DEFAULT_VALID_POS);
    }

    public RegexNERSequenceClassifier(String str, boolean z, boolean z2, String str2) {
        super(new Properties());
        if (str2 == null || str2.equals("")) {
            this.validPosPattern = null;
        } else {
            this.validPosPattern = Pattern.compile(str2);
        }
        BufferedReader bufferedReader = null;
        try {
            try {
                bufferedReader = IOUtils.readerFromString(str);
                this.entries = readEntries(bufferedReader, z);
                IOUtils.closeIgnoringExceptions(bufferedReader);
                this.myLabels = Generics.newHashSet();
                this.myLabels.add(this.flags.backgroundSymbol);
                this.myLabels.add(null);
                if (z2) {
                    Iterator<Entry> it = this.entries.iterator();
                    while (it.hasNext()) {
                        this.myLabels.add(it.next().type);
                    }
                }
            } catch (IOException e) {
                throw new RuntimeIOException("Couldn't read RegexNER from " + str, e);
            }
        } catch (Throwable th) {
            IOUtils.closeIgnoringExceptions(bufferedReader);
            throw th;
        }
    }

    public RegexNERSequenceClassifier(BufferedReader bufferedReader, boolean z, boolean z2, String str) {
        super(new Properties());
        if (str == null || str.equals("")) {
            this.validPosPattern = null;
        } else {
            this.validPosPattern = Pattern.compile(str);
        }
        try {
            this.entries = readEntries(bufferedReader, z);
            this.myLabels = Generics.newHashSet();
            this.myLabels.add(this.flags.backgroundSymbol);
            this.myLabels.add(null);
            if (z2) {
                Iterator<Entry> it = this.entries.iterator();
                while (it.hasNext()) {
                    this.myLabels.add(it.next().type);
                }
            }
        } catch (IOException e) {
            throw new RuntimeIOException("Couldn't read RegexNER from reader", e);
        }
    }

    private boolean containsValidPos(List<CoreLabel> list, int i, int i2) {
        if (this.validPosPattern == null) {
            return true;
        }
        for (int i3 = i; i3 < i2; i3++) {
            if (list.get(i3).tag() == null) {
                throw new IllegalArgumentException("RegexNER was asked to check for valid tags on an untagged sequence. Either tag the sequence, perhaps with the pos annotator, or create RegexNER with an empty validPosPattern, perhaps with the property regexner.validpospattern");
            }
            if (this.validPosPattern.matcher(list.get(i3).tag()).find()) {
                return true;
            }
        }
        return false;
    }

    @Override // edu.stanford.nlp.ie.AbstractSequenceClassifier
    public List<CoreLabel> classify(List<CoreLabel> list) {
        for (Entry entry : this.entries) {
            int i = 0;
            while (true) {
                int findStartIndex = findStartIndex(entry, list, i, this.myLabels);
                if (findStartIndex < 0) {
                    break;
                }
                if (containsValidPos(list, findStartIndex, findStartIndex + entry.regex.size())) {
                    for (int i2 = findStartIndex; i2 < findStartIndex + entry.regex.size(); i2++) {
                        list.get(i2).set(CoreAnnotations.AnswerAnnotation.class, entry.type);
                    }
                }
                i = findStartIndex + 1;
            }
        }
        return list;
    }

    private static List<Entry> readEntries(BufferedReader bufferedReader, boolean z) throws IOException {
        ArrayList arrayList = new ArrayList();
        int i = 0;
        while (true) {
            String readLine = bufferedReader.readLine();
            if (readLine == null) {
                Collections.sort(arrayList);
                return arrayList;
            }
            i++;
            String[] split = readLine.split(LinearClassifier.TEXT_SERIALIZATION_DELIMITER);
            if (split.length < 2 || split.length > 4) {
                break;
            }
            String[] split2 = split[0].trim().split("\\s+");
            String trim = split[1].trim();
            Set newHashSet = Generics.newHashSet();
            double d = 0.0d;
            ArrayList arrayList2 = new ArrayList();
            if (split.length >= 3) {
                newHashSet.addAll(Arrays.asList(split[2].trim().split(",")));
            }
            if (split.length == 4) {
                try {
                    d = Double.parseDouble(split[3].trim());
                } catch (NumberFormatException e) {
                    throw new IllegalArgumentException("ERROR: Invalid line " + i + " in regexner file " + bufferedReader + ": \"" + readLine + "\"!", e);
                }
            }
            for (String str : split2) {
                if (z) {
                    arrayList2.add(Pattern.compile(str, 2));
                } else {
                    arrayList2.add(Pattern.compile(str));
                }
            }
            arrayList.add(new Entry(arrayList2, trim, newHashSet, d));
        }
        throw new IllegalArgumentException("Provided mapping file is in wrong format");
    }

    private static int findStartIndex(Entry entry, List<CoreLabel> list, int i, Set<String> set) {
        List<Pattern> list2 = entry.regex;
        int size = list2.size();
        int size2 = list.size() - list2.size();
        for (int i2 = i; i2 <= size2; i2++) {
            boolean z = false;
            for (int i3 = 0; i3 < size; i3++) {
                Pattern pattern = list2.get(i3);
                CoreLabel coreLabel = list.get(i2 + i3);
                String str = (String) coreLabel.get(CoreAnnotations.NamedEntityTagAnnotation.class);
                if (!pattern.matcher(coreLabel.word()).matches() || coreLabel.get(CoreAnnotations.AnswerAnnotation.class) != null || (!entry.overwritableTypes.contains(str) && !set.contains(str))) {
                    z = true;
                    break;
                }
            }
            if (!z) {
                return i2;
            }
        }
        return -1;
    }

    @Override // edu.stanford.nlp.ie.AbstractSequenceClassifier
    public List<CoreLabel> classifyWithGlobalInformation(List<CoreLabel> list, CoreMap coreMap, CoreMap coreMap2) {
        return classify(list);
    }

    @Override // edu.stanford.nlp.ie.AbstractSequenceClassifier
    public void train(Collection<List<CoreLabel>> collection, DocumentReaderAndWriter<CoreLabel> documentReaderAndWriter) {
    }

    @Override // edu.stanford.nlp.ie.AbstractSequenceClassifier
    public void printProbsDocument(List<CoreLabel> list) {
    }

    @Override // edu.stanford.nlp.ie.AbstractSequenceClassifier
    public void serializeClassifier(String str) {
    }

    @Override // edu.stanford.nlp.ie.AbstractSequenceClassifier
    public void loadClassifier(ObjectInputStream objectInputStream, Properties properties) throws IOException, ClassCastException, ClassNotFoundException {
    }
}
