package edu.stanford.nlp.process;

import edu.stanford.nlp.ling.CoreAnnotations;
import edu.stanford.nlp.ling.Document;
import edu.stanford.nlp.ling.HasWord;
import edu.stanford.nlp.util.CoreMap;
import edu.stanford.nlp.util.Generics;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.Iterator;
import java.util.List;
import java.util.Set;
import java.util.regex.Pattern;

/* loaded from: input_file:edu/stanford/nlp/process/WordToSentenceProcessor.class */
public class WordToSentenceProcessor<IN> implements ListProcessor<IN, List<IN>> {
    private static final boolean DEBUG = false;
    private final Pattern sentenceBoundaryTokenPattern;
    private final Set<String> sentenceBoundaryFollowers;
    private List<Pattern> sentenceBoundaryToDiscard;
    private final Pattern sentenceRegionBeginPattern;
    private final Pattern sentenceRegionEndPattern;
    private boolean isOneSentence;

    public void setSentenceBoundaryToDiscard(Set<String> set) {
        this.sentenceBoundaryToDiscard = new ArrayList(set.size());
        Iterator<String> it = set.iterator();
        while (it.hasNext()) {
            this.sentenceBoundaryToDiscard.add(Pattern.compile(Pattern.quote(it.next())));
        }
    }

    public boolean isOneSentence() {
        return this.isOneSentence;
    }

    public void setOneSentence(boolean z) {
        this.isOneSentence = z;
    }

    public void addHtmlSentenceBoundaryToDiscard(Set<String> set) {
        if (this.sentenceBoundaryToDiscard == null) {
            this.sentenceBoundaryToDiscard = new ArrayList();
        }
        for (String str : set) {
            this.sentenceBoundaryToDiscard.add(Pattern.compile("<\\s*/?\\s*" + str + "\\s*/?\\s*>", 2));
            this.sentenceBoundaryToDiscard.add(Pattern.compile("<\\s*" + str + "\\s+[^>]+>", 2));
        }
    }

    private boolean matchesSentenceBoundaryToDiscard(String str) {
        Iterator<Pattern> it = this.sentenceBoundaryToDiscard.iterator();
        while (it.hasNext()) {
            if (it.next().matcher(str).matches()) {
                return true;
            }
        }
        return false;
    }

    @Override // edu.stanford.nlp.process.ListProcessor
    public List<List<IN>> process(List<? extends IN> list) {
        if (!this.isOneSentence) {
            return wordsToSentences(list);
        }
        ArrayList newArrayList = Generics.newArrayList();
        newArrayList.add(new ArrayList(list));
        return newArrayList;
    }

    public List<List<IN>> wordsToSentences(List<? extends IN> list) {
        String str;
        Boolean bool;
        ArrayList newArrayList = Generics.newArrayList();
        ArrayList arrayList = null;
        ArrayList arrayList2 = null;
        boolean z = false;
        for (IN in : list) {
            if (in instanceof HasWord) {
                str = ((HasWord) in).word();
            } else if (in instanceof String) {
                str = (String) in;
            } else {
                if (!(in instanceof CoreMap)) {
                    throw new RuntimeException("Expected token to be either Word or String.");
                }
                str = (String) ((CoreMap) in).get(CoreAnnotations.TextAnnotation.class);
            }
            boolean z2 = false;
            if ((in instanceof CoreMap) && (bool = (Boolean) ((CoreMap) in).get(CoreAnnotations.ForcedSentenceEndAnnotation.class)) != null) {
                z2 = bool.booleanValue();
            }
            if (arrayList == null) {
                arrayList = new ArrayList();
            }
            if (this.sentenceRegionBeginPattern == null || z) {
                if (this.sentenceBoundaryFollowers.contains(str) && arrayList2 != null && arrayList.isEmpty()) {
                    arrayList2.add(in);
                } else {
                    boolean z3 = false;
                    if (matchesSentenceBoundaryToDiscard(str)) {
                        z3 = true;
                    } else if (this.sentenceRegionEndPattern != null && this.sentenceRegionEndPattern.matcher(str).matches()) {
                        z = false;
                        z3 = true;
                    } else if (this.sentenceBoundaryTokenPattern.matcher(str).matches()) {
                        arrayList.add(in);
                        z3 = true;
                    } else if (z2) {
                        arrayList.add(in);
                        z3 = true;
                    } else {
                        arrayList.add(in);
                    }
                    if (z3 && arrayList.size() > 0) {
                        newArrayList.add(arrayList);
                        arrayList2 = arrayList;
                        arrayList = null;
                    }
                }
            } else if (this.sentenceRegionBeginPattern.matcher(str).matches()) {
                z = true;
            }
        }
        if (arrayList != null && arrayList.size() > 0) {
            newArrayList.add(arrayList);
        }
        return newArrayList;
    }

    public <L, F> Document<L, F, List<IN>> processDocument(Document<L, F, IN> document) {
        Document<L, F, List<IN>> document2 = (Document<L, F, List<IN>>) document.blankDocument();
        document2.addAll(process(document));
        return document2;
    }

    public WordToSentenceProcessor() {
        this("\\.|[!?]+");
    }

    public WordToSentenceProcessor(String str) {
        this(str, Generics.newHashSet(Arrays.asList(")", "]", "\"", "'", PTBLexer.closedblquote, PTBLexer.closeparen, "-RSB-", PTBLexer.closebrace)));
    }

    public WordToSentenceProcessor(String str, Set<String> set) {
        this(str, set, Collections.singleton("\n"));
    }

    public WordToSentenceProcessor(String str, Set<String> set, Set<String> set2) {
        this(str, set, set2, null, null);
    }

    public WordToSentenceProcessor(Pattern pattern, Pattern pattern2) {
        this("", Collections.emptySet(), Collections.emptySet(), pattern, pattern2);
    }

    private WordToSentenceProcessor(String str, Set<String> set, Set<String> set2, Pattern pattern, Pattern pattern2) {
        this.sentenceBoundaryTokenPattern = Pattern.compile(str);
        this.sentenceBoundaryFollowers = set;
        setSentenceBoundaryToDiscard(set2);
        this.sentenceRegionBeginPattern = pattern;
        this.sentenceRegionEndPattern = pattern2;
    }
}
