/*
 * Decompiled with CFR 0.152.
 */
package org.apache.ctakes.core.ae;

import java.util.Comparator;
import java.util.HashSet;
import java.util.List;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.stream.Collectors;
import org.apache.ctakes.core.pipeline.PipeBitInfo;
import org.apache.ctakes.typesystem.type.textspan.Sentence;
import org.apache.log4j.Logger;
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
import org.apache.uima.fit.component.JCasAnnotator_ImplBase;
import org.apache.uima.fit.util.JCasUtil;
import org.apache.uima.jcas.JCas;
import org.apache.uima.jcas.cas.TOP;
import org.apache.uima.jcas.tcas.Annotation;

@PipeBitInfo(name="MrsDrSentenceJoiner", description="Joins Sentences with person titles Mr. Mrs. Dr. that have been split by SentenceDetectorBIO.", dependencies={PipeBitInfo.TypeProduct.SENTENCE}, role=PipeBitInfo.Role.SPECIAL)
public final class MrsDrSentenceJoiner
extends JCasAnnotator_ImplBase {
    private static final Logger LOGGER = Logger.getLogger((String)"MrsDrSentenceJoiner");

    public void process(JCas jCas) throws AnalysisEngineProcessException {
        LOGGER.info((Object)"Joining Sentences ending with person titles (Mr. Mrs. Dr.) ...");
        List sentences = JCasUtil.select((JCas)jCas, Sentence.class).stream().sorted(Comparator.comparingInt(Annotation::getBegin)).collect(Collectors.toList());
        HashSet<Integer> newlines = new HashSet<Integer>();
        char[] chars = jCas.getDocumentText().toCharArray();
        for (int i = 0; i < chars.length; ++i) {
            if (chars[i] != '\r' && chars[i] != '\n') continue;
            newlines.add(i);
        }
        int sentenceCount = sentences.size();
        HashSet<Sentence> removalSentences = new HashSet<Sentence>();
        boolean appendNextSentence = false;
        int appendSentenceBegin = 0;
        for (int i = 0; i < sentenceCount; ++i) {
            Sentence sentence = (Sentence)sentences.get(i);
            String text = sentence.getCoveredText();
            if ((text.endsWith(" Mr.") || text.endsWith(" Mrs.") || text.endsWith(" Dr.") || text.endsWith(" St.") || text.endsWith(" a.m.") || text.endsWith(" p.m.") || text.endsWith("\nMr.") || text.endsWith("\nMrs.") || text.endsWith("\nDr.") || text.endsWith("\na.m.") || text.endsWith("\np.m.") || text.equals("Mr.") || text.equals("Mrs.") || text.equals("Dr.") || text.equals("a.m.") || text.equals("p.m.")) && i < sentenceCount - 1 && !newlines.contains(sentence.getEnd())) {
                if (!appendNextSentence) {
                    appendSentenceBegin = sentence.getBegin();
                }
                removalSentences.add(sentence);
                appendNextSentence = true;
                continue;
            }
            if (!appendNextSentence) continue;
            Sentence newSentence = new Sentence(jCas, appendSentenceBegin, sentence.getEnd());
            newSentence.addToIndexes();
            removalSentences.add(sentence);
            appendNextSentence = false;
        }
        removalSentences.forEach(TOP::removeFromIndexes);
        AtomicInteger index = new AtomicInteger(0);
        JCasUtil.select((JCas)jCas, Sentence.class).stream().sorted(Comparator.comparingInt(Annotation::getBegin)).forEach(s -> s.setSentenceNumber(index.incrementAndGet()));
    }

    private static boolean isCM(String text) {
        if (text.length() > 4 && (text.endsWith("CM.") || text.endsWith("cm."))) {
            if (Character.isDigit(text.charAt(text.length() - 4))) {
                return true;
            }
            return Character.isDigit(text.charAt(text.length() - 5));
        }
        return false;
    }
}

