/*
 * Decompiled with CFR 0.152.
 */
package org.apache.ctakes.core.ae;

import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.regex.Pattern;
import org.apache.ctakes.core.pipeline.PipeBitInfo;
import org.apache.ctakes.core.util.Pair;
import org.apache.ctakes.typesystem.type.textspan.Sentence;
import org.apache.log4j.Logger;
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
import org.apache.uima.cas.FeatureStructure;
import org.apache.uima.fit.component.JCasAnnotator_ImplBase;
import org.apache.uima.fit.util.JCasUtil;
import org.apache.uima.jcas.JCas;
import org.apache.uima.jcas.tcas.Annotation;

@PipeBitInfo(name="End of Line Sentence Splitter", description="Re-annotates Sentences based upon short lines, preventing a Sentence from spanning over an intentional line break.", dependencies={PipeBitInfo.TypeProduct.SENTENCE})
public final class EolSentenceFixer
extends JCasAnnotator_ImplBase {
    private static final Logger LOGGER = Logger.getLogger((String)"EolSentenceFixer");
    private static final Pattern WHITESPACE = Pattern.compile("\\s+");

    public void process(JCas jcas) throws AnalysisEngineProcessException {
        LOGGER.info((Object)"Adjusting Sentences for intentional line breaks ...");
        EolSentenceFixer.adjustEolSentences(jcas);
        LOGGER.info((Object)"Finished Processing");
    }

    private static void adjustEolSentences(JCas jCas) {
        char[] docChars = jCas.getDocumentText().toCharArray();
        ArrayList<Pair<Integer>> lineBounds = new ArrayList<Pair<Integer>>();
        ArrayList<Integer> followingWordLengths = new ArrayList<Integer>();
        int maxLength = 0;
        int lineBegin = 0;
        boolean inBreak = false;
        boolean inFirstWord = false;
        for (int i = 0; i < docChars.length; ++i) {
            if (docChars[i] == '\r' || docChars[i] == '\n') {
                if (inBreak) continue;
                int length = i - lineBegin;
                if (inFirstWord) {
                    followingWordLengths.add(length);
                    inFirstWord = false;
                }
                if (length > 0) {
                    lineBounds.add(new Pair<Integer>(lineBegin, i));
                    maxLength = Math.max(length, maxLength);
                }
                inBreak = true;
                continue;
            }
            if (inBreak) {
                lineBegin = i;
                inBreak = false;
                inFirstWord = true;
            }
            if (!inFirstWord || docChars[i] != ' ' && docChars[i] != '\t' || i - lineBegin <= 0) continue;
            followingWordLengths.add(i - lineBegin);
            inFirstWord = false;
        }
        if (inFirstWord) {
            int length = docChars.length - lineBegin;
            followingWordLengths.add(length);
        }
        EolSentenceFixer.adjustEolSentences(jCas, lineBounds, followingWordLengths, maxLength);
    }

    private static void adjustEolSentences(JCas jCas, List<Pair<Integer>> lineBounds, List<Integer> followingWordLengths, int maxLength) {
        ArrayList<Sentence> allSentences = new ArrayList<Sentence>(JCasUtil.select((JCas)jCas, Sentence.class));
        allSentences.sort(Comparator.comparingInt(Annotation::getBegin));
        int nextLineBounds = 0;
        HashMap<Sentence, Collection> sentenceCrossBounds = new HashMap<Sentence, Collection>();
        block0: for (Sentence sentence : allSentences) {
            for (int i = nextLineBounds; i < lineBounds.size() - 1; ++i) {
                Pair<Integer> lineBound = lineBounds.get(i);
                if (lineBound.getValue2() < sentence.getBegin()) continue;
                if (lineBound.getValue2() >= sentence.getEnd()) {
                    nextLineBounds = i;
                    continue block0;
                }
                int lineLength = lineBound.getValue2() - lineBound.getValue1();
                if (lineLength + followingWordLengths.get(i) >= maxLength) continue;
                Collection crossBounds = sentenceCrossBounds.computeIfAbsent(sentence, s -> new HashSet());
                sentenceCrossBounds.put(sentence, crossBounds);
                crossBounds.add(sentence.getBegin());
                crossBounds.add(sentence.getEnd());
                crossBounds.add(lineBound.getValue2());
                Pair<Integer> nextLineBound = lineBounds.get(i + 1);
                crossBounds.add(Math.min(sentence.getEnd(), nextLineBound.getValue1()));
            }
        }
        for (Map.Entry entry : sentenceCrossBounds.entrySet()) {
            ArrayList sortedBounds = new ArrayList((Collection)entry.getValue());
            Collections.sort(sortedBounds);
            for (int i = 0; i < sortedBounds.size() - 1; ++i) {
                String sentenceText = jCas.getDocumentText().substring((Integer)sortedBounds.get(i), (Integer)sortedBounds.get(i + 1));
                if (WHITESPACE.matcher(sentenceText).replaceAll(" ").trim().length() <= 0) continue;
                Sentence sentence = new Sentence(jCas, ((Integer)sortedBounds.get(i)).intValue(), ((Integer)sortedBounds.get(i + 1)).intValue());
                sentence.addToIndexes();
            }
            ((Sentence)entry.getKey()).removeFromIndexes();
            jCas.removeFsFromIndexes((FeatureStructure)entry.getKey());
        }
    }
}

