/*
 * Decompiled with CFR 0.152.
 */
package org.apache.ctakes.core.ae;

import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.nio.charset.Charset;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.HashSet;
import java.util.Set;
import opennlp.tools.dictionary.Dictionary;
import opennlp.tools.sentdetect.DefaultSDContextGenerator;
import opennlp.tools.sentdetect.SentenceDetectorFactory;
import opennlp.tools.sentdetect.SentenceDetectorME;
import opennlp.tools.sentdetect.SentenceModel;
import opennlp.tools.sentdetect.SentenceSampleStream;
import opennlp.tools.util.InputStreamFactory;
import opennlp.tools.util.MarkableFileInputStreamFactory;
import opennlp.tools.util.ObjectStream;
import opennlp.tools.util.PlainTextByLineStream;
import opennlp.tools.util.TrainingParameters;
import org.apache.ctakes.core.pipeline.PipeBitInfo;
import org.apache.ctakes.core.resource.FileLocator;
import org.apache.ctakes.core.sentence.EndOfSentenceScannerImpl;
import org.apache.ctakes.core.sentence.SentenceDetectorCtakes;
import org.apache.ctakes.core.sentence.SentenceSpan;
import org.apache.ctakes.typesystem.type.textspan.Segment;
import org.apache.ctakes.typesystem.type.textspan.Sentence;
import org.apache.log4j.Logger;
import org.apache.uima.UimaContext;
import org.apache.uima.analysis_engine.AnalysisEngineDescription;
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
import org.apache.uima.fit.component.JCasAnnotator_ImplBase;
import org.apache.uima.fit.descriptor.ConfigurationParameter;
import org.apache.uima.fit.factory.AnalysisEngineFactory;
import org.apache.uima.fit.util.JCasUtil;
import org.apache.uima.jcas.JCas;
import org.apache.uima.resource.ResourceInitializationException;

@PipeBitInfo(name="Sentence Detector", description="Annotates Sentences based upon an OpenNLP model.", dependencies={PipeBitInfo.TypeProduct.SECTION}, products={PipeBitInfo.TypeProduct.SENTENCE})
public class SentenceDetector
extends JCasAnnotator_ImplBase {
    public static final String PARAM_SEGMENTS_TO_SKIP = "SegmentsToSkip";
    @ConfigurationParameter(name="SegmentsToSkip", mandatory=false, description="Set of segments that can be skipped")
    private String[] skipSegmentsArray;
    private Set<String> skipSegmentsSet;
    public static final String PARAM_SD_MODEL_FILE = "SentenceModelFile";
    public static final String SD_MODEL_FILE_PARAM = "SentenceModelFile";
    @ConfigurationParameter(name="SentenceModelFile", description="Path to sentence detector model file", defaultValue={"org/apache/ctakes/core/models/sentdetect/sd-med-model.zip"})
    private String sdModelPath;
    private SentenceModel sdmodel;
    private SentenceDetectorCtakes sentenceDetector;
    private String NEWLINE = "\n";
    private Logger logger = Logger.getLogger((String)((Object)((Object)this)).getClass().getName());

    public void initialize(UimaContext aContext) throws ResourceInitializationException {
        super.initialize(aContext);
        try (InputStream is = FileLocator.getAsStream(this.sdModelPath);){
            this.logger.info((Object)("Sentence detector model file: " + this.sdModelPath));
            this.sdmodel = new SentenceModel(is);
            EndOfSentenceScannerImpl eoss = new EndOfSentenceScannerImpl();
            DefaultSDContextGenerator cg = new DefaultSDContextGenerator(eoss.getEndOfSentenceCharacters());
            this.sentenceDetector = new SentenceDetectorCtakes(this.sdmodel.getMaxentModel(), cg, eoss);
            this.skipSegmentsSet = new HashSet<String>();
            if (this.skipSegmentsArray != null) {
                Collections.addAll(this.skipSegmentsSet, this.skipSegmentsArray);
            }
        }
        catch (IOException e) {
            e.printStackTrace();
            throw new ResourceInitializationException((Throwable)e);
        }
    }

    public void process(JCas jcas) throws AnalysisEngineProcessException {
        this.logger.info((Object)"Starting processing.");
        int sentenceCount = 0;
        String text = jcas.getDocumentText();
        Collection segments = JCasUtil.select((JCas)jcas, Segment.class);
        for (Segment segment : segments) {
            String sectionID = segment.getId();
            if (this.skipSegmentsSet.contains(sectionID)) continue;
            sentenceCount = this.annotateRange(jcas, text, segment, sentenceCount);
        }
    }

    protected int annotateRange(JCas jcas, String text, Segment section, int sentenceCount) {
        String coveredText;
        int b = section.getBegin();
        int e = section.getEnd();
        int[] sentenceBreaks = this.sentenceDetector.sentPosDetect(text.substring(b, e));
        int numSentences = sentenceBreaks.length;
        SentenceSpan[] potentialSentSpans = new SentenceSpan[numSentences + 1];
        int sentStart = b;
        int sentEnd = b;
        for (int i = 0; i < numSentences; ++i) {
            sentEnd = sentenceBreaks[i] + b;
            String coveredText2 = text.substring(sentStart, sentEnd);
            potentialSentSpans[i] = new SentenceSpan(sentStart, sentEnd, coveredText2);
            sentStart = sentEnd;
        }
        if (sentEnd < e && (coveredText = text.substring(sentEnd, e)).trim() != "") {
            potentialSentSpans[numSentences] = new SentenceSpan(sentEnd, e, coveredText);
            ++numSentences;
        }
        ArrayList<SentenceSpan> sentenceSpans = new ArrayList<SentenceSpan>(0);
        for (int i = 0; i < potentialSentSpans.length; ++i) {
            if (potentialSentSpans[i] == null) continue;
            sentenceSpans.addAll(potentialSentSpans[i].splitAtLineBreaksAndTrim(this.NEWLINE));
        }
        int previousEnd = -1;
        for (int i = 0; i < sentenceSpans.size(); ++i) {
            SentenceSpan span = (SentenceSpan)sentenceSpans.get(i);
            if (span.getStart() == span.getEnd()) continue;
            Sentence sa = new Sentence(jcas);
            sa.setBegin(span.getStart());
            sa.setEnd(span.getEnd());
            if (previousEnd <= sa.getBegin()) {
                sa.setSentenceNumber(sentenceCount);
                sa.addToIndexes();
                ++sentenceCount;
                previousEnd = span.getEnd();
                continue;
            }
            this.logger.error((Object)("Skipping sentence from " + span.getStart() + " to " + span.getEnd()));
            this.logger.error((Object)("Overlap with previous sentence that ended at " + previousEnd));
        }
        return sentenceCount;
    }

    public static AnalysisEngineDescription createAnnotatorDescription() throws ResourceInitializationException {
        return AnalysisEngineFactory.createEngineDescription(SentenceDetector.class, (Object[])new Object[0]);
    }

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    public static void main(String[] args) throws IOException {
        Logger logger = Logger.getLogger((String)(SentenceDetector.class.getName() + ".main()"));
        if (args.length < 2 || args.length > 4) {
            SentenceDetector.usage(logger);
            System.exit(-1);
        }
        File inFile = SentenceDetector.getReadableFile(args[0]);
        File outFile = SentenceDetector.getFileInExistingDir(args[1]);
        int iters = 100;
        if (args.length > 2) {
            iters = SentenceDetector.parseInt(args[2], logger);
        }
        int cut = 5;
        if (args.length > 3) {
            cut = SentenceDetector.parseInt(args[3], logger);
        }
        EndOfSentenceScannerImpl scanner = new EndOfSentenceScannerImpl();
        int numEosc = scanner.getEndOfSentenceCharacters().length;
        logger.info((Object)("Training new model from " + inFile.getAbsolutePath()));
        logger.info((Object)("Using " + numEosc + " end of sentence characters."));
        Charset charset = Charset.forName("UTF-8");
        SentenceModel mod = null;
        MarkableFileInputStreamFactory mfisf = new MarkableFileInputStreamFactory(inFile);
        try (PlainTextByLineStream lineStream = new PlainTextByLineStream((InputStreamFactory)mfisf, charset);){
            SentenceSampleStream sampleStream = new SentenceSampleStream((ObjectStream)lineStream);
            TrainingParameters mlParams = new TrainingParameters();
            mlParams.put("Algorithm", "MAXENT");
            mlParams.put("Iterations", Integer.toString(iters));
            mlParams.put("Cutoff", Integer.toString(cut));
            Dictionary dict = new Dictionary();
            SentenceDetectorFactory sdFactory = new SentenceDetectorFactory("en", true, dict, null);
            try {
                mod = SentenceDetectorME.train((String)"en", (ObjectStream)sampleStream, (SentenceDetectorFactory)sdFactory, (TrainingParameters)mlParams);
            }
            finally {
                sampleStream.close();
            }
        }
        var12_12 = null;
        try (FileOutputStream outStream = new FileOutputStream(outFile);){
            logger.info((Object)("Saving the model as: " + outFile.getAbsolutePath()));
            mod.serialize((OutputStream)outStream);
        }
        catch (Throwable throwable) {
            var12_12 = throwable;
            throw throwable;
        }
    }

    public static void usage(Logger log) {
        log.info((Object)("Usage: java " + SentenceDetector.class.getName() + " training_data_filename name_of_model_to_create <iters> <cut>"));
    }

    public static int parseInt(String s, Logger log) {
        try {
            return Integer.parseInt(s);
        }
        catch (NumberFormatException nfe) {
            log.error((Object)("Unable to parse '" + s + "' as an integer."));
            throw nfe;
        }
    }

    public static File getReadableFile(String fn) throws IOException {
        File f = new File(fn);
        if (!f.canRead()) {
            throw new IOException("Unable to read from file " + f.getAbsolutePath());
        }
        return f;
    }

    public static File getFileInExistingDir(String fn) throws IOException {
        File f = new File(fn);
        File parent = f.getAbsoluteFile().getParentFile();
        if (!parent.isDirectory()) {
            throw new IOException("Directory not found: " + f.getParentFile().getAbsolutePath());
        }
        return f;
    }
}

