/*
 * Decompiled with CFR 0.152.
 */
package org.apache.ctakes.ytex.uima.annotators;

import com.google.common.base.Strings;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.regex.PatternSyntaxException;
import opennlp.model.MaxentModel;
import opennlp.tools.sentdetect.DefaultSDContextGenerator;
import opennlp.tools.sentdetect.EndOfSentenceScanner;
import opennlp.tools.sentdetect.SDContextGenerator;
import opennlp.tools.sentdetect.SentenceModel;
import opennlp.tools.util.InvalidFormatException;
import org.apache.ctakes.core.resource.FileLocator;
import org.apache.ctakes.core.sentence.EndOfSentenceScannerImpl;
import org.apache.ctakes.core.sentence.SentenceDetectorCtakes;
import org.apache.ctakes.core.util.ParamUtil;
import org.apache.ctakes.typesystem.type.textspan.Segment;
import org.apache.ctakes.typesystem.type.textspan.Sentence;
import org.apache.ctakes.ytex.uima.annotators.SentenceSpan;
import org.apache.log4j.Logger;
import org.apache.uima.UimaContext;
import org.apache.uima.analysis_component.JCasAnnotator_ImplBase;
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
import org.apache.uima.jcas.JCas;
import org.apache.uima.jcas.JFSIndexRepository;
import org.apache.uima.resource.ResourceAccessException;
import org.apache.uima.resource.ResourceInitializationException;

public class SentenceDetector
extends JCasAnnotator_ImplBase {
    public static final String PARAM_SEGMENTS_TO_SKIP = "SegmentsToSkip";
    private Logger logger = Logger.getLogger((String)((Object)((Object)this)).getClass().getName());
    public static final String SD_MODEL_FILE_PARAM = "SentenceModelFile";
    private SentenceModel sdmodel;
    public static final String PARAGRAPH_PATTERN = "(?m):\\r{0,1}\\n|\\r{0,1}\\n\\r{0,1}\\n";
    public static final String ACRONYM_PATTERN = "(?m)Dr\\z|Ms\\z|Mr\\z|Mrs\\z|Ms\\z|\\p{Upper}\\z";
    public static final String PERIOD_PATTERN = "(?m)\\A\\s+\\p{Upper}|\\A\\s+\\d\\.";
    public static final String SPLIT_PATTERN = "(?im)\\n[\\(\\[]\\s*[yesxno]{0,3}\\s*[\\)\\]]|[\\(\\[]\\s*[yesxno]{0,3}\\s*[\\)\\]]\\s*\\r{0,1}\\n|^[^:\\r\\n]{3,20}\\:[^\\r\\n]{3,20}$";
    private Pattern paragraphPattern;
    private Pattern splitPattern;
    private Pattern periodPattern;
    private Pattern acronymPattern;
    private UimaContext context;
    private Set<?> skipSegmentsSet;
    private SentenceDetectorCtakes sentenceDetector;
    private String NEWLINE = "\n";
    private int sentenceCount = 0;

    public void initialize(UimaContext aContext) throws ResourceInitializationException {
        super.initialize(aContext);
        this.logger.info(Arrays.asList(aContext.getConfigParameterNames()));
        this.context = aContext;
        try {
            this.configInit();
        }
        catch (Exception ace) {
            throw new ResourceInitializationException((Throwable)ace);
        }
    }

    private void configInit() throws ResourceAccessException, InvalidFormatException, IOException {
        String sdModelPath = (String)this.context.getConfigParameterValue(SD_MODEL_FILE_PARAM);
        InputStream is = FileLocator.getAsStream((String)sdModelPath);
        this.logger.info((Object)("Sentence detector model file: " + sdModelPath));
        this.sdmodel = new SentenceModel(is);
        is.close();
        EndOfSentenceScannerImpl eoss = new EndOfSentenceScannerImpl();
        char[] eosc = eoss.getEndOfSentenceCharacters();
        DefaultSDContextGenerator cg = new DefaultSDContextGenerator(eosc);
        this.sentenceDetector = new SentenceDetectorCtakes((MaxentModel)this.sdmodel.getMaxentModel(), (SDContextGenerator)cg, (EndOfSentenceScanner)eoss);
        this.skipSegmentsSet = ParamUtil.getStringParameterValuesSet((String)PARAM_SEGMENTS_TO_SKIP, (UimaContext)this.context);
        this.paragraphPattern = this.compilePatternCheck("paragraphPattern", PARAGRAPH_PATTERN);
        this.splitPattern = this.compilePatternCheck("splitPattern", SPLIT_PATTERN);
        this.periodPattern = this.compilePatternCheck("periodPattern", PERIOD_PATTERN);
        this.acronymPattern = this.compilePatternCheck("acronymPattern", ACRONYM_PATTERN);
    }

    private Pattern compilePatternCheck(String patternKey, String patternDefault) {
        String strPattern = (String)this.context.getConfigParameterValue(patternKey);
        if (strPattern == null) {
            strPattern = patternDefault;
        }
        Pattern pat = null;
        try {
            pat = Strings.isNullOrEmpty((String)strPattern) ? null : Pattern.compile(strPattern);
        }
        catch (PatternSyntaxException pse) {
            this.logger.warn((Object)("ignoring bad pattern, reverting to default: " + strPattern), (Throwable)pse);
            pat = Pattern.compile(patternDefault);
        }
        return pat;
    }

    public void process(JCas jcas) throws AnalysisEngineProcessException {
        this.logger.info((Object)"Starting processing.");
        this.sentenceCount = 0;
        String text = jcas.getDocumentText();
        JFSIndexRepository indexes = jcas.getJFSIndexRepository();
        for (Segment sa : indexes.getAnnotationIndex(Segment.type)) {
            String sectionID = sa.getId();
            if (this.skipSegmentsSet.contains(sectionID)) continue;
            this.sentenceCount = this.annotateParagraph(jcas, text, sa.getBegin(), sa.getEnd(), this.sentenceCount);
        }
    }

    protected int annotateParagraph(JCas jcas, String text, int b, int e, int sentenceCount) throws AnalysisEngineProcessException {
        if (this.paragraphPattern == null) {
            return this.annotateRange(jcas, text, b, e, sentenceCount);
        }
        int lastEnd = b;
        Matcher m = this.paragraphPattern.matcher(text);
        while (m.find()) {
            if (m.end() > b && m.end() < e) {
                sentenceCount = this.annotateRange(jcas, text, lastEnd, m.end(), sentenceCount);
                lastEnd = m.end();
                continue;
            }
            if (m.end() < e) continue;
        }
        sentenceCount = this.annotateRange(jcas, text, lastEnd, e, sentenceCount);
        return sentenceCount;
    }

    protected int annotateRange(JCas jcas, String text, int b, int e, int sentenceCount) throws AnalysisEngineProcessException {
        String coveredText;
        int[] sentenceBreaks = this.sentenceDetector.sentPosDetect(text.substring(b, e));
        int numSentences = sentenceBreaks.length;
        SentenceSpan[] potentialSentSpans = new SentenceSpan[numSentences + 1];
        int sentStart = b;
        int sentEnd = b;
        for (int i = 0; i < numSentences; ++i) {
            sentEnd = sentenceBreaks[i] + b;
            String coveredText2 = text.substring(sentStart, sentEnd);
            potentialSentSpans[i] = new SentenceSpan(sentStart, sentEnd, coveredText2);
            sentStart = sentEnd;
        }
        if (sentEnd < e && (coveredText = text.substring(sentEnd, e)).trim() != "") {
            potentialSentSpans[numSentences] = new SentenceSpan(sentEnd, e, coveredText);
            ++numSentences;
        }
        ArrayList<SentenceSpan> sentenceSpans1 = new ArrayList<SentenceSpan>(0);
        for (int i = 0; i < potentialSentSpans.length; ++i) {
            if (potentialSentSpans[i] == null) continue;
            sentenceSpans1.addAll(potentialSentSpans[i].splitAtLineBreaksAndTrim(this.NEWLINE));
        }
        ArrayList<SentenceSpan> sentenceSpans = new ArrayList<SentenceSpan>(sentenceSpans1.size());
        for (SentenceSpan span : sentenceSpans1) {
            if (span == null) continue;
            sentenceSpans.addAll(span.splitAtPeriodAndTrim(this.acronymPattern, this.periodPattern, this.splitPattern));
        }
        int previousEnd = -1;
        for (int i = 0; i < sentenceSpans.size(); ++i) {
            SentenceSpan span = (SentenceSpan)sentenceSpans.get(i);
            if (span.getStart() == span.getEnd()) continue;
            Sentence sa = new Sentence(jcas);
            sa.setBegin(span.getStart());
            sa.setEnd(span.getEnd());
            if (previousEnd <= sa.getBegin()) {
                sa.setSentenceNumber(sentenceCount);
                sa.addToIndexes();
                ++sentenceCount;
                previousEnd = span.getEnd();
                continue;
            }
            this.logger.error((Object)("Skipping sentence from " + span.getStart() + " to " + span.getEnd()));
            this.logger.error((Object)("Overlap with previous sentence that ended at " + previousEnd));
        }
        return sentenceCount;
    }

    public static void main(String[] args) throws IOException {
        Logger logger = Logger.getLogger((String)(SentenceDetector.class.getName() + ".main()"));
        if (args.length < 2 || args.length > 4) {
            SentenceDetector.usage(logger);
            System.exit(-1);
        }
        File inFile = SentenceDetector.getReadableFile(args[0]);
        File outFile = SentenceDetector.getFileInExistingDir(args[1]);
        int iters = 100;
        if (args.length > 2) {
            iters = SentenceDetector.parseInt(args[2], logger);
        }
        int cut = 5;
        if (args.length > 3) {
            cut = SentenceDetector.parseInt(args[3], logger);
        }
        EndOfSentenceScannerImpl scanner = new EndOfSentenceScannerImpl();
        int numEosc = scanner.getEndOfSentenceCharacters().length;
        logger.info((Object)("Training new model from " + inFile.getAbsolutePath()));
        logger.info((Object)("Using " + numEosc + " end of sentence characters."));
        logger.error((Object)"----------------------------------------------------------------------------------");
        logger.error((Object)"Need to update yet for OpenNLP changes ");
        logger.error((Object)"Commented out code that no longer compiles due to OpenNLP API incompatible changes");
        logger.error((Object)"----------------------------------------------------------------------------------");
    }

    public static void usage(Logger log) {
        log.info((Object)("Usage: java " + SentenceDetector.class.getName() + " training_data_filename name_of_model_to_create <iters> <cut>"));
    }

    public static int parseInt(String s, Logger log) {
        try {
            return Integer.parseInt(s);
        }
        catch (NumberFormatException nfe) {
            log.error((Object)("Unable to parse '" + s + "' as an integer."));
            throw nfe;
        }
    }

    public static File getReadableFile(String fn) throws IOException {
        File f = new File(fn);
        if (!f.canRead()) {
            throw new IOException("Unable to read from file " + f.getAbsolutePath());
        }
        return f;
    }

    public static File getFileInExistingDir(String fn) throws IOException {
        File f = new File(fn);
        if (!f.getParentFile().isDirectory()) {
            throw new IOException("Directory not found: " + f.getParentFile().getAbsolutePath());
        }
        return f;
    }
}

