/*
 * Decompiled with CFR 0.152.
 */
package org.apache.ctakes.core.ae;

import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.function.Function;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.stream.Collectors;
import org.apache.ctakes.core.pipeline.PipeBitInfo;
import org.apache.ctakes.core.util.Pair;
import org.apache.ctakes.core.util.regex.RegexSpanFinder;
import org.apache.ctakes.core.util.regex.TimeoutMatcher;
import org.apache.ctakes.typesystem.type.textspan.Segment;
import org.apache.log4j.Logger;
import org.apache.uima.UimaContext;
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
import org.apache.uima.fit.component.JCasAnnotator_ImplBase;
import org.apache.uima.fit.descriptor.ConfigurationParameter;
import org.apache.uima.jcas.JCas;
import org.apache.uima.resource.ResourceInitializationException;

@PipeBitInfo(name="Regex Sectionizer (A)", description="Annotates Document Sections by detecting Section Headers using Regular Expressions.", products={PipeBitInfo.TypeProduct.SECTION})
public abstract class RegexSectionizer
extends JCasAnnotator_ImplBase {
    private static final Logger LOGGER = Logger.getLogger((String)"RegexSectionizer");
    public static final String PARAM_TAG_DIVIDERS = "TagDividers";
    @ConfigurationParameter(name="TagDividers", description="True if lines of divider characters ____ , ---- , === should divide sections", defaultValue={"true"}, mandatory=false)
    private boolean _tagDividers = true;
    private static final String DEFAULT_SEGMENT_ID = "SIMPLE_SEGMENT";
    private static final String SECTION_NAME_EX = "SECTION_NAME";
    public static final String DIVIDER_LINE_NAME = "DIVIDER_LINE";
    private static final Pattern DIVIDER_LINE_PATTERN = Pattern.compile("^[\\t ]*[_\\-=]{4,}[\\t ]*$");
    protected static final SectionTag LINE_DIVIDER_TAG = new SectionTag("DIVIDER_LINE", "DIVIDER_LINE", TagType.DIVIDER);
    private static final Object SECTION_TYPE_LOCK = new Object();
    private static final Map<String, SectionType> _sectionTypes = new HashMap<String, SectionType>();
    private static volatile boolean _sectionsLoaded = false;

    public static boolean shouldParseSegment(String segmentId) {
        SectionType sectionType = _sectionTypes.getOrDefault(segmentId, SectionType.DEFAULT_TYPE);
        return sectionType.__shouldParse;
    }

    protected static void addSectionType(SectionType sectionType) {
        _sectionTypes.put(sectionType.__name, sectionType);
    }

    public static Map<String, SectionType> getSectionTypes() {
        return Collections.unmodifiableMap(_sectionTypes);
    }

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    public void initialize(UimaContext context) throws ResourceInitializationException {
        LOGGER.info((Object)"Initializing ...");
        super.initialize(context);
        Object object = SECTION_TYPE_LOCK;
        synchronized (object) {
            if (!_sectionsLoaded) {
                this.loadSections();
                _sectionsLoaded = true;
            }
        }
    }

    public void process(JCas jcas) throws AnalysisEngineProcessException {
        LOGGER.info((Object)"Finding Sections ...");
        if (_sectionTypes.isEmpty()) {
            LOGGER.info((Object)"Finished processing, no section types defined");
            return;
        }
        String docText = jcas.getDocumentText();
        Map<Pair<Integer>, SectionTag> headerTags = RegexSectionizer.findHeaderTags(docText);
        if (headerTags.isEmpty()) {
            LOGGER.debug((Object)"No section headers found");
        }
        Collection<Pair<Integer>> subsumedTags = RegexSectionizer.getSubsumedBounds(headerTags.keySet());
        headerTags.keySet().removeAll(subsumedTags);
        Map<Pair<Integer>, SectionTag> footerTags = RegexSectionizer.findFooterTags(docText);
        HashMap<Pair<Integer>, SectionTag> dividerLines = new HashMap<Pair<Integer>, SectionTag>();
        if (this._tagDividers) {
            dividerLines.putAll(RegexSectionizer.findDividerLines(docText));
        }
        RegexSectionizer.createSegments(jcas, headerTags, footerTags, dividerLines);
    }

    private static Collection<Pair<Integer>> getSubsumedBounds(Collection<Pair<Integer>> bounds) {
        ArrayList<Pair<Integer>> boundsList = new ArrayList<Pair<Integer>>(bounds);
        HashSet<Pair<Integer>> subsumedBounds = new HashSet<Pair<Integer>>();
        for (int i = 0; i < boundsList.size() - 1; ++i) {
            Pair pairI = (Pair)boundsList.get(i);
            for (int j = i + 1; j < boundsList.size(); ++j) {
                Pair pairJ = (Pair)boundsList.get(j);
                if ((Integer)pairI.getValue1() <= (Integer)pairJ.getValue1() && (Integer)pairJ.getValue2() <= (Integer)pairI.getValue2()) {
                    subsumedBounds.add(pairJ);
                    continue;
                }
                if ((Integer)pairJ.getValue1() > (Integer)pairI.getValue1() || (Integer)pairI.getValue2() > (Integer)pairJ.getValue2()) continue;
                subsumedBounds.add(pairI);
            }
        }
        return subsumedBounds;
    }

    private static Collection<Pair<Integer>> getOverlappedBounds(Collection<Pair<Integer>> bounds) {
        ArrayList<Pair<Integer>> boundsList = new ArrayList<Pair<Integer>>(bounds);
        HashSet<Pair<Integer>> overlappedBounds = new HashSet<Pair<Integer>>();
        for (int i = 0; i < boundsList.size() - 1; ++i) {
            Pair pairI = (Pair)boundsList.get(i);
            for (int j = i + 1; j < boundsList.size(); ++j) {
                Pair pairJ = (Pair)boundsList.get(j);
                if ((Integer)pairI.getValue1() <= (Integer)pairJ.getValue1() && (Integer)pairJ.getValue1() <= (Integer)pairI.getValue2()) {
                    overlappedBounds.add(pairJ);
                    continue;
                }
                if ((Integer)pairJ.getValue1() > (Integer)pairI.getValue1() || (Integer)pairI.getValue1() > (Integer)pairJ.getValue2()) continue;
                overlappedBounds.add(pairI);
            }
        }
        return overlappedBounds;
    }

    private static List<Pair<Integer>> sortAndTrimBounds(Collection<Pair<Integer>> bounds) {
        ArrayList<Pair<Integer>> boundsList = new ArrayList<Pair<Integer>>(bounds);
        boundsList.sort(new PairIntSorter());
        HashSet<Pair> removalBounds = new HashSet<Pair>();
        for (int i = 0; i < boundsList.size() - 1; ++i) {
            Pair pairJ;
            Pair pairI = (Pair)boundsList.get(i);
            for (int j = i + 1; j < boundsList.size() && (Integer)(pairJ = (Pair)boundsList.get(j)).getValue1() <= (Integer)pairI.getValue2(); ++j) {
                removalBounds.add(pairJ);
            }
        }
        boundsList.removeAll(removalBounds);
        return boundsList;
    }

    protected abstract void loadSections() throws ResourceInitializationException;

    private static Map<Pair<Integer>, SectionTag> findHeaderTags(String docText) {
        HashMap<Pair<Integer>, SectionTag> headerTags = new HashMap<Pair<Integer>, SectionTag>();
        for (SectionType sectionType : _sectionTypes.values()) {
            if (sectionType.__headerPattern == null) continue;
            headerTags.putAll(RegexSectionizer.findSectionTags(docText, sectionType.__name, sectionType.__headerPattern, TagType.HEADER));
        }
        return headerTags;
    }

    private static Map<Pair<Integer>, SectionTag> findFooterTags(String docText) {
        HashMap<Pair<Integer>, SectionTag> footerTags = new HashMap<Pair<Integer>, SectionTag>();
        for (SectionType sectionType : _sectionTypes.values()) {
            if (sectionType.__footerPattern == null) continue;
            footerTags.putAll(RegexSectionizer.findSectionTags(docText, sectionType.__name, sectionType.__footerPattern, TagType.FOOTER));
        }
        return footerTags;
    }

    static Map<Pair<Integer>, SectionTag> findSectionTags(String docText, String typeName, Pattern tagPattern, TagType tagType) {
        HashMap<Pair<Integer>, SectionTag> sectionTags = new HashMap<Pair<Integer>, SectionTag>();
        try (TimeoutMatcher finder = new TimeoutMatcher(tagPattern, docText);){
            Matcher tagMatcher = finder.nextMatch();
            while (tagMatcher != null) {
                String name;
                Pair<Integer> tagBounds = new Pair<Integer>(tagMatcher.start(), tagMatcher.end());
                try {
                    name = tagMatcher.group(SECTION_NAME_EX);
                    if (name == null || name.isEmpty()) {
                        name = typeName;
                    }
                }
                catch (IllegalArgumentException iaE) {
                    name = typeName;
                }
                sectionTags.put(tagBounds, new SectionTag(name, typeName, tagType));
                tagMatcher = finder.nextMatch();
            }
        }
        catch (IllegalArgumentException iaE) {
            LOGGER.error((Object)iaE.getMessage());
        }
        return sectionTags;
    }

    private static void createSegments(JCas jcas, Map<Pair<Integer>, SectionTag> headerTags, Map<Pair<Integer>, SectionTag> footerTags, Map<Pair<Integer>, SectionTag> dividerLines) {
        int sectionEnd;
        String docText = jcas.getDocumentText();
        HashMap<Pair<Integer>, SectionTag> sectionTags = new HashMap<Pair<Integer>, SectionTag>(headerTags.size() + footerTags.size());
        sectionTags.putAll(headerTags);
        sectionTags.putAll(footerTags);
        sectionTags.putAll(dividerLines);
        if (sectionTags.isEmpty()) {
            Segment docSegment = new Segment(jcas, 0, docText.length());
            docSegment.setId(DEFAULT_SEGMENT_ID);
            docSegment.setPreferredText(DEFAULT_SEGMENT_ID);
            docSegment.addToIndexes();
            return;
        }
        List<Pair<Integer>> boundsList = RegexSectionizer.sortAndTrimBounds(sectionTags.keySet());
        Pair<Integer> leftBounds = boundsList.get(0);
        if (leftBounds.getValue1() > 0 && !docText.substring(0, sectionEnd = leftBounds.getValue1().intValue()).trim().isEmpty()) {
            Segment simpleSegment = new Segment(jcas, 0, sectionEnd);
            simpleSegment.setId(DEFAULT_SEGMENT_ID);
            simpleSegment.setPreferredText(DEFAULT_SEGMENT_ID);
            simpleSegment.addToIndexes();
        }
        int length = boundsList.size();
        for (int i = 0; i < length; ++i) {
            int sectionBegin;
            sectionEnd = i + 1 < length ? boundsList.get(i + 1).getValue1().intValue() : docText.length();
            if (sectionEnd > (sectionBegin = (leftBounds = boundsList.get(i)).getValue2().intValue()) && !docText.substring(sectionBegin, sectionEnd).trim().isEmpty()) {
                while (Character.isWhitespace(docText.charAt(sectionBegin))) {
                    ++sectionBegin;
                }
            }
            SectionTag leftTag = (SectionTag)sectionTags.get(leftBounds);
            Segment segment = new Segment(jcas, sectionBegin, sectionEnd);
            if (leftTag.__tagType == TagType.HEADER) {
                segment.setId(leftTag.__typeName);
                segment.setPreferredText(leftTag.__name);
                segment.setTagText(jcas.getDocumentText().substring(leftBounds.getValue1(), sectionBegin));
            } else {
                segment.setId(DEFAULT_SEGMENT_ID);
                segment.setPreferredText(DEFAULT_SEGMENT_ID);
            }
            segment.addToIndexes();
        }
    }

    private static List<Pair<Integer>> createBoundsList(Collection<Pair<Integer>> bounds) {
        ArrayList<Pair<Integer>> boundsList = new ArrayList<Pair<Integer>>(bounds);
        boundsList.sort((p1, p2) -> (Integer)p1.getValue1() - (Integer)p2.getValue2());
        HashSet<Pair> removalBounds = new HashSet<Pair>();
        block0: for (int i = 0; i < boundsList.size() - 1; ++i) {
            Pair pairJ;
            Pair pairI = (Pair)boundsList.get(i);
            for (int j = i + 1; j < boundsList.size() && (Integer)(pairJ = (Pair)boundsList.get(j)).getValue1() < (Integer)pairI.getValue2(); ++j) {
                if ((Integer)pairI.getValue2() >= (Integer)pairJ.getValue2()) {
                    removalBounds.add(pairJ);
                    continue block0;
                }
                if ((Integer)pairI.getValue1() < (Integer)pairJ.getValue1() || (Integer)pairJ.getValue2() <= (Integer)pairI.getValue2()) continue;
                removalBounds.add(pairI);
                continue block0;
            }
        }
        boundsList.removeAll(removalBounds);
        return boundsList;
    }

    protected static boolean isBoolean(String text) {
        String text2 = text.trim().toLowerCase();
        return text2.equalsIgnoreCase("true") || text2.equalsIgnoreCase("false");
    }

    /*
     * Enabled aggressive block sorting
     * Enabled unnecessary exception pruning
     * Enabled aggressive exception aggregation
     */
    private static Map<Pair<Integer>, SectionTag> findDividerLines(String docText) {
        Function<Pair, SectionTag> lineDividerTag = p -> LINE_DIVIDER_TAG;
        try (RegexSpanFinder finder = new RegexSpanFinder(DIVIDER_LINE_PATTERN);){
            Map<Pair<Integer>, SectionTag> map = finder.findSpans(docText).stream().collect(Collectors.toMap(Function.identity(), lineDividerTag));
            return map;
        }
        catch (IllegalArgumentException iaE) {
            return Collections.emptyMap();
        }
    }

    private static final class PairIntSorter
    implements Comparator<Pair<Integer>> {
        private PairIntSorter() {
        }

        @Override
        public int compare(Pair<Integer> p1, Pair<Integer> p2) {
            int start = p1.getValue1() - p2.getValue1();
            if (start != 0) {
                return start;
            }
            return p2.getValue2() - p1.getValue2();
        }
    }

    static final class SectionTag {
        private final String __name;
        private final String __typeName;
        private final TagType __tagType;

        private SectionTag(String name, String typeName, TagType tagType) {
            this.__name = name;
            this.__typeName = typeName;
            this.__tagType = tagType;
        }
    }

    protected static final class SectionType {
        private static final SectionType DEFAULT_TYPE = new SectionType("SIMPLE_SEGMENT", null, null, true);
        private final String __name;
        private final Pattern __headerPattern;
        private final Pattern __footerPattern;
        private final boolean __shouldParse;

        public SectionType(String name, String headerRegex, String footerRegex, boolean shouldParse) {
            this.__name = name;
            this.__headerPattern = headerRegex == null ? null : Pattern.compile(headerRegex, 10);
            this.__footerPattern = footerRegex == null ? null : Pattern.compile(footerRegex, 10);
            this.__shouldParse = shouldParse;
        }
    }

    private static enum TagType {
        HEADER,
        FOOTER,
        DIVIDER;

    }
}

