package org.apache.ctakes.temporal.ae;

import java.io.File;
import java.io.FileFilter;
import java.io.IOException;
import java.net.URI;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.ctakes.temporal.utils.TimeRelationConstants;
import org.apache.ctakes.typesystem.type.syntax.BaseToken;
import org.apache.ctakes.typesystem.type.syntax.ContractionToken;
import org.apache.ctakes.typesystem.type.syntax.NewlineToken;
import org.apache.ctakes.typesystem.type.syntax.NumToken;
import org.apache.ctakes.typesystem.type.syntax.PunctuationToken;
import org.apache.ctakes.typesystem.type.syntax.SymbolToken;
import org.apache.ctakes.typesystem.type.syntax.TerminalTreebankNode;
import org.apache.ctakes.typesystem.type.syntax.TopTreebankNode;
import org.apache.ctakes.typesystem.type.syntax.TreebankNode;
import org.apache.ctakes.typesystem.type.syntax.WordToken;
import org.apache.ctakes.typesystem.type.textspan.Sentence;
import org.apache.log4j.Logger;
import org.apache.uima.UimaContext;
import org.apache.uima.analysis_engine.AnalysisEngineDescription;
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
import org.apache.uima.cas.FeatureStructure;
import org.apache.uima.fit.component.JCasAnnotator_ImplBase;
import org.apache.uima.fit.descriptor.ConfigurationParameter;
import org.apache.uima.fit.factory.AnalysisEngineFactory;
import org.apache.uima.fit.util.FSCollectionFactory;
import org.apache.uima.fit.util.JCasUtil;
import org.apache.uima.jcas.JCas;
import org.apache.uima.jcas.cas.FSArray;
import org.apache.uima.jcas.cas.StringArray;
import org.apache.uima.jcas.tcas.Annotation;
import org.apache.uima.resource.ResourceInitializationException;
import org.apache.uima.util.FileUtils;
import org.apache.uima.util.Level;
import org.cleartk.util.ViewUriUtil;
import org.cleartk.util.treebank.TreebankFormatParser;

/* loaded from: input_file:org/apache/ctakes/temporal/ae/THYMETreebankReader.class */
public class THYMETreebankReader extends JCasAnnotator_ImplBase {
    public static final String TREEBANK_DIRECTORY = "treebankDirectory";

    @ConfigurationParameter(name = TREEBANK_DIRECTORY, mandatory = true)
    protected File treebankDirectory;
    File[] subdirs = null;
    public static Logger logger = Logger.getLogger(THYMETreebankReader.class);
    private static final Pattern headerPatt = Pattern.compile("\\[(meta|start|end) [^\\]]*?\\]");

    /* renamed from: org.apache.ctakes.temporal.ae.THYMETreebankReader$2, reason: invalid class name */
    /* loaded from: input_file:org/apache/ctakes/temporal/ae/THYMETreebankReader$2.class */
    static /* synthetic */ class AnonymousClass2 {
        static final /* synthetic */ int[] $SwitchMap$org$apache$ctakes$temporal$ae$THYMETreebankReader$TOKEN_TYPE = new int[TOKEN_TYPE.values().length];

        static {
            try {
                $SwitchMap$org$apache$ctakes$temporal$ae$THYMETreebankReader$TOKEN_TYPE[TOKEN_TYPE.CONTRACTION.ordinal()] = 1;
            } catch (NoSuchFieldError e) {
            }
            try {
                $SwitchMap$org$apache$ctakes$temporal$ae$THYMETreebankReader$TOKEN_TYPE[TOKEN_TYPE.NEWLINE.ordinal()] = 2;
            } catch (NoSuchFieldError e2) {
            }
            try {
                $SwitchMap$org$apache$ctakes$temporal$ae$THYMETreebankReader$TOKEN_TYPE[TOKEN_TYPE.NUM.ordinal()] = 3;
            } catch (NoSuchFieldError e3) {
            }
            try {
                $SwitchMap$org$apache$ctakes$temporal$ae$THYMETreebankReader$TOKEN_TYPE[TOKEN_TYPE.PUNCT.ordinal()] = 4;
            } catch (NoSuchFieldError e4) {
            }
            try {
                $SwitchMap$org$apache$ctakes$temporal$ae$THYMETreebankReader$TOKEN_TYPE[TOKEN_TYPE.SYMBOL.ordinal()] = 5;
            } catch (NoSuchFieldError e5) {
            }
            try {
                $SwitchMap$org$apache$ctakes$temporal$ae$THYMETreebankReader$TOKEN_TYPE[TOKEN_TYPE.WORD.ordinal()] = 6;
            } catch (NoSuchFieldError e6) {
            }
        }
    }

    /* loaded from: input_file:org/apache/ctakes/temporal/ae/THYMETreebankReader$TOKEN_TYPE.class */
    enum TOKEN_TYPE {
        WORD,
        PUNCT,
        SYMBOL,
        NUM,
        NEWLINE,
        CONTRACTION
    }

    public void initialize(UimaContext uimaContext) throws ResourceInitializationException {
        super.initialize(uimaContext);
        this.subdirs = this.treebankDirectory.listFiles(new FileFilter() { // from class: org.apache.ctakes.temporal.ae.THYMETreebankReader.1
            @Override // java.io.FileFilter
            public boolean accept(File file) {
                return file.isDirectory() && !file.isHidden();
            }
        });
    }

    public void process(JCas jCas) throws AnalysisEngineProcessException {
        ContractionToken baseToken;
        URI uri = ViewUriUtil.getURI(jCas);
        logger.info("Document id is: " + uri.toString());
        String str = uri.getPath().substring(uri.getPath().lastIndexOf(47) + 1) + ".xml.tree";
        File file = null;
        for (File file2 : this.subdirs) {
            file = new File(file2, str);
            if (file.exists()) {
                break;
            }
            file = null;
        }
        if (file == null) {
            getContext().getLogger().log(Level.WARNING, "Could not find treeFile: " + str);
            return;
        }
        try {
            String file2String = FileUtils.file2String(file);
            StringBuffer stringBuffer = new StringBuffer(jCas.getDocumentText());
            Matcher matcher = headerPatt.matcher(stringBuffer);
            while (matcher.find()) {
                stringBuffer.replace(matcher.start(), matcher.end(), getWhitespaceString(matcher.group().length()));
            }
            try {
                List parseDocument = TreebankFormatParser.parseDocument(file2String, 0, stringBuffer.toString());
                Iterator it = new ArrayList(JCasUtil.select(jCas, Sentence.class)).iterator();
                while (it.hasNext()) {
                    ((Sentence) it.next()).removeFromIndexes();
                }
                HashMap hashMap = new HashMap();
                for (BaseToken baseToken2 : new ArrayList(JCasUtil.select(jCas, BaseToken.class))) {
                    String annotationKey = getAnnotationKey(baseToken2);
                    if (baseToken2 instanceof WordToken) {
                        hashMap.put(annotationKey, TOKEN_TYPE.WORD);
                    } else if (baseToken2 instanceof PunctuationToken) {
                        hashMap.put(annotationKey, TOKEN_TYPE.PUNCT);
                    } else if (baseToken2 instanceof SymbolToken) {
                        hashMap.put(annotationKey, TOKEN_TYPE.SYMBOL);
                    } else if (baseToken2 instanceof NumToken) {
                        hashMap.put(annotationKey, TOKEN_TYPE.NUM);
                    } else if (baseToken2 instanceof NewlineToken) {
                        hashMap.put(annotationKey, TOKEN_TYPE.NEWLINE);
                    } else if (baseToken2 instanceof ContractionToken) {
                        hashMap.put(annotationKey, TOKEN_TYPE.CONTRACTION);
                    }
                    baseToken2.removeFromIndexes();
                }
                Iterator it2 = parseDocument.iterator();
                while (it2.hasNext()) {
                    TopTreebankNode convert = convert((org.cleartk.util.treebank.TopTreebankNode) it2.next(), jCas);
                    new Sentence(jCas, convert.getBegin(), convert.getEnd()).addToIndexes();
                    for (int i = 0; i < convert.getTerminals().size(); i++) {
                        TerminalTreebankNode terminals = convert.getTerminals(i);
                        if (terminals.getBegin() != terminals.getEnd()) {
                            String annotationKey2 = getAnnotationKey(terminals);
                            if (hashMap.containsKey(annotationKey2)) {
                                switch (AnonymousClass2.$SwitchMap$org$apache$ctakes$temporal$ae$THYMETreebankReader$TOKEN_TYPE[((TOKEN_TYPE) hashMap.get(annotationKey2)).ordinal()]) {
                                    case TimeRelationConstants.AF /* 1 */:
                                        baseToken = new ContractionToken(jCas, terminals.getBegin(), terminals.getEnd());
                                        break;
                                    case TimeRelationConstants.CN /* 2 */:
                                        baseToken = new NewlineToken(jCas, terminals.getBegin(), terminals.getEnd());
                                        break;
                                    case TimeRelationConstants.CB /* 3 */:
                                        baseToken = new NumToken(jCas, terminals.getBegin(), terminals.getEnd());
                                        break;
                                    case TimeRelationConstants.BO /* 4 */:
                                        baseToken = new PunctuationToken(jCas, terminals.getBegin(), terminals.getEnd());
                                        break;
                                    case TimeRelationConstants.EO /* 5 */:
                                        baseToken = new SymbolToken(jCas, terminals.getBegin(), terminals.getEnd());
                                        break;
                                    case TimeRelationConstants.OV /* 6 */:
                                        baseToken = new WordToken(jCas, terminals.getBegin(), terminals.getEnd());
                                        break;
                                    default:
                                        baseToken = new BaseToken(jCas, terminals.getBegin(), terminals.getEnd());
                                        break;
                                }
                            } else {
                                baseToken = new BaseToken(jCas, terminals.getBegin(), terminals.getEnd());
                            }
                            baseToken.setPartOfSpeech(terminals.getNodeType());
                            baseToken.addToIndexes();
                        }
                    }
                }
            } catch (Exception e) {
                getContext().getLogger().log(Level.WARNING, String.format("Skipping %s due to alignment problems", str), e);
            }
        } catch (IOException e2) {
            throw new AnalysisEngineProcessException(e2);
        }
    }

    private static String getWhitespaceString(int i) {
        char[] cArr = new char[i];
        Arrays.fill(cArr, ' ');
        return new String(cArr);
    }

    private static TopTreebankNode convert(org.cleartk.util.treebank.TopTreebankNode topTreebankNode, JCas jCas) {
        TopTreebankNode topTreebankNode2 = new TopTreebankNode(jCas, topTreebankNode.getTextBegin(), topTreebankNode.getTextEnd());
        topTreebankNode2.setTreebankParse(topTreebankNode.getTreebankParse());
        convert(topTreebankNode, jCas, topTreebankNode2, null);
        initTerminalNodes(topTreebankNode2, jCas);
        topTreebankNode2.addToIndexes();
        return topTreebankNode2;
    }

    public static void initTerminalNodes(TopTreebankNode topTreebankNode, JCas jCas) {
        ArrayList arrayList = new ArrayList();
        _initTerminalNodes(topTreebankNode, arrayList);
        for (int i = 0; i < arrayList.size(); i++) {
            ((TerminalTreebankNode) arrayList.get(i)).setIndex(i);
        }
        FSArray fSArray = new FSArray(jCas, arrayList.size());
        fSArray.copyFromArray((FeatureStructure[]) arrayList.toArray(new FeatureStructure[arrayList.size()]), 0, 0, arrayList.size());
        topTreebankNode.setTerminals(fSArray);
    }

    private static void _initTerminalNodes(TreebankNode treebankNode, List<TerminalTreebankNode> list) {
        FSArray children = treebankNode.getChildren();
        for (int i = 0; i < children.size(); i++) {
            TreebankNode treebankNode2 = children.get(i);
            if (treebankNode2 instanceof TerminalTreebankNode) {
                list.add((TerminalTreebankNode) treebankNode2);
            } else {
                _initTerminalNodes(treebankNode2, list);
            }
        }
    }

    public static TreebankNode convert(org.cleartk.util.treebank.TreebankNode treebankNode, JCas jCas, TreebankNode treebankNode2, TreebankNode treebankNode3) {
        treebankNode2.setNodeType(treebankNode.getType());
        treebankNode2.setNodeTags(new StringArray(jCas, treebankNode.getTags().length));
        FSCollectionFactory.fillArrayFS(treebankNode2.getNodeTags(), treebankNode.getTags());
        treebankNode2.setNodeValue(treebankNode.getValue());
        treebankNode2.setLeaf(treebankNode.isLeaf());
        treebankNode2.setParent(treebankNode3);
        ArrayList arrayList = new ArrayList();
        for (org.cleartk.util.treebank.TreebankNode treebankNode4 : treebankNode.getChildren()) {
            TerminalTreebankNode terminalTreebankNode = treebankNode4.isLeaf() ? new TerminalTreebankNode(jCas, treebankNode4.getTextBegin(), treebankNode4.getTextEnd()) : new TreebankNode(jCas, treebankNode4.getTextBegin(), treebankNode4.getTextEnd());
            arrayList.add(convert(treebankNode4, jCas, terminalTreebankNode, treebankNode2));
            terminalTreebankNode.addToIndexes();
        }
        FSArray fSArray = new FSArray(jCas, arrayList.size());
        fSArray.copyFromArray((FeatureStructure[]) arrayList.toArray(new FeatureStructure[arrayList.size()]), 0, 0, arrayList.size());
        treebankNode2.setChildren(fSArray);
        return treebankNode2;
    }

    public static AnalysisEngineDescription getDescription(File file) throws ResourceInitializationException {
        return AnalysisEngineFactory.createEngineDescription(THYMETreebankReader.class, new Object[]{TREEBANK_DIRECTORY, file});
    }

    public static void main(String[] strArr) {
        Matcher matcher = headerPatt.matcher("[meta rev_date=\"02/20/2010\" start_date=\"02/20/2010\" rev=\"0002\"]\n\n[start section id=\"20112\"]\n\n#1 Dilated esophagus on CT-scan\n#2 Adenocarcinoma right colon\n#3 Symptomatic anemia\n#4 Hypothyroidism");
        while (matcher.find()) {
            System.out.println("FOund match at: " + matcher.start() + "-" + matcher.end());
        }
    }

    public static final String getAnnotationKey(Annotation annotation) {
        return annotation.getBegin() + "-" + annotation.getEnd();
    }
}
