/*
 * Decompiled with CFR 0.152.
 */
package org.apache.ctakes.ytex.tools;

import gov.nih.nlm.nls.lvg.Api.LvgCmdApi;
import java.io.BufferedInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.net.URL;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Locale;
import java.util.Set;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.ctakes.core.nlp.tokenizer.Token;
import org.apache.ctakes.core.nlp.tokenizer.TokenizerPTB;
import org.apache.ctakes.ytex.kernel.KernelContextHolder;
import org.apache.ctakes.ytex.umls.dao.UMLSDao;
import org.apache.ctakes.ytex.umls.model.UmlsAuiFirstWord;
import org.springframework.transaction.PlatformTransactionManager;
import org.springframework.transaction.support.TransactionTemplate;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.NodeList;
import org.xml.sax.SAXException;

public class SetupAuiFirstWord {
    private static final Log log = LogFactory.getLog(SetupAuiFirstWord.class);
    private TokenizerPTB tokenizer;
    private LvgCmdApi lvgCmd;
    private Set<String> exclusionSet = null;

    public SetupAuiFirstWord() throws Exception {
        this.initTokenizer();
        this.initExclusionSet();
        this.initLvg();
    }

    private void initLvg() {
        try {
            URL uri = this.getClass().getClassLoader().getResource("org/apache/ctakes/lvg/data/config/lvg.properties");
            if (log.isInfoEnabled()) {
                log.info((Object)("loading lvg.properties from:" + uri.getPath()));
            }
            File f = new File(uri.getPath());
            String configDir = f.getParentFile().getAbsolutePath();
            String lvgDir = configDir.substring(0, configDir.length() - "data/config".length());
            System.setProperty("user.dir", lvgDir);
            this.lvgCmd = new LvgCmdApi("-f:l:b", f.getAbsolutePath());
        }
        catch (Exception e) {
            log.warn((Object)"could not initialize lvg - will not create a stemmed dictionary.", (Throwable)e);
        }
    }

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    private void initExclusionSet() throws ParserConfigurationException, SAXException, IOException {
        this.exclusionSet = new HashSet<String>();
        try (InputStream isLvgAnno = null;){
            isLvgAnno = this.getClass().getClassLoader().getResourceAsStream("ctakes-lvg/desc/analysis_engine/LvgAnnotator.xml");
            if (isLvgAnno == null) {
                log.warn((Object)"classpath:ctakes-lvg/desc/analysis_engine/LvgAnnotator.xml not available, attempting to load from file system");
                File f = new File("../ctakes-lvg/desc/analysis_engine/LvgAnnotator.xml");
                if (f.exists()) {
                    isLvgAnno = new BufferedInputStream(new FileInputStream(f));
                }
            }
            if (isLvgAnno == null) {
                log.warn((Object)"ctakes-lvg/desc/analysis_engine/LvgAnnotator.xml not available, using empty exclusion set");
            } else {
                DocumentBuilderFactory dbFactory = DocumentBuilderFactory.newInstance();
                DocumentBuilder dBuilder = dbFactory.newDocumentBuilder();
                Document doc = dBuilder.parse(isLvgAnno);
                NodeList nList = doc.getElementsByTagName("nameValuePair");
                for (int i = 0; i < nList.getLength(); ++i) {
                    Element e = (Element)nList.item(i);
                    String name = e.getElementsByTagName("name").item(0).getChildNodes().item(0).getNodeValue();
                    if (!"ExclusionSet".equals(name)) continue;
                    NodeList nListEx = e.getElementsByTagName("string");
                    for (int j = 0; j < nListEx.getLength(); ++j) {
                        this.exclusionSet.add(nListEx.item(j).getChildNodes().item(0).getNodeValue());
                    }
                }
            }
        }
    }

    private void initTokenizer() throws FileNotFoundException, IOException {
        this.tokenizer = new TokenizerPTB();
    }

    public static void main(String[] args) throws Exception {
        SetupAuiFirstWord setupFword = new SetupAuiFirstWord();
        setupFword.setupAuiFirstWord();
    }

    public void setupAuiFirstWord() {
        UMLSDao umlsDao = (UMLSDao)KernelContextHolder.getApplicationContext().getBean(UMLSDao.class);
        TransactionTemplate t = new TransactionTemplate((PlatformTransactionManager)KernelContextHolder.getApplicationContext().getBean(PlatformTransactionManager.class));
        t.setPropagationBehavior(3);
        String lastAui = umlsDao.getLastAui();
        List listAuiStr = null;
        do {
            listAuiStr = umlsDao.getAllAuiStr(lastAui);
            ArrayList<UmlsAuiFirstWord> listFword = new ArrayList<UmlsAuiFirstWord>(1000);
            for (Object[] auiStr : listAuiStr) {
                String aui = (String)auiStr[0];
                String str = (String)auiStr[1];
                lastAui = aui;
                if (str.length() < 200) {
                    try {
                        UmlsAuiFirstWord fw = this.tokenizeStr(aui, str);
                        if (fw == null) {
                            log.error((Object)("Error tokenizing aui=" + aui + ", str=" + str));
                            continue;
                        }
                        if (fw.getFword().length() > 70) {
                            log.debug((Object)("fword too long: aui=" + aui + ", str=" + fw.getFword()));
                            continue;
                        }
                        if (fw.getTokenizedStr().length() > 250) {
                            log.debug((Object)("string too long: aui=" + aui + ", str=" + str));
                            continue;
                        }
                        if (log.isDebugEnabled()) {
                            log.debug((Object)("aui=" + aui + ", fw=" + fw));
                        }
                        listFword.add(fw);
                    }
                    catch (Exception e) {
                        log.error((Object)("Error tokenizing aui=" + aui + ", str=" + str), (Throwable)e);
                    }
                    continue;
                }
                log.debug((Object)("Skipping aui because str to long: aui=" + aui + ", str=" + str));
            }
            if (listFword.size() <= 0) continue;
            umlsDao.insertAuiFirstWord(listFword);
            log.info((Object)("inserted " + listFword.size() + " rows"));
        } while (listAuiStr.size() > 0);
    }

    public UmlsAuiFirstWord tokenizeStr(String aui, String str) throws Exception {
        List list = this.tokenizer.tokenize(str);
        Iterator tokenItr = list.iterator();
        int tCount = 0;
        String firstTokenText = "";
        StringBuilder tokenizedDesc = new StringBuilder();
        String firstTokenStem = "";
        StringBuilder stemmedDesc = new StringBuilder();
        while (tokenItr.hasNext()) {
            Token t = (Token)tokenItr.next();
            if (++tCount == 1) {
                firstTokenText = t.getText();
                tokenizedDesc.append(firstTokenText);
                if (this.lvgCmd == null) continue;
                firstTokenStem = this.stemToken(t);
                stemmedDesc.append(firstTokenStem);
                continue;
            }
            tokenizedDesc.append(" ").append(t.getText());
            if (this.lvgCmd == null || firstTokenStem == null) continue;
            String stemmedWord = this.stemToken(t);
            stemmedDesc.append(" ").append(stemmedWord);
        }
        UmlsAuiFirstWord fw = new UmlsAuiFirstWord();
        fw.setAui(aui);
        fw.setFword(firstTokenText.toLowerCase(Locale.ENGLISH));
        fw.setTokenizedStr(tokenizedDesc.toString());
        if (this.lvgCmd != null) {
            fw.setFstem(firstTokenStem.toLowerCase(Locale.ENGLISH));
            fw.setStemmedStr(stemmedDesc.toString());
        }
        return fw;
    }

    private String stemToken(Token t) throws Exception {
        String stemmedWord = t.getText();
        if (!(1 != t.getType() && 0 != t.getType() || (stemmedWord = this.getCanonicalForm(t.getText())) != null && stemmedWord.length() != 0)) {
            stemmedWord = t.getText();
        }
        return stemmedWord;
    }

    private String getCanonicalForm(String word) throws Exception {
        if (this.lvgCmd == null || this.exclusionSet.contains(word)) {
            return null;
        }
        String canonicalForm = null;
        String out = this.lvgCmd.MutateToString(word);
        String[] output = null;
        if (out != null) {
            output = out.split("\\|");
        } else {
            log.warn((Object)("mutateToString returned null for: " + word));
        }
        if (output != null && output.length >= 2 && !output[1].matches("No Output")) {
            canonicalForm = output[1];
        }
        return canonicalForm;
    }
}

