package org.apache.ctakes.dictionary.assertion;

import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.StringTokenizer;
import org.apache.ctakes.core.nlp.tokenizer.OffsetComparator;
import org.apache.ctakes.core.nlp.tokenizer.Token;
import org.apache.ctakes.core.nlp.tokenizer.TokenizerPTB;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.StringField;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;

/* loaded from: input_file:org/apache/ctakes/dictionary/assertion/CreateAssertionLuceneIndexFromDelimitedFile.class */
public class CreateAssertionLuceneIndexFromDelimitedFile {
    private static TokenizerPTB tokenizer = new TokenizerPTB();
    private static String directoryOfDelimitedFiles = null;
    private IndexWriter iwriter;
    private int idCount = 0;
    private final String ID = "UNIQUE_DOCUMENT_IDENTIFIER_FIELD";
    private final String rxNormCode = "codeRxNorm";
    private final String Code = "code";
    private final String CodeToken = "codeTokenized";
    private final String FirstWord = "first_word";
    private final String OtherDesig = "other_designation";
    private final String PreferDesig = "preferred_designation";
    public static final String CUE_PHRASE_FIELD_NAME = "cuePhrase";
    public static final String CUE_PHRASE_CATEGORY_FIELD_NAME = "cuePhraseCategory";
    public static final String CUE_PHRASE_FAMILY_FIELD_NAME = "cuePhraseFamily";
    public static final String CUE_PHRASE_FIRST_WORD_FIELD_NAME = "cuePhraseFirstWord";

    public CreateAssertionLuceneIndexFromDelimitedFile(TokenizerPTB tokenizerPTB) throws Exception {
        this.iwriter = null;
        StandardAnalyzer standardAnalyzer = new StandardAnalyzer(Version.LUCENE_40);
        String absolutePath = new File(directoryOfDelimitedFiles).getAbsolutePath();
        boolean z = false;
        long j = 0;
        try {
            try {
                this.iwriter = new IndexWriter(FSDirectory.open(new File(new File(absolutePath).getParent() + "/assertion_cue_phrase_index")), new IndexWriterConfig(Version.LUCENE_40, standardAnalyzer));
                File file = new File(absolutePath);
                if (file.isDirectory()) {
                    for (String str : file.list()) {
                        System.out.println("Process Each File in " + file.getName() + "...");
                        BufferedReader bufferedReader = new BufferedReader(new FileReader(new File(directoryOfDelimitedFiles + "/" + str)));
                        while (true) {
                            String readLine = bufferedReader.readLine();
                            if (readLine != null) {
                                String[] split = readLine.split("\\|");
                                if (split.length != 0) {
                                    String str2 = split[0];
                                    String str3 = "default_category";
                                    String str4 = "default_family";
                                    if (split.length >= 2) {
                                        str3 = split[1];
                                        str4 = split[2];
                                        if (str3 == null || str3.isEmpty()) {
                                            str3 = "category__" + str4;
                                        }
                                    }
                                    writeToFormatLucene(str2, str3, str4);
                                    j++;
                                }
                            }
                        }
                    }
                }
                try {
                    this.iwriter.maybeMerge();
                    this.iwriter.close();
                    if (0 == 0) {
                        System.out.println("Index created with " + j + " entries.");
                    }
                } catch (IOException e) {
                    System.out.println("IO exception caught");
                }
            } catch (IOException e2) {
                System.out.println("IO exception caught");
                z = true;
                try {
                    this.iwriter.maybeMerge();
                    this.iwriter.close();
                    if (1 == 0) {
                        System.out.println("Index created with " + j + " entries.");
                    }
                } catch (IOException e3) {
                    System.out.println("IO exception caught");
                }
            }
        } catch (Throwable th) {
            try {
                this.iwriter.maybeMerge();
                this.iwriter.close();
                if (!z) {
                    System.out.println("Index created with " + j + " entries.");
                }
            } catch (IOException e4) {
                System.out.println("IO exception caught");
            }
            throw th;
        }
    }

    public static void main(String[] strArr) {
        System.gc();
        if (strArr.length == 1) {
            try {
                directoryOfDelimitedFiles = strArr[0];
                tokenizer = new TokenizerPTB();
                new CreateAssertionLuceneIndexFromDelimitedFile(tokenizer);
                return;
            } catch (Exception e) {
                e.printStackTrace();
                return;
            }
        }
        if (strArr.length != 3) {
            System.out.println(getUsage());
            return;
        }
        try {
            directoryOfDelimitedFiles = strArr[0];
            tokenizer = new TokenizerPTB();
            new CreateAssertionLuceneIndexFromDelimitedFile(tokenizer);
        } catch (Exception e2) {
            e2.printStackTrace();
        }
    }

    public static String load(String str) throws FileNotFoundException, IOException {
        String str2 = "";
        BufferedReader bufferedReader = new BufferedReader(new FileReader(new File(str)));
        String readLine = bufferedReader.readLine();
        while (true) {
            String str3 = readLine;
            if (str3 == null) {
                bufferedReader.close();
                return str2;
            }
            str2 = str2 + str3 + "\n";
            readLine = bufferedReader.readLine();
        }
    }

    public static Map loadHyphMap(String str) throws FileNotFoundException, IOException {
        HashMap hashMap = new HashMap();
        BufferedReader bufferedReader = new BufferedReader(new FileReader(new File(str)));
        String readLine = bufferedReader.readLine();
        while (true) {
            String str2 = readLine;
            if (str2 == null) {
                bufferedReader.close();
                return hashMap;
            }
            StringTokenizer stringTokenizer = new StringTokenizer(str2, "|");
            if (stringTokenizer.countTokens() == 2) {
                hashMap.put(stringTokenizer.nextToken().toLowerCase(), new Integer(stringTokenizer.nextToken()));
            } else {
                System.out.println("Invalid hyphen file line: " + str2);
            }
            readLine = bufferedReader.readLine();
        }
    }

    public static void printResults(String str, List list) {
        String str2;
        String str3;
        String str4;
        System.out.println("Text: " + str);
        for (int i = 0; i < list.size(); i++) {
            Token token = (Token) list.get(i);
            switch (token.getType()) {
                case 1:
                    str2 = "word       ";
                    break;
                case 2:
                    str2 = "number     ";
                    break;
                case 3:
                    str2 = "punctuation";
                    break;
                case 4:
                    str2 = "end of line";
                    break;
                case 5:
                    str2 = "contraction";
                    break;
                case 6:
                    str2 = "symbol     ";
                    break;
                default:
                    str2 = "unknown    ";
                    break;
            }
            switch (token.getCaps()) {
                case 1:
                    str3 = "N";
                    break;
                case 2:
                    str3 = "M";
                    break;
                case 3:
                    str3 = "F";
                    break;
                case 4:
                    str3 = "A";
                    break;
                default:
                    str3 = "?";
                    break;
            }
            switch (token.getNumPosition()) {
                case 0:
                    str4 = "N";
                    break;
                case 1:
                    str4 = "F";
                    break;
                case 2:
                    str4 = "M";
                    break;
                case 3:
                    str4 = "L";
                    break;
                default:
                    str4 = "?";
                    break;
            }
            System.out.println("Token: type=[" + str2 + "] caps=[" + str3 + "] npos=[" + str4 + "] int=[" + (token.isInteger() ? "Y" : "N") + "] offsets=[" + token.getStartOffset() + "," + token.getEndOffset() + "]\t\ttext=[" + str.substring(token.getStartOffset(), token.getEndOffset()) + "]");
        }
    }

    public static String getUsage() {
        return "java LucenePopulateDriver <dir-containing-textfile(s)> [hyphenfile] [freqcutoff]";
    }

    protected void writeToFormatLucene(String str, String str2, String str3) {
        Document document = new Document();
        try {
            this.idCount++;
            System.out.println(" " + this.idCount + " processed so far out of total");
            document.add(new TextField("cuePhrase", str, Field.Store.YES));
            document.add(new StringField("cuePhraseCategory", str2, Field.Store.YES));
            document.add(new StringField("cuePhraseFamily", str3, Field.Store.YES));
            List<Token> list = tokenizer.tokenize(str);
            Collections.sort(list, new OffsetComparator());
            int i = 0;
            String str4 = "";
            String str5 = "";
            for (Token token : list) {
                i++;
                if (i == 1) {
                    str4 = token.getText();
                    str5 = str5 + token.getText();
                } else {
                    str5 = str5 + " " + token.getText();
                }
            }
            document.add(new StringField("cuePhraseFirstWord", str4, Field.Store.YES));
            this.iwriter.addDocument(document);
            writeToFile(str + "|" + str2 + "|" + str5 + '\n');
        } catch (IOException e) {
            System.out.println("IOException in document : io " + e.getLocalizedMessage());
        } catch (Exception e2) {
            System.out.println("Exception in document : exc " + e2.getLocalizedMessage());
        }
    }

    public void writeToFile(String str) {
        try {
            BufferedWriter bufferedWriter = new BufferedWriter(new FileWriter("sample.txt", true));
            bufferedWriter.write(str);
            bufferedWriter.close();
        } catch (IOException e) {
            e.printStackTrace();
        }
    }
}
