/*
 * Decompiled with CFR 0.152.
 */
package kaist.cilab.tripleextractor.indexer;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.io.PrintWriter;
import java.util.Date;
import java.util.Hashtable;
import java.util.LinkedList;
import kaist.cilab.tripleextractor.hannanumwrapper.HanNanumMorphAnalWrapper;
import kaist.cilab.tripleextractor.util.Configuration;
import kaist.cilab.tripleextractor.util.DBAccessor;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;

public class Indexer {
    private LinkedList<String> indexTarget = new LinkedList();

    public void readSentencesFromFile(String fileName, String docName, String indexTargetFile) {
        try {
            BufferedReader in = new BufferedReader(new InputStreamReader((InputStream)new FileInputStream(docName), "UTF-8"));
            String str = "";
            Hashtable<Integer, String> docTable = new Hashtable<Integer, String>();
            in.readLine();
            while ((str = in.readLine()) != null) {
                int docID = Integer.parseInt(str.substring(0, str.indexOf(44)));
                String docTitle = str.substring(str.indexOf(44) + 2, str.lastIndexOf(44) - 1);
                docTable.put(docID, docTitle);
            }
            in.close();
            in = new BufferedReader(new InputStreamReader((InputStream)new FileInputStream(fileName), "UTF-8"));
            PrintWriter pw = new PrintWriter(new OutputStreamWriter((OutputStream)new FileOutputStream(indexTargetFile), "UTF-8"));
            in.readLine();
            while ((str = in.readLine()) != null) {
                int senID = Integer.parseInt(str.substring(0, str.indexOf(44)));
                str = str.substring(str.indexOf(44) + 1);
                String docTitle = (String)docTable.get(Integer.parseInt(str.substring(str.lastIndexOf(44) + 1)));
                if ((str = str.substring(1, str.lastIndexOf(44) - 1)).startsWith("#") || str.startsWith("\ubd84\ub958:") || str.startsWith("[") && str.endsWith("]") || str.length() > 3 && str.charAt(0) >= 'a' && str.charAt(0) <= 'z' && str.charAt(1) >= 'a' && str.charAt(1) <= 'z' && str.charAt(2) == ':' || str.startsWith("thumb") || str.startsWith("|")) continue;
                if (str.startsWith("* ")) {
                    str = str.substring(2).trim();
                }
                if (str.startsWith("*")) {
                    str = str.substring(1).trim();
                }
                if (str.equals("")) continue;
                pw.println(senID);
                pw.println(docTitle);
                pw.println(str);
            }
            pw.close();
        }
        catch (Exception e) {
            e.printStackTrace();
        }
    }

    public void doIndex(String indexFile) throws Exception {
        BufferedReader in = new BufferedReader(new InputStreamReader((InputStream)new FileInputStream(indexFile), "UTF-8"));
        Date start = new Date();
        StandardAnalyzer analyzer = new StandardAnalyzer(Version.LUCENE_31);
        HanNanumMorphAnalWrapper maw = HanNanumMorphAnalWrapper.getInstance();
        IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_31, analyzer);
        iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE);
        FSDirectory indexDir = FSDirectory.open(new File(Configuration.indexDir));
        IndexWriter writer = new IndexWriter(indexDir, iwc);
        String strL = "";
        int cnt = 0;
        while ((strL = in.readLine()) != null) {
            int senID = Integer.parseInt(strL);
            String docTitle = strL = in.readLine();
            docTitle = docTitle.trim().replaceAll("_", " ").replaceAll("\\\"", "\\\\\\\"");
            docTitle = this.removeParenthesis(docTitle);
            docTitle = this.escapeSpecialCharacters(docTitle);
            String text = in.readLine();
            try {
                Document luceneDocument = new Document();
                luceneDocument.add(new Field("sentence", maw.getSpacedresult(text), Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.NO));
                luceneDocument.add(new Field("Sentence_Orig", text, Field.Store.YES, Field.Index.NO, Field.TermVector.NO));
                luceneDocument.add(new Field("Sentence_ID", String.valueOf(senID), Field.Store.YES, Field.Index.NO, Field.TermVector.NO));
                luceneDocument.add(new Field("Title", docTitle, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.NO));
                writer.addDocument(luceneDocument);
                if (++cnt % 100 != 0) continue;
                System.out.println(cnt);
            }
            catch (Exception e) {
                System.out.println("PASS: " + text);
            }
        }
        writer.close();
        Date end = new Date();
        System.out.println(String.valueOf(end.getTime() - start.getTime()) + " total milliseconds");
        in.close();
    }

    public void getAllSentenceFromDB() {
        try {
            LinkedList<String> sentences = DBAccessor.getAllDBpediaSentence();
            PrintWriter pw = new PrintWriter(new OutputStreamWriter((OutputStream)new FileOutputStream("tmp.txt"), "UTF-8"));
            for (String str : sentences) {
                pw.println(str);
            }
            pw.close();
        }
        catch (Exception e) {
            e.printStackTrace();
        }
    }

    public static void main(String[] args) {
        try {
            HanNanumMorphAnalWrapper maw = HanNanumMorphAnalWrapper.getInstance();
            System.out.println(maw.getSpacedresult("\ub9db\uc788\ub294 \uac10\uc790\ud0d5 1\uadf8\ub987\uc744 \uba39\uc744\uacbd\uc6b0 177Kcal\ub97c \uc12d\ucde8\ud558\uac8c \ub41c\ub2e4\uace0 \ud569\ub2c8\ub2e4."));
        }
        catch (Exception e) {
            e.printStackTrace();
        }
    }

    public String escapeSpecialCharacters(String arg1) {
        String ret = "";
        int i = 0;
        while (i < arg1.length()) {
            char c = arg1.charAt(i);
            if (c == '+') {
                ret = String.valueOf(ret) + "\\+";
            } else if (c == '-') {
                ret = String.valueOf(ret) + "\\-";
            } else if (c == '&' && i < arg1.length() - 1 && arg1.charAt(i + 1) == '&') {
                ret = String.valueOf(ret) + "\\&\\&";
                ++i;
            } else if (c == '|' && i < arg1.length() - 1 && arg1.charAt(i + 1) == '|') {
                ret = String.valueOf(ret) + "\\|\\|";
                ++i;
            } else {
                ret = c == '!' ? String.valueOf(ret) + "\\!" : (c == '(' ? String.valueOf(ret) + "\\(" : (c == ')' ? String.valueOf(ret) + "\\)" : (c == '{' ? String.valueOf(ret) + "\\{" : (c == '}' ? String.valueOf(ret) + "\\}" : (c == '[' ? String.valueOf(ret) + "\\[" : (c == ']' ? String.valueOf(ret) + "\\]" : (c == '^' ? String.valueOf(ret) + "\\^" : (c == '\"' ? String.valueOf(ret) + "\\\"" : (c == '~' ? String.valueOf(ret) + "\\~" : (c == '*' ? String.valueOf(ret) + "\\*" : (c == '?' ? String.valueOf(ret) + "\\?" : (c == ':' ? String.valueOf(ret) + "\\:" : (c == '\\' ? String.valueOf(ret) + "\\\\" : String.valueOf(ret) + c)))))))))))));
            }
            ++i;
        }
        return ret;
    }

    public String removeParenthesis(String arg1) {
        while (arg1.indexOf(40) > 0 && arg1.indexOf(41) > 0 && arg1.indexOf(40) < arg1.indexOf(41)) {
            arg1 = String.valueOf(arg1.substring(0, arg1.indexOf(40))) + arg1.substring(arg1.indexOf(41));
        }
        return arg1;
    }
}

