/*
 * Decompiled with CFR 0.152.
 */
package kaist.cilab.tripleextractor.dbpedia;

import java.io.BufferedReader;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.io.PrintWriter;
import java.net.URLDecoder;
import java.sql.Connection;
import java.sql.PreparedStatement;
import kaist.cilab.tripleextractor.dbpedia.Triple;
import kaist.cilab.tripleextractor.util.Configuration;
import kaist.cilab.tripleextractor.util.DBMgr;

public class DBpediaSlicer {
    public void sliceDBpedia() {
        try {
            DBMgr.openConnection(Configuration.database, Configuration.dbSchema, Configuration.dbID, Configuration.dbPasswd);
            Connection conn = DBMgr.getConnection();
            PreparedStatement insertTriple = conn.prepareStatement("INSERT INTO kc_triple (triple_id, subject, predicate, object) VALUES (?, ?, ?, ?)");
            conn.createStatement().execute("set character_set_connection=utf8;");
            conn.createStatement().execute("set character_set_server=utf8;");
            conn.createStatement().execute("set character_set_client=utf8;");
            conn.createStatement().execute("set character_set_database=utf8;");
            BufferedReader in = new BufferedReader(new InputStreamReader((InputStream)new FileInputStream(Configuration.dbPedia), "UTF-8"));
            PrintWriter pw = new PrintWriter(new OutputStreamWriter((OutputStream)new FileOutputStream("./Infobox_Slice/1.txt"), "UTF-8"));
            boolean fileIdx = true;
            int subjMaxL = 0;
            int predMaxL = 0;
            int objMaxL = 0;
            String str = "";
            int cnt = 0;
            while ((str = in.readLine()) != null) {
                String[] data = str.split("\t");
                Triple tp = new Triple();
                if (data.length < 3) continue;
                tp.subject = URLDecoder.decode(data[0], "UTF-8").replaceAll("_", " ").trim().replaceAll("\\\"", "\\\\\\\"");
                try {
                    tp.predicate = URLDecoder.decode(data[1], "UTF-8").replaceAll("_", " ").trim().replaceAll("\\\"", "\\\\\\\"");
                }
                catch (Exception e) {
                    tp.predicate = data[1].replaceAll("_", " ").trim();
                }
                try {
                    tp.object = URLDecoder.decode(data[2], "UTF-8").replaceAll("_", " ").trim().replaceAll("\\\"", "\\\\\\\"");
                }
                catch (Exception e) {
                    tp.object = data[2].replaceAll("_", " ").trim();
                }
                if (tp.subject.length() > subjMaxL) {
                    subjMaxL = tp.subject.length();
                }
                if (tp.predicate.length() > predMaxL) {
                    predMaxL = tp.predicate.length();
                }
                if (tp.object.length() > objMaxL) {
                    objMaxL = tp.object.length();
                }
                if (tp.object.length() > 510) continue;
                tp.tid = ++cnt;
                if (cnt % 10000 != 0) continue;
                System.out.println(cnt);
            }
            in.close();
            pw.close();
            conn.close();
            System.out.println("SUBJECT MAX: " + subjMaxL);
        }
        catch (Exception e) {
            e.printStackTrace();
        }
    }

    public static void main(String[] args) {
        DBpediaSlicer ds = new DBpediaSlicer();
        ds.sliceDBpedia();
    }
}

