package net.java.sen.compiler;

import java.io.BufferedOutputStream;
import java.io.DataOutputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.RandomAccessFile;
import java.nio.MappedByteBuffer;
import java.nio.ShortBuffer;
import java.nio.channels.FileChannel;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Iterator;
import java.util.List;
import java.util.Vector;
import net.java.sen.dictionary.CToken;
import net.java.sen.dictionary.DictionaryUtil;
import net.java.sen.trie.TrieBuilder;
import net.java.sen.util.CSVData;
import net.java.sen.util.CSVParser;

/* loaded from: input_file:net/java/sen/compiler/DictionaryBuilder.class */
public class DictionaryBuilder {
    private static final String DICTIONARY_CSV_FILENAME = "dictionary.csv";
    private static final String CONNECTION_CSV_FILENAME = "connection.csv";
    private static final String CONNECTION_COST_DATA_FILENAME = "connectionCost.sen";
    private static final String PART_OF_SPEECH_DATA_FILENAME = "partOfSpeech.sen";
    private static final String PART_OF_SPEECH_INDEX_FILENAME = "posIndex.sen";
    private static final String TOKEN_DATA_FILENAME = "token.sen";
    private static final String TRIE_DATA_FILENAME = "trie.sen";
    private static final String HEADER_DATA_FILENAME = "header.sen";
    private static final short DEFAULT_CONNECTION_COST = 10000;
    private static final int PART_OF_SPEECH_START = 2;
    private static final int PART_OF_SPEECH_SIZE = 7;
    private static final String BOS_PART_OF_SPEECH = "文頭,*,*,*,*,*,*";
    private static final String EOS_PART_OF_SPEECH = "文末,*,*,*,*,*,*";
    private static final String UNKNOWN_PART_OF_SPEECH = "名詞,サ変接続,*,*,*,*,*";

    /* JADX INFO: Access modifiers changed from: private */
    /* loaded from: input_file:net/java/sen/compiler/DictionaryBuilder$TrieData.class */
    public static class TrieData {
        public String[] keys;
        public int[] values;
        public int size;

        private TrieData() {
        }
    }

    private static short[] resize(short[] sArr) {
        short[] sArr2 = new short[(int) (sArr.length * 1.5d)];
        System.arraycopy(sArr, 0, sArr2, 0, sArr.length);
        return sArr2;
    }

    private List<String> splitCompoundField(String str) {
        ArrayList arrayList;
        if (str.length() != 0) {
            if (str.charAt(0) == '{' && str.indexOf(125) > 0) {
                arrayList = new ArrayList(4);
                String[] split = str.split("[{}]");
                String str2 = split.length == 3 ? split[PART_OF_SPEECH_START] : "";
                for (String str3 : split[1].split("/")) {
                    arrayList.add(str3 + str2);
                }
                return arrayList;
            }
        }
        arrayList = new ArrayList(1);
        arrayList.add(str);
        return arrayList;
    }

    private void createPartOfSpeechDataFile(List<String> list, String str, String str2, CostMatrixBuilder[] costMatrixBuilderArr, int i, int i2, String str3, String str4, String str5, String str6, VirtualTupleList virtualTupleList, CToken[] cTokenArr) throws IOException {
        CSVData cSVData = new CSVData();
        CSVData cSVData2 = new CSVData();
        DataOutputStream dataOutputStream = new DataOutputStream(new BufferedOutputStream(new FileOutputStream(str)));
        ArrayList arrayList = new ArrayList();
        ArrayList arrayList2 = new ArrayList();
        ArrayList arrayList3 = new ArrayList();
        Iterator<String> it = list.iterator();
        while (it.hasNext()) {
            CSVParser cSVParser = new CSVParser(new FileInputStream(it.next()), str3);
            while (true) {
                String[] nextTokens = cSVParser.nextTokens();
                if (nextTokens != null) {
                    if (nextTokens.length < i2 + i) {
                        throw new RuntimeException("format error:" + cSVParser.currentLine());
                    }
                    cSVData.clear();
                    cSVData2.clear();
                    for (int i3 = i; i3 < i + i2; i3++) {
                        cSVData.append(nextTokens[i3]);
                        cSVData2.append(nextTokens[i3]);
                    }
                    for (int i4 = i + i2; i4 < nextTokens.length; i4++) {
                        cSVData2.append(nextTokens[i4]);
                    }
                    CToken cToken = new CToken();
                    cToken.rcAttr2 = (short) costMatrixBuilderArr[0].getDicId(cSVData.toString());
                    cToken.rcAttr1 = (short) costMatrixBuilderArr[1].getDicId(cSVData.toString());
                    cToken.lcAttr = (short) costMatrixBuilderArr[PART_OF_SPEECH_START].getDicId(cSVData.toString());
                    cToken.partOfSpeechIndex = dataOutputStream.size();
                    cToken.length = (short) nextTokens[0].length();
                    cToken.cost = (short) Integer.parseInt(nextTokens[1]);
                    virtualTupleList.add(nextTokens[0], cToken);
                    StringBuilder sb = new StringBuilder();
                    for (int i5 = i; i5 < i + 4; i5++) {
                        if (!nextTokens[i5].equals("*")) {
                            sb.append(nextTokens[i5]);
                            sb.append("-");
                        }
                    }
                    String substring = sb.substring(0, sb.length() - 1);
                    String str7 = nextTokens[i + 4];
                    String str8 = nextTokens[i + 5];
                    String str9 = nextTokens[i + 6];
                    List<String> splitCompoundField = splitCompoundField(nextTokens[i + PART_OF_SPEECH_SIZE]);
                    List<String> splitCompoundField2 = splitCompoundField(nextTokens[i + 8]);
                    int indexOf = arrayList.indexOf(substring);
                    if (indexOf < 0) {
                        indexOf = arrayList.size();
                        arrayList.add(substring);
                    }
                    DictionaryUtil.writeVInt(dataOutputStream, indexOf);
                    int indexOf2 = arrayList2.indexOf(str7);
                    if (indexOf2 < 0) {
                        indexOf2 = arrayList2.size();
                        arrayList2.add(str7);
                    }
                    DictionaryUtil.writeVInt(dataOutputStream, indexOf2);
                    int indexOf3 = arrayList3.indexOf(str8);
                    if (indexOf3 < 0) {
                        indexOf3 = arrayList3.size();
                        arrayList3.add(str8);
                    }
                    DictionaryUtil.writeVInt(dataOutputStream, indexOf3);
                    if (str9.equals(nextTokens[0])) {
                        DictionaryUtil.writeVInt(dataOutputStream, 0);
                    } else {
                        DictionaryUtil.writeVInt(dataOutputStream, str9.length());
                        dataOutputStream.writeChars(str9);
                    }
                    int i6 = 0;
                    for (String str10 : splitCompoundField) {
                        for (int i7 = 0; i7 < str10.length(); i7++) {
                            char charAt = str10.charAt(i7);
                            if (charAt < 12448 || charAt > 12543) {
                                i6 = 1;
                            }
                        }
                    }
                    for (String str11 : splitCompoundField2) {
                        for (int i8 = 0; i8 < str11.length(); i8++) {
                            char charAt2 = str11.charAt(i8);
                            if (charAt2 < 12448 || charAt2 > 12543) {
                                i6 = 1;
                            }
                        }
                    }
                    DictionaryUtil.writeVInt(dataOutputStream, (splitCompoundField.size() << 1) | i6);
                    for (int i9 = 0; i9 < splitCompoundField.size(); i9++) {
                        String str12 = splitCompoundField.get(i9);
                        String str13 = splitCompoundField2.get(i9);
                        if (str13.equals(str12)) {
                            DictionaryUtil.writeVInt(dataOutputStream, (str12.length() << 1) | 0);
                            if (i6 == 0) {
                                DictionaryUtil.writeKatakana(dataOutputStream, str12);
                            } else {
                                dataOutputStream.writeChars(str12);
                            }
                        } else {
                            DictionaryUtil.writeVInt(dataOutputStream, (str12.length() << 1) | 1);
                            if (i6 == 0) {
                                DictionaryUtil.writeKatakana(dataOutputStream, str12);
                            } else {
                                dataOutputStream.writeChars(str12);
                            }
                            DictionaryUtil.writeVInt(dataOutputStream, str13.length());
                            if (i6 == 0) {
                                DictionaryUtil.writeKatakana(dataOutputStream, str13);
                            } else {
                                dataOutputStream.writeChars(str13);
                            }
                        }
                    }
                }
            }
        }
        dataOutputStream.close();
        DataOutputStream dataOutputStream2 = new DataOutputStream(new FileOutputStream(str2));
        dataOutputStream2.writeChar(arrayList.size());
        Iterator it2 = arrayList.iterator();
        while (it2.hasNext()) {
            dataOutputStream2.writeUTF((String) it2.next());
        }
        dataOutputStream2.writeChar(arrayList2.size());
        Iterator it3 = arrayList2.iterator();
        while (it3.hasNext()) {
            dataOutputStream2.writeUTF((String) it3.next());
        }
        dataOutputStream2.writeChar(arrayList3.size());
        Iterator it4 = arrayList3.iterator();
        while (it4.hasNext()) {
            dataOutputStream2.writeUTF((String) it4.next());
        }
        dataOutputStream2.close();
        virtualTupleList.sort();
        CToken cToken2 = new CToken();
        cToken2.rcAttr2 = (short) costMatrixBuilderArr[0].getDicId(str4);
        cToken2.rcAttr1 = (short) costMatrixBuilderArr[1].getDicId(str4);
        cToken2.lcAttr = (short) costMatrixBuilderArr[PART_OF_SPEECH_START].getDicId(str4);
        cTokenArr[0] = cToken2;
        CToken cToken3 = new CToken();
        cToken3.rcAttr2 = (short) costMatrixBuilderArr[0].getDicId(str5);
        cToken3.rcAttr1 = (short) costMatrixBuilderArr[1].getDicId(str5);
        cToken3.lcAttr = (short) costMatrixBuilderArr[PART_OF_SPEECH_START].getDicId(str5);
        cTokenArr[1] = cToken3;
        CToken cToken4 = new CToken();
        cToken4.rcAttr2 = (short) costMatrixBuilderArr[0].getDicId(str6);
        cToken4.rcAttr1 = (short) costMatrixBuilderArr[1].getDicId(str6);
        cToken4.lcAttr = (short) costMatrixBuilderArr[PART_OF_SPEECH_START].getDicId(str6);
        cToken4.partOfSpeechIndex = -1;
        cTokenArr[PART_OF_SPEECH_START] = cToken4;
    }

    private CostMatrixBuilder[] createConnectionCostFile(String str, String str2, short s, String str3) throws IOException {
        CostMatrixBuilder[] costMatrixBuilderArr = {new CostMatrixBuilder(), new CostMatrixBuilder(), new CostMatrixBuilder()};
        Vector vector = new Vector();
        Vector vector2 = new Vector();
        Vector vector3 = new Vector();
        short[] sArr = new short[30000];
        CSVParser cSVParser = new CSVParser(new FileInputStream(str), str3);
        int i = 0;
        while (true) {
            String[] nextTokens = cSVParser.nextTokens();
            if (nextTokens == null) {
                costMatrixBuilderArr[0].build();
                costMatrixBuilderArr[1].build();
                costMatrixBuilderArr[PART_OF_SPEECH_START].build();
                int size = costMatrixBuilderArr[0].size();
                int size2 = costMatrixBuilderArr[1].size();
                int size3 = costMatrixBuilderArr[PART_OF_SPEECH_START].size();
                int size4 = vector.size();
                int i2 = size * size2 * size3 * PART_OF_SPEECH_START;
                RandomAccessFile randomAccessFile = new RandomAccessFile(str2, "rw");
                randomAccessFile.setLength(0L);
                randomAccessFile.writeShort(size);
                randomAccessFile.writeShort(size2);
                randomAccessFile.writeShort(size3);
                randomAccessFile.setLength(6 + i2);
                FileChannel channel = randomAccessFile.getChannel();
                MappedByteBuffer map = channel.map(FileChannel.MapMode.READ_WRITE, 6, i2);
                ShortBuffer asShortBuffer = map.asShortBuffer();
                channel.close();
                for (int i3 = 0; i3 < size * size2 * size3; i3++) {
                    asShortBuffer.put(i3, s);
                }
                for (int i4 = 0; i4 < size4; i4++) {
                    Vector<Integer> ruleIdList = costMatrixBuilderArr[0].getRuleIdList((String) vector.get(i4));
                    Vector<Integer> ruleIdList2 = costMatrixBuilderArr[1].getRuleIdList((String) vector2.get(i4));
                    Vector<Integer> ruleIdList3 = costMatrixBuilderArr[PART_OF_SPEECH_START].getRuleIdList((String) vector3.get(i4));
                    Iterator<Integer> it = ruleIdList.iterator();
                    while (it.hasNext()) {
                        int intValue = it.next().intValue();
                        Iterator<Integer> it2 = ruleIdList2.iterator();
                        while (it2.hasNext()) {
                            int intValue2 = it2.next().intValue();
                            Iterator<Integer> it3 = ruleIdList3.iterator();
                            while (it3.hasNext()) {
                                asShortBuffer.put((size3 * ((size2 * intValue) + intValue2)) + it3.next().intValue(), sArr[i4]);
                            }
                        }
                    }
                }
                map.force();
                return costMatrixBuilderArr;
            }
            if (nextTokens.length < 4) {
                throw new IOException("Connection cost CSV format error");
            }
            costMatrixBuilderArr[0].add(nextTokens[0]);
            vector.add(nextTokens[0]);
            costMatrixBuilderArr[1].add(nextTokens[1]);
            vector2.add(nextTokens[1]);
            costMatrixBuilderArr[PART_OF_SPEECH_START].add(nextTokens[PART_OF_SPEECH_START]);
            vector3.add(nextTokens[PART_OF_SPEECH_START]);
            if (i == sArr.length) {
                sArr = resize(sArr);
            }
            int i5 = i;
            i++;
            sArr[i5] = (short) Integer.parseInt(nextTokens[3]);
        }
    }

    private TrieData createTokenFile(String str, CToken[] cTokenArr, VirtualTupleList virtualTupleList) throws IOException {
        TrieData trieData = new TrieData();
        trieData.values = new int[virtualTupleList.size()];
        trieData.keys = new String[virtualTupleList.size()];
        trieData.size = 0;
        int i = 0;
        int i2 = 0;
        String str2 = "";
        DataOutputStream dataOutputStream = new DataOutputStream(new BufferedOutputStream(new FileOutputStream(str)));
        CToken.write(dataOutputStream, cTokenArr[0]);
        CToken.write(dataOutputStream, cTokenArr[1]);
        CToken.write(dataOutputStream, cTokenArr[PART_OF_SPEECH_START]);
        for (int i3 = 0; i3 < trieData.keys.length; i3++) {
            StringCTokenTuple stringCTokenTuple = virtualTupleList.get(i3);
            if (str2.equals(stringCTokenTuple.key) || i3 == 0) {
                i2++;
            } else {
                trieData.keys[trieData.size] = virtualTupleList.get(i).key;
                trieData.values[trieData.size] = i2 + (i << 8);
                trieData.size++;
                i2 = 1;
                i = i3;
            }
            str2 = stringCTokenTuple.key;
            CToken.write(dataOutputStream, stringCTokenTuple.value);
        }
        dataOutputStream.flush();
        dataOutputStream.close();
        trieData.keys[trieData.size] = virtualTupleList.get(i).key;
        trieData.values[trieData.size] = i2 + (i << 8);
        trieData.size++;
        return trieData;
    }

    private void createTrieFile(String str, TrieData trieData) throws IOException {
        new TrieBuilder(trieData.keys, trieData.values, trieData.size).build(str);
    }

    private void createHeaderFile(String str) throws IOException {
        DataOutputStream dataOutputStream = new DataOutputStream(new FileOutputStream(str));
        dataOutputStream.writeInt((int) new File(CONNECTION_COST_DATA_FILENAME).length());
        dataOutputStream.writeInt((int) new File(PART_OF_SPEECH_DATA_FILENAME).length());
        dataOutputStream.writeInt((int) new File(TOKEN_DATA_FILENAME).length());
        dataOutputStream.writeInt((int) new File(TRIE_DATA_FILENAME).length());
        dataOutputStream.close();
    }

    public DictionaryBuilder(String[] strArr) throws IOException {
        ArrayList arrayList = new ArrayList();
        arrayList.add(DICTIONARY_CSV_FILENAME);
        arrayList.addAll(Arrays.asList(strArr));
        CostMatrixBuilder[] createConnectionCostFile = createConnectionCostFile(CONNECTION_CSV_FILENAME, CONNECTION_COST_DATA_FILENAME, (short) 10000, "UTF-8");
        VirtualTupleList virtualTupleList = new VirtualTupleList();
        CToken[] cTokenArr = new CToken[3];
        createPartOfSpeechDataFile(arrayList, PART_OF_SPEECH_DATA_FILENAME, PART_OF_SPEECH_INDEX_FILENAME, createConnectionCostFile, PART_OF_SPEECH_START, PART_OF_SPEECH_SIZE, "UTF-8", BOS_PART_OF_SPEECH, EOS_PART_OF_SPEECH, UNKNOWN_PART_OF_SPEECH, virtualTupleList, cTokenArr);
        createTrieFile(TRIE_DATA_FILENAME, createTokenFile(TOKEN_DATA_FILENAME, cTokenArr, virtualTupleList));
        createHeaderFile(HEADER_DATA_FILENAME);
    }
}
