package edu.stanford.nlp.parser.lexparser;

import edu.stanford.nlp.ling.HasWord;
import edu.stanford.nlp.ling.Label;
import edu.stanford.nlp.ling.TaggedWord;
import edu.stanford.nlp.ling.Word;
import edu.stanford.nlp.math.ArrayMath;
import edu.stanford.nlp.process.WordSegmenter;
import edu.stanford.nlp.stats.ClassicCounter;
import edu.stanford.nlp.stats.Distribution;
import edu.stanford.nlp.stats.GeneralizedCounter;
import edu.stanford.nlp.trees.Tree;
import edu.stanford.nlp.trees.Treebank;
import edu.stanford.nlp.util.Index;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;

/* loaded from: input_file:edu/stanford/nlp/parser/lexparser/ChineseMarkovWordSegmenter.class */
public class ChineseMarkovWordSegmenter implements WordSegmenter {
    private Distribution<String> initialPOSDist;
    private Map<String, Distribution> markovPOSDists;
    private ChineseCharacterBasedLexicon lex;
    private Set<String> POSes;
    private final Index<String> wordIndex;
    private final Index<String> tagIndex;
    private transient ClassicCounter<String> initial;
    private transient GeneralizedCounter ruleCounter;
    private static final long serialVersionUID = 1559606198270645508L;

    public ChineseMarkovWordSegmenter(ChineseCharacterBasedLexicon chineseCharacterBasedLexicon, Index<String> index, Index<String> index2) {
        this.lex = chineseCharacterBasedLexicon;
        this.wordIndex = index;
        this.tagIndex = index2;
    }

    public ChineseMarkovWordSegmenter(ChineseTreebankParserParams chineseTreebankParserParams, Index<String> index, Index<String> index2) {
        this.lex = new ChineseCharacterBasedLexicon(chineseTreebankParserParams, index, index2);
        this.wordIndex = index;
        this.tagIndex = index2;
    }

    @Override // edu.stanford.nlp.process.WordSegmenter
    public void initializeTraining(double d) {
        this.lex.initializeTraining(d);
        this.initial = new ClassicCounter<>();
        this.ruleCounter = new GeneralizedCounter(2);
    }

    @Override // edu.stanford.nlp.process.WordSegmenter
    public void train(Collection<Tree> collection) {
        Iterator<Tree> it = collection.iterator();
        while (it.hasNext()) {
            train(it.next());
        }
    }

    @Override // edu.stanford.nlp.process.WordSegmenter
    public void train(Tree tree) {
        train((List<TaggedWord>) tree.taggedYield());
    }

    @Override // edu.stanford.nlp.process.WordSegmenter
    public void train(List<TaggedWord> list) {
        this.lex.train(list, 1.0d);
        String str = null;
        Iterator<TaggedWord> it = list.iterator();
        while (it.hasNext()) {
            String tag = it.next().tag();
            this.tagIndex.add(tag);
            if (str == null) {
                this.initial.incrementCount(tag);
            } else {
                this.ruleCounter.incrementCount2D(str, tag);
            }
            str = tag;
        }
    }

    /* JADX WARN: Multi-variable type inference failed */
    @Override // edu.stanford.nlp.process.WordSegmenter
    public void finishTraining() {
        this.lex.finishTraining();
        int size = this.tagIndex.size();
        this.POSes = new HashSet(this.tagIndex.objectsList());
        this.initialPOSDist = Distribution.laplaceSmoothedDistribution(this.initial, size, 0.5d);
        this.markovPOSDists = new HashMap();
        for (Map.Entry entry : this.ruleCounter.lowestLevelCounterEntrySet()) {
            this.markovPOSDists.put(((List) entry.getKey()).get(0), Distribution.laplaceSmoothedDistribution((ClassicCounter) entry.getValue(), size, 0.5d));
        }
    }

    @Override // edu.stanford.nlp.process.WordSegmenter
    public List<HasWord> segment(String str) {
        return segmentWordsWithMarkov(str);
    }

    private ArrayList<TaggedWord> basicSegmentWords(String str) {
        int length = str.length();
        double[][] dArr = new double[length][length + 1];
        int[][] iArr = new int[length][length + 1];
        int[][] iArr2 = new int[length][length + 1];
        for (int i = 0; i < length; i++) {
            Arrays.fill(dArr[i], Double.NEGATIVE_INFINITY);
        }
        for (int i2 = 1; i2 <= 10; i2++) {
            for (int i3 = 0; i3 + i2 <= length; i3++) {
                int i4 = i3 + i2;
                StringBuilder sb = new StringBuilder();
                for (int i5 = i3; i5 < i4; i5++) {
                    sb.append(str.charAt(i5));
                }
                String sb2 = sb.toString();
                for (String str2 : this.POSes) {
                    IntTaggedWord intTaggedWord = new IntTaggedWord(sb2, str2, this.wordIndex, this.tagIndex);
                    double score = this.lex.score(intTaggedWord, 0, sb2, null) + Math.log(this.lex.getPOSDistribution().probabilityOf(str2));
                    if (score > dArr[i3][i4]) {
                        dArr[i3][i4] = score;
                        iArr[i3][i4] = i4;
                        iArr2[i3][i4] = intTaggedWord.tag();
                    }
                }
            }
        }
        for (int i6 = 2; i6 <= length; i6++) {
            for (int i7 = 0; i7 + i6 <= length; i7++) {
                int i8 = i7 + i6;
                for (int i9 = i7 + 1; i9 < i8 && i9 - i7 <= 10; i9++) {
                    if (iArr[i7][i9] == i9) {
                        double d = dArr[i7][i9] + dArr[i9][i8];
                        if (d > dArr[i7][i8]) {
                            dArr[i7][i8] = d;
                            iArr[i7][i8] = i9;
                        }
                    }
                }
            }
        }
        ArrayList arrayList = new ArrayList();
        int i10 = 0;
        while (true) {
            int i11 = i10;
            if (i11 >= length) {
                return new ArrayList<>(arrayList);
            }
            int i12 = iArr[i11][length];
            StringBuilder sb3 = new StringBuilder();
            for (int i13 = i11; i13 < i12; i13++) {
                sb3.append(str.charAt(i13));
            }
            arrayList.add(new TaggedWord(sb3.toString(), this.tagIndex.get(iArr2[i11][i12])));
            i10 = i12;
        }
    }

    private ArrayList<HasWord> segmentWordsWithMarkov(String str) {
        Distribution distribution;
        int length = str.length();
        int size = this.POSes.size();
        double[][][] dArr = new double[length][length + 1][size];
        int[][][] iArr = new int[length][length + 1][size];
        int[][][] iArr2 = new int[length][length + 1][size];
        for (int i = 0; i < length; i++) {
            for (int i2 = 0; i2 < length + 1; i2++) {
                Arrays.fill(dArr[i][i2], Double.NEGATIVE_INFINITY);
            }
        }
        for (int i3 = 1; i3 <= 10; i3++) {
            for (int i4 = 0; i4 + i3 <= length; i4++) {
                int i5 = i4 + i3;
                StringBuilder sb = new StringBuilder();
                for (int i6 = i4; i6 < i5; i6++) {
                    sb.append(str.charAt(i6));
                }
                String sb2 = sb.toString();
                for (String str2 : this.POSes) {
                    IntTaggedWord intTaggedWord = new IntTaggedWord(sb2, str2, this.wordIndex, this.tagIndex);
                    double score = this.lex.score(intTaggedWord, 0, sb2, null);
                    if (i4 == 0) {
                        score += Math.log(this.initialPOSDist.probabilityOf(str2));
                    }
                    dArr[i4][i5][intTaggedWord.tag()] = score;
                    iArr[i4][i5][intTaggedWord.tag()] = i5;
                }
            }
        }
        for (int i7 = 2; i7 <= length; i7++) {
            for (int i8 = 0; i8 + i7 <= length; i8++) {
                int i9 = i8 + i7;
                for (int i10 = i8 + 1; i10 < i9 && i10 - i8 <= 10; i10++) {
                    for (String str3 : this.POSes) {
                        int indexOf = this.tagIndex.indexOf(str3, true);
                        if (iArr[i8][i10][indexOf] == i10 && (distribution = this.markovPOSDists.get(str3)) != null) {
                            for (String str4 : this.POSes) {
                                int indexOf2 = this.tagIndex.indexOf(str4, true);
                                double log = dArr[i8][i10][indexOf] + dArr[i10][i9][indexOf2] + Math.log(distribution.probabilityOf(str4));
                                if (log > dArr[i8][i9][indexOf]) {
                                    dArr[i8][i9][indexOf] = log;
                                    iArr[i8][i9][indexOf] = i10;
                                    iArr2[i8][i9][indexOf] = indexOf2;
                                }
                            }
                        }
                    }
                }
            }
        }
        int argmax = ArrayMath.argmax(dArr[0][length]);
        ArrayList<HasWord> arrayList = new ArrayList<>();
        int i11 = 0;
        while (true) {
            int i12 = i11;
            if (i12 >= length) {
                return arrayList;
            }
            int i13 = iArr[i12][length][argmax];
            StringBuilder sb3 = new StringBuilder();
            for (int i14 = i12; i14 < i13; i14++) {
                sb3.append(str.charAt(i14));
            }
            arrayList.add(new Word(sb3.toString()));
            if (i13 < length) {
                argmax = iArr2[i12][length][argmax];
            }
            i11 = i13;
        }
    }

    private Distribution<Integer> getSegmentedWordLengthDistribution(Treebank treebank) {
        ClassicCounter classicCounter = new ClassicCounter();
        Iterator<Tree> it = treebank.iterator();
        while (it.hasNext()) {
            Tree next = it.next();
            StringBuilder sb = new StringBuilder();
            Iterator<Label> it2 = next.yield().iterator();
            while (it2.hasNext()) {
                sb.append((Word) it2.next());
            }
            List<HasWord> segment = segment(sb.toString());
            for (int i = 0; i < segment.size(); i++) {
                classicCounter.incrementCount(Integer.valueOf(segment.get(i).word().length()));
            }
        }
        return Distribution.getDistribution(classicCounter);
    }

    @Override // edu.stanford.nlp.process.WordSegmenter
    public void loadSegmenter(String str) {
        throw new UnsupportedOperationException();
    }
}
