package org.ansj.splitWord.analysis;

import java.io.Reader;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import org.ansj.app.crf.SplitWord;
import org.ansj.dic.LearnTool;
import org.ansj.domain.Nature;
import org.ansj.domain.NewWord;
import org.ansj.domain.Result;
import org.ansj.domain.Term;
import org.ansj.domain.TermNatures;
import org.ansj.library.CrfLibrary;
import org.ansj.recognition.arrimpl.NewWordRecognition;
import org.ansj.recognition.arrimpl.NumRecognition;
import org.ansj.recognition.arrimpl.PersonRecognition;
import org.ansj.recognition.arrimpl.UserDefineRecognition;
import org.ansj.recognition.impl.NatureRecognition;
import org.ansj.splitWord.Analysis;
import org.ansj.util.AnsjReader;
import org.ansj.util.Graph;
import org.ansj.util.TermUtil;
import org.nlpcn.commons.lang.tire.domain.Forest;
import org.nlpcn.commons.lang.util.MapCount;
import org.nlpcn.commons.lang.util.WordAlert;
import org.nlpcn.commons.lang.util.logging.Log;
import org.nlpcn.commons.lang.util.logging.LogFactory;

/* loaded from: input_file:org/ansj/splitWord/analysis/NlpAnalysis.class */
public class NlpAnalysis extends Analysis {
    private static final String TAB = "\t";
    private static final int CRF_WEIGHT = 6;
    private static final Log LOG = LogFactory.getLog(NlpAnalysis.class);
    private static final Set<Character> filter = new HashSet();
    private LearnTool learn = null;
    private SplitWord splitWord = CrfLibrary.get();

    @Override // org.ansj.splitWord.Analysis
    protected List<Term> getResult(final Graph graph) {
        return new Analysis.Merger() { // from class: org.ansj.splitWord.analysis.NlpAnalysis.1
            /* JADX WARN: 'super' call moved to the top of the method (can break code semantics) */
            {
                super();
            }

            @Override // org.ansj.splitWord.Analysis.Merger
            public List<Term> merger() {
                if (NlpAnalysis.this.learn == null) {
                    NlpAnalysis.this.learn = new LearnTool();
                }
                graph.walkPath();
                NlpAnalysis.this.learn.learn(graph, NlpAnalysis.this.splitWord, NlpAnalysis.this.forests);
                if (graph.hasPerson && NlpAnalysis.this.isNameRecognition) {
                    new PersonRecognition().recognition(graph);
                }
                if (NlpAnalysis.this.splitWord != null) {
                    MapCount mapCount = new MapCount();
                    List<String> cut = NlpAnalysis.this.splitWord.cut(graph.chars);
                    Term term = null;
                    int i = 0;
                    if (cut.size() > 0) {
                        String str = cut.get(0);
                        if (!NlpAnalysis.isRuleWord(str)) {
                            mapCount.add("始##始\t" + str, NlpAnalysis.CRF_WEIGHT);
                        }
                    }
                    for (String str2 : cut) {
                        TermNatures termNatures = new NatureRecognition(NlpAnalysis.this.forests).getTermNatures(str2);
                        Term term2 = termNatures != TermNatures.NULL ? new Term(str2, i, termNatures) : new Term(str2, i, TermNatures.NW);
                        i += str2.length();
                        if (NlpAnalysis.isRuleWord(str2)) {
                            term = null;
                        } else {
                            TermUtil.insertTerm(graph.terms, term2, TermUtil.InsertTermType.SCORE_ADD_SORT);
                            if (term != null && !term.isNewWord() && !term2.isNewWord()) {
                                mapCount.add(term.getName() + NlpAnalysis.TAB + str2, NlpAnalysis.CRF_WEIGHT);
                            }
                            term = term2;
                            if (term2.isNewWord()) {
                                NlpAnalysis.this.learn.addTerm(new NewWord(str2, Nature.NW));
                            }
                        }
                    }
                    if (term != null && !term.isNewWord()) {
                        mapCount.add(term.getName() + NlpAnalysis.TAB + "末##末", NlpAnalysis.CRF_WEIGHT);
                    }
                    graph.walkPath(mapCount.get());
                } else {
                    NlpAnalysis.LOG.warn("not find any crf model, make sure your config right? ");
                }
                if (NlpAnalysis.this.isNumRecognition) {
                    new NumRecognition(NlpAnalysis.this.isQuantifierRecognition).recognition(graph);
                }
                getResult();
                new UserDefineRecognition(TermUtil.InsertTermType.SCORE_ADD_SORT, NlpAnalysis.this.forests).recognition(graph);
                graph.rmLittlePath();
                graph.walkPathByScore();
                new NewWordRecognition(NlpAnalysis.this.learn).recognition(graph);
                graph.walkPathByScore();
                List<Term> result = getResult();
                for (Term term3 : result) {
                    if (term3.isNewWord()) {
                        if ("nw".equals(term3.getNatureStr())) {
                            TermNatures guessNature = NatureRecognition.guessNature(term3.getName());
                            if (!"nw".equals(guessNature.nature.natureStr)) {
                                term3.setNature(guessNature.nature);
                            }
                        }
                        NlpAnalysis.this.learn.active(term3.getName());
                    }
                }
                NlpAnalysis.this.setRealName(graph, result);
                return result;
            }

            private List<Term> getResult() {
                ArrayList arrayList = new ArrayList();
                int length = graph.terms.length - 1;
                for (int i = 0; i < length; i++) {
                    if (graph.terms[i] != null) {
                        NlpAnalysis.this.setIsNewWord(graph.terms[i]);
                        arrayList.add(graph.terms[i]);
                    }
                }
                return arrayList;
            }
        }.merger();
    }

    public static boolean isRuleWord(String str) {
        char CharCover;
        for (int i = 0; i < str.length(); i++) {
            if (str.charAt(i) != 183 && ((CharCover = WordAlert.CharCover(str.charAt(i))) < 256 || filter.contains(Character.valueOf(CharCover)))) {
                return true;
            }
        }
        return false;
    }

    public NlpAnalysis setCrfModel(SplitWord splitWord) {
        this.splitWord = splitWord;
        return this;
    }

    public NlpAnalysis setLearnTool(LearnTool learnTool) {
        this.learn = learnTool;
        return this;
    }

    public NlpAnalysis() {
    }

    public NlpAnalysis(Reader reader) {
        super.resetContent(new AnsjReader(reader));
    }

    public static Result parse(String str) {
        return new NlpAnalysis().parseStr(str);
    }

    public static Result parse(String str, Forest... forestArr) {
        return new NlpAnalysis().setForests(forestArr).parseStr(str);
    }

    static {
        filter.add(':');
        filter.add(' ');
        filter.add((char) 65306);
        filter.add((char) 12288);
        filter.add((char) 65292);
        filter.add((char) 8221);
        filter.add((char) 8220);
        filter.add((char) 65311);
        filter.add((char) 12290);
        filter.add((char) 65281);
        filter.add((char) 12290);
        filter.add(',');
        filter.add('.');
        filter.add((char) 12289);
        filter.add('\\');
        filter.add((char) 65307);
        filter.add(';');
        filter.add((char) 65311);
        filter.add('?');
        filter.add('!');
        filter.add('\"');
        filter.add((char) 65288);
        filter.add((char) 65289);
        filter.add('(');
        filter.add(')');
        filter.add((char) 8230);
        filter.add((char) 8230);
        filter.add((char) 8212);
        filter.add('-');
        filter.add((char) 65293);
        filter.add((char) 8212);
        filter.add((char) 12298);
        filter.add((char) 12299);
    }
}
