package org.apdplat.qa.questiontypeanalysis;

import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Set;
import org.apdplat.qa.questiontypeanalysis.patternbased.MainPartExtracter;
import org.apdplat.qa.questiontypeanalysis.patternbased.QuestionStructure;
import org.apdplat.qa.util.Tools;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:org/apdplat/qa/questiontypeanalysis/ValidateMainPartExtracter.class */
public class ValidateMainPartExtracter {
    private static final Logger LOG = LoggerFactory.getLogger(ValidateMainPartExtracter.class);

    public static List<QuestionStructure> parseQuestions(Set<String> set) {
        LOG.info("解析预先标注的语料的主谓宾");
        ArrayList arrayList = new ArrayList();
        Iterator<String> it = set.iterator();
        while (it.hasNext()) {
            String trim = it.next().trim();
            String[] split = trim.split(":");
            if (split == null) {
                LOG.info("问句未标注主谓宾：" + trim);
                QuestionStructure questionStructure = new QuestionStructure();
                questionStructure.setQuestion(trim);
                arrayList.add(questionStructure);
            } else if (split.length == 1) {
                LOG.info("问句未标注主谓宾：" + trim);
                QuestionStructure questionStructure2 = new QuestionStructure();
                questionStructure2.setQuestion(split[0].trim());
                arrayList.add(questionStructure2);
            } else if (split.length == 2) {
                String str = split[0];
                String str2 = split[1];
                if (str2 == null || "".equals(str2.trim())) {
                    LOG.info("问句未标注主谓宾：" + trim);
                } else {
                    String[] split2 = str2.split("\\s+");
                    if (split2 == null || split2.length != 3) {
                        LOG.info("问句未标注主谓宾：" + trim);
                    } else {
                        QuestionStructure questionStructure3 = new QuestionStructure();
                        questionStructure3.setQuestion(str);
                        questionStructure3.setMainPart(split2[0].trim() + " " + split2[1].trim() + " " + split2[2].trim());
                        arrayList.add(questionStructure3);
                    }
                }
            } else {
                LOG.info("问句未标注主谓宾：" + trim);
            }
        }
        return arrayList;
    }

    public static void validate() {
        MainPartExtracter mainPartExtracter = new MainPartExtracter();
        Set<String> questions = Tools.getQuestions("/org/apdplat.qa/questiontypeanalysis/AllTestQuestionsWithMainPart.txt");
        LOG.info("从文件中加载" + questions.size() + "个问题：/org/apdplat.qa/questiontypeanalysis/AllTestQuestionsWithMainPart.txt");
        List<QuestionStructure> parseQuestions = parseQuestions(questions);
        LOG.info("从标注的问句语料库中加载" + questions.size() + "条记录");
        LOG.info("成功解析" + parseQuestions.size() + "个问句");
        ArrayList<QuestionStructure> arrayList = new ArrayList();
        ArrayList<QuestionStructure> arrayList2 = new ArrayList();
        HashMap hashMap = new HashMap();
        ArrayList<QuestionStructure> arrayList3 = new ArrayList();
        int i = 0;
        for (QuestionStructure questionStructure : parseQuestions) {
            QuestionStructure mainPart = mainPartExtracter.getMainPart(questionStructure.getQuestion());
            if (mainPart == null || mainPart.getMainPart() == null) {
                arrayList.add(mainPart);
            } else if (questionStructure.getMainPart() != null) {
                i++;
                if (mainPart.getMainPart().equals(questionStructure.getMainPart())) {
                    arrayList3.add(mainPart);
                } else if (questionStructure.getMainPart() == null) {
                    hashMap.put(mainPart, "语料未标注主谓宾");
                } else {
                    String[] split = mainPart.getMainPart().split("\\s+");
                    if (split == null || split.length != 3) {
                        hashMap.put(mainPart, "主谓宾提取错误");
                    } else {
                        String[] split2 = questionStructure.getMainPart().split("\\s+");
                        if (split2 == null || split2.length != 3) {
                            hashMap.put(mainPart, "主谓宾标注错误");
                        } else {
                            StringBuilder sb = new StringBuilder();
                            if (!split[0].trim().equals(split2[0].trim())) {
                                sb.append(" 主语提取错误 ");
                            }
                            if (!split[1].trim().equals(split2[1].trim())) {
                                sb.append(" 谓语提取错误 ");
                            }
                            if (!split[2].trim().equals(split2[2].trim())) {
                                sb.append(" 宾语提取错误 ");
                            }
                            sb.append(" 正确的主谓宾应该为：").append(questionStructure.getMainPart());
                            hashMap.put(mainPart, sb.toString().trim());
                        }
                    }
                }
            } else {
                arrayList2.add(mainPart);
            }
        }
        int i2 = 0;
        LOG.info("");
        LOG.info("能提取主谓宾但未标注（" + arrayList2.size() + "）：");
        int i3 = 1;
        for (QuestionStructure questionStructure2 : arrayList2) {
            if (questionStructure2.perfect()) {
                i2++;
            }
            int i4 = i3;
            i3++;
            LOG.info(i4 + " " + questionStructure2.getQuestion() + " : " + questionStructure2.getMainPart());
            Iterator<String> it = questionStructure2.getDependencies().iterator();
            while (it.hasNext()) {
                LOG.info("\t" + it.next());
            }
        }
        LOG.info("");
        LOG.info("不能提取主谓宾数（" + arrayList.size() + "）：");
        int i5 = 1;
        for (QuestionStructure questionStructure3 : arrayList) {
            int i6 = i5;
            i5++;
            LOG.info(i6 + " " + questionStructure3.getQuestion());
            Iterator<String> it2 = questionStructure3.getDependencies().iterator();
            while (it2.hasNext()) {
                LOG.info("\t" + it2.next());
            }
        }
        LOG.info("");
        LOG.info("能提取主谓宾但和标注【不一致】数（" + hashMap.size() + "）：");
        int i7 = 1;
        for (QuestionStructure questionStructure4 : hashMap.keySet()) {
            if (questionStructure4.perfect()) {
                i2++;
            }
            int i8 = i7;
            i7++;
            LOG.info(i8 + " " + questionStructure4.getQuestion() + " " + questionStructure4.getMainPart());
            Iterator<String> it3 = questionStructure4.getDependencies().iterator();
            while (it3.hasNext()) {
                LOG.info("\t" + it3.next());
            }
            LOG.info("\t" + ((String) hashMap.get(questionStructure4)));
        }
        LOG.info("");
        LOG.info("能提取主谓宾且和标注【一致】数（" + arrayList3.size() + "）：");
        int i9 = 1;
        for (QuestionStructure questionStructure5 : arrayList3) {
            if (questionStructure5.perfect()) {
                i2++;
            }
            int i10 = i9;
            i9++;
            LOG.info(i10 + " " + questionStructure5.getQuestion() + " : " + questionStructure5.getMainPart());
            Iterator<String> it4 = questionStructure5.getDependencies().iterator();
            while (it4.hasNext()) {
                LOG.info("\t" + it4.next());
            }
        }
        int size = arrayList3.size() + hashMap.size() + arrayList2.size() + arrayList.size();
        LOG.info("主谓宾提取统计");
        LOG.info("两种提取模式结果一致数: " + i2);
        LOG.info("两种提取模式结果一致率: " + ((i2 / size) * 100.0d) + "%");
        LOG.info("问题总数: " + size);
        LOG.info("识别数: " + (size - arrayList.size()));
        LOG.info("识别率: " + (((size - arrayList.size()) / size) * 100.0d) + "%");
        LOG.info("未识别数: " + arrayList.size());
        LOG.info("未识别率: " + ((arrayList.size() / size) * 100.0d) + "%");
        LOG.info("人工标注数: " + i);
        LOG.info("人工标注率: " + ((i / size) * 100.0d) + "%");
        LOG.info("识别准确数(人工标注): " + arrayList3.size());
        LOG.info("识别准确率(人工标注): " + ((arrayList3.size() / i) * 100.0d) + "%");
        LOG.info("识别不准确数(人工标注): " + hashMap.size());
        LOG.info("识别不准确率(人工标注): " + ((hashMap.size() / i) * 100.0d) + "%");
    }

    public static void main(String[] strArr) {
        validate();
    }
}
