package org.apdplat.qa.datasource;

import java.io.BufferedReader;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.UnsupportedEncodingException;
import java.net.URLEncoder;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import org.apdplat.qa.files.FilesConfig;
import org.apdplat.qa.model.Evidence;
import org.apdplat.qa.model.Question;
import org.apdplat.qa.system.QuestionAnsweringSystem;
import org.apdplat.qa.util.MySQLUtils;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:org/apdplat/qa/datasource/BaiduDataSource.class */
public class BaiduDataSource implements DataSource {
    private static final Logger LOG = LoggerFactory.getLogger(BaiduDataSource.class);
    private static final String ACCEPT = "text/html, */*; q=0.01";
    private static final String ENCODING = "gzip, deflate";
    private static final String LANGUAGE = "zh-cn,zh;q=0.8,en-us;q=0.5,en;q=0.3";
    private static final String CONNECTION = "keep-alive";
    private static final String HOST = "www.baidu.com";
    private static final String USER_AGENT = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.9; rv:31.0) Gecko/20100101 Firefox/31.0";
    private static final int PAGE = 1;
    private static final int PAGESIZE = 10;
    private static final boolean SUMMARY = true;
    private final List<String> files = new ArrayList();

    public BaiduDataSource() {
    }

    public BaiduDataSource(String str) {
        this.files.add(str);
    }

    public BaiduDataSource(List<String> list) {
        this.files.addAll(list);
    }

    @Override // org.apdplat.qa.datasource.DataSource
    public Question getQuestion(String str) {
        return getAndAnswerQuestion(str, null);
    }

    @Override // org.apdplat.qa.datasource.DataSource
    public List<Question> getQuestions() {
        return getAndAnswerQuestions(null);
    }

    @Override // org.apdplat.qa.datasource.DataSource
    public List<Question> getAndAnswerQuestions(QuestionAnsweringSystem questionAnsweringSystem) {
        ArrayList arrayList = new ArrayList();
        for (String str : this.files) {
            BufferedReader bufferedReader = null;
            try {
                try {
                    try {
                        bufferedReader = new BufferedReader(new InputStreamReader(getClass().getResourceAsStream(str), "utf-8"));
                        String readLine = bufferedReader.readLine();
                        while (readLine != null) {
                            if (readLine.trim().equals("") || readLine.trim().startsWith("#") || readLine.indexOf("#") == 1 || readLine.length() < 3) {
                                readLine = bufferedReader.readLine();
                            } else {
                                LOG.info("从类路径的 " + str + " 中加载Question:" + readLine.trim());
                                try {
                                    Thread.sleep(3000L);
                                } catch (InterruptedException e) {
                                    e.printStackTrace();
                                }
                                String str2 = null;
                                String[] split = readLine.trim().split("[:|：]");
                                String trim = split == null ? readLine.trim() : null;
                                if (split != null && split.length == 1) {
                                    trim = split[0];
                                }
                                if (split != null && split.length == 2) {
                                    trim = split[0];
                                    str2 = split[1];
                                }
                                LOG.info("Question:" + trim);
                                LOG.info("ExpectAnswer:" + str2);
                                Question question = getQuestion(trim);
                                if (question != null) {
                                    question.setExpectAnswer(str2);
                                    arrayList.add(question);
                                }
                                if (questionAnsweringSystem != null && question != null) {
                                    questionAnsweringSystem.answerQuestion(question);
                                }
                                readLine = bufferedReader.readLine();
                            }
                        }
                        if (bufferedReader != null) {
                            try {
                                bufferedReader.close();
                            } catch (IOException e2) {
                                e2.printStackTrace();
                            }
                        }
                    } catch (Throwable th) {
                        if (bufferedReader != null) {
                            try {
                                bufferedReader.close();
                            } catch (IOException e3) {
                                e3.printStackTrace();
                            }
                        }
                        throw th;
                    }
                } catch (UnsupportedEncodingException e4) {
                    e4.printStackTrace();
                    if (bufferedReader != null) {
                        try {
                            bufferedReader.close();
                        } catch (IOException e5) {
                            e5.printStackTrace();
                        }
                    }
                }
            } catch (FileNotFoundException e6) {
                e6.printStackTrace();
                if (bufferedReader != null) {
                    try {
                        bufferedReader.close();
                    } catch (IOException e7) {
                        e7.printStackTrace();
                    }
                }
            } catch (IOException e8) {
                e8.printStackTrace();
                if (bufferedReader != null) {
                    try {
                        bufferedReader.close();
                    } catch (IOException e9) {
                        e9.printStackTrace();
                    }
                }
            }
            LOG.info("从Question文件" + str + "中加载Question，从baidu中检索到了 " + arrayList.size() + " 个Question");
        }
        return arrayList;
    }

    @Override // org.apdplat.qa.datasource.DataSource
    public Question getAndAnswerQuestion(String str, QuestionAnsweringSystem questionAnsweringSystem) {
        Question questionFromDatabase = MySQLUtils.getQuestionFromDatabase("baidu:", str);
        if (questionFromDatabase != null) {
            LOG.info("从数据库中查询到Question：" + questionFromDatabase.getQuestion());
            if (questionAnsweringSystem != null) {
                questionAnsweringSystem.answerQuestion(questionFromDatabase);
            }
            return questionFromDatabase;
        }
        Question question = new Question();
        question.setQuestion(str);
        try {
            String encode = URLEncoder.encode(question.getQuestion(), "UTF-8");
            String str2 = "http://www.baidu.com/";
            for (int i = 0; i < 1; i++) {
                encode = "http://www.baidu.com/s?tn=monline_5_dg&ie=utf-8&wd=" + encode + "&oq=" + encode + "&usm=3&f=8&bs=" + encode + "&rsv_bp=1&rsv_sug3=1&rsv_sug4=141&rsv_sug1=1&rsv_sug=1&pn=" + (i * PAGESIZE);
                LOG.debug(encode);
                List<Evidence> searchBaidu = searchBaidu(encode, str2);
                str2 = encode;
                if (searchBaidu == null || searchBaidu.size() <= 0) {
                    LOG.error("结果页 " + (i + 1) + " 没有搜索到结果");
                    break;
                }
                question.addEvidences(searchBaidu);
            }
            LOG.info("Question：" + question.getQuestion() + " 搜索到Evidence " + question.getEvidences().size() + " 条");
            if (question.getEvidences().isEmpty()) {
                return null;
            }
            if (question.getEvidences().size() > 7) {
                LOG.info("将Question：" + question.getQuestion() + " 加入MySQL数据库");
                MySQLUtils.saveQuestionToDatabase("baidu:", question);
            }
            if (questionAnsweringSystem != null) {
                questionAnsweringSystem.answerQuestion(question);
            }
            return question;
        } catch (UnsupportedEncodingException e) {
            LOG.error("url构造失败", e);
            return null;
        }
    }

    private List<Evidence> searchBaidu(String str, String str2) {
        ArrayList arrayList = new ArrayList();
        try {
            Iterator it = Jsoup.connect(str).header("Accept", ACCEPT).header("Accept-Encoding", ENCODING).header("Accept-Language", LANGUAGE).header("Connection", CONNECTION).header("User-Agent", USER_AGENT).header("Host", HOST).header("Referer", str2).get().select("html > body > div > div > div > div > div").iterator();
            while (it.hasNext()) {
                Element element = (Element) it.next();
                Elements select = element.select("h3 > a");
                if (select.size() != 1) {
                    LOG.debug("没有找到标题");
                } else {
                    String text = select.get(0).text();
                    if (text == null || "".equals(text.trim())) {
                        LOG.debug("标题为空");
                    } else {
                        Elements select2 = element.select("div.c-abstract");
                        if (select2.size() != 1) {
                            LOG.debug("没有找到摘要");
                        } else {
                            String text2 = select2.get(0).text();
                            if (text2 == null || "".equals(text2.trim())) {
                                LOG.debug("摘要为空");
                            } else {
                                Evidence evidence = new Evidence();
                                evidence.setTitle(text);
                                evidence.setSnippet(text2);
                                arrayList.add(evidence);
                            }
                        }
                    }
                }
            }
        } catch (Exception e) {
            LOG.error("搜索出错", e);
        }
        return arrayList;
    }

    public static void main(String[] strArr) {
        LOG.info(new BaiduDataSource(FilesConfig.personNameQuestions).getQuestion("APDPlat的创始人是谁？").toString());
    }
}
