package edu.stanford.nlp.quoteattribution;

import edu.stanford.nlp.ie.NERClassifierCombiner;
import edu.stanford.nlp.io.IOUtils;
import edu.stanford.nlp.ling.CoreAnnotations;
import edu.stanford.nlp.ling.CoreLabel;
import edu.stanford.nlp.ling.tokensregex.types.Expressions;
import edu.stanford.nlp.pipeline.Annotation;
import edu.stanford.nlp.pipeline.Annotator;
import edu.stanford.nlp.pipeline.CleanXmlAnnotator;
import edu.stanford.nlp.pipeline.ProtobufAnnotationSerializer;
import edu.stanford.nlp.pipeline.QuoteAnnotator;
import edu.stanford.nlp.pipeline.StanfordCoreNLP;
import edu.stanford.nlp.quoteattribution.Sieves.Sieve;
import edu.stanford.nlp.util.CoreMap;
import edu.stanford.nlp.util.Pair;
import edu.stanford.nlp.util.XMLUtils;
import java.io.BufferedOutputStream;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.PrintWriter;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Properties;
import org.w3c.dom.Document;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;

/* loaded from: input_file:edu/stanford/nlp/quoteattribution/XMLToAnnotation.class */
public class XMLToAnnotation {

    /* loaded from: input_file:edu/stanford/nlp/quoteattribution/XMLToAnnotation$Data.class */
    public static class Data {
        public List<GoldQuoteInfo> goldList;
        public List<Person> personList;
        public Annotation doc;

        public Data(List<GoldQuoteInfo> list, List<Person> list2, Annotation annotation) {
            this.goldList = list;
            this.personList = list2;
            this.doc = annotation;
        }
    }

    /* loaded from: input_file:edu/stanford/nlp/quoteattribution/XMLToAnnotation$GoldQuoteInfo.class */
    public static class GoldQuoteInfo {
        public int mentionStartTokenIndex;
        public int mentionEndTokenIndex;
        public String speaker;
        public String mention;

        public GoldQuoteInfo(int i, int i2, String str, String str2) {
            this.mentionStartTokenIndex = i;
            this.mentionEndTokenIndex = i2;
            this.speaker = str;
            this.mention = str2;
        }
    }

    /* JADX INFO: Access modifiers changed from: protected */
    /* loaded from: input_file:edu/stanford/nlp/quoteattribution/XMLToAnnotation$Mention.class */
    public static class Mention {
        String text;
        int begin;
        int end;

        public Mention(String str, int i, int i2) {
            this.text = str;
            this.begin = i;
            this.end = i2;
        }
    }

    public static String getJustText(Node node) {
        StringBuilder sb = new StringBuilder();
        NodeList childNodes = node.getChildNodes();
        for (int i = 0; i < childNodes.getLength(); i++) {
            sb.append(childNodes.item(i).getTextContent().replaceAll("\n(?!\n)", " ").replaceAll(Expressions.VAR_SELF, "") + " ");
        }
        return sb.toString();
    }

    public static Properties getProcessedCoreNLPProperties() {
        Properties properties = new Properties();
        properties.setProperty("annotators", "tokenize, ssplit, pos, lemma, ner, depparse, quote");
        properties.setProperty("ner.useSUTime", "false");
        properties.setProperty(NERClassifierCombiner.APPLY_NUMERIC_CLASSIFIERS_PROPERTY, "false");
        properties.setProperty(StanfordCoreNLP.NEWLINE_IS_SENTENCE_BREAK_PROPERTY, "always");
        properties.setProperty("outputFormat", "serialized");
        properties.setProperty("serializer", "edu.stanford.nlp.pipeline.ProtobufAnnotationSerializer");
        properties.setProperty("threads", "1");
        return properties;
    }

    public static void processCoreNLPIfDoesNotExist(File file, Properties properties, String str) {
        if (file.exists()) {
            return;
        }
        try {
            new ProtobufAnnotationSerializer(true).write(new StanfordCoreNLP(properties).process(str), new BufferedOutputStream(new FileOutputStream(file.getAbsolutePath())));
        } catch (IOException e) {
            e.printStackTrace();
        }
    }

    public static Annotation getAnnotatedFile(String str, String str2, Properties properties) throws IOException {
        File file = new File(str2 + ".ser.gz");
        processCoreNLPIfDoesNotExist(file, properties, str);
        Annotation readSerializedProtobufFile = ExtractQuotesUtil.readSerializedProtobufFile(file);
        new QuoteAnnotator(new Properties()).annotate(readSerializedProtobufFile);
        return readSerializedProtobufFile;
    }

    public static List<Integer> readConnection(String str) {
        ArrayList arrayList = new ArrayList();
        if (str.equals("")) {
            return arrayList;
        }
        for (String str2 : str.split(",")) {
            arrayList.add(Integer.valueOf(Integer.parseInt(str2.substring(1))));
        }
        return arrayList;
    }

    public static int getEndIndex(int i, List<CoreLabel> list, String str) {
        String trim = str.trim();
        int i2 = i;
        CoreLabel coreLabel = list.get(i);
        int intValue = ((Integer) coreLabel.get(CoreAnnotations.CharacterOffsetBeginAnnotation.class)).intValue();
        trim.indexOf((String) coreLabel.get(CoreAnnotations.OriginalTextAnnotation.class));
        while (true) {
            int intValue2 = ((Integer) coreLabel.get(CoreAnnotations.CharacterOffsetEndAnnotation.class)).intValue();
            if (intValue2 - intValue == trim.length()) {
                return i2;
            }
            if (intValue2 - intValue > trim.length()) {
                return i2 - 1;
            }
            i2++;
            if (i2 == list.size()) {
                return i2 - 1;
            }
            coreLabel = list.get(i2);
        }
    }

    public static List<Person> readXMLCharacterList(Document document) {
        ArrayList arrayList = new ArrayList();
        NodeList childNodes = document.getDocumentElement().getElementsByTagName("characters").item(0).getChildNodes();
        for (int i = 0; i < childNodes.getLength(); i++) {
            Node item = childNodes.item(i);
            if (item.getNodeName().equals("character")) {
                char[] charArray = item.getAttributes().getNamedItem(Sieve.NAME).getNodeValue().toCharArray();
                charArray[0] = Character.toUpperCase(charArray[0]);
                new String(charArray);
                arrayList.add(new Person(item.getAttributes().getNamedItem(Sieve.NAME).getNodeValue(), item.getAttributes().getNamedItem(Annotator.STANFORD_GENDER) == null ? "" : item.getAttributes().getNamedItem(Annotator.STANFORD_GENDER).getNodeValue(), Arrays.asList(item.getAttributes().getNamedItem("aliases").getNodeValue().split(";"))));
            }
        }
        return arrayList;
    }

    /* JADX WARN: Failed to find 'out' block for switch in B:5:0x0033. Please report as an issue. */
    public static void writeCharacterList(String str, List<Person> list) throws IOException {
        StringBuilder sb = new StringBuilder();
        for (Person person : list) {
            String str2 = "";
            switch (person.gender) {
                case MALE:
                    str2 = "M";
                    break;
                case FEMALE:
                    str2 = "F";
                    break;
                case UNK:
                    str2 = "";
                    break;
            }
            sb.append(person.name + ";" + str2);
            Iterator<String> it = person.aliases.iterator();
            while (it.hasNext()) {
                sb.append(";" + it.next());
            }
            sb.append("\n");
        }
        PrintWriter printWriter = IOUtils.getPrintWriter(str);
        printWriter.print(sb);
        printWriter.close();
    }

    /* JADX WARN: Multi-variable type inference failed */
    public static Data readXMLFormat(String str) throws Exception {
        int endIndex;
        Document readDocumentFromFile = XMLUtils.readDocumentFromFile(str);
        Node item = readDocumentFromFile.getDocumentElement().getElementsByTagName("text").item(0);
        Annotation annotatedFile = getAnnotatedFile(getJustText(item), str, getProcessedCoreNLPProperties());
        List list = (List) annotatedFile.get(CoreAnnotations.QuotationsAnnotation.class);
        List list2 = (List) annotatedFile.get(CoreAnnotations.TokensAnnotation.class);
        ArrayList arrayList = new ArrayList();
        HashMap hashMap = new HashMap();
        List<Person> readXMLCharacterList = readXMLCharacterList(readDocumentFromFile);
        QuoteAttributionUtils.readPersonMap(readXMLCharacterList);
        ArrayList<Pair> arrayList2 = new ArrayList();
        int i = 0;
        NodeList childNodes = item.getChildNodes();
        int i2 = 0;
        for (int i3 = 0; i3 < childNodes.getLength(); i3++) {
            Node item2 = childNodes.item(i3);
            if (item2.getNodeName().equals("chapter")) {
                NodeList childNodes2 = item2.getChildNodes();
                for (int i4 = 0; i4 < childNodes2.getLength(); i4++) {
                    Node item3 = childNodes2.item(i4);
                    if (item3.getNodeName().equals(Annotator.STANFORD_QUOTE)) {
                        NodeList childNodes3 = item3.getChildNodes();
                        for (int i5 = 0; i5 < childNodes3.getLength(); i5++) {
                            Node item4 = childNodes3.item(i5);
                            if (item4.getNodeName().equals("mention")) {
                                String textContent = item4.getTextContent();
                                int parseInt = Integer.parseInt(item4.getAttributes().getNamedItem("id").getTextContent().substring(1));
                                readConnection(item4.getAttributes().getNamedItem("connection").getNodeValue());
                                int endIndex2 = getEndIndex(i2, list2, textContent);
                                hashMap.put(Integer.valueOf(parseInt), new Mention(textContent, i2, endIndex2));
                                endIndex = endIndex2;
                            } else {
                                endIndex = getEndIndex(i2, list2, item4.getTextContent().replaceAll("\n(?!\n)", " ").replaceAll(Expressions.VAR_SELF, ""));
                            }
                            i2 = endIndex + 1;
                        }
                        String replaceAll = item3.getTextContent().replaceAll("\n(?!\n)", " ").replaceAll(Expressions.VAR_SELF, "");
                        if (replaceAll.startsWith("``")) {
                        }
                        List<Integer> readConnection = readConnection(item3.getAttributes().getNamedItem("connection").getTextContent());
                        int parseInt2 = Integer.parseInt(item3.getAttributes().getNamedItem("id").getTextContent().substring(1));
                        Integer num = null;
                        if (readConnection.size() > 0) {
                            num = readConnection.get(0);
                        } else {
                            System.out.println("quote w/ no mention. ID: " + parseInt2);
                        }
                        arrayList2.add(new Pair(num, item3.getAttributes().getNamedItem(CleanXmlAnnotator.DEFAULT_SPEAKER_TAGS).getTextContent()));
                        String str2 = (String) ((CoreMap) list.get(i)).get(CoreAnnotations.TextAnnotation.class);
                        while (!replaceAll.endsWith(str2)) {
                            i++;
                            str2 = (String) ((CoreMap) list.get(i)).get(CoreAnnotations.TextAnnotation.class);
                            arrayList2.add(new Pair(num, item3.getAttributes().getNamedItem(CleanXmlAnnotator.DEFAULT_SPEAKER_TAGS).getTextContent()));
                        }
                        i++;
                    } else if (item3.getNodeName().equals("mention")) {
                        String textContent2 = item3.getTextContent();
                        int parseInt3 = Integer.parseInt(item3.getAttributes().getNamedItem("id").getTextContent().substring(1));
                        readConnection(item3.getAttributes().getNamedItem("connection").getNodeValue());
                        int endIndex3 = getEndIndex(i2, list2, textContent2);
                        hashMap.put(Integer.valueOf(parseInt3), new Mention(textContent2, i2, endIndex3));
                        i2 = endIndex3 + 1;
                    } else {
                        String replaceAll2 = item3.getTextContent().replaceAll("\n(?!\n)", " ").replaceAll(Expressions.VAR_SELF, "");
                        if (i2 < list2.size()) {
                            i2 = getEndIndex(i2, list2, replaceAll2) + 1;
                        }
                    }
                }
            }
        }
        for (Pair pair : arrayList2) {
            Mention mention = (Mention) hashMap.get(pair.first);
            if (mention == null) {
                arrayList.add(new GoldQuoteInfo(-1, -1, (String) pair.second, null));
            } else {
                arrayList.add(new GoldQuoteInfo(mention.begin, mention.end, (String) pair.second, mention.text));
            }
        }
        if (((List) annotatedFile.get(CoreAnnotations.QuotationsAnnotation.class)).size() != arrayList.size()) {
            throw new RuntimeException("Quotes size and gold size don't match!");
        }
        return new Data(arrayList, readXMLCharacterList, annotatedFile);
    }
}
