package edu.stanford.nlp.trees.international.french;

import edu.stanford.nlp.io.ReaderInputStream;
import edu.stanford.nlp.ling.CoreLabel;
import edu.stanford.nlp.ling.HasContext;
import edu.stanford.nlp.ling.HasIndex;
import edu.stanford.nlp.ling.HasTag;
import edu.stanford.nlp.ling.HasWord;
import edu.stanford.nlp.ling.Label;
import edu.stanford.nlp.ling.Sentence;
import edu.stanford.nlp.trees.LabeledScoredTreeFactory;
import edu.stanford.nlp.trees.Tree;
import edu.stanford.nlp.trees.TreeFactory;
import edu.stanford.nlp.trees.TreeNormalizer;
import edu.stanford.nlp.trees.TreeReader;
import edu.stanford.nlp.util.XMLUtils;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.Reader;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashSet;
import java.util.List;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.xml.sax.SAXException;

/* loaded from: input_file:edu/stanford/nlp/trees/international/french/FrenchTreeReader.class */
public class FrenchTreeReader implements TreeReader {
    private InputStream stream;
    private final TreeNormalizer treeNormalizer;
    private final TreeFactory treeFactory;
    private static final String NODE_SENT = "SENT";
    private static final String NODE_WORD = "w";
    private static final String ATTR_NUMBER = "nb";
    private static final String ATTR_POS = "cat";
    private static final String ATTR_POS_MWE = "catint";
    private static final String ATTR_LEMMA = "lemma";
    private static final String ATTR_MORPH = "mph";
    private static final String ATTR_EE = "ee";
    private static final String MWE_PHRASAL = "MW";
    public static final String EMPTY_LEAF = "-NONE-";
    public static final String MISSING_PHRASAL = "DUMMYP";
    public static final String MISSING_POS = "DUMMY";
    private NodeList sentences;
    private int sentIdx;

    public FrenchTreeReader(Reader reader) {
        this(reader, new LabeledScoredTreeFactory(), new FrenchTreeNormalizer());
    }

    public FrenchTreeReader(Reader reader, TreeFactory treeFactory, TreeNormalizer treeNormalizer) {
        this.stream = new ReaderInputStream(reader, new FrenchTreebankLanguagePack().getEncoding());
        this.treeFactory = treeFactory;
        this.treeNormalizer = treeNormalizer;
        try {
            this.sentences = XMLUtils.getXmlParser().parse(this.stream).getDocumentElement().getElementsByTagName(NODE_SENT);
            this.sentIdx = 0;
        } catch (IOException e) {
            e.printStackTrace();
        } catch (SAXException e2) {
            e2.printStackTrace();
        }
    }

    @Override // edu.stanford.nlp.trees.TreeReader
    public void close() {
        try {
            if (this.stream != null) {
                this.stream.close();
                this.stream = null;
            }
        } catch (IOException e) {
        }
    }

    @Override // edu.stanford.nlp.trees.TreeReader
    public Tree readTree() {
        Tree tree = null;
        while (tree == null && this.sentences != null && this.sentIdx < this.sentences.getLength()) {
            NodeList nodeList = this.sentences;
            int i = this.sentIdx;
            this.sentIdx = i + 1;
            Node item = nodeList.item(i);
            tree = getTreeFromXML(item);
            if (tree != null) {
                tree = this.treeNormalizer.normalizeWholeTree(tree, this.treeFactory);
                if (tree.label() instanceof CoreLabel) {
                    ((HasIndex) tree.label()).setDocID(((Element) item).getAttribute(ATTR_NUMBER));
                }
            }
        }
        return tree;
    }

    private String getPOS(Element element) {
        String trim = element.hasAttribute("cat") ? element.getAttribute("cat").trim() : "";
        String trim2 = element.hasAttribute(ATTR_POS_MWE) ? element.getAttribute(ATTR_POS_MWE).trim() : "";
        return trim != "" ? trim : trim2 != "" ? trim2 : MISSING_POS;
    }

    private List<String> getLemma(Element element) {
        String attribute = element.getAttribute("lemma");
        if (attribute == null || attribute.equals("")) {
            return null;
        }
        return getWordString(attribute);
    }

    private String getMorph(Element element) {
        String attribute = element.getAttribute(ATTR_MORPH);
        if (attribute == null) {
            attribute = "";
        }
        String attribute2 = element.getAttribute(ATTR_EE);
        return attribute2 != null ? attribute2 : attribute;
    }

    /* JADX WARN: Multi-variable type inference failed */
    /* JADX WARN: Type inference failed for: r0v22, types: [java.util.List] */
    private List<String> getWordString(String str) {
        ArrayList arrayList = new ArrayList();
        if (str == null || str.equals("")) {
            arrayList.add(EMPTY_LEAF);
        } else {
            if (str.length() > 1) {
                str = str.replaceAll("[\\(\\)]", "");
            }
            String replaceAll = str.replaceAll("\\s+", "");
            if (replaceAll.matches("\\d+") || replaceAll.matches("\\p{Punct}+")) {
                arrayList.add(replaceAll);
            } else {
                arrayList = Arrays.asList(str.split("\\s+"));
            }
        }
        if (arrayList.size() == 0) {
            throw new RuntimeException(getClass().getName() + ": Zero length token list for: " + str);
        }
        return arrayList;
    }

    private Tree getTreeFromXML(Node node) {
        Tree newTreeNode;
        Element element = (Element) node;
        if (!element.getNodeName().equals(NODE_WORD) || element.getElementsByTagName(NODE_WORD).getLength() != 0) {
            ArrayList arrayList = new ArrayList();
            Node firstChild = element.getFirstChild();
            while (true) {
                Node node2 = firstChild;
                if (node2 == null) {
                    break;
                }
                if (node2.getNodeType() == 1) {
                    Tree treeFromXML = getTreeFromXML(node2);
                    if (treeFromXML == null) {
                        System.err.printf("%s: Discarding empty tree (root: %s)%n", getClass().getName(), node2.getNodeName());
                    } else {
                        arrayList.add(treeFromXML);
                    }
                }
                firstChild = node2.getNextSibling();
            }
            String trim = element.getNodeName().trim();
            boolean z = trim.equals(NODE_WORD) && element.hasAttribute("cat");
            if (z) {
                trim = element.getAttribute("cat").trim();
            }
            Tree newTreeNode2 = arrayList.size() == 0 ? null : this.treeFactory.newTreeNode(this.treeNormalizer.normalizeNonterminal(trim), arrayList);
            if (newTreeNode2 != null && z) {
                newTreeNode2 = postProcessMWE(newTreeNode2);
            }
            return newTreeNode2;
        }
        String normalizeNonterminal = this.treeNormalizer.normalizeNonterminal(getPOS(element));
        List<String> lemma = getLemma(element);
        String morph = getMorph(element);
        List<String> wordString = getWordString(element.getTextContent().trim());
        if (lemma != null && lemma.size() != wordString.size()) {
            System.err.println("Lemmas don't match tokens, ignoring lemmas: lemmas " + lemma + ", tokens " + wordString);
            lemma = null;
        }
        ArrayList arrayList2 = new ArrayList();
        if (wordString.size() > 1) {
            for (int i = 0; i < wordString.size(); i++) {
                String normalizeTerminal = this.treeNormalizer.normalizeTerminal(wordString.get(i));
                ArrayList arrayList3 = new ArrayList();
                Tree newLeaf = this.treeFactory.newLeaf(normalizeTerminal);
                if (newLeaf.label() instanceof HasWord) {
                    ((HasWord) newLeaf.label()).setWord(normalizeTerminal);
                }
                if ((newLeaf.label() instanceof CoreLabel) && lemma != null) {
                    ((CoreLabel) newLeaf.label()).setLemma(lemma.get(i));
                }
                if (newLeaf.label() instanceof HasContext) {
                    ((HasContext) newLeaf.label()).setOriginalText(morph);
                }
                arrayList3.add(newLeaf);
                Tree newTreeNode3 = this.treeFactory.newTreeNode(MISSING_POS, arrayList3);
                if (newTreeNode3.label() instanceof HasTag) {
                    ((HasTag) newTreeNode3.label()).setTag(MISSING_POS);
                }
                arrayList2.add(newTreeNode3);
            }
            newTreeNode = this.treeFactory.newTreeNode(MISSING_PHRASAL, arrayList2);
        } else {
            String normalizeTerminal2 = this.treeNormalizer.normalizeTerminal(wordString.get(0));
            Tree newLeaf2 = this.treeFactory.newLeaf(normalizeTerminal2);
            if (newLeaf2.label() instanceof HasWord) {
                ((HasWord) newLeaf2.label()).setWord(normalizeTerminal2);
            }
            if ((newLeaf2.label() instanceof CoreLabel) && lemma != null) {
                ((CoreLabel) newLeaf2.label()).setLemma(lemma.get(0));
            }
            if (newLeaf2.label() instanceof HasContext) {
                ((HasContext) newLeaf2.label()).setOriginalText(morph);
            }
            arrayList2.add(newLeaf2);
            newTreeNode = this.treeFactory.newTreeNode(normalizeNonterminal, arrayList2);
            if (newTreeNode.label() instanceof HasTag) {
                ((HasTag) newTreeNode.label()).setTag(normalizeNonterminal);
            }
        }
        return newTreeNode;
    }

    private Tree postProcessMWE(Tree tree) {
        String replaceAll = Sentence.listToString(tree.yield()).replaceAll("\\s+", "");
        if (replaceAll.matches("[\\d\\p{Punct}]*")) {
            ArrayList arrayList = new ArrayList();
            arrayList.add(this.treeFactory.newLeaf(replaceAll));
            tree = this.treeFactory.newTreeNode(tree.value(), arrayList);
        } else {
            tree.setValue(MWE_PHRASAL + tree.value());
        }
        return tree;
    }

    public static void main(String[] strArr) {
        if (strArr.length < 1) {
            System.err.printf("Usage: java %s tree_file(s)%n%n", FrenchTreeReader.class.getName());
            System.exit(-1);
        }
        ArrayList<File> arrayList = new ArrayList();
        for (String str : strArr) {
            arrayList.add(new File(str));
        }
        FrenchTreeReaderFactory frenchTreeReaderFactory = new FrenchTreeReaderFactory(true);
        int i = 0;
        HashSet hashSet = new HashSet();
        try {
            for (File file : arrayList) {
                TreeReader newTreeReader = frenchTreeReaderFactory.newTreeReader(new BufferedReader(new InputStreamReader(new FileInputStream(file), "UTF-8")));
                String substring = file.getName().substring(0, file.getName().lastIndexOf(46));
                int i2 = 0;
                while (true) {
                    Tree readTree = newTreeReader.readTree();
                    if (readTree != null) {
                        System.out.printf("%s-%s\t%s%n", substring, ((HasIndex) readTree.label()).docID(), readTree.toString());
                        for (Label label : readTree.yield()) {
                            if (label instanceof CoreLabel) {
                                hashSet.add(((CoreLabel) label).originalText());
                            }
                        }
                        i2++;
                    }
                }
                newTreeReader.close();
                System.err.printf("%s: %d trees%n", file.getName(), Integer.valueOf(i2));
                i += i2;
            }
            System.err.printf("%nRead %d trees%n", Integer.valueOf(i));
        } catch (FileNotFoundException e) {
            e.printStackTrace();
        } catch (IOException e2) {
            e2.printStackTrace();
        }
    }
}
