package edu.stanford.nlp.pipeline;

import edu.stanford.nlp.ling.CoreAnnotations;
import edu.stanford.nlp.ling.CoreLabel;
import edu.stanford.nlp.pipeline.Annotator;
import edu.stanford.nlp.util.XMLUtils;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Set;
import java.util.Stack;
import java.util.regex.Pattern;

/* loaded from: input_file:edu/stanford/nlp/pipeline/CleanXmlAnnotator.class */
public class CleanXmlAnnotator implements Annotator {
    private final Pattern xmlTagMatcher;
    public static final String DEFAULT_XML_TAGS = ".*";
    private final Pattern sentenceEndingTagMatcher;
    public static final String DEFAULT_SENTENCE_ENDERS = "";
    private final Pattern dateTagMatcher;
    public static final String DEFAULT_DATE_TAGS = "datetime|date";
    private final boolean allowFlawedXml;
    public static final boolean DEFAULT_ALLOW_FLAWS = true;

    public CleanXmlAnnotator() {
        this(DEFAULT_XML_TAGS, "", DEFAULT_DATE_TAGS, true);
    }

    public CleanXmlAnnotator(String str, String str2, String str3, boolean z) {
        this.allowFlawedXml = z;
        if (str != null) {
            this.xmlTagMatcher = Pattern.compile(str);
            if (str2 == null || str2.length() <= 0) {
                this.sentenceEndingTagMatcher = null;
            } else {
                this.sentenceEndingTagMatcher = Pattern.compile(str2);
            }
        } else {
            this.xmlTagMatcher = null;
            this.sentenceEndingTagMatcher = null;
        }
        if (str3 != null) {
            this.dateTagMatcher = Pattern.compile(str3, 2);
        } else {
            this.dateTagMatcher = null;
        }
    }

    @Override // edu.stanford.nlp.pipeline.Annotator
    public void annotate(Annotation annotation) {
        if (annotation.has(CoreAnnotations.TokensAnnotation.class)) {
            List<CoreLabel> list = (List) annotation.get(CoreAnnotations.TokensAnnotation.class);
            ArrayList arrayList = new ArrayList();
            annotation.set(CoreAnnotations.TokensAnnotation.class, process(list, arrayList));
            if (arrayList.size() > 0) {
                StringBuffer stringBuffer = new StringBuffer();
                boolean z = true;
                for (CoreLabel coreLabel : arrayList) {
                    if (!z) {
                        stringBuffer.append(" ");
                    }
                    stringBuffer.append(coreLabel.word());
                    z = false;
                }
                annotation.set(CoreAnnotations.DocDateAnnotation.class, stringBuffer.toString());
            }
        }
    }

    public List<CoreLabel> process(List<CoreLabel> list) {
        return process(list, null);
    }

    public List<CoreLabel> process(List<CoreLabel> list, List<CoreLabel> list2) {
        String str;
        Stack stack = new Stack();
        List list3 = null;
        int i = 0;
        ArrayList arrayList = new ArrayList();
        StringBuilder sb = new StringBuilder();
        for (CoreLabel coreLabel : list) {
            XMLUtils.XMLTag parseTag = XMLUtils.parseTag(coreLabel.word().trim());
            if (parseTag == null) {
                coreLabel.setWord(XMLUtils.unescapeStringForXML(coreLabel.word()));
                if (i > 0 || this.xmlTagMatcher == null || this.xmlTagMatcher.matcher("").matches()) {
                    arrayList.add(coreLabel);
                }
                if (sb.length() > 0) {
                    boolean z = false;
                    String str2 = (String) coreLabel.get(CoreAnnotations.BeforeAnnotation.class);
                    if (str2 != null) {
                        coreLabel.set(CoreAnnotations.BeforeAnnotation.class, ((Object) sb) + str2);
                        z = true;
                    }
                    if (z && arrayList.size() > 1) {
                        CoreLabel coreLabel2 = (CoreLabel) arrayList.get(arrayList.size() - 2);
                        String str3 = (String) coreLabel2.get(CoreAnnotations.AfterAnnotation.class);
                        if (str3 != null) {
                            coreLabel2.set(CoreAnnotations.AfterAnnotation.class, str3 + ((Object) sb));
                        } else {
                            coreLabel2.set(CoreAnnotations.AfterAnnotation.class, sb.toString());
                        }
                    }
                    sb = new StringBuilder();
                }
                if (list3 == null) {
                    list3 = Collections.unmodifiableList(new ArrayList(stack));
                }
                coreLabel.set(CoreAnnotations.XmlContextAnnotation.class, list3);
                if (this.dateTagMatcher != null && list3.size() > 0 && this.dateTagMatcher.matcher((CharSequence) list3.get(list3.size() - 1)).matches()) {
                    list2.add(coreLabel);
                }
            } else {
                String str4 = (String) coreLabel.get(CoreAnnotations.BeforeAnnotation.class);
                if (str4 != null) {
                    sb.append(str4);
                }
                String str5 = (String) coreLabel.get(CoreAnnotations.OriginalTextAnnotation.class);
                if (str5 != null) {
                    sb.append(str5);
                }
                if (coreLabel == list.get(list.size() - 1) && (str = (String) coreLabel.get(CoreAnnotations.AfterAnnotation.class)) != null) {
                    sb.append(str);
                }
                if (this.sentenceEndingTagMatcher != null && this.sentenceEndingTagMatcher.matcher(parseTag.name).matches() && arrayList.size() > 0) {
                    ((CoreLabel) arrayList.get(arrayList.size() - 1)).set(CoreAnnotations.ForcedSentenceEndAnnotation.class, true);
                }
                if (this.xmlTagMatcher != null && !parseTag.isSingleTag) {
                    list3 = null;
                    if (parseTag.isEndTag) {
                        while (stack.size() != 0) {
                            String str6 = (String) stack.pop();
                            if (this.xmlTagMatcher.matcher(str6).matches()) {
                                i--;
                            }
                            if (str6.equals(parseTag.name)) {
                                if (i < 0) {
                                    throw new AssertionError("Programming error?  We think there have been more close tags than open tags");
                                }
                            } else if (!this.allowFlawedXml) {
                                throw new IllegalArgumentException("Mismatched tags... " + parseTag.name + " closed a " + str6 + " tag.");
                            }
                        }
                        throw new IllegalArgumentException("Got a close tag " + parseTag.name + "which does not match any open tag");
                    }
                    stack.push(parseTag.name);
                    if (this.xmlTagMatcher.matcher(parseTag.name).matches()) {
                        i++;
                    }
                }
            }
        }
        if (stack.size() > 0 && !this.allowFlawedXml) {
            throw new IllegalArgumentException("Unclosed tags, starting with " + ((String) stack.pop()));
        }
        if (arrayList.size() > 0 && sb.length() > 0) {
            CoreLabel coreLabel3 = (CoreLabel) arrayList.get(arrayList.size() - 1);
            if (coreLabel3.get(CoreAnnotations.OriginalTextAnnotation.class) != null) {
                coreLabel3.set(CoreAnnotations.AfterAnnotation.class, sb.toString());
            }
        }
        return arrayList;
    }

    @Override // edu.stanford.nlp.pipeline.Annotator
    public Set<Annotator.Requirement> requires() {
        return Collections.singleton(TOKENIZE_REQUIREMENT);
    }

    @Override // edu.stanford.nlp.pipeline.Annotator
    public Set<Annotator.Requirement> requirementsSatisfied() {
        return Collections.singleton(CLEAN_XML_REQUIREMENT);
    }
}
