package edu.stanford.nlp.scenegraph;

import edu.stanford.nlp.ling.CoreAnnotations;
import edu.stanford.nlp.ling.CoreLabel;
import edu.stanford.nlp.pipeline.Annotation;
import edu.stanford.nlp.pipeline.StanfordCoreNLP;
import edu.stanford.nlp.scenegraph.image.SceneGraphImage;
import edu.stanford.nlp.scenegraph.image.SceneGraphImageAttribute;
import edu.stanford.nlp.scenegraph.image.SceneGraphImageObject;
import edu.stanford.nlp.scenegraph.image.SceneGraphImageRegion;
import edu.stanford.nlp.scenegraph.image.SceneGraphImageRelationship;
import edu.stanford.nlp.scenegraph.image.SceneGraphImageUtils;
import edu.stanford.nlp.semgraph.semgrex.ssurgeon.AddNode;
import edu.stanford.nlp.semgraph.semgrex.ssurgeon.SsurgeonPattern;
import edu.stanford.nlp.util.CoreMap;
import edu.stanford.nlp.util.Generics;
import edu.stanford.nlp.util.StringUtils;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.Properties;
import java.util.Set;

/* loaded from: input_file:edu/stanford/nlp/scenegraph/SceneGraphImageCleaner.class */
public class SceneGraphImageCleaner {
    private static String FINAL_PUNCT_REGEX = "\\.+$";
    private static String INITIAL_DET_REGEX = "^(an?|the) ";
    private static String FINAL_DET_REGEX = " (an?|the)$";
    private static String TRAILING_NUMBER_REGEX = " [0-9]+$";
    private static Set<String> ALL_ATTRIBUTES = Generics.newHashSet();
    private static StanfordCoreNLP pipeline;
    private static StanfordCoreNLP tokenizerPipeline;

    private static StanfordCoreNLP getPipeline() {
        if (pipeline == null) {
            Properties properties = new Properties();
            properties.put("annotators", "tokenize,ssplit,pos,lemma,ner");
            properties.put(StanfordCoreNLP.NEWLINE_SPLITTER_PROPERTY, "true");
            pipeline = new StanfordCoreNLP(properties);
        }
        return pipeline;
    }

    private static StanfordCoreNLP getTokenizerPipeline() {
        if (tokenizerPipeline == null) {
            Properties properties = new Properties();
            properties.put("annotators", "tokenize,ssplit,pos,lemma,ner");
            properties.put(StanfordCoreNLP.NEWLINE_SPLITTER_PROPERTY, "true");
            tokenizerPipeline = new StanfordCoreNLP(properties);
        }
        return tokenizerPipeline;
    }

    public static void extractAllAttributes(List<SceneGraphImage> list) {
        Iterator<SceneGraphImage> it = list.iterator();
        while (it.hasNext()) {
            Iterator<SceneGraphImageAttribute> it2 = it.next().attributes.iterator();
            while (it2.hasNext()) {
                ALL_ATTRIBUTES.add(it2.next().attributeLemmaGloss());
            }
        }
    }

    public void cleanupImage(SceneGraphImage sceneGraphImage) {
        lemmatize(sceneGraphImage);
    }

    private String removeFinalPunctuation(String str) {
        return str.replaceAll(FINAL_PUNCT_REGEX, "");
    }

    private String removeDeterminersAndNumbers(String str) {
        return str.replaceAll(INITIAL_DET_REGEX, "").replaceAll(FINAL_DET_REGEX, "").replaceAll(TRAILING_NUMBER_REGEX, "");
    }

    private String lemmaGloss(List<CoreLabel> list) {
        return StringUtils.join(list.stream().map(coreLabel -> {
            return coreLabel.lemma() == null ? coreLabel.word() : coreLabel.lemma();
        }), AddNode.ATOM_DELIMITER);
    }

    public void splitAttributeConjunctions(SceneGraphImage sceneGraphImage) {
        if (ALL_ATTRIBUTES.isEmpty()) {
            System.err.println("WARNING: List of attributes is empty! Won't split any conjunctions.");
            return;
        }
        LinkedList newLinkedList = Generics.newLinkedList();
        for (SceneGraphImageAttribute sceneGraphImageAttribute : sceneGraphImage.attributes) {
            if (SceneGraphImageUtils.containsLemma(sceneGraphImageAttribute.attributeGloss, SsurgeonPattern.PREDICATE_AND_TAG) || SceneGraphImageUtils.containsLemma(sceneGraphImageAttribute.attributeGloss, "&")) {
                LinkedList newLinkedList2 = Generics.newLinkedList();
                boolean z = true;
                LinkedList newLinkedList3 = Generics.newLinkedList();
                int i = 0;
                int size = sceneGraphImageAttribute.attributeGloss.size();
                while (true) {
                    if (i > size) {
                        break;
                    }
                    CoreLabel coreLabel = i < size ? sceneGraphImageAttribute.attributeGloss.get(i) : null;
                    if (coreLabel != null && !coreLabel.lemma().equals(SsurgeonPattern.PREDICATE_AND_TAG) && !coreLabel.lemma().equals(",") && !coreLabel.lemma().equals("&")) {
                        newLinkedList3.add(coreLabel);
                    } else if (newLinkedList3.isEmpty()) {
                        continue;
                    } else if (!ALL_ATTRIBUTES.contains(lemmaGloss(newLinkedList3))) {
                        z = false;
                        break;
                    } else {
                        newLinkedList2.add(newLinkedList3);
                        newLinkedList3 = Generics.newLinkedList();
                    }
                    i++;
                }
                if (z && newLinkedList2.size() > 0) {
                    sceneGraphImageAttribute.attributeGloss = (List) newLinkedList2.get(0);
                    sceneGraphImageAttribute.attribute = sceneGraphImageAttribute.attributeGloss();
                    sceneGraphImageAttribute.object = sceneGraphImageAttribute.attributeGloss();
                    sceneGraphImageAttribute.text[2] = sceneGraphImageAttribute.attributeGloss();
                    int size2 = newLinkedList2.size();
                    for (int i2 = 1; i2 < size2; i2++) {
                        SceneGraphImageAttribute m3427clone = sceneGraphImageAttribute.m3427clone();
                        m3427clone.attributeGloss = (List) newLinkedList2.get(i2);
                        m3427clone.attribute = m3427clone.attributeGloss();
                        m3427clone.object = m3427clone.attributeGloss();
                        m3427clone.text[2] = m3427clone.attributeGloss();
                        newLinkedList.add(m3427clone);
                    }
                }
            }
        }
        Iterator it = newLinkedList.iterator();
        while (it.hasNext()) {
            sceneGraphImage.addAttribute((SceneGraphImageAttribute) it.next());
        }
    }

    public void lemmatize(SceneGraphImage sceneGraphImage) {
        StanfordCoreNLP pipeline2 = getPipeline();
        for (SceneGraphImageAttribute sceneGraphImageAttribute : sceneGraphImage.attributes) {
            Annotation annotation = new Annotation(String.format("She is %s .\n", removeDeterminersAndNumbers(removeFinalPunctuation(sceneGraphImageAttribute.attribute))));
            pipeline2.annotate(annotation);
            List list = (List) ((CoreMap) ((List) annotation.get(CoreAnnotations.SentencesAnnotation.class)).get(0)).get(CoreAnnotations.TokensAnnotation.class);
            sceneGraphImageAttribute.attributeGloss = list.subList(2, list.size() - 1);
            Annotation annotation2 = new Annotation(String.format("The %s is tall .", removeDeterminersAndNumbers(removeFinalPunctuation(sceneGraphImageAttribute.text[0]))));
            pipeline2.annotate(annotation2);
            List list2 = (List) ((CoreMap) ((List) annotation2.get(CoreAnnotations.SentencesAnnotation.class)).get(0)).get(CoreAnnotations.TokensAnnotation.class);
            sceneGraphImageAttribute.subjectGloss = list2.subList(1, list2.size() - 3);
            sceneGraphImageAttribute.subject.labels.add(sceneGraphImageAttribute.subjectGloss);
        }
        for (SceneGraphImageRelationship sceneGraphImageRelationship : sceneGraphImage.relationships) {
            Annotation annotation3 = new Annotation(String.format("She is the %s .\n", removeDeterminersAndNumbers(removeFinalPunctuation(sceneGraphImageRelationship.text[2]))));
            pipeline2.annotate(annotation3);
            List list3 = (List) ((CoreMap) ((List) annotation3.get(CoreAnnotations.SentencesAnnotation.class)).get(0)).get(CoreAnnotations.TokensAnnotation.class);
            sceneGraphImageRelationship.objectGloss = list3.subList(3, list3.size() - 1);
            sceneGraphImageRelationship.object.labels.add(sceneGraphImageRelationship.objectGloss);
            Annotation annotation4 = new Annotation(String.format("The %s is tall .", removeDeterminersAndNumbers(removeFinalPunctuation(sceneGraphImageRelationship.text[0]))));
            pipeline2.annotate(annotation4);
            List list4 = (List) ((CoreMap) ((List) annotation4.get(CoreAnnotations.SentencesAnnotation.class)).get(0)).get(CoreAnnotations.TokensAnnotation.class);
            sceneGraphImageRelationship.subjectGloss = list4.subList(1, list4.size() - 3);
            sceneGraphImageRelationship.subject.labels.add(sceneGraphImageRelationship.subjectGloss);
            Annotation annotation5 = new Annotation(String.format("A horse %s an apple .", removeDeterminersAndNumbers(removeFinalPunctuation(sceneGraphImageRelationship.predicate))));
            pipeline2.annotate(annotation5);
            List list5 = (List) ((CoreMap) ((List) annotation5.get(CoreAnnotations.SentencesAnnotation.class)).get(0)).get(CoreAnnotations.TokensAnnotation.class);
            sceneGraphImageRelationship.predicateGloss = list5.subList(2, list5.size() - 3);
        }
        for (SceneGraphImageObject sceneGraphImageObject : sceneGraphImage.objects) {
            if (sceneGraphImageObject.names.size() > sceneGraphImageObject.labels.size()) {
                Iterator<String> it = sceneGraphImageObject.names.iterator();
                while (it.hasNext()) {
                    Annotation annotation6 = new Annotation(String.format("The %s is tall .", removeDeterminersAndNumbers(removeFinalPunctuation(it.next()))));
                    pipeline2.annotate(annotation6);
                    List list6 = (List) ((CoreMap) ((List) annotation6.get(CoreAnnotations.SentencesAnnotation.class)).get(0)).get(CoreAnnotations.TokensAnnotation.class);
                    sceneGraphImageObject.labels.add(list6.subList(1, list6.size() - 3));
                }
            }
        }
        StanfordCoreNLP tokenizerPipeline2 = getTokenizerPipeline();
        for (SceneGraphImageRegion sceneGraphImageRegion : sceneGraphImage.regions) {
            Annotation annotation7 = new Annotation(sceneGraphImageRegion.phrase.toLowerCase());
            tokenizerPipeline2.annotate(annotation7);
            sceneGraphImageRegion.tokens = (List) ((CoreMap) ((List) annotation7.get(CoreAnnotations.SentencesAnnotation.class)).get(0)).get(CoreAnnotations.TokensAnnotation.class);
        }
    }

    public void trimFunctionWords(SceneGraphImage sceneGraphImage) {
        for (SceneGraphImageRelationship sceneGraphImageRelationship : sceneGraphImage.relationships) {
            if (sceneGraphImageRelationship.predicateGloss.get(0).lemma().matches("be|an?|the") && sceneGraphImageRelationship.predicateGloss.size() > 1) {
                sceneGraphImageRelationship.predicateGloss = sceneGraphImageRelationship.predicateGloss.subList(1, sceneGraphImageRelationship.predicateGloss.size());
            }
        }
    }
}
