package edu.stanford.nlp.coref.docreader;

import edu.stanford.nlp.coref.CorefCoreAnnotations;
import edu.stanford.nlp.coref.CorefUtils;
import edu.stanford.nlp.coref.data.CorefChain;
import edu.stanford.nlp.coref.data.InputDoc;
import edu.stanford.nlp.coref.data.Mention;
import edu.stanford.nlp.io.IOUtils;
import edu.stanford.nlp.io.RuntimeIOException;
import edu.stanford.nlp.ling.CoreAnnotations;
import edu.stanford.nlp.ling.CoreLabel;
import edu.stanford.nlp.pipeline.Annotation;
import edu.stanford.nlp.pipeline.AnnotationIterator;
import edu.stanford.nlp.util.CoreMap;
import edu.stanford.nlp.util.Generics;
import edu.stanford.nlp.util.Pair;
import edu.stanford.nlp.util.Triple;
import edu.stanford.nlp.util.logging.Redwood;
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Iterator;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Set;
import java.util.function.Predicate;
import java.util.regex.Pattern;
import java.util.stream.Collectors;

/* loaded from: input_file:edu/stanford/nlp/coref/docreader/CoreNLPDocumentReader.class */
public class CoreNLPDocumentReader implements DocReader {
    protected static final Redwood.RedwoodChannels logger = Redwood.channels(CoreNLPDocumentReader.class);
    private AnnotationIterator docIterator;
    protected final List<File> fileList;
    private int curFileIndex;
    private final Options options;
    private final Predicate<Pair<CorefChain.CorefMention, List<CoreLabel>>> filterCorefMentions;

    /* loaded from: input_file:edu/stanford/nlp/coref/docreader/CoreNLPDocumentReader$Options.class */
    public static class Options {
        public boolean printLoadingMessage;
        public Locale lang;
        public Predicate<Pair<CorefChain.CorefMention, List<CoreLabel>>> filterCorefMentions;
        protected String fileFilter;
        protected Pattern filePattern;
        protected boolean sortFiles;

        public Options() {
            this(".json$");
        }

        public Options(String str) {
            this.printLoadingMessage = true;
            this.lang = Locale.ENGLISH;
            this.fileFilter = str;
            this.filePattern = Pattern.compile(this.fileFilter);
        }

        public void setFilter(String str) {
            this.fileFilter = str;
            this.filePattern = Pattern.compile(this.fileFilter);
        }
    }

    public CoreNLPDocumentReader(String str) {
        this(str, new Options());
    }

    public CoreNLPDocumentReader(String str, Options options) {
        if (str == null || !new File(str).exists()) {
            this.fileList = Collections.EMPTY_LIST;
        } else {
            this.fileList = getFiles(str, options.filePattern);
        }
        this.options = options;
        this.filterCorefMentions = options.filterCorefMentions;
        if (options.sortFiles) {
            Collections.sort(this.fileList);
        }
        this.curFileIndex = 0;
        if (str != null && new File(str).exists() && options.printLoadingMessage) {
            logger.info("Reading " + this.fileList.size() + " CoreNLP files from " + str);
        }
    }

    private static List<File> getFiles(String str, Pattern pattern) {
        Iterable<File> iterFilesRecursive = IOUtils.iterFilesRecursive(new File(str), pattern);
        ArrayList arrayList = new ArrayList();
        Iterator<File> it = iterFilesRecursive.iterator();
        while (it.hasNext()) {
            arrayList.add(it.next());
        }
        Collections.sort(arrayList);
        return arrayList;
    }

    public Annotation getNextDocument() {
        try {
            if (this.curFileIndex >= this.fileList.size()) {
                return null;
            }
            File file = this.fileList.get(this.curFileIndex);
            if (this.docIterator == null) {
                this.docIterator = new AnnotationIterator(file.getAbsolutePath());
            }
            while (!this.docIterator.hasNext()) {
                Redwood.log("debug-docreader", "Processed " + this.docIterator.getDocCnt() + " documents in " + file.getAbsolutePath());
                this.docIterator.close();
                this.curFileIndex++;
                if (this.curFileIndex >= this.fileList.size()) {
                    return null;
                }
                file = this.fileList.get(this.curFileIndex);
                this.docIterator = new AnnotationIterator(file.getAbsolutePath());
            }
            Annotation next = this.docIterator.next();
            Redwood.log("debug-docreader", "Reading document: " + ((String) next.get(CoreAnnotations.DocIDAnnotation.class)));
            return next;
        } catch (IOException e) {
            throw new RuntimeIOException(e);
        }
    }

    @Override // edu.stanford.nlp.coref.docreader.DocReader
    public InputDoc nextDoc() {
        Annotation nextDocument = getNextDocument();
        if (nextDocument == null) {
            return null;
        }
        Set<Triple<Integer, Integer, Integer>> set = null;
        if (this.filterCorefMentions != null) {
            set = CorefUtils.getMatchingSpans(nextDocument);
            nextDocument.set(CorefCoreAnnotations.CorefChainAnnotation.class, (Map) ((Map) nextDocument.get(CorefCoreAnnotations.CorefChainAnnotation.class)).values().stream().filter(corefChain -> {
                return CorefUtils.filterCorefChainWithMentionSpans(corefChain, set);
            }).collect(Collectors.toMap(corefChain2 -> {
                return Integer.valueOf(corefChain2.getChainID());
            }, corefChain3 -> {
                return corefChain3;
            })));
        }
        InputDoc inputDoc = new InputDoc(nextDocument, makeDocInfo(nextDocument), extractGoldMentions(nextDocument), null);
        inputDoc.filterMentionSet = set;
        return inputDoc;
    }

    public List<List<Mention>> extractGoldMentions(Annotation annotation) {
        List<CoreMap> list = (List) annotation.get(CoreAnnotations.SentencesAnnotation.class);
        Map map = (Map) annotation.get(CorefCoreAnnotations.CorefChainAnnotation.class);
        ArrayList arrayList = new ArrayList();
        for (int i = 0; i < list.size(); i++) {
            arrayList.add(new ArrayList());
        }
        Iterator it = map.values().iterator();
        while (it.hasNext()) {
            Iterator<CorefChain.CorefMention> it2 = ((CorefChain) it.next()).getMentionsInTextualOrder().iterator();
            while (it2.hasNext()) {
                Mention mention = toMention(it2.next(), list);
                ((List) arrayList.get(mention.sentNum)).add(mention);
            }
        }
        return arrayList;
    }

    public Mention toMention(CorefChain.CorefMention corefMention, List<CoreMap> list) {
        Mention mention = new Mention();
        mention.goldCorefClusterID = corefMention.corefClusterID;
        mention.startIndex = corefMention.startIndex - 1;
        mention.endIndex = corefMention.endIndex - 1;
        mention.sentNum = corefMention.sentNum - 1;
        mention.originalSpan = ((List) list.get(mention.sentNum).get(CoreAnnotations.TokensAnnotation.class)).subList(mention.startIndex, mention.endIndex);
        return mention;
    }

    /* JADX WARN: Multi-variable type inference failed */
    private static Map<String, String> makeDocInfo(Annotation annotation) {
        Map<String, String> newHashMap = Generics.newHashMap();
        newHashMap.put("DOC_ID", annotation.get(CoreAnnotations.DocIDAnnotation.class));
        return newHashMap;
    }

    @Override // edu.stanford.nlp.coref.docreader.DocReader
    public void reset() {
        this.curFileIndex = 0;
        if (this.docIterator != null) {
            this.docIterator.close();
            this.docIterator = null;
        }
    }
}
