package de.tudarmstadt.ukp.dkpro.keyphrases.core.evaluator.util;

import de.tudarmstadt.ukp.dkpro.core.api.metadata.type.DocumentMetaData;
import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Lemma;
import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Stem;
import de.tudarmstadt.ukp.dkpro.keyphrases.core.evaluator.KeyphraseEvaluator;
import de.tudarmstadt.ukp.dkpro.keyphrases.core.type.Keyphrase;
import de.tudarmstadt.ukp.dkpro.keyphrases.core.util.KeyphraseScoreComparator;
import java.io.IOException;
import java.net.URI;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Set;
import java.util.TreeSet;
import org.apache.commons.io.FilenameUtils;
import org.apache.commons.io.IOUtils;
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
import org.apache.uima.fit.util.JCasUtil;
import org.apache.uima.jcas.JCas;

/* loaded from: input_file:de/tudarmstadt/ukp/dkpro/keyphrases/core/evaluator/util/EvaluatorUtils.class */
public class EvaluatorUtils {
    public static Set<String> getGoldKeyphrases(DocumentMetaData documentMetaData, String str, boolean z) throws AnalysisEngineProcessException {
        TreeSet treeSet = new TreeSet();
        String documentUri = documentMetaData.getDocumentUri();
        try {
            for (String str2 : IOUtils.readLines(URI.create(documentUri.substring(0, FilenameUtils.indexOfExtension(documentUri)) + str).toURL().openStream(), "UTF-8")) {
                String trim = z ? str2.toLowerCase().trim() : str2.trim();
                if (trim.length() > 0) {
                    if (trim.contains(";")) {
                        for (String str3 : trim.split(";")) {
                            treeSet.add(str3.trim());
                        }
                    } else {
                        treeSet.add(trim);
                    }
                }
            }
            return treeSet;
        } catch (IOException e) {
            throw new AnalysisEngineProcessException(e);
        }
    }

    public static String getDocumentText(JCas jCas, KeyphraseEvaluator.EvaluatorType evaluatorType) {
        StringBuilder sb = new StringBuilder();
        if (evaluatorType.equals(KeyphraseEvaluator.EvaluatorType.Token)) {
            sb.append(jCas.getDocumentText());
        } else if (evaluatorType.equals(KeyphraseEvaluator.EvaluatorType.Stem)) {
            int i = 0;
            for (Stem stem : JCasUtil.select(jCas, Stem.class)) {
                if (stem.getBegin() >= i) {
                    i = stem.getBegin();
                    sb.append(stem.getValue());
                    sb.append(" ");
                }
            }
        } else if (evaluatorType.equals(KeyphraseEvaluator.EvaluatorType.Lemma)) {
            int i2 = 0;
            for (Lemma lemma : JCasUtil.select(jCas, Lemma.class)) {
                if (lemma.getBegin() >= i2) {
                    i2 = lemma.getBegin();
                    sb.append(lemma.getValue());
                    sb.append(" ");
                }
            }
        }
        return sb.toString();
    }

    public static List<Keyphrase> filterAndSortKeyphrases(Iterable<Keyphrase> iterable, boolean z) {
        ArrayList arrayList = new ArrayList();
        Iterator<Keyphrase> it = iterable.iterator();
        while (it.hasNext()) {
            arrayList.add(it.next());
        }
        return filterAndSortKeyphrases((Collection<Keyphrase>) arrayList, z);
    }

    public static List<Keyphrase> filterAndSortKeyphrases(Collection<Keyphrase> collection, boolean z) {
        ArrayList<Keyphrase> arrayList = new ArrayList(collection);
        Collections.sort(arrayList, new KeyphraseScoreComparator());
        ArrayList arrayList2 = new ArrayList();
        HashSet hashSet = new HashSet();
        for (Keyphrase keyphrase : arrayList) {
            String lowerCase = z ? keyphrase.getKeyphrase().toLowerCase() : keyphrase.getKeyphrase();
            if (lowerCase.length() != 0 && !hashSet.contains(lowerCase)) {
                hashSet.add(lowerCase);
                arrayList2.add(keyphrase);
            }
        }
        return arrayList2;
    }
}
