package de.tudarmstadt.ukp.dkpro.keyphrases.core.filter;

import de.tudarmstadt.ukp.dkpro.keyphrases.core.type.Keyphrase;
import java.io.File;
import java.io.FileFilter;
import java.io.IOException;
import java.util.Collection;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.Set;
import java.util.StringTokenizer;
import org.apache.commons.io.FileUtils;
import org.apache.uima.UimaContext;
import org.apache.uima.fit.descriptor.ConfigurationParameter;
import org.apache.uima.resource.ResourceInitializationException;

/* loaded from: input_file:de/tudarmstadt/ukp/dkpro/keyphrases/core/filter/CorpusFilter.class */
public class CorpusFilter extends AbstractKeyphraseFilter {
    public static final String CORPUS_FOLDER = "corpusFolder";

    @ConfigurationParameter(name = CORPUS_FOLDER, mandatory = true)
    private File corpusFolder;
    public static final String FILE_EXTENSION = "fileExtension";

    @ConfigurationParameter(name = FILE_EXTENSION, mandatory = true, defaultValue = {"txt"})
    private String fileExtension;
    private Set<String> tokensSet = new HashSet();

    public void initialize(UimaContext uimaContext) throws ResourceInitializationException {
        super.initialize(uimaContext);
        try {
            for (File file : this.corpusFolder.listFiles(new FileFilter() { // from class: de.tudarmstadt.ukp.dkpro.keyphrases.core.filter.CorpusFilter.1
                @Override // java.io.FileFilter
                public boolean accept(File file2) {
                    return file2.getName().endsWith(new StringBuilder().append(".").append(CorpusFilter.this.fileExtension).toString());
                }
            })) {
                Iterator it = FileUtils.readLines(file).iterator();
                while (it.hasNext()) {
                    StringTokenizer stringTokenizer = new StringTokenizer((String) it.next());
                    while (stringTokenizer.hasMoreElements()) {
                        this.tokensSet.add(stringTokenizer.nextToken().toLowerCase());
                    }
                }
            }
        } catch (IOException e) {
            throw new ResourceInitializationException(e);
        }
    }

    @Override // de.tudarmstadt.ukp.dkpro.keyphrases.core.filter.AbstractKeyphraseFilter
    protected Collection<Keyphrase> filterCandidates(Collection<Keyphrase> collection) {
        LinkedList linkedList = new LinkedList();
        for (Keyphrase keyphrase : collection) {
            if (!this.tokensSet.contains(keyphrase.getCoveredText().toLowerCase())) {
                linkedList.add(keyphrase);
            }
        }
        return linkedList;
    }
}
