package edu.stanford.nlp.trees.treebank;

import edu.stanford.nlp.trees.TreeVisitor;
import edu.stanford.nlp.trees.Treebank;
import edu.stanford.nlp.trees.treebank.Dataset;
import edu.stanford.nlp.util.DataFilePaths;
import edu.stanford.nlp.util.Generics;
import edu.stanford.nlp.util.logging.Redwood;
import java.io.File;
import java.io.FileFilter;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.io.LineNumberReader;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Properties;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

/* loaded from: input_file:edu/stanford/nlp/trees/treebank/AbstractDataset.class */
public abstract class AbstractDataset implements Dataset {
    private static Redwood.RedwoodChannels log = Redwood.channels(AbstractDataset.class);
    protected String outFileName;
    protected String flatFileName;
    protected Treebank treebank;
    protected Properties options;
    protected Mapper posMapper = null;
    protected String posMapOptions = "";
    protected Mapper lexMapper = null;
    protected String lexMapOptions = "";
    protected Dataset.Encoding encoding = Dataset.Encoding.UTF8;
    protected FileFilter splitFilter = null;
    protected boolean addDeterminer = false;
    protected boolean removeDashTags = false;
    protected boolean addRoot = false;
    protected boolean removeEscapeTokens = false;
    protected int maxLen = Integer.MAX_VALUE;
    protected String morphDelim = null;
    protected TreeVisitor customTreeVisitor = null;
    protected boolean makeFlatFile = false;
    protected final Pattern fileNameNormalizer = Pattern.compile("\\s+");
    protected String treeFileExtension = "tree";
    protected final List<String> outputFileList = new ArrayList();
    protected final List<File> pathsToData = new ArrayList();
    protected final List<File> pathsToMappings = new ArrayList();
    protected final StringBuilder toStringBuffer = new StringBuilder();
    protected final Set<String> configuredOptions = Generics.newHashSet();
    protected final Set<String> requiredOptions = Generics.newHashSet();

    /* loaded from: input_file:edu/stanford/nlp/trees/treebank/AbstractDataset$SplitFilter.class */
    protected static class SplitFilter implements FileFilter {
        private final Set<String> filterSet;

        public SplitFilter(Set<String> set) {
            this.filterSet = set;
        }

        @Override // java.io.FileFilter
        public boolean accept(File file) {
            return this.filterSet.contains(file.getName());
        }
    }

    public AbstractDataset() {
        this.requiredOptions.add(ConfigParser.paramName);
        this.requiredOptions.add(ConfigParser.paramPath);
        this.requiredOptions.add(ConfigParser.paramEncode);
    }

    @Override // edu.stanford.nlp.trees.treebank.Dataset
    public abstract void build();

    private Mapper loadMapper(String str) {
        Mapper mapper = null;
        try {
            mapper = (Mapper) ClassLoader.getSystemClassLoader().loadClass(str).newInstance();
        } catch (ClassNotFoundException e) {
            System.err.printf("%s: Mapper type %s does not exist\n", getClass().getName(), str);
        } catch (IllegalAccessException e2) {
            System.err.printf("%s: Unable to access mapper type %s\n", getClass().getName(), str);
        } catch (InstantiationException e3) {
            System.err.printf("%s: Unable to instantiate mapper type %s\n", getClass().getName(), str);
            e3.printStackTrace();
        }
        return mapper;
    }

    @Override // edu.stanford.nlp.trees.treebank.Dataset
    public boolean setOptions(Properties properties) {
        this.options = properties;
        ArrayList<String> arrayList = new ArrayList(properties.stringPropertyNames());
        Collections.sort(arrayList);
        for (String str : arrayList) {
            String property = properties.getProperty(str);
            this.configuredOptions.add(str);
            Matcher matcher = ConfigParser.matchPath.matcher(str);
            Matcher matcher2 = ConfigParser.matchMapping.matcher(str);
            if (matcher.lookingAt()) {
                this.pathsToData.add(new File(property));
                this.configuredOptions.add(ConfigParser.paramPath);
            } else if (matcher2.lookingAt()) {
                this.pathsToMappings.add(new File(property));
                this.configuredOptions.add(ConfigParser.paramMapping);
            } else if (str.equals(ConfigParser.paramEncode)) {
                this.encoding = Dataset.Encoding.valueOf(property);
            } else if (str.equals(ConfigParser.paramName)) {
                this.outFileName = this.fileNameNormalizer.matcher(property.trim()).replaceAll("-");
                this.toStringBuffer.append(String.format("Dataset Name: %s\n", property.trim()));
            } else if (str.equals(ConfigParser.paramDT)) {
                this.addDeterminer = Boolean.parseBoolean(property);
            } else if (str.equals(ConfigParser.paramSplit)) {
                this.splitFilter = new SplitFilter(buildSplitMap(property));
            } else if (str.equals(ConfigParser.paramFlat) && Boolean.parseBoolean(property)) {
                this.makeFlatFile = true;
            } else if (str.equals(ConfigParser.paramFileExt)) {
                this.treeFileExtension = property;
            } else if (str.equals(ConfigParser.paramLexMapper)) {
                this.lexMapper = loadMapper(property);
            } else if (str.equals(ConfigParser.paramNoDashTags)) {
                this.removeDashTags = Boolean.parseBoolean(property);
            } else if (str.equals(ConfigParser.paramAddRoot)) {
                this.addRoot = Boolean.parseBoolean(property);
            } else if (str.equals(ConfigParser.paramUnEscape)) {
                this.removeEscapeTokens = true;
            } else if (str.equals(ConfigParser.paramLexMapOptions)) {
                this.lexMapOptions = property;
            } else if (str.equals(ConfigParser.paramPosMapper)) {
                this.posMapper = loadMapper(property);
            } else if (str.equals(ConfigParser.paramPosMapOptions)) {
                this.posMapOptions = property;
            } else if (str.equals(ConfigParser.paramMaxLen)) {
                this.maxLen = Integer.parseInt(property);
            } else if (str.equals(ConfigParser.paramMorph)) {
                this.morphDelim = property;
            } else if (str.equals(ConfigParser.paramTransform)) {
                this.customTreeVisitor = loadTreeVistor(property);
            }
        }
        if (!this.configuredOptions.containsAll(this.requiredOptions)) {
            return false;
        }
        if (this.encoding == Dataset.Encoding.UTF8) {
            this.outFileName += ".utf8";
        } else {
            this.outFileName += ".bw";
        }
        String property2 = properties.getProperty(ConfigParser.paramOutputPath);
        if (property2 != null) {
            this.outFileName = property2 + File.separator + this.outFileName;
        }
        if (this.makeFlatFile) {
            this.flatFileName = this.outFileName + ".flat.txt";
        }
        this.outFileName += ".txt";
        return true;
    }

    private static TreeVisitor loadTreeVistor(String str) {
        try {
            return (TreeVisitor) ClassLoader.getSystemClassLoader().loadClass(str).newInstance();
        } catch (ClassNotFoundException | IllegalAccessException | InstantiationException e) {
            e.printStackTrace();
            return null;
        }
    }

    protected Set<String> buildSplitMap(String str) {
        String convert = DataFilePaths.convert(str);
        Set<String> newHashSet = Generics.newHashSet();
        LineNumberReader lineNumberReader = null;
        try {
            lineNumberReader = new LineNumberReader(new FileReader(convert));
            while (lineNumberReader.ready()) {
                newHashSet.add(lineNumberReader.readLine().trim());
            }
            lineNumberReader.close();
        } catch (FileNotFoundException e) {
            System.err.printf("%s: Could not open split file %s\n", getClass().getName(), convert);
        } catch (IOException e2) {
            System.err.printf("%s: Error reading split file %s (line %d)\n", getClass().getName(), convert, Integer.valueOf(lineNumberReader.getLineNumber()));
        }
        return newHashSet;
    }

    @Override // edu.stanford.nlp.trees.treebank.Dataset
    public List<String> getFilenames() {
        return Collections.unmodifiableList(this.outputFileList);
    }

    public String toString() {
        return this.toStringBuffer.toString();
    }
}
