package opennlp.tools.formats.leipzig;

import java.io.File;
import java.io.FileFilter;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Random;
import java.util.stream.Collectors;
import java.util.stream.IntStream;
import opennlp.tools.langdetect.Language;
import opennlp.tools.langdetect.LanguageSample;
import opennlp.tools.util.InvalidFormatException;
import opennlp.tools.util.MarkableFileInputStreamFactory;
import opennlp.tools.util.ObjectStream;
import opennlp.tools.util.PlainTextByLineStream;
import org.apache.commons.lang3.StringUtils;

/* loaded from: input_file:opennlp-tools-1.9.1.jar:opennlp/tools/formats/leipzig/LeipzigLanguageSampleStream.class */
public class LeipzigLanguageSampleStream implements ObjectStream<LanguageSample> {
    private final int sentencesPerSample;
    private Map<String, Integer> langSampleCounts;
    private File[] sentencesFiles;
    private Iterator<File> sentencesFilesIt;
    private ObjectStream<LanguageSample> sampleStream;
    private final Random random;

    /* JADX INFO: Access modifiers changed from: private */
    /* loaded from: input_file:opennlp-tools-1.9.1.jar:opennlp/tools/formats/leipzig/LeipzigLanguageSampleStream$LeipzigSentencesStream.class */
    public class LeipzigSentencesStream implements ObjectStream<LanguageSample> {
        private final String lang;
        private Iterator<String> lineIterator;

        LeipzigSentencesStream(String str, File file, int i, int i2) throws IOException {
            this.lang = str;
            int count = (int) Files.lines(file.toPath()).count();
            int i3 = i * i2;
            if (count < i3) {
                throw new InvalidFormatException(String.format("%s does not contain enough lines (%d lines < %d required lines).", file.getPath(), Integer.valueOf(count), Integer.valueOf(i3)));
            }
            List list = (List) IntStream.range(0, count).boxed().collect(Collectors.toList());
            Collections.shuffle(list, LeipzigLanguageSampleStream.this.random);
            HashSet hashSet = new HashSet(list.subList(0, i3));
            ArrayList arrayList = new ArrayList();
            PlainTextByLineStream plainTextByLineStream = new PlainTextByLineStream(new MarkableFileInputStreamFactory(file), StandardCharsets.UTF_8);
            Throwable th = null;
            int i4 = 0;
            while (true) {
                try {
                    try {
                        String read = plainTextByLineStream.read();
                        if (read == null) {
                            break;
                        }
                        if (read.indexOf(9) != -1 && hashSet.contains(Integer.valueOf(i4))) {
                            arrayList.add(read);
                        }
                        i4++;
                    } finally {
                    }
                } catch (Throwable th2) {
                    if (plainTextByLineStream != null) {
                        if (th != null) {
                            try {
                                plainTextByLineStream.close();
                            } catch (Throwable th3) {
                                th.addSuppressed(th3);
                            }
                        } else {
                            plainTextByLineStream.close();
                        }
                    }
                    throw th2;
                }
            }
            if (plainTextByLineStream != null) {
                if (0 != 0) {
                    try {
                        plainTextByLineStream.close();
                    } catch (Throwable th4) {
                        th.addSuppressed(th4);
                    }
                } else {
                    plainTextByLineStream.close();
                }
            }
            Collections.shuffle(arrayList, LeipzigLanguageSampleStream.this.random);
            this.lineIterator = arrayList.iterator();
        }

        /* JADX WARN: Can't rename method to resolve collision */
        @Override // opennlp.tools.util.ObjectStream
        public LanguageSample read() throws IOException {
            StringBuilder sb = new StringBuilder();
            for (int i = 0; i < LeipzigLanguageSampleStream.this.sentencesPerSample && this.lineIterator.hasNext(); i++) {
                String next = this.lineIterator.next();
                sb.append(next.substring(next.indexOf(9) + 1) + StringUtils.SPACE);
            }
            if (sb.length() > 0) {
                return new LanguageSample(new Language(this.lang), sb);
            }
            return null;
        }
    }

    public LeipzigLanguageSampleStream(File file, int i, int i2) throws IOException {
        this.sentencesPerSample = i;
        this.sentencesFiles = file.listFiles(new FileFilter() { // from class: opennlp.tools.formats.leipzig.LeipzigLanguageSampleStream.1
            @Override // java.io.FileFilter
            public boolean accept(File file2) {
                return !file2.isHidden() && file2.isFile() && file2.getName().length() >= 3 && file2.getName().substring(0, 3).matches("[a-z]+");
            }
        });
        Arrays.sort(this.sentencesFiles);
        this.langSampleCounts = (Map) ((Map) Arrays.stream(this.sentencesFiles).map(file2 -> {
            return file2.getName().substring(0, 3);
        }).collect(Collectors.groupingBy((v0) -> {
            return v0.toString();
        }, Collectors.summingInt(str -> {
            return 1;
        })))).entrySet().stream().collect(Collectors.toMap((v0) -> {
            return v0.getKey();
        }, entry -> {
            return Integer.valueOf(i2 / ((Integer) entry.getValue()).intValue());
        }));
        this.random = new Random(23L);
        reset();
    }

    /* JADX WARN: Can't rename method to resolve collision */
    @Override // opennlp.tools.util.ObjectStream
    public LanguageSample read() throws IOException {
        LanguageSample read;
        if (this.sampleStream != null && (read = this.sampleStream.read()) != null) {
            return read;
        }
        if (!this.sentencesFilesIt.hasNext()) {
            return null;
        }
        File next = this.sentencesFilesIt.next();
        String substring = next.getName().substring(0, 3);
        this.sampleStream = new LeipzigSentencesStream(substring, next, this.sentencesPerSample, this.langSampleCounts.get(substring).intValue());
        return read();
    }

    @Override // opennlp.tools.util.ObjectStream
    public void reset() throws IOException {
        this.sentencesFilesIt = Arrays.asList(this.sentencesFiles).iterator();
        this.sampleStream = null;
    }
}
