/*
 * Decompiled with CFR 0.152.
 */
package opennlp.tools.formats;

import java.io.IOException;
import java.io.InputStream;
import opennlp.tools.doccat.DocumentSample;
import opennlp.tools.tokenize.SimpleTokenizer;
import opennlp.tools.util.FilterObjectStream;
import opennlp.tools.util.PlainTextByLineStream;

/*
 * This class specifies class file version 49.0 but uses Java 6 signatures.  Assumed Java 6.
 */
public class LeipzigDoccatSampleStream
extends FilterObjectStream<String, DocumentSample> {
    private final String language;
    private final int sentencesPerDocument;

    LeipzigDoccatSampleStream(String language, int sentencesPerDocument, InputStream in) throws IOException {
        super(new PlainTextByLineStream(in, "UTF-8"));
        this.language = language;
        this.sentencesPerDocument = sentencesPerDocument;
    }

    @Override
    public DocumentSample read() throws IOException {
        String line;
        StringBuilder sampleText = new StringBuilder();
        for (int count = 0; count < this.sentencesPerDocument && (line = (String)this.samples.read()) != null; ++count) {
            String[] tokens = SimpleTokenizer.INSTANCE.tokenize(line);
            if (tokens.length == 0) {
                throw new IOException("Empty lines are not allowed!");
            }
            for (int i = 1; i < tokens.length; ++i) {
                sampleText.append(tokens[i]);
                sampleText.append(' ');
            }
        }
        if (sampleText.length() > 0) {
            return new DocumentSample(this.language, sampleText.toString());
        }
        return null;
    }
}

