package hex.tfidf;

import water.MRTask;
import water.fvec.Chunk;
import water.fvec.NewChunk;
import water.parser.BufferedString;

/* loaded from: input_file:hex/tfidf/TfIdfPreprocessorTask.class */
public class TfIdfPreprocessorTask extends MRTask<TfIdfPreprocessorTask> {
    private static final String WORDS_DELIMITER_REGEX = "\\s+";
    private final int _docIdsColIdx;
    private final int _docContentsColIdx;

    public TfIdfPreprocessorTask(int i, int i2) {
        this._docIdsColIdx = i;
        this._docContentsColIdx = i2;
    }

    @Override // water.MRTask
    public void map(Chunk[] chunkArr, NewChunk[] newChunkArr) {
        Chunk chunk = chunkArr[this._docIdsColIdx];
        Chunk chunk2 = chunkArr[this._docContentsColIdx];
        NewChunk newChunk = newChunkArr[0];
        NewChunk newChunk2 = newChunkArr[1];
        for (int i = 0; i < chunk2._len; i++) {
            if (!chunk2.isNA(i)) {
                String bufferedString = chunk2.atStr(new BufferedString(), i).toString();
                long at8 = chunk.at8(i);
                for (String str : bufferedString.split(WORDS_DELIMITER_REGEX)) {
                    newChunk.addNum(at8);
                    newChunk2.addStr(str);
                }
            }
        }
    }
}
