package org.apache.sysds.runtime.transform.tokenize.builder;

import java.util.ArrayList;
import java.util.List;
import java.util.Objects;
import org.apache.sysds.runtime.frame.data.FrameBlock;
import org.apache.sysds.runtime.transform.tokenize.DocumentRepresentation;
import org.apache.sysds.runtime.transform.tokenize.Token;
import org.apache.sysds.runtime.util.UtilFunctions;
import org.apache.wink.json4j.JSONException;
import org.apache.wink.json4j.JSONObject;

/* loaded from: input_file:org/apache/sysds/runtime/transform/tokenize/builder/TokenizerBuilderWhitespaceSplit.class */
public class TokenizerBuilderWhitespaceSplit extends TokenizerBuilder {
    private static final long serialVersionUID = 539127244034913364L;
    private final int[] idCols;
    private final int tokenizeCol;
    public String regex;

    public TokenizerBuilderWhitespaceSplit(int[] iArr, int i, JSONObject jSONObject) throws JSONException {
        this.regex = "\\s+";
        if (jSONObject != null && jSONObject.has("regex")) {
            this.regex = jSONObject.getString("regex");
        }
        this.idCols = iArr;
        this.tokenizeCol = i;
    }

    public List<Token> splitToTokens(String str) {
        ArrayList arrayList = new ArrayList();
        if (str == null) {
            return arrayList;
        }
        int i = 0;
        for (String str2 : str.split(this.regex)) {
            if (!Objects.equals(str2, "")) {
                int indexOf = str.indexOf(str2, i);
                i = indexOf;
                arrayList.add(new Token(str2, indexOf));
            }
        }
        return arrayList;
    }

    @Override // org.apache.sysds.runtime.transform.tokenize.builder.TokenizerBuilder
    public void createInternalRepresentation(FrameBlock frameBlock, DocumentRepresentation[] documentRepresentationArr, int i, int i2) {
        int endIndex = UtilFunctions.getEndIndex(frameBlock.getNumRows(), i, i2);
        for (int i3 = i; i3 < endIndex; i3++) {
            List<Token> splitToTokens = splitToTokens(frameBlock.getString(i3, this.tokenizeCol - 1));
            ArrayList arrayList = new ArrayList();
            for (int i4 : this.idCols) {
                arrayList.add(frameBlock.get(i3, Integer.valueOf(i4).intValue() - 1));
                documentRepresentationArr[i3] = new DocumentRepresentation(arrayList, splitToTokens);
            }
        }
    }
}
