package org.apache.sysds.runtime.transform.tokenize.applier;

import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.TreeSet;
import org.apache.sysds.common.Types;
import org.apache.sysds.runtime.frame.data.FrameBlock;
import org.apache.sysds.runtime.transform.tokenize.DocumentRepresentation;
import org.apache.sysds.runtime.transform.tokenize.Token;
import org.apache.sysds.runtime.util.UtilFunctions;
import org.apache.wink.json4j.JSONException;
import org.apache.wink.json4j.JSONObject;

/* loaded from: input_file:org/apache/sysds/runtime/transform/tokenize/applier/TokenizerApplierCount.class */
public class TokenizerApplierCount extends TokenizerApplier {
    private static final long serialVersionUID = 6382000606237705019L;
    public boolean sort_alpha;
    private List<Map<String, Integer>> counts;

    public TokenizerApplierCount(int i, int i2, boolean z, boolean z2, JSONObject jSONObject) throws JSONException {
        super(i, i2, z, z2);
        this.sort_alpha = false;
        if (jSONObject == null || !jSONObject.has("sort_alpha")) {
            return;
        }
        this.sort_alpha = jSONObject.getBoolean("sort_alpha");
    }

    @Override // org.apache.sysds.runtime.transform.tokenize.applier.TokenizerApplier
    public int getNumRows(DocumentRepresentation[] documentRepresentationArr) {
        return this.wideFormat ? documentRepresentationArr.length : this.applyPadding ? this.maxTokens * documentRepresentationArr.length : this.counts.stream().mapToInt(map -> {
            return Math.min(map.size(), this.maxTokens);
        }).sum();
    }

    @Override // org.apache.sysds.runtime.transform.tokenize.applier.TokenizerApplier
    public void allocateInternalMeta(int i) {
        this.counts = new ArrayList(Collections.nCopies(i, null));
    }

    @Override // org.apache.sysds.runtime.transform.tokenize.applier.TokenizerApplier
    public void build(DocumentRepresentation[] documentRepresentationArr, int i, int i2) {
        int endIndex = UtilFunctions.getEndIndex(documentRepresentationArr.length, i, i2);
        for (int i3 = i; i3 < endIndex; i3++) {
            HashMap hashMap = new HashMap();
            Iterator<Token> it = documentRepresentationArr[i3].tokens.iterator();
            while (it.hasNext()) {
                String token = it.next().toString();
                Integer num = (Integer) hashMap.getOrDefault(token, null);
                if (num != null) {
                    hashMap.put(token, Integer.valueOf(num.intValue() + 1));
                } else {
                    hashMap.put(token, 1);
                }
            }
            this.counts.set(i3, hashMap);
        }
    }

    @Override // org.apache.sysds.runtime.transform.tokenize.applier.TokenizerApplier
    public int applyInternalRepresentation(DocumentRepresentation[] documentRepresentationArr, FrameBlock frameBlock, int i, int i2) {
        int endIndex = UtilFunctions.getEndIndex(documentRepresentationArr.length, i, i2);
        int outputRow = getOutputRow(i, this.counts);
        for (int i3 = i; i3 < endIndex; i3++) {
            List<Object> list = documentRepresentationArr[i3].keys;
            Map<String, Integer> map = this.counts.get(i3);
            Set<String> keySet = map.keySet();
            if (this.sort_alpha) {
                keySet = new TreeSet(keySet);
            }
            int i4 = 0;
            for (String str : keySet) {
                if (i4 >= this.maxTokens) {
                    break;
                }
                int keys = setKeys(outputRow, list, frameBlock);
                long intValue = map.get(str).intValue();
                frameBlock.set(outputRow, keys, str);
                frameBlock.set(outputRow, keys + 1, Long.valueOf(intValue));
                outputRow++;
                i4++;
            }
            if (this.applyPadding) {
                outputRow = applyPaddingLong(outputRow, i4, list, frameBlock, "", -1);
            }
        }
        return outputRow;
    }

    @Override // org.apache.sysds.runtime.transform.tokenize.applier.TokenizerApplier
    public Types.ValueType[] getOutSchema() {
        if (this.wideFormat) {
            throw new IllegalArgumentException("Wide Format is not supported for Count Representation.");
        }
        Types.ValueType[] nCopies = UtilFunctions.nCopies(this.numIdCols + 2, Types.ValueType.STRING);
        nCopies[this.numIdCols + 1] = Types.ValueType.INT64;
        return nCopies;
    }
}
