package org.apache.sysds.runtime.transform.tokenize;

import java.io.Serializable;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.concurrent.Callable;
import java.util.concurrent.ExecutionException;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.sysds.common.Types;
import org.apache.sysds.conf.ConfigurationManager;
import org.apache.sysds.runtime.DMLRuntimeException;
import org.apache.sysds.runtime.frame.data.FrameBlock;
import org.apache.sysds.runtime.transform.tokenize.applier.TokenizerApplier;
import org.apache.sysds.runtime.transform.tokenize.applier.TokenizerApplierCount;
import org.apache.sysds.runtime.transform.tokenize.applier.TokenizerApplierHash;
import org.apache.sysds.runtime.transform.tokenize.applier.TokenizerApplierPosition;
import org.apache.sysds.runtime.transform.tokenize.builder.TokenizerBuilder;
import org.apache.sysds.runtime.util.DependencyTask;
import org.apache.sysds.runtime.util.DependencyThreadPool;

/* loaded from: input_file:org/apache/sysds/runtime/transform/tokenize/Tokenizer.class */
public class Tokenizer implements Serializable {
    private static final long serialVersionUID = 7155673772374114577L;
    private static final boolean MULTI_THREADED_STAGES_TOKENIZER = false;
    private DocumentRepresentation[] internalRepresentation = null;
    private final TokenizerBuilder tokenizerBuilder;
    private final TokenizerApplier tokenizerApplier;
    protected static final Log LOG = LogFactory.getLog(Tokenizer.class.getName());
    public static final int TOKENIZE_NUM_BLOCKS = ConfigurationManager.getNumberTokenizeBlocks();

    /* JADX INFO: Access modifiers changed from: protected */
    /* loaded from: input_file:org/apache/sysds/runtime/transform/tokenize/Tokenizer$AllocateOutputFrame.class */
    public static class AllocateOutputFrame implements Callable<Object> {
        protected final Tokenizer _tokenizer;
        protected final FrameBlock _out;

        protected AllocateOutputFrame(Tokenizer tokenizer, FrameBlock frameBlock) {
            this._tokenizer = tokenizer;
            this._out = frameBlock;
        }

        @Override // java.util.concurrent.Callable
        public Object call() throws Exception {
            this._out.ensureAllocatedColumns(this._tokenizer.getNumRowsEstimate());
            return null;
        }
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public Tokenizer(TokenizerBuilder tokenizerBuilder, TokenizerApplier tokenizerApplier) {
        this.tokenizerBuilder = tokenizerBuilder;
        this.tokenizerApplier = tokenizerApplier;
    }

    public Types.ValueType[] getSchema() {
        return this.tokenizerApplier.getOutSchema();
    }

    public int getMaxNumRows(int i) {
        return this.tokenizerApplier.getMaxNumRows(i);
    }

    public int getNumRowsEstimate() {
        if (this.internalRepresentation != null) {
            return this.tokenizerApplier.isWideFormat() ? this.internalRepresentation.length : this.tokenizerApplier.hasPadding() ? this.internalRepresentation.length * this.tokenizerApplier.getMaxTokens() : Arrays.stream(this.internalRepresentation).mapToInt(documentRepresentation -> {
                return Math.min(documentRepresentation.tokens.size(), this.tokenizerApplier.getMaxTokens());
            }).sum();
        }
        throw new DMLRuntimeException("Internal Token Representation was not computed yet. Can not get exact size.");
    }

    public long getNumCols() {
        return this.tokenizerApplier.getNumCols();
    }

    public void allocateInternalRepresentation(int i) {
        this.internalRepresentation = new DocumentRepresentation[i];
        this.tokenizerApplier.allocateInternalMeta(i);
    }

    public FrameBlock tokenize(FrameBlock frameBlock) {
        return tokenize(frameBlock, 1);
    }

    public FrameBlock tokenize(FrameBlock frameBlock, int i) {
        allocateInternalRepresentation(frameBlock.getNumRows());
        FrameBlock frameBlock2 = new FrameBlock(getSchema());
        if (i > 1) {
            DependencyThreadPool dependencyThreadPool = new DependencyThreadPool(i);
            LOG.debug("Tokenizing with full DAG on " + i + " Threads");
            try {
                int intValue = ((Integer) dependencyThreadPool.submitAllAndWait(getTokenizeTasks(frameBlock, frameBlock2, dependencyThreadPool)).stream().map(obj -> {
                    return Integer.valueOf(obj == null ? 0 : ((Integer) obj).intValue());
                }).max((num, num2) -> {
                    return Integer.compare(num.intValue(), num2.intValue());
                }).get()).intValue();
                if (intValue != frameBlock2.getNumRows()) {
                    frameBlock2 = frameBlock2.slice(0, intValue - 1, 0, frameBlock2.getNumColumns() - 1, (FrameBlock) null);
                }
            } catch (InterruptedException | ExecutionException e) {
                LOG.error("MT tokenize failed");
                e.printStackTrace();
            }
            dependencyThreadPool.shutdown();
        } else {
            build(frameBlock, i);
            frameBlock2.ensureAllocatedColumns(this.tokenizerApplier.getNumRows(this.internalRepresentation));
            frameBlock2 = apply(frameBlock2, i);
        }
        return frameBlock2;
    }

    private List<DependencyTask<?>> getTokenizeTasks(FrameBlock frameBlock, FrameBlock frameBlock2, DependencyThreadPool dependencyThreadPool) {
        int size;
        ArrayList arrayList = new ArrayList();
        HashMap hashMap = new HashMap();
        arrayList.add(DependencyThreadPool.createDependencyTask(new AllocateOutputFrame(this, frameBlock2)));
        List<DependencyTask<?>> buildTasks = getBuildTasks(frameBlock);
        arrayList.addAll(buildTasks);
        List<DependencyTask<?>> applyTasks = this.tokenizerApplier.getApplyTasks(this.internalRepresentation, frameBlock2);
        if (applyTasks.size() != buildTasks.size() / 2) {
            throw new DMLRuntimeException("Different block sizes between build and apply tasks currently not supported");
        }
        if (!this.tokenizerApplier.isWideFormat() || !this.tokenizerApplier.hasPadding()) {
            if (this.tokenizerApplier instanceof TokenizerApplierPosition) {
                size = 0;
            } else {
                if (!(this.tokenizerApplier instanceof TokenizerApplierCount) && !(this.tokenizerApplier instanceof TokenizerApplierHash)) {
                    throw new DMLRuntimeException("Unknown TokenizerApplier");
                }
                size = applyTasks.size();
            }
            hashMap.put(new Integer[]{0, 1}, new Integer[]{1, Integer.valueOf((buildTasks.size() / 2) + 1)});
            hashMap.put(new Integer[]{Integer.valueOf(arrayList.size()), Integer.valueOf(arrayList.size() + applyTasks.size())}, new Integer[]{0, 1});
            for (int i = 0; i < applyTasks.size(); i++) {
                hashMap.put(new Integer[]{Integer.valueOf(arrayList.size() + i), Integer.valueOf(arrayList.size() + applyTasks.size())}, new Integer[]{Integer.valueOf(1 + size + i), Integer.valueOf(2 + size + i)});
            }
        }
        arrayList.addAll(applyTasks);
        ArrayList arrayList2 = new ArrayList(Collections.nCopies(arrayList.size(), null));
        DependencyThreadPool.createDependencyList(arrayList, hashMap, arrayList2);
        return DependencyThreadPool.createDependencyTasks(arrayList, arrayList2);
    }

    public FrameBlock apply(FrameBlock frameBlock, int i) {
        int i2 = -1;
        if (i > 1) {
            DependencyThreadPool dependencyThreadPool = new DependencyThreadPool(i);
            try {
                i2 = ((Integer) dependencyThreadPool.submitAllAndWait(this.tokenizerApplier.getApplyTasks(this.internalRepresentation, frameBlock)).stream().map(obj -> {
                    return (Integer) obj;
                }).max((num, num2) -> {
                    return Integer.compare(num.intValue(), num2.intValue());
                }).get()).intValue();
            } catch (InterruptedException | ExecutionException e) {
                LOG.error("MT Tokenizer apply failed");
                e.printStackTrace();
            }
            dependencyThreadPool.shutdown();
        } else {
            i2 = this.tokenizerApplier.applyInternalRepresentation(this.internalRepresentation, frameBlock);
        }
        if (i2 != frameBlock.getNumRows()) {
            frameBlock = frameBlock.slice(0, i2 - 1, 0, frameBlock.getNumColumns() - 1, (FrameBlock) null);
        }
        return frameBlock;
    }

    public List<DependencyTask<?>> getBuildTasks(FrameBlock frameBlock) {
        List<DependencyTask<?>> tasks = this.tokenizerBuilder.getTasks(frameBlock, this.internalRepresentation);
        List<DependencyTask<?>> buildTasks = this.tokenizerApplier.getBuildTasks(this.internalRepresentation);
        if (tasks.size() != buildTasks.size()) {
            throw new DMLRuntimeException("Cannot create dependencies for mismatched array sizes");
        }
        tasks.addAll(buildTasks);
        ArrayList arrayList = new ArrayList(Collections.nCopies(tasks.size(), null));
        HashMap hashMap = new HashMap();
        for (int i = 0; i < tasks.size() / 2; i++) {
            hashMap.put(new Integer[]{Integer.valueOf(i + buildTasks.size()), Integer.valueOf(i + buildTasks.size() + 1)}, new Integer[]{Integer.valueOf(i), Integer.valueOf(i + 1)});
        }
        DependencyThreadPool.createDependencyList(tasks, hashMap, arrayList);
        return DependencyThreadPool.createDependencyTasks(tasks, arrayList);
    }

    public void build(FrameBlock frameBlock, int i) {
        this.tokenizerApplier.allocateInternalMeta(frameBlock.getNumRows());
        if (i <= 1) {
            this.tokenizerBuilder.createInternalRepresentation(frameBlock, this.internalRepresentation);
            this.tokenizerApplier.build(this.internalRepresentation, 0, -1);
            return;
        }
        DependencyThreadPool dependencyThreadPool = new DependencyThreadPool(i);
        try {
            dependencyThreadPool.submitAllAndWait(getBuildTasks(frameBlock));
        } catch (InterruptedException | ExecutionException e) {
            LOG.error("MT Tokenizer build failed");
            e.printStackTrace();
        }
        dependencyThreadPool.shutdown();
    }
}
