package org.apache.sysds.runtime.codegen;

import java.nio.ByteBuffer;
import java.util.ArrayList;
import java.util.Iterator;
import jcuda.Pointer;
import org.apache.sysds.hops.codegen.SpoofCompiler;
import org.apache.sysds.runtime.controlprogram.caching.MatrixObject;
import org.apache.sysds.runtime.controlprogram.context.ExecutionContext;
import org.apache.sysds.runtime.controlprogram.parfor.Task;
import org.apache.sysds.runtime.instructions.cp.ScalarObject;
import org.apache.sysds.runtime.instructions.gpu.context.GPUObject;
import org.apache.sysds.runtime.matrix.data.LibMatrixCUDA;

/* loaded from: input_file:org/apache/sysds/runtime/codegen/SpoofCUDAOperator.class */
public interface SpoofCUDAOperator {
    public static final int JNI_MAT_ENTRY_SIZE = 40;
    public static final int TRANSFERRED_DATA_HEADER_SIZE = 32;

    /* loaded from: input_file:org/apache/sysds/runtime/codegen/SpoofCUDAOperator$PrecisionProxy.class */
    public static abstract class PrecisionProxy {
        protected final long ctx = SpoofCompiler.native_contexts.get(SpoofCompiler.GeneratorAPI.CUDA).longValue();

        public abstract int exec(SpoofCUDAOperator spoofCUDAOperator);
    }

    String getName();

    default void writeMatrixDescriptorToBuffer(ByteBuffer byteBuffer, int i, int i2, long j, long j2, long j3, long j4) {
        byteBuffer.putLong(j4);
        byteBuffer.putInt(i);
        byteBuffer.putInt(i2);
        byteBuffer.putLong(j);
        byteBuffer.putLong(j2);
        byteBuffer.putLong(j3);
    }

    default void prepareMatrixPointers(ByteBuffer byteBuffer, ExecutionContext executionContext, MatrixObject matrixObject, boolean z) {
        if (matrixObject.getGPUObject(executionContext.getGPUContext(0)).isSparse()) {
            writeMatrixDescriptorToBuffer(byteBuffer, (int) matrixObject.getNumRows(), (int) matrixObject.getNumColumns(), GPUObject.getPointerAddress(executionContext.getGPUSparsePointerAddress(matrixObject).rowPtr), GPUObject.getPointerAddress(executionContext.getGPUSparsePointerAddress(matrixObject).colInd), GPUObject.getPointerAddress(executionContext.getGPUSparsePointerAddress(matrixObject).val), executionContext.getGPUSparsePointerAddress(matrixObject).nnz);
            return;
        }
        if (!z) {
            writeMatrixDescriptorToBuffer(byteBuffer, (int) matrixObject.getNumRows(), (int) matrixObject.getNumColumns(), 0L, 0L, executionContext.getGPUDensePointerAddress(matrixObject), matrixObject.getNnz());
            return;
        }
        int numRows = (int) matrixObject.getNumRows();
        int numColumns = (int) matrixObject.getNumColumns();
        Pointer densePointer = matrixObject.getGPUObject(executionContext.getGPUContext(0)).getDensePointer();
        Pointer allocate = executionContext.getGPUContext(0).allocate(getName(), numRows * numColumns * LibMatrixCUDA.sizeOfDataType, false);
        LibMatrixCUDA.denseTranspose(executionContext, executionContext.getGPUContext(0), getName(), densePointer, allocate, numRows, numColumns);
        writeMatrixDescriptorToBuffer(byteBuffer, numRows, numColumns, 0L, 0L, GPUObject.getPointerAddress(allocate), matrixObject.getNnz());
    }

    default void packDataForTransfer(ExecutionContext executionContext, ArrayList<MatrixObject> arrayList, ArrayList<ScalarObject> arrayList2, MatrixObject matrixObject, int i, int i2, long j, boolean z, Pointer[] pointerArr) {
        int size = ((arrayList.size() + 1) * 40) + (arrayList2.size() * 8) + 32;
        Pointer pointer = new Pointer();
        if (SpoofOperator.getNativeStagingBuffer(pointer, getContext(), size) != 0) {
            throw new RuntimeException("Failed to get native staging buffer from spoof operator");
        }
        ByteBuffer byteBuffer = pointer.getByteBuffer();
        byteBuffer.putInt(size);
        byteBuffer.putInt(i2);
        byteBuffer.putInt((int) j);
        byteBuffer.putInt(i);
        byteBuffer.putInt(arrayList.size() - i);
        byteBuffer.putInt(matrixObject == null ? 0 : 1);
        byteBuffer.putInt(arrayList2.size());
        byteBuffer.putInt(-1);
        for (int i3 = 0; i3 < arrayList.size(); i3++) {
            if (i3 == i) {
                prepareMatrixPointers(byteBuffer, executionContext, arrayList.get(i3), z);
            } else {
                prepareMatrixPointers(byteBuffer, executionContext, arrayList.get(i3), false);
            }
        }
        if (matrixObject == null) {
            if (this instanceof SpoofCUDACellwise) {
                pointerArr[0] = executionContext.getGPUContext(0).allocate(getName(), LibMatrixCUDA.sizeOfDataType * ((((arrayList.get(0).getNumRows() * arrayList.get(0).getNumColumns()) + (Task.MAX_VARNAME_SIZE * 2)) - 1) / (Task.MAX_VARNAME_SIZE * 2)), false);
            } else {
                pointerArr[0] = executionContext.getGPUContext(0).allocate(getName(), LibMatrixCUDA.sizeOfDataType * 1, true);
            }
            writeMatrixDescriptorToBuffer(byteBuffer, 1, 1, 0L, 0L, GPUObject.getPointerAddress(pointerArr[0]), 1L);
        } else {
            prepareMatrixPointers(byteBuffer, executionContext, matrixObject, false);
        }
        Iterator<ScalarObject> it = arrayList2.iterator();
        while (it.hasNext()) {
            byteBuffer.putDouble(it.next().getDoubleValue());
        }
    }

    MatrixObject execute(ExecutionContext executionContext, ArrayList<MatrixObject> arrayList, ArrayList<ScalarObject> arrayList2, String str);

    ScalarObject execute(ExecutionContext executionContext, ArrayList<MatrixObject> arrayList, ArrayList<ScalarObject> arrayList2);

    int execute_dp(long j);

    int execute_sp(long j);

    long getContext();
}
