/*
 * Decompiled with CFR 0.152.
 */
package jdk.graal.compiler.lir.amd64;

import java.nio.ByteOrder;
import java.util.Arrays;
import java.util.EnumSet;
import jdk.graal.compiler.asm.Label;
import jdk.graal.compiler.asm.amd64.AMD64Address;
import jdk.graal.compiler.asm.amd64.AMD64Assembler;
import jdk.graal.compiler.asm.amd64.AMD64BaseAssembler;
import jdk.graal.compiler.asm.amd64.AMD64MacroAssembler;
import jdk.graal.compiler.asm.amd64.AVXKind;
import jdk.graal.compiler.code.DataSection;
import jdk.graal.compiler.core.common.LIRKind;
import jdk.graal.compiler.core.common.Stride;
import jdk.graal.compiler.debug.Assertions;
import jdk.graal.compiler.debug.GraalError;
import jdk.graal.compiler.lir.LIRInstructionClass;
import jdk.graal.compiler.lir.amd64.AMD64LIRInstruction;
import jdk.graal.compiler.lir.asm.ArrayDataPointerConstant;
import jdk.graal.compiler.lir.asm.CompilationResultBuilder;
import jdk.graal.compiler.lir.gen.LIRGeneratorTool;
import jdk.vm.ci.amd64.AMD64;
import jdk.vm.ci.amd64.AMD64Kind;
import jdk.vm.ci.code.Register;
import jdk.vm.ci.code.TargetDescription;
import jdk.vm.ci.meta.JavaKind;
import jdk.vm.ci.meta.PlatformKind;
import jdk.vm.ci.meta.Value;

public abstract class AMD64ComplexVectorOp
extends AMD64LIRInstruction {
    public static final LIRInstructionClass<AMD64ComplexVectorOp> TYPE = LIRInstructionClass.create(AMD64ComplexVectorOp.class);
    protected final AVXKind.AVXSize vectorSize;
    protected final EnumSet<AMD64.CPUFeature> runtimeCheckedCPUFeatures;
    protected final TargetDescription targetDescription;

    public AMD64ComplexVectorOp(LIRInstructionClass<? extends AMD64ComplexVectorOp> c, LIRGeneratorTool tool, EnumSet<AMD64.CPUFeature> runtimeCheckedCPUFeatures, AVXKind.AVXSize maxUsedVectorSize) {
        super((LIRInstructionClass<? extends AMD64LIRInstruction>)c);
        this.targetDescription = tool.target();
        this.runtimeCheckedCPUFeatures = runtimeCheckedCPUFeatures;
        AVXKind.AVXSize maxSupportedVectorSize = (AVXKind.AVXSize)tool.getMaxVectorSize(runtimeCheckedCPUFeatures);
        assert (AMD64ComplexVectorOp.isXMMOrGreater(maxUsedVectorSize)) : Assertions.errorMessageContext("maxUsedVectorSize", maxSupportedVectorSize);
        assert (AMD64ComplexVectorOp.isXMMOrGreater(maxSupportedVectorSize)) : Assertions.errorMessageContext("maxSupportedVectorSize", maxSupportedVectorSize);
        this.vectorSize = maxUsedVectorSize.fitsWithin(maxSupportedVectorSize) ? maxUsedVectorSize : maxSupportedVectorSize;
    }

    private static boolean isXMMOrGreater(AVXKind.AVXSize size) {
        return size == AVXKind.AVXSize.XMM || size == AVXKind.AVXSize.YMM || size == AVXKind.AVXSize.ZMM;
    }

    protected AMD64Kind getVectorKind(JavaKind valueKind) {
        switch (this.vectorSize) {
            case XMM: {
                switch (valueKind) {
                    case Byte: {
                        return AMD64Kind.V128_BYTE;
                    }
                    case Char: {
                        return AMD64Kind.V128_WORD;
                    }
                    case Int: {
                        return AMD64Kind.V128_DWORD;
                    }
                    case Long: {
                        return AMD64Kind.V128_QWORD;
                    }
                    case Float: {
                        return AMD64Kind.V128_SINGLE;
                    }
                    case Double: {
                        return AMD64Kind.V128_DOUBLE;
                    }
                }
                throw GraalError.shouldNotReachHere("Unsupported base value kind.");
            }
            case YMM: {
                switch (valueKind) {
                    case Byte: {
                        return AMD64Kind.V256_BYTE;
                    }
                    case Char: {
                        return AMD64Kind.V256_WORD;
                    }
                    case Int: {
                        return AMD64Kind.V256_DWORD;
                    }
                    case Long: {
                        return AMD64Kind.V256_QWORD;
                    }
                    case Float: {
                        return AMD64Kind.V256_SINGLE;
                    }
                    case Double: {
                        return AMD64Kind.V256_DOUBLE;
                    }
                }
                throw GraalError.shouldNotReachHere("Unsupported base value kind.");
            }
            case ZMM: {
                switch (valueKind) {
                    case Byte: {
                        return AMD64Kind.V512_BYTE;
                    }
                    case Char: {
                        return AMD64Kind.V512_WORD;
                    }
                    case Int: {
                        return AMD64Kind.V512_DWORD;
                    }
                    case Long: {
                        return AMD64Kind.V512_QWORD;
                    }
                    case Float: {
                        return AMD64Kind.V512_SINGLE;
                    }
                    case Double: {
                        return AMD64Kind.V512_DOUBLE;
                    }
                }
                throw GraalError.shouldNotReachHere("Unsupported base value kind.");
            }
        }
        throw GraalError.shouldNotReachHere("Unsupported vector size.");
    }

    protected AMD64Kind getVectorKind(Stride stride) {
        switch (this.vectorSize) {
            case XMM: {
                switch (stride) {
                    case S1: {
                        return AMD64Kind.V128_BYTE;
                    }
                    case S2: {
                        return AMD64Kind.V128_WORD;
                    }
                    case S4: {
                        return AMD64Kind.V128_DWORD;
                    }
                    case S8: {
                        return AMD64Kind.V128_QWORD;
                    }
                }
                throw GraalError.shouldNotReachHere("Unsupported base value kind.");
            }
            case YMM: {
                switch (stride) {
                    case S1: {
                        return AMD64Kind.V256_BYTE;
                    }
                    case S2: {
                        return AMD64Kind.V256_WORD;
                    }
                    case S4: {
                        return AMD64Kind.V256_DWORD;
                    }
                    case S8: {
                        return AMD64Kind.V256_QWORD;
                    }
                }
                throw GraalError.shouldNotReachHere("Unsupported base value kind.");
            }
            case ZMM: {
                switch (stride) {
                    case S1: {
                        return AMD64Kind.V512_BYTE;
                    }
                    case S2: {
                        return AMD64Kind.V512_WORD;
                    }
                    case S4: {
                        return AMD64Kind.V512_DWORD;
                    }
                    case S8: {
                        return AMD64Kind.V512_QWORD;
                    }
                }
                throw GraalError.shouldNotReachHere("Unsupported base value kind.");
            }
        }
        throw GraalError.shouldNotReachHere("Unsupported vector size.");
    }

    protected Value[] allocateTempRegisters(LIRGeneratorTool tool, AMD64Kind kind, int n) {
        Value[] temp = new Value[n];
        for (int i = 0; i < temp.length; ++i) {
            temp[i] = tool.newVariable(LIRKind.value((PlatformKind)kind));
        }
        return temp;
    }

    protected Value[] allocateVectorRegisters(LIRGeneratorTool tool, JavaKind valueKind, int n) {
        return this.allocateVectorRegisters(tool, LIRKind.value((PlatformKind)this.getVectorKind(valueKind)), n);
    }

    protected Value[] allocateVectorRegisters(LIRGeneratorTool tool, Stride stride, int n) {
        return this.allocateVectorRegisters(tool, LIRKind.value((PlatformKind)this.getVectorKind(stride)), n);
    }

    protected Value[] allocateVectorRegisters(LIRGeneratorTool tool, LIRKind kind, int n) {
        Value[] vectors = new Value[n];
        for (int i = 0; i < vectors.length; ++i) {
            vectors[i] = tool.newVariable(kind);
        }
        return vectors;
    }

    public static boolean supports(TargetDescription target, EnumSet<AMD64.CPUFeature> runtimeCheckedCPUFeatures, AMD64.CPUFeature requiredFeature, AMD64.CPUFeature ... additionalRequiredFeatures) {
        return AMD64ComplexVectorOp.supports(target, runtimeCheckedCPUFeatures, EnumSet.of(requiredFeature, additionalRequiredFeatures));
    }

    public static boolean supports(TargetDescription target, EnumSet<AMD64.CPUFeature> runtimeCheckedCPUFeatures, EnumSet<AMD64.CPUFeature> requiredFeatures) {
        return runtimeCheckedCPUFeatures != null && runtimeCheckedCPUFeatures.containsAll(requiredFeatures) || ((AMD64)target.arch).getFeatures().containsAll(requiredFeatures);
    }

    public static boolean supportsAVX512VLBW(TargetDescription target, EnumSet<AMD64.CPUFeature> runtimeCheckedCPUFeatures) {
        return AMD64ComplexVectorOp.supports(target, runtimeCheckedCPUFeatures, AMD64.CPUFeature.AVX512VL, new AMD64.CPUFeature[0]) && AMD64ComplexVectorOp.supports(target, runtimeCheckedCPUFeatures, AMD64.CPUFeature.AVX512BW, new AMD64.CPUFeature[0]);
    }

    protected boolean supports(AMD64.CPUFeature cpuFeature) {
        return AMD64ComplexVectorOp.supports(this.targetDescription, this.runtimeCheckedCPUFeatures, cpuFeature, new AMD64.CPUFeature[0]);
    }

    protected boolean supportsAVX2AndYMM() {
        return AVXKind.AVXSize.YMM.fitsWithin(this.vectorSize) && this.supports(AMD64.CPUFeature.AVX2);
    }

    protected boolean supportsAVX512VLBWAndZMM() {
        return AVXKind.AVXSize.ZMM.fitsWithin(this.vectorSize) && AMD64ComplexVectorOp.supportsAVX512VLBW(this.targetDescription, this.runtimeCheckedCPUFeatures);
    }

    protected boolean supportsBMI2() {
        return this.supports(AMD64.CPUFeature.BMI2);
    }

    protected boolean supportsTZCNT() {
        return this.supports(AMD64.CPUFeature.BMI1) && ((AMD64)this.targetDescription.arch).getFlags().contains(AMD64.Flag.UseCountTrailingZerosInstruction);
    }

    protected void bsfq(AMD64MacroAssembler masm, Register dst, Register src) {
        if (this.supportsTZCNT()) {
            AMD64Assembler.AMD64RMOp.TZCNT.emit((AMD64Assembler)masm, AMD64BaseAssembler.OperandSize.QWORD, dst, src);
        } else {
            masm.bsfq(dst, src);
        }
    }

    @Override
    public boolean needsClearUpperVectorRegisters() {
        return true;
    }

    static int elementsPerVector(AVXKind.AVXSize size, Stride stride) {
        return size.getBytes() >> stride.log2;
    }

    protected void loadTailIntoYMMOrdered(CompilationResultBuilder crb, AMD64MacroAssembler asm, Stride stride, DataSection.Data xmmTailShuffleMask, Register arr, Register lengthTail, Register vecArray, Register tmp, Register vecTmp1, Register vecTmp2) {
        if (this.supportsAVX2AndYMM()) {
            Label lessThan16 = new Label();
            Label done = new Label();
            asm.leaq(tmp, (AMD64Address)crb.recordDataSectionReference(xmmTailShuffleMask));
            asm.movdqu(AVXKind.AVXSize.XMM, vecArray, new AMD64Address(arr, lengthTail, stride, -AVXKind.AVXSize.XMM.getBytes()));
            asm.cmpqAndJcc(lengthTail, AMD64ComplexVectorOp.elementsPerVector(AVXKind.AVXSize.XMM, stride), AMD64Assembler.ConditionFlag.Less, lessThan16, true);
            asm.movdqu(AVXKind.AVXSize.XMM, vecTmp1, new AMD64Address(arr));
            asm.negq(lengthTail);
            asm.movdqu(AVXKind.AVXSize.XMM, vecTmp2, new AMD64Address(tmp, lengthTail, stride, AVXKind.AVXSize.XMM.getBytes() * 2));
            asm.pshufb(AVXKind.AVXSize.XMM, vecArray, vecTmp2);
            AMD64Assembler.VexRVMIOp.VPERM2I128.emit((AMD64Assembler)asm, AVXKind.AVXSize.YMM, vecArray, vecArray, vecTmp1, 2);
            asm.jmpb(done);
            asm.bind(lessThan16);
            asm.negq(lengthTail);
            asm.movdqu(AVXKind.AVXSize.XMM, vecTmp2, new AMD64Address(tmp, lengthTail, stride, AVXKind.AVXSize.XMM.getBytes()));
            asm.pshufb(AVXKind.AVXSize.XMM, vecArray, vecTmp2);
            asm.bind(done);
        } else {
            asm.leaq(tmp, (AMD64Address)crb.recordDataSectionReference(xmmTailShuffleMask));
            asm.movdqu(AVXKind.AVXSize.XMM, vecArray, new AMD64Address(arr, lengthTail, stride, -AVXKind.AVXSize.XMM.getBytes()));
            asm.negq(lengthTail);
            asm.movdqu(AVXKind.AVXSize.XMM, vecTmp2, new AMD64Address(tmp, lengthTail, stride, AVXKind.AVXSize.XMM.getBytes()));
            asm.pshufb(AVXKind.AVXSize.XMM, vecArray, vecTmp2);
        }
    }

    protected void loadLessThan32IntoYMMOrdered(CompilationResultBuilder crb, AMD64MacroAssembler asm, Stride stride, DataSection.Data xmmTailShuffleMask, Register arr, Register lengthTail, Register tmp, Register vecArray, Register vecTmp1, Register vecTmp2) {
        GraalError.guarantee(this.supportsAVX2AndYMM(), "AVX2 and YMM support required");
        asm.movdqu(AVXKind.AVXSize.XMM, vecTmp1, new AMD64Address(arr));
        asm.movdqu(AVXKind.AVXSize.XMM, vecArray, new AMD64Address(arr, lengthTail, stride, -AVXKind.AVXSize.XMM.getBytes()));
        asm.leaq(tmp, (AMD64Address)crb.recordDataSectionReference(xmmTailShuffleMask));
        asm.negq(lengthTail);
        asm.movdqu(AVXKind.AVXSize.XMM, vecTmp2, new AMD64Address(tmp, lengthTail, stride, AVXKind.AVXSize.XMM.getBytes() * 2));
        asm.pshufb(AVXKind.AVXSize.XMM, vecArray, vecTmp2);
        AMD64Assembler.VexRVMIOp.VPERM2I128.emit((AMD64Assembler)asm, AVXKind.AVXSize.YMM, vecArray, vecArray, vecTmp1, 2);
    }

    protected void loadLessThan32IntoYMMUnordered(CompilationResultBuilder crb, AMD64MacroAssembler asm, Stride stride, DataSection.Data maskTail, Register arr, Register lengthTail, Register tmp, Register vecArray, Register vecTmp1, Register vecTmp2) {
        GraalError.guarantee(this.supportsAVX2AndYMM(), "AVX2 and YMM support required");
        asm.movdqu(AVXKind.AVXSize.XMM, vecArray, new AMD64Address(arr));
        asm.movdqu(AVXKind.AVXSize.XMM, vecTmp1, new AMD64Address(arr, lengthTail, stride, -AVXKind.AVXSize.XMM.getBytes()));
        asm.leaq(tmp, (AMD64Address)crb.recordDataSectionReference(maskTail));
        asm.pandU(AVXKind.AVXSize.YMM, vecTmp1, new AMD64Address(tmp, lengthTail, stride), vecTmp2);
        AMD64Assembler.VexRVMIOp.VPERM2I128.emit((AMD64Assembler)asm, AVXKind.AVXSize.YMM, vecArray, vecArray, vecTmp1, 2);
    }

    protected static void loadLessThan16IntoXMMOrdered(CompilationResultBuilder crb, AMD64MacroAssembler asm, Stride stride, Register arr, Register lengthTail, Register tmp, Register vecArray, Register vecTmp1, Register vecTmp2) {
        asm.movdq(vecArray, new AMD64Address(arr));
        asm.movdq(vecTmp1, new AMD64Address(arr, lengthTail, stride, -8));
        asm.leaq(tmp, AMD64ComplexVectorOp.getMaskOnce(crb, AMD64ComplexVectorOp.createXMMTailShuffleMask(8), AVXKind.AVXSize.XMM.getBytes() * 2));
        asm.negq(lengthTail);
        asm.movdqu(AVXKind.AVXSize.XMM, vecTmp2, new AMD64Address(tmp, lengthTail, stride, AVXKind.AVXSize.XMM.getBytes()));
        asm.pshufb(AVXKind.AVXSize.XMM, vecTmp1, vecTmp2);
        asm.movlhps(vecArray, vecTmp1);
    }

    protected void loadLessThan16IntoXMMUnordered(CompilationResultBuilder crb, AMD64MacroAssembler asm, Stride stride, DataSection.Data maskTail, Register arr, Register lengthTail, Register tmp, Register vecArray, Register vecTmp1, Register vecTmp2) {
        asm.movdq(vecArray, new AMD64Address(arr));
        asm.movdq(vecTmp1, new AMD64Address(arr, lengthTail, stride, -8));
        asm.leaq(tmp, (AMD64Address)crb.recordDataSectionReference(maskTail));
        asm.pandU(this.vectorSize, vecTmp1, new AMD64Address(tmp, lengthTail, stride, this.supportsAVX2AndYMM() ? AVXKind.AVXSize.XMM.getBytes() : 0), vecTmp2);
        asm.movlhps(vecArray, vecTmp1);
    }

    protected static void loadLessThan8IntoXMMOrdered(AMD64MacroAssembler asm, Stride stride, Register arr, Register lengthTail, Register vecArray, Register tmp, Register tmp2) {
        GraalError.guarantee(stride.log2 < 2, "stride of more than 2 bytes not supported");
        asm.movl(tmp, new AMD64Address(arr));
        asm.movl(tmp2, new AMD64Address(arr, lengthTail, stride, -4));
        asm.andq(lengthTail, 3 >> stride.log2);
        asm.shlq(lengthTail, 3 + stride.log2);
        GraalError.guarantee(lengthTail.equals((Object)AMD64.rcx), "lengthTail must be RCX, as it is used as an implicit argument to shlq");
        asm.shlq(tmp2);
        asm.shrq(tmp2, 32);
        asm.shlq(tmp2, 32);
        asm.orq(tmp, tmp2);
        asm.movdq(vecArray, tmp);
    }

    protected void loadLessThan8IntoXMMUnordered(CompilationResultBuilder crb, AMD64MacroAssembler asm, Stride stride, DataSection.Data maskTail, Register arr, Register lengthTail, Register vecArray, Register tmp, Register tmp2) {
        asm.leaq(tmp, (AMD64Address)crb.recordDataSectionReference(maskTail));
        asm.movl(tmp2, new AMD64Address(arr, lengthTail, stride, -4));
        asm.andq(tmp2, new AMD64Address(tmp, lengthTail, stride, (this.supportsAVX2AndYMM() ? AVXKind.AVXSize.XMM.getBytes() : 0) + 8));
        asm.movl(tmp, new AMD64Address(arr));
        asm.shlq(tmp2, 32);
        asm.orq(tmp, tmp2);
        asm.movdq(vecArray, tmp);
    }

    protected void loadMask(CompilationResultBuilder crb, AMD64MacroAssembler asm, Register vecMask, byte[] mask) {
        asm.movdqu(this.vectorSize, vecMask, AMD64ComplexVectorOp.getMaskOnce(crb, mask));
    }

    protected void loadMask(CompilationResultBuilder crb, AMD64MacroAssembler asm, Stride stride, Register vecMask, int value) {
        asm.movdqu(this.vectorSize, vecMask, AMD64ComplexVectorOp.getMaskOnce(crb, this.createMaskBytes(value, stride)));
    }

    protected static AMD64Address getMaskOnce(CompilationResultBuilder crb, byte[] mask) {
        return AMD64ComplexVectorOp.getMaskOnce(crb, mask, mask.length);
    }

    protected static AMD64Address getMaskOnce(CompilationResultBuilder crb, byte[] mask, int alignLength) {
        int align = crb.dataBuilder.ensureValidDataAlignment(alignLength);
        return (AMD64Address)crb.recordDataReferenceInCode(mask, align);
    }

    protected DataSection.Data createMask(CompilationResultBuilder crb, Stride stride, int value) {
        return AMD64ComplexVectorOp.writeToDataSection(crb, this.createMaskBytes(value, stride));
    }

    protected DataSection.Data createTailMask(CompilationResultBuilder crb, Stride stride) {
        byte[] mask = new byte[this.vectorSize.getBytes() * 2];
        for (int i = AMD64ComplexVectorOp.elementsPerVector(this.vectorSize, stride); i < AMD64ComplexVectorOp.elementsPerVector(this.vectorSize, stride) * 2; ++i) {
            AMD64ComplexVectorOp.writeValue(mask, stride, i, -1);
        }
        return AMD64ComplexVectorOp.writeToDataSection(crb, mask);
    }

    protected static byte[] createXMMTailShuffleMask(int length) {
        byte[] mask = new byte[AVXKind.AVXSize.XMM.getBytes() + length];
        for (int i = 0; i < length; ++i) {
            mask[i] = (byte)i;
        }
        Arrays.fill(mask, length, AVXKind.AVXSize.XMM.getBytes() + length, (byte)-1);
        return mask;
    }

    protected byte[] createMaskBytes(int value, Stride stride) {
        byte[] mask = new byte[this.vectorSize.getBytes()];
        for (int i = 0; i < AMD64ComplexVectorOp.elementsPerVector(this.vectorSize, stride); ++i) {
            AMD64ComplexVectorOp.writeValue(mask, stride, i, value);
        }
        return mask;
    }

    protected static byte[] getAVX2IntToBytePackingUnscrambleMap() {
        return new byte[]{0, 0, 0, 0, 4, 0, 0, 0, 1, 0, 0, 0, 5, 0, 0, 0, 2, 0, 0, 0, 6, 0, 0, 0, 3, 0, 0, 0, 7, 0, 0, 0};
    }

    protected static DataSection.Data writeToDataSection(CompilationResultBuilder crb, byte[] array) {
        int align = crb.dataBuilder.ensureValidDataAlignment(array.length);
        ArrayDataPointerConstant arrayConstant = new ArrayDataPointerConstant(array, align);
        return crb.dataBuilder.createSerializableData(arrayConstant, align);
    }

    private static void writeValue(byte[] array, Stride stride, int index, int value) {
        int i = index << stride.log2;
        if (stride == Stride.S1) {
            array[i] = (byte)value;
            return;
        }
        if (ByteOrder.nativeOrder().equals(ByteOrder.LITTLE_ENDIAN)) {
            if (stride == Stride.S2) {
                array[i] = (byte)value;
                array[i + 1] = (byte)(value >> 8);
            } else {
                array[i] = (byte)value;
                array[i + 1] = (byte)(value >> 8);
                array[i + 2] = (byte)(value >> 16);
                array[i + 3] = (byte)(value >> 24);
            }
        } else if (stride == Stride.S2) {
            array[i] = (byte)(value >> 8);
            array[i + 1] = (byte)value;
        } else {
            array[i] = (byte)(value >> 24);
            array[i + 1] = (byte)(value >> 16);
            array[i + 2] = (byte)(value >> 8);
            array[i + 3] = (byte)value;
        }
    }
}

