/*
 * Decompiled with CFR 0.152.
 */
package org.graalvm.compiler.lir.amd64;

import java.util.Arrays;
import java.util.Objects;
import jdk.vm.ci.amd64.AMD64;
import jdk.vm.ci.code.Register;
import jdk.vm.ci.code.RegisterValue;
import jdk.vm.ci.code.TargetDescription;
import jdk.vm.ci.code.ValueUtil;
import jdk.vm.ci.meta.AllocatableValue;
import jdk.vm.ci.meta.JavaKind;
import jdk.vm.ci.meta.PlatformKind;
import jdk.vm.ci.meta.Value;
import org.graalvm.compiler.asm.Label;
import org.graalvm.compiler.asm.amd64.AMD64Address;
import org.graalvm.compiler.asm.amd64.AMD64Assembler;
import org.graalvm.compiler.asm.amd64.AMD64MacroAssembler;
import org.graalvm.compiler.asm.amd64.AVXKind;
import org.graalvm.compiler.core.common.LIRKind;
import org.graalvm.compiler.debug.GraalError;
import org.graalvm.compiler.lir.LIRInstruction;
import org.graalvm.compiler.lir.LIRInstructionClass;
import org.graalvm.compiler.lir.Opcode;
import org.graalvm.compiler.lir.amd64.AMD64ArrayEqualsOp;
import org.graalvm.compiler.lir.amd64.AMD64ComplexVectorOp;
import org.graalvm.compiler.lir.amd64.AMD64ControlFlow;
import org.graalvm.compiler.lir.amd64.AMD64StrideUtil;
import org.graalvm.compiler.lir.asm.CompilationResultBuilder;
import org.graalvm.compiler.lir.gen.LIRGeneratorTool;

@Opcode(value="ARRAY_REGION_COMPARE")
public final class AMD64ArrayRegionCompareToOp
extends AMD64ComplexVectorOp {
    public static final LIRInstructionClass<AMD64ArrayRegionCompareToOp> TYPE = LIRInstructionClass.create(AMD64ArrayRegionCompareToOp.class);
    private static final Register REG_ARRAY_A = AMD64.rsi;
    private static final Register REG_OFFSET_A = AMD64.rax;
    private static final Register REG_ARRAY_B = AMD64.rdi;
    private static final Register REG_OFFSET_B = AMD64.rcx;
    private static final Register REG_LENGTH = AMD64.rdx;
    private static final Register REG_STRIDE = AMD64.r8;
    private static final int ONES_16 = 65535;
    private static final int ONES_32 = -1;
    private final AMD64Address.Scale argScaleA;
    private final AMD64Address.Scale argScaleB;
    private final AMD64MacroAssembler.ExtendMode extendMode;
    @LIRInstruction.Def(value={LIRInstruction.OperandFlag.REG})
    private Value resultValue;
    @LIRInstruction.Use(value={LIRInstruction.OperandFlag.REG})
    private Value arrayAValue;
    @LIRInstruction.Use(value={LIRInstruction.OperandFlag.REG})
    private Value offsetAValue;
    @LIRInstruction.Use(value={LIRInstruction.OperandFlag.REG})
    private Value arrayBValue;
    @LIRInstruction.Use(value={LIRInstruction.OperandFlag.REG, LIRInstruction.OperandFlag.ILLEGAL})
    private Value offsetBValue;
    @LIRInstruction.Use(value={LIRInstruction.OperandFlag.REG})
    private Value lengthValue;
    @LIRInstruction.Use(value={LIRInstruction.OperandFlag.REG, LIRInstruction.OperandFlag.ILLEGAL})
    private Value dynamicStridesValue;
    @LIRInstruction.Temp(value={LIRInstruction.OperandFlag.REG})
    private Value arrayAValueTemp;
    @LIRInstruction.Temp(value={LIRInstruction.OperandFlag.REG})
    private Value offsetAValueTemp;
    @LIRInstruction.Temp(value={LIRInstruction.OperandFlag.REG})
    private Value arrayBValueTemp;
    @LIRInstruction.Temp(value={LIRInstruction.OperandFlag.REG, LIRInstruction.OperandFlag.ILLEGAL})
    private Value offsetBValueTemp;
    @LIRInstruction.Temp(value={LIRInstruction.OperandFlag.REG})
    private Value lengthValueTemp;
    @LIRInstruction.Temp(value={LIRInstruction.OperandFlag.REG, LIRInstruction.OperandFlag.ILLEGAL})
    private Value dynamicStridesValueTemp;
    @LIRInstruction.Temp(value={LIRInstruction.OperandFlag.REG, LIRInstruction.OperandFlag.ILLEGAL})
    private Value vectorTemp1;
    @LIRInstruction.Temp(value={LIRInstruction.OperandFlag.REG, LIRInstruction.OperandFlag.ILLEGAL})
    private Value vectorTemp2;

    private AMD64ArrayRegionCompareToOp(LIRGeneratorTool tool, JavaKind strideA, JavaKind strideB, Value result, Value arrayA, Value offsetA, Value arrayB, Value offsetB, Value length, Value dynamicStrides, AMD64MacroAssembler.ExtendMode extendMode) {
        super(TYPE, tool, AVXKind.AVXSize.YMM);
        this.extendMode = extendMode;
        if (strideA == null) {
            this.argScaleA = null;
            this.argScaleB = null;
        } else {
            GraalError.guarantee(strideA == JavaKind.Byte || strideA == JavaKind.Char || strideA == JavaKind.Int, "unsupported strideA");
            GraalError.guarantee(strideB == JavaKind.Byte || strideB == JavaKind.Char || strideB == JavaKind.Int, "unsupported strideB");
            this.argScaleA = Objects.requireNonNull(AMD64Address.Scale.fromInt(tool.getProviders().getMetaAccess().getArrayIndexScale(strideA)));
            this.argScaleB = Objects.requireNonNull(AMD64Address.Scale.fromInt(tool.getProviders().getMetaAccess().getArrayIndexScale(strideB)));
        }
        this.resultValue = result;
        this.arrayAValue = this.arrayAValueTemp = arrayA;
        this.offsetAValue = this.offsetAValueTemp = offsetA;
        this.arrayBValue = this.arrayBValueTemp = arrayB;
        this.offsetBValue = this.offsetBValueTemp = offsetB;
        this.lengthValue = this.lengthValueTemp = length;
        this.dynamicStridesValue = this.dynamicStridesValueTemp = dynamicStrides;
        if (AMD64ArrayRegionCompareToOp.isVectorCompareSupported(tool.target(), this.argScaleA, this.argScaleB)) {
            LIRKind lirKind = LIRKind.value((PlatformKind)this.getVectorKind(JavaKind.Byte));
            this.vectorTemp1 = tool.newVariable(lirKind);
            this.vectorTemp2 = tool.newVariable(lirKind);
        } else {
            this.vectorTemp1 = Value.ILLEGAL;
            this.vectorTemp2 = Value.ILLEGAL;
        }
    }

    public static AMD64ArrayRegionCompareToOp movParamsAndCreate(LIRGeneratorTool tool, JavaKind strideA, JavaKind strideB, Value result, Value arrayA, Value offsetA, Value arrayB, Value offsetB, Value length, Value dynamicStrides, AMD64MacroAssembler.ExtendMode extendMode) {
        RegisterValue regArrayA = REG_ARRAY_A.asValue(arrayA.getValueKind());
        RegisterValue regOffsetA = REG_OFFSET_A.asValue(offsetA.getValueKind());
        RegisterValue regArrayB = REG_ARRAY_B.asValue(arrayB.getValueKind());
        RegisterValue regOffsetB = REG_OFFSET_B.asValue(offsetB.getValueKind());
        RegisterValue regLength = REG_LENGTH.asValue(length.getValueKind());
        AllocatableValue regStride = dynamicStrides == null ? Value.ILLEGAL : REG_STRIDE.asValue(length.getValueKind());
        tool.emitConvertNullToZero((AllocatableValue)regArrayA, arrayA);
        tool.emitMove((AllocatableValue)regOffsetA, offsetA);
        tool.emitConvertNullToZero((AllocatableValue)regArrayB, arrayB);
        tool.emitMove((AllocatableValue)regOffsetB, offsetB);
        tool.emitMove((AllocatableValue)regLength, length);
        if (dynamicStrides != null) {
            tool.emitMove(regStride, dynamicStrides);
        }
        return new AMD64ArrayRegionCompareToOp(tool, strideA, strideB, result, (Value)regArrayA, (Value)regOffsetA, (Value)regArrayB, (Value)regOffsetB, (Value)regLength, (Value)regStride, extendMode);
    }

    @Override
    public void emitCode(CompilationResultBuilder crb, AMD64MacroAssembler masm) {
        Register result = ValueUtil.asRegister((Value)this.resultValue);
        Register arrayA = ValueUtil.asRegister((Value)this.arrayAValue);
        Register arrayB = ValueUtil.asRegister((Value)this.arrayBValue);
        masm.leaq(arrayA, new AMD64Address(arrayA, ValueUtil.asRegister((Value)this.offsetAValue), AMD64Address.Scale.Times1));
        masm.leaq(arrayB, new AMD64Address(arrayB, ValueUtil.asRegister((Value)this.offsetBValue), AMD64Address.Scale.Times1));
        Register length = ValueUtil.asRegister((Value)this.lengthValue);
        masm.movl(result, length);
        Register tmp1 = ValueUtil.asRegister((Value)this.offsetAValue);
        Register tmp2 = ValueUtil.asRegister((Value)this.offsetBValue);
        if (ValueUtil.isIllegal((Value)this.dynamicStridesValue)) {
            this.emitArrayCompare(crb, masm, this.argScaleA, this.argScaleB, result, arrayA, arrayB, length, tmp1);
        } else {
            masm.xorq(tmp2, tmp2);
            Label[] variants = new Label[9];
            Label done = new Label();
            for (int i = 0; i < variants.length; ++i) {
                variants[i] = new Label();
            }
            AMD64ControlFlow.RangeTableSwitchOp.emitJumpTable(crb, masm, tmp1, ValueUtil.asRegister((Value)this.dynamicStridesValue), 0, 8, Arrays.stream(variants));
            for (AMD64Address.Scale scaleA : new AMD64Address.Scale[]{AMD64Address.Scale.Times1, AMD64Address.Scale.Times2, AMD64Address.Scale.Times4}) {
                for (AMD64Address.Scale scaleB : new AMD64Address.Scale[]{AMD64Address.Scale.Times1, AMD64Address.Scale.Times2, AMD64Address.Scale.Times4}) {
                    if (scaleA.log2 < scaleB.log2) continue;
                    if (scaleA.log2 > scaleB.log2) {
                        masm.align(crb.target.wordSize * 2);
                        masm.bind(variants[AMD64StrideUtil.getDirectStubCallIndex(scaleB, scaleA)]);
                        masm.movq(tmp1, arrayA);
                        masm.movq(arrayA, arrayB);
                        masm.movq(arrayB, tmp1);
                        masm.incl(tmp2);
                    }
                    masm.align(crb.target.wordSize * 2);
                    masm.bind(variants[AMD64StrideUtil.getDirectStubCallIndex(scaleA, scaleB)]);
                    this.emitArrayCompare(crb, masm, scaleA, scaleB, result, arrayA, arrayB, length, tmp1);
                    if (scaleA.log2 > scaleB.log2) {
                        masm.testlAndJcc(tmp2, tmp2, AMD64Assembler.ConditionFlag.Zero, done, false);
                        masm.negl(result);
                    }
                    masm.jmp(done);
                }
            }
            masm.bind(done);
        }
    }

    private static boolean isVectorCompareSupported(TargetDescription target, AMD64Address.Scale scaleA, AMD64Address.Scale scaleB) {
        return scaleA == scaleB || ((AMD64)target.arch).getFeatures().contains(AMD64.CPUFeature.SSE4_1);
    }

    private void emitArrayCompare(CompilationResultBuilder crb, AMD64MacroAssembler masm, AMD64Address.Scale scaleA, AMD64Address.Scale scaleB, Register result, Register arrayA, Register arrayB, Register length, Register tmp) {
        Label returnLabel = new Label();
        if (AMD64ArrayRegionCompareToOp.isVectorCompareSupported(crb.target, scaleA, scaleB)) {
            this.emitVectorLoop(crb, masm, scaleA, scaleB, result, arrayA, arrayB, length, tmp, returnLabel);
        }
        this.emitScalarLoop(crb, masm, scaleA, scaleB, result, arrayA, arrayB, length, tmp, returnLabel);
        masm.bind(returnLabel);
    }

    private void emitVectorLoop(CompilationResultBuilder crb, AMD64MacroAssembler masm, AMD64Address.Scale scaleA, AMD64Address.Scale scaleB, Register result, Register arrayA, Register arrayB, Register length, Register tmp, Label returnLabel) {
        AMD64Address.Scale maxScale = AMD64ArrayEqualsOp.max(scaleA, scaleB);
        Register vector1 = ValueUtil.asRegister((Value)this.vectorTemp1);
        Register vector2 = ValueUtil.asRegister((Value)this.vectorTemp2);
        int elementsPerVector = AMD64ArrayRegionCompareToOp.getElementsPerVector(this.vectorSize, maxScale);
        Label loop = new Label();
        Label scalarTail = new Label();
        Label xmmTail = new Label();
        Label diffFound = new Label();
        masm.testlAndJcc(result, result, AMD64Assembler.ConditionFlag.Zero, returnLabel, false);
        AMD64MacroAssembler.movSZx(masm, scaleA, this.extendMode, result, new AMD64Address(arrayA));
        AMD64MacroAssembler.movSZx(masm, scaleB, this.extendMode, tmp, new AMD64Address(arrayB));
        masm.subqAndJcc(result, tmp, AMD64Assembler.ConditionFlag.NotZero, returnLabel, false);
        masm.movl(result, length);
        masm.andl(result, elementsPerVector - 1);
        masm.andlAndJcc(length, -elementsPerVector, AMD64Assembler.ConditionFlag.Zero, this.vectorSize == AVXKind.AVXSize.YMM ? xmmTail : scalarTail, false);
        masm.leaq(arrayA, new AMD64Address(arrayA, length, scaleA));
        masm.leaq(arrayB, new AMD64Address(arrayB, length, scaleB));
        masm.negq(length);
        masm.align(crb.target.wordSize * 2);
        masm.bind(loop);
        AMD64MacroAssembler.pmovSZx(masm, this.vectorSize, vector1, this.extendMode, maxScale, arrayA, scaleA, length, 0);
        AMD64MacroAssembler.pmovSZx(masm, this.vectorSize, vector2, this.extendMode, maxScale, arrayB, scaleB, length, 0);
        AMD64MacroAssembler.pcmpeq(masm, this.vectorSize, maxScale, vector1, vector2);
        AMD64MacroAssembler.pmovmsk(masm, this.vectorSize, tmp, vector1);
        masm.xorlAndJcc(tmp, this.vectorSize == AVXKind.AVXSize.XMM ? 65535 : -1, AMD64Assembler.ConditionFlag.NotZero, diffFound, true);
        masm.addqAndJcc(length, elementsPerVector, AMD64Assembler.ConditionFlag.NotZero, loop, true);
        masm.testlAndJcc(result, result, AMD64Assembler.ConditionFlag.Zero, returnLabel, this.vectorSize == AVXKind.AVXSize.XMM);
        AMD64MacroAssembler.pmovSZx(masm, this.vectorSize, vector1, this.extendMode, maxScale, arrayA, scaleA, result, -this.vectorSize.getBytes());
        AMD64MacroAssembler.pmovSZx(masm, this.vectorSize, vector2, this.extendMode, maxScale, arrayB, scaleB, result, -this.vectorSize.getBytes());
        masm.addq(length, result);
        AMD64MacroAssembler.pcmpeq(masm, this.vectorSize, maxScale, vector1, vector2);
        masm.subq(length, elementsPerVector);
        AMD64MacroAssembler.pmovmsk(masm, this.vectorSize, tmp, vector1);
        masm.xorlAndJcc(tmp, this.vectorSize == AVXKind.AVXSize.XMM ? 65535 : -1, AMD64Assembler.ConditionFlag.NotZero, diffFound, true);
        masm.xorq(result, result);
        if (this.vectorSize == AVXKind.AVXSize.XMM) {
            masm.jmpb(returnLabel);
        } else {
            masm.jmp(returnLabel);
        }
        masm.bind(diffFound);
        masm.bsfq(tmp, tmp);
        if (maxScale.value > 1) {
            masm.shrq(tmp, maxScale.log2);
        }
        masm.addq(length, tmp);
        AMD64MacroAssembler.movSZx(masm, scaleA, this.extendMode, result, new AMD64Address(arrayA, length, scaleA));
        AMD64MacroAssembler.movSZx(masm, scaleB, this.extendMode, tmp, new AMD64Address(arrayB, length, scaleB));
        masm.subq(result, tmp);
        masm.jmpb(returnLabel);
        if (this.supportsAVX2AndYMM()) {
            masm.bind(xmmTail);
            masm.cmplAndJcc(result, AMD64ArrayRegionCompareToOp.getElementsPerVector(AVXKind.AVXSize.XMM, maxScale), AMD64Assembler.ConditionFlag.Less, scalarTail, true);
            AMD64MacroAssembler.pmovSZx(masm, AVXKind.AVXSize.XMM, this.extendMode, vector1, maxScale, arrayA, scaleA, 0);
            AMD64MacroAssembler.pmovSZx(masm, AVXKind.AVXSize.XMM, this.extendMode, vector2, maxScale, arrayB, scaleB, 0);
            AMD64MacroAssembler.pcmpeq(masm, AVXKind.AVXSize.XMM, maxScale, vector1, vector2);
            AMD64MacroAssembler.pmovmsk(masm, AVXKind.AVXSize.XMM, tmp, vector1);
            masm.xorlAndJcc(tmp, 65535, AMD64Assembler.ConditionFlag.NotZero, diffFound, true);
            AMD64MacroAssembler.pmovSZx(masm, AVXKind.AVXSize.XMM, vector1, this.extendMode, maxScale, arrayA, scaleA, result, -AVXKind.AVXSize.XMM.getBytes());
            AMD64MacroAssembler.pmovSZx(masm, AVXKind.AVXSize.XMM, vector2, this.extendMode, maxScale, arrayB, scaleB, result, -AVXKind.AVXSize.XMM.getBytes());
            masm.movq(length, result);
            AMD64MacroAssembler.pcmpeq(masm, AVXKind.AVXSize.XMM, maxScale, vector1, vector2);
            masm.subq(length, AMD64ArrayRegionCompareToOp.getElementsPerVector(AVXKind.AVXSize.XMM, maxScale));
            AMD64MacroAssembler.pmovmsk(masm, AVXKind.AVXSize.XMM, tmp, vector1);
            masm.xorlAndJcc(tmp, 65535, AMD64Assembler.ConditionFlag.NotZero, diffFound, true);
            masm.xorq(result, result);
            masm.jmpb(returnLabel);
        }
        masm.bind(scalarTail);
        masm.movl(length, result);
    }

    private static int getElementsPerVector(AVXKind.AVXSize vSize, AMD64Address.Scale maxScale) {
        return vSize.getBytes() >> maxScale.log2;
    }

    private void emitScalarLoop(CompilationResultBuilder crb, AMD64MacroAssembler masm, AMD64Address.Scale scaleA, AMD64Address.Scale scaleB, Register result, Register arrayA, Register arrayB, Register length, Register tmp, Label returnLabel) {
        Label loop = new Label();
        masm.leaq(arrayA, new AMD64Address(arrayA, length, scaleA));
        masm.leaq(arrayB, new AMD64Address(arrayB, length, scaleB));
        masm.negq(length);
        masm.align(crb.target.wordSize * 2);
        masm.bind(loop);
        AMD64MacroAssembler.movSZx(masm, scaleA, this.extendMode, result, new AMD64Address(arrayA, length, scaleA));
        AMD64MacroAssembler.movSZx(masm, scaleB, this.extendMode, tmp, new AMD64Address(arrayB, length, scaleB));
        masm.subqAndJcc(result, tmp, AMD64Assembler.ConditionFlag.NotZero, returnLabel, true);
        masm.incqAndJcc(length, AMD64Assembler.ConditionFlag.NotZero, loop, true);
    }
}

