/*
 * Decompiled with CFR 0.152.
 */
package org.graalvm.compiler.lir.amd64;

import java.util.Arrays;
import java.util.Objects;
import jdk.vm.ci.amd64.AMD64;
import jdk.vm.ci.amd64.AMD64Kind;
import jdk.vm.ci.code.Register;
import jdk.vm.ci.code.RegisterValue;
import jdk.vm.ci.code.TargetDescription;
import jdk.vm.ci.code.ValueUtil;
import jdk.vm.ci.meta.AllocatableValue;
import jdk.vm.ci.meta.JavaKind;
import jdk.vm.ci.meta.PlatformKind;
import jdk.vm.ci.meta.Value;
import jdk.vm.ci.meta.ValueKind;
import org.graalvm.compiler.asm.Label;
import org.graalvm.compiler.asm.amd64.AMD64Address;
import org.graalvm.compiler.asm.amd64.AMD64Assembler;
import org.graalvm.compiler.asm.amd64.AMD64BaseAssembler;
import org.graalvm.compiler.asm.amd64.AMD64MacroAssembler;
import org.graalvm.compiler.asm.amd64.AVXKind;
import org.graalvm.compiler.core.common.LIRKind;
import org.graalvm.compiler.core.common.StrideUtil;
import org.graalvm.compiler.debug.GraalError;
import org.graalvm.compiler.lir.LIRInstruction;
import org.graalvm.compiler.lir.LIRInstructionClass;
import org.graalvm.compiler.lir.LIRValueUtil;
import org.graalvm.compiler.lir.Opcode;
import org.graalvm.compiler.lir.amd64.AMD64ComplexVectorOp;
import org.graalvm.compiler.lir.amd64.AMD64ControlFlow;
import org.graalvm.compiler.lir.amd64.AMD64StrideUtil;
import org.graalvm.compiler.lir.asm.CompilationResultBuilder;
import org.graalvm.compiler.lir.gen.LIRGeneratorTool;

@Opcode(value="ARRAY_EQUALS")
public final class AMD64ArrayEqualsOp
extends AMD64ComplexVectorOp {
    public static final LIRInstructionClass<AMD64ArrayEqualsOp> TYPE = LIRInstructionClass.create(AMD64ArrayEqualsOp.class);
    private static final Register REG_ARRAY_A = AMD64.rsi;
    private static final Register REG_OFFSET_A = AMD64.rax;
    private static final Register REG_ARRAY_B = AMD64.rdi;
    private static final Register REG_OFFSET_B = AMD64.rcx;
    private static final Register REG_MASK = AMD64.r8;
    private static final Register REG_LENGTH = AMD64.rdx;
    private static final Register REG_STRIDE = AMD64.r9;
    private final JavaKind elementKind;
    private final int baseOffsetA;
    private final int baseOffsetB;
    private final int baseOffsetMask;
    private final int constOffsetA;
    private final int constOffsetB;
    private final int constLength;
    private final AMD64Address.Scale argScaleA;
    private final AMD64Address.Scale argScaleB;
    private final AMD64Address.Scale argScaleMask;
    private final AMD64MacroAssembler.ExtendMode extendMode;
    private final boolean canGenerateConstantLengthCompare;
    @LIRInstruction.Def(value={LIRInstruction.OperandFlag.REG})
    private Value resultValue;
    @LIRInstruction.Use(value={LIRInstruction.OperandFlag.REG})
    private Value arrayAValue;
    @LIRInstruction.Use(value={LIRInstruction.OperandFlag.REG, LIRInstruction.OperandFlag.ILLEGAL})
    private Value offsetAValue;
    @LIRInstruction.Use(value={LIRInstruction.OperandFlag.REG})
    private Value arrayBValue;
    @LIRInstruction.Use(value={LIRInstruction.OperandFlag.REG, LIRInstruction.OperandFlag.ILLEGAL})
    private Value offsetBValue;
    @LIRInstruction.Use(value={LIRInstruction.OperandFlag.REG, LIRInstruction.OperandFlag.ILLEGAL})
    private Value arrayMaskValue;
    @LIRInstruction.Use(value={LIRInstruction.OperandFlag.REG})
    private Value lengthValue;
    @LIRInstruction.Use(value={LIRInstruction.OperandFlag.REG, LIRInstruction.OperandFlag.ILLEGAL})
    private Value dynamicStridesValue;
    @LIRInstruction.Temp(value={LIRInstruction.OperandFlag.REG})
    private Value arrayAValueTemp;
    @LIRInstruction.Temp(value={LIRInstruction.OperandFlag.REG, LIRInstruction.OperandFlag.ILLEGAL})
    private Value offsetAValueTemp;
    @LIRInstruction.Temp(value={LIRInstruction.OperandFlag.REG})
    private Value arrayBValueTemp;
    @LIRInstruction.Temp(value={LIRInstruction.OperandFlag.REG, LIRInstruction.OperandFlag.ILLEGAL})
    private Value offsetBValueTemp;
    @LIRInstruction.Temp(value={LIRInstruction.OperandFlag.REG, LIRInstruction.OperandFlag.ILLEGAL})
    private Value arrayMaskValueTemp;
    @LIRInstruction.Temp(value={LIRInstruction.OperandFlag.REG})
    private Value lengthValueTemp;
    @LIRInstruction.Temp(value={LIRInstruction.OperandFlag.REG, LIRInstruction.OperandFlag.ILLEGAL})
    private Value dynamicStrideValueTemp;
    @LIRInstruction.Temp(value={LIRInstruction.OperandFlag.REG, LIRInstruction.OperandFlag.ILLEGAL})
    private Value tempXMM;
    @LIRInstruction.Temp(value={LIRInstruction.OperandFlag.REG})
    private Value[] vectorTemp;
    private static final int VECTOR_SIZE = 8;

    private AMD64ArrayEqualsOp(LIRGeneratorTool tool, JavaKind kindA, JavaKind kindB, JavaKind kindMask, int baseOffsetA, int baseOffsetB, int baseOffsetMask, Value result, Value arrayA, Value offsetA, Value arrayB, Value offsetB, Value mask, Value length, Value dynamicStrides, AMD64MacroAssembler.ExtendMode extendMode, int constOffsetA, int constOffsetB, int constLength) {
        super(TYPE, tool, AVXKind.AVXSize.YMM);
        this.extendMode = extendMode;
        this.constOffsetA = constOffsetA;
        this.constOffsetB = constOffsetB;
        this.constLength = constLength;
        this.baseOffsetA = baseOffsetA;
        this.baseOffsetB = baseOffsetB;
        this.baseOffsetMask = baseOffsetMask;
        if (StrideUtil.useConstantStrides(dynamicStrides)) {
            assert (kindA.isNumericInteger() && kindB.isNumericInteger() || kindA == kindB);
            this.elementKind = kindA;
            this.argScaleA = Objects.requireNonNull(AMD64Address.Scale.fromInt(tool.getProviders().getMetaAccess().getArrayIndexScale(kindA)));
            this.argScaleB = Objects.requireNonNull(AMD64Address.Scale.fromInt(tool.getProviders().getMetaAccess().getArrayIndexScale(kindB)));
            this.argScaleMask = Objects.requireNonNull(AMD64Address.Scale.fromInt(tool.getProviders().getMetaAccess().getArrayIndexScale(kindMask)));
        } else {
            this.elementKind = JavaKind.Byte;
            this.argScaleA = null;
            this.argScaleB = null;
            this.argScaleMask = null;
        }
        this.canGenerateConstantLengthCompare = AMD64ArrayEqualsOp.canGenerateConstantLengthCompare(tool.target(), kindA, kindB, constLength, dynamicStrides, this.vectorSize);
        this.resultValue = result;
        this.arrayAValue = this.arrayAValueTemp = arrayA;
        this.offsetAValue = constOffsetA == 0 ? Value.ILLEGAL : offsetA;
        this.offsetAValueTemp = offsetA;
        this.arrayBValue = this.arrayBValueTemp = arrayB;
        this.offsetBValue = constOffsetB == 0 ? Value.ILLEGAL : offsetB;
        this.offsetBValueTemp = offsetB;
        this.arrayMaskValue = this.arrayMaskValueTemp = mask;
        this.lengthValue = this.lengthValueTemp = length;
        this.dynamicStridesValue = this.dynamicStrideValueTemp = dynamicStrides;
        this.tempXMM = kindA == JavaKind.Float ? tool.newVariable(LIRKind.value((PlatformKind)AMD64Kind.SINGLE)) : (kindA == JavaKind.Double ? tool.newVariable(LIRKind.value((PlatformKind)AMD64Kind.DOUBLE)) : Value.ILLEGAL);
        if (AMD64ArrayEqualsOp.supports(tool.target(), AMD64.CPUFeature.SSE4_1)) {
            LIRKind lirKind = LIRKind.value((PlatformKind)this.getVectorKind(JavaKind.Byte));
            this.vectorTemp = new Value[(this.withMask() ? 3 : 2) + (this.canGenerateConstantLengthCompare ? 1 : 0)];
            for (int i = 0; i < this.vectorTemp.length; ++i) {
                this.vectorTemp[i] = tool.newVariable(lirKind);
            }
        } else {
            this.vectorTemp = new Value[0];
        }
    }

    public static AMD64ArrayEqualsOp movParamsAndCreate(LIRGeneratorTool tool, int baseOffsetA, int baseOffsetB, int baseOffsetMask, Value result, Value arrayA, Value offsetA, Value arrayB, Value offsetB, Value mask, Value length, Value dynamicStrides, AMD64MacroAssembler.ExtendMode extendMode) {
        return AMD64ArrayEqualsOp.movParamsAndCreate(tool, null, null, null, baseOffsetA, baseOffsetB, baseOffsetMask, result, arrayA, offsetA, arrayB, offsetB, mask, length, dynamicStrides, extendMode);
    }

    public static AMD64ArrayEqualsOp movParamsAndCreate(LIRGeneratorTool tool, JavaKind strideA, JavaKind strideB, JavaKind strideMask, int baseOffsetA, int baseOffsetB, int baseOffsetMask, Value result, Value arrayA, Value offsetA, Value arrayB, Value offsetB, Value mask, Value length, AMD64MacroAssembler.ExtendMode extendMode) {
        return AMD64ArrayEqualsOp.movParamsAndCreate(tool, strideA, strideB, strideMask, baseOffsetA, baseOffsetB, baseOffsetMask, result, arrayA, offsetA, arrayB, offsetB, mask, length, null, extendMode);
    }

    public static AMD64ArrayEqualsOp movParamsAndCreate(LIRGeneratorTool tool, JavaKind strideA, JavaKind strideB, JavaKind strideMask, int baseOffsetA, int baseOffsetB, int baseOffsetMask, Value result, Value arrayA, Value offsetA, Value arrayB, Value offsetB, Value arrayMask, Value length, Value dynamicStrides, AMD64MacroAssembler.ExtendMode extendMode) {
        RegisterValue regArrayA = REG_ARRAY_A.asValue(arrayA.getValueKind());
        RegisterValue regOffsetA = REG_OFFSET_A.asValue((ValueKind)(offsetA == null ? LIRKind.value((PlatformKind)AMD64Kind.QWORD) : offsetA.getValueKind()));
        RegisterValue regArrayB = REG_ARRAY_B.asValue(arrayB.getValueKind());
        RegisterValue regOffsetB = REG_OFFSET_B.asValue((ValueKind)(offsetB == null ? LIRKind.value((PlatformKind)AMD64Kind.QWORD) : offsetB.getValueKind()));
        AllocatableValue regMask = arrayMask == null ? Value.ILLEGAL : REG_MASK.asValue(arrayMask.getValueKind());
        RegisterValue regLength = REG_LENGTH.asValue(length.getValueKind());
        AllocatableValue regStride = dynamicStrides == null ? Value.ILLEGAL : REG_STRIDE.asValue(dynamicStrides.getValueKind());
        tool.emitConvertNullToZero((AllocatableValue)regArrayA, arrayA);
        tool.emitConvertNullToZero((AllocatableValue)regArrayB, arrayB);
        tool.emitMove((AllocatableValue)regLength, length);
        if (offsetA != null) {
            tool.emitMove((AllocatableValue)regOffsetA, offsetA);
        }
        if (offsetB != null) {
            tool.emitMove((AllocatableValue)regOffsetB, offsetB);
        }
        if (arrayMask != null) {
            tool.emitMove((AllocatableValue)((RegisterValue)regMask), arrayMask);
        }
        if (dynamicStrides != null) {
            tool.emitMove((AllocatableValue)((RegisterValue)regStride), dynamicStrides);
        }
        return new AMD64ArrayEqualsOp(tool, strideA, strideB, strideMask, baseOffsetA, baseOffsetB, baseOffsetMask, result, (Value)regArrayA, (Value)regOffsetA, (Value)regArrayB, (Value)regOffsetB, (Value)regMask, (Value)regLength, (Value)regStride, extendMode, AMD64ArrayEqualsOp.constOffset(offsetA), AMD64ArrayEqualsOp.constOffset(offsetB), LIRValueUtil.isJavaConstant(length) ? LIRValueUtil.asJavaConstant(length).asInt() : -1);
    }

    private static int constOffset(Value offset) {
        if (offset == null) {
            return 0;
        }
        if (LIRValueUtil.isJavaConstant(offset) && LIRValueUtil.asJavaConstant(offset).asLong() <= Integer.MAX_VALUE) {
            return (int)LIRValueUtil.asJavaConstant(offset).asLong();
        }
        return -1;
    }

    private static boolean canGenerateConstantLengthCompare(TargetDescription target, JavaKind kindA, JavaKind kindB, int constantLength, Value stride, AVXKind.AVXSize vectorSize) {
        return ValueUtil.isIllegal((Value)stride) && constantLength >= 0 && AMD64ArrayEqualsOp.canGenerateConstantLengthCompare(target, kindA, kindB, constantLength, vectorSize);
    }

    public static boolean canGenerateConstantLengthCompare(TargetDescription target, JavaKind kindA, JavaKind kindB, int constantLength, AVXKind.AVXSize vectorSize) {
        int elementSize = Math.max(kindA.getByteCount(), kindB.getByteCount());
        int minVectorSize = AVXKind.AVXSize.XMM.getBytes() / elementSize;
        int maxVectorSize = vectorSize.getBytes() / elementSize;
        return AMD64ArrayEqualsOp.supports(target, AMD64.CPUFeature.SSE4_1) && kindA.isNumericInteger() && (kindA == kindB || minVectorSize <= constantLength) && constantLength <= maxVectorSize * 2;
    }

    private boolean isLengthConstant() {
        return this.constLength >= 0;
    }

    private int constantLength() {
        assert (this.isLengthConstant());
        return this.constLength;
    }

    private boolean withMask() {
        return !ValueUtil.isIllegal((Value)this.arrayMaskValue);
    }

    private boolean withDynamicStrides() {
        return !ValueUtil.isIllegal((Value)this.dynamicStridesValue);
    }

    @Override
    public void emitCode(CompilationResultBuilder crb, AMD64MacroAssembler masm) {
        Register result = ValueUtil.asRegister((Value)this.resultValue);
        Label done = new Label();
        Register arrayA = ValueUtil.asRegister((Value)this.arrayAValue);
        Register arrayB = ValueUtil.asRegister((Value)this.arrayBValue);
        Register mask = this.withMask() ? ValueUtil.asRegister((Value)this.arrayMaskValue) : null;
        AMD64ArrayEqualsOp.loadBaseAddress(masm, arrayA, this.baseOffsetA, this.constOffsetA, this.offsetAValue);
        AMD64ArrayEqualsOp.loadBaseAddress(masm, arrayB, this.baseOffsetB, this.constOffsetB, this.offsetBValue);
        if (this.withMask()) {
            masm.leaq(mask, new AMD64Address(mask, this.baseOffsetMask));
        }
        if (this.canGenerateConstantLengthCompare) {
            this.emitConstantLengthArrayCompareBytes(masm, result);
        } else {
            Register length = ValueUtil.asRegister((Value)this.lengthValue);
            Register tmp = ValueUtil.asRegister((Value)this.offsetAValueTemp);
            if (this.withDynamicStrides()) {
                assert (this.elementKind.isNumericInteger());
                Label[] variants = new Label[9];
                for (int i = 0; i < variants.length; ++i) {
                    variants[i] = new Label();
                }
                AMD64ControlFlow.RangeTableSwitchOp.emitJumpTable(crb, masm, tmp, ValueUtil.asRegister((Value)this.dynamicStridesValue), 0, 8, Arrays.stream(variants));
                masm.align(crb.target.wordSize * 2);
                masm.bind(variants[AMD64StrideUtil.getDirectStubCallIndex(AMD64Address.Scale.Times4, AMD64Address.Scale.Times4)]);
                masm.shll(length, 1);
                masm.align(crb.target.wordSize * 2);
                masm.bind(variants[AMD64StrideUtil.getDirectStubCallIndex(AMD64Address.Scale.Times2, AMD64Address.Scale.Times2)]);
                masm.shll(length, 1);
                masm.align(crb.target.wordSize * 2);
                masm.bind(variants[AMD64StrideUtil.getDirectStubCallIndex(AMD64Address.Scale.Times1, AMD64Address.Scale.Times1)]);
                this.emitArrayCompare(crb, masm, AMD64Address.Scale.Times1, AMD64Address.Scale.Times1, AMD64Address.Scale.Times1, result, arrayA, arrayB, mask, length, done, false);
                masm.jmp(done);
                for (AMD64Address.Scale scaleA : new AMD64Address.Scale[]{AMD64Address.Scale.Times1, AMD64Address.Scale.Times2, AMD64Address.Scale.Times4}) {
                    for (AMD64Address.Scale scaleB : new AMD64Address.Scale[]{AMD64Address.Scale.Times1, AMD64Address.Scale.Times2, AMD64Address.Scale.Times4}) {
                        if (scaleA.log2 <= scaleB.log2) continue;
                        masm.align(crb.target.wordSize * 2);
                        masm.bind(variants[AMD64StrideUtil.getDirectStubCallIndex(scaleB, scaleA)]);
                        masm.movq(tmp, arrayA);
                        masm.movq(arrayA, arrayB);
                        masm.movq(arrayB, tmp);
                        masm.bind(variants[AMD64StrideUtil.getDirectStubCallIndex(scaleA, scaleB)]);
                        this.emitArrayCompare(crb, masm, scaleA, scaleB, scaleB, result, arrayA, arrayB, mask, length, done, false);
                        masm.jmp(done);
                    }
                }
            } else {
                this.emitArrayCompare(crb, masm, this.argScaleA, this.argScaleB, this.argScaleMask, result, arrayA, arrayB, mask, length, done, true);
            }
        }
        masm.bind(done);
    }

    private static void emitReturnValue(AMD64MacroAssembler masm, Register result, Label trueLabel, Label falseLabel, Label done, boolean shortJmp) {
        masm.bind(trueLabel);
        masm.movl(result, 1);
        masm.jmp(done, shortJmp);
        masm.bind(falseLabel);
        masm.xorl(result, result);
    }

    private static void loadBaseAddress(AMD64MacroAssembler masm, Register array, int baseOffset, int constantOffset, Value dynamicOffset) {
        if (constantOffset >= 0) {
            masm.leaq(array, new AMD64Address(array, constantOffset + baseOffset));
        } else {
            masm.leaq(array, new AMD64Address(array, ValueUtil.asRegister((Value)dynamicOffset), AMD64Address.Scale.Times1, baseOffset));
        }
    }

    private void emitArrayCompare(CompilationResultBuilder crb, AMD64MacroAssembler masm, AMD64Address.Scale scaleA, AMD64Address.Scale scaleB, AMD64Address.Scale scaleMask, Register result, Register array1, Register array2, Register mask, Register length, Label done, boolean shortJmp) {
        Label trueLabel = new Label();
        Label falseLabel = new Label();
        masm.movl(result, length);
        if (masm.supports(AMD64.CPUFeature.SSE4_1)) {
            this.emitVectorCompare(crb, masm, scaleA, scaleB, scaleMask, result, array1, array2, mask, length, trueLabel, falseLabel);
        }
        if (scaleA == scaleB && scaleA == scaleMask) {
            this.emit8ByteCompare(crb, masm, scaleA, scaleB, scaleMask, result, array1, array2, mask, length, trueLabel, falseLabel);
            this.emitTailCompares(masm, scaleA, scaleB, scaleMask, result, array1, array2, mask, length, trueLabel, falseLabel);
        } else {
            this.emitDifferentKindsElementWiseCompare(crb, masm, scaleA, scaleB, scaleMask, result, array1, array2, mask, length, trueLabel, falseLabel);
        }
        AMD64ArrayEqualsOp.emitReturnValue(masm, result, trueLabel, falseLabel, done, shortJmp);
    }

    private void emitVectorCompare(CompilationResultBuilder crb, AMD64MacroAssembler masm, AMD64Address.Scale scaleA, AMD64Address.Scale scaleB, AMD64Address.Scale scaleMask, Register result, Register arrayA, Register arrayB, Register mask, Register length, Label trueLabel, Label falseLabel) {
        assert (masm.supports(AMD64.CPUFeature.SSE4_1));
        AMD64Address.Scale maxScale = AMD64ArrayEqualsOp.max(scaleA, scaleB);
        Register vector1 = ValueUtil.asRegister((Value)this.vectorTemp[0]);
        Register vector2 = ValueUtil.asRegister((Value)this.vectorTemp[1]);
        Register vector3 = this.withMask() ? ValueUtil.asRegister((Value)this.vectorTemp[2]) : null;
        int elementsPerVector = AMD64ArrayEqualsOp.getElementsPerVector(this.vectorSize, maxScale);
        Label loop = new Label();
        Label compareTail = new Label();
        boolean requiresNaNCheck = this.elementKind.isNumericFloat();
        Label loopCheck = new Label();
        Label nanCheck = new Label();
        masm.andl(result, elementsPerVector - 1);
        masm.andlAndJcc(length, ~(elementsPerVector - 1), AMD64Assembler.ConditionFlag.Zero, compareTail, false);
        masm.leaq(arrayA, new AMD64Address(arrayA, length, scaleA, 0));
        masm.leaq(arrayB, new AMD64Address(arrayB, length, scaleB, 0));
        if (this.withMask()) {
            masm.leaq(mask, new AMD64Address(mask, length, scaleMask, 0));
        }
        masm.negq(length);
        masm.align(crb.target.wordSize * 2);
        masm.bind(loop);
        this.pmovSZx(masm, this.vectorSize, vector1, maxScale, arrayA, length, 0, scaleA);
        this.pmovSZx(masm, this.vectorSize, vector2, maxScale, arrayB, length, 0, scaleB);
        if (this.withMask()) {
            this.pmovSZx(masm, this.vectorSize, vector3, maxScale, mask, length, 0, scaleMask);
            AMD64MacroAssembler.por(masm, this.vectorSize, vector1, vector3);
        }
        AMD64ArrayEqualsOp.emitVectorCmp(masm, vector1, vector2, this.vectorSize);
        masm.jcc(AMD64Assembler.ConditionFlag.NotZero, requiresNaNCheck ? nanCheck : falseLabel, requiresNaNCheck);
        masm.bind(loopCheck);
        masm.addqAndJcc(length, elementsPerVector, AMD64Assembler.ConditionFlag.NotZero, loop, true);
        masm.testlAndJcc(result, result, AMD64Assembler.ConditionFlag.Zero, trueLabel, false);
        if (requiresNaNCheck) {
            assert (!this.withMask());
            Label unalignedCheck = new Label();
            masm.jmpb(unalignedCheck);
            masm.bind(nanCheck);
            this.emitFloatCompareWithinRange(crb, masm, scaleA, scaleB, arrayA, arrayB, length, 0, falseLabel, elementsPerVector);
            masm.jmpb(loopCheck);
            masm.bind(unalignedCheck);
        }
        this.pmovSZx(masm, this.vectorSize, vector1, maxScale, arrayA, result, -this.vectorSize.getBytes(), scaleA);
        this.pmovSZx(masm, this.vectorSize, vector2, maxScale, arrayB, result, -this.vectorSize.getBytes(), scaleB);
        if (this.withMask()) {
            this.pmovSZx(masm, this.vectorSize, vector3, maxScale, mask, result, -this.vectorSize.getBytes(), scaleMask);
            AMD64MacroAssembler.por(masm, this.vectorSize, vector1, vector3);
        }
        AMD64ArrayEqualsOp.emitVectorCmp(masm, vector1, vector2, this.vectorSize);
        if (requiresNaNCheck) {
            assert (!this.withMask());
            masm.jcc(AMD64Assembler.ConditionFlag.Zero, trueLabel);
            this.emitFloatCompareWithinRange(crb, masm, scaleA, scaleB, arrayA, arrayB, result, -this.vectorSize.getBytes(), falseLabel, elementsPerVector);
        } else {
            masm.jcc(AMD64Assembler.ConditionFlag.NotZero, falseLabel);
        }
        masm.jmp(trueLabel);
        masm.bind(compareTail);
        masm.movl(length, result);
    }

    private static int getElementsPerVector(AVXKind.AVXSize vSize, AMD64Address.Scale maxScale) {
        return vSize.getBytes() >> maxScale.log2;
    }

    private void pmovSZx(AMD64MacroAssembler asm, AVXKind.AVXSize size, Register dst, AMD64Address.Scale maxScale, Register src, int displacement, AMD64Address.Scale scale) {
        this.pmovSZx(asm, size, dst, maxScale, src, Register.None, displacement, scale);
    }

    private void pmovSZx(AMD64MacroAssembler asm, AVXKind.AVXSize size, Register dst, AMD64Address.Scale maxScale, Register src, Register index, int displacement, AMD64Address.Scale scale) {
        AMD64MacroAssembler.pmovSZx(asm, size, dst, this.extendMode, maxScale, src, scale, index, displacement);
    }

    private static void emitVectorCmp(AMD64MacroAssembler masm, Register vector1, Register vector2, AVXKind.AVXSize size) {
        AMD64MacroAssembler.pxor(masm, size, vector1, vector2);
        AMD64MacroAssembler.ptest(masm, size, vector1);
    }

    private void emit8ByteCompare(CompilationResultBuilder crb, AMD64MacroAssembler masm, AMD64Address.Scale scaleA, AMD64Address.Scale scaleB, AMD64Address.Scale scaleMask, Register result, Register arrayA, Register arrayB, Register mask, Register length, Label trueLabel, Label falseLabel) {
        assert (scaleA == scaleB && scaleA == scaleMask);
        Label loop = new Label();
        Label compareTail = new Label();
        int elementsPerVector = 8 >> scaleA.log2;
        boolean requiresNaNCheck = this.elementKind.isNumericFloat();
        Label loopCheck = new Label();
        Label nanCheck = new Label();
        Register temp = ValueUtil.asRegister((Value)this.offsetAValueTemp);
        masm.andl(result, elementsPerVector - 1);
        masm.andlAndJcc(length, ~(elementsPerVector - 1), AMD64Assembler.ConditionFlag.Zero, compareTail, false);
        masm.leaq(arrayA, new AMD64Address(arrayA, length, scaleA, 0));
        masm.leaq(arrayB, new AMD64Address(arrayB, length, scaleB, 0));
        if (this.withMask()) {
            masm.leaq(mask, new AMD64Address(mask, length, scaleMask, 0));
        }
        masm.negq(length);
        masm.align(crb.target.wordSize * 2);
        masm.bind(loop);
        masm.movq(temp, new AMD64Address(arrayA, length, scaleA, 0));
        if (this.withMask()) {
            masm.orq(temp, new AMD64Address(mask, length, scaleMask, 0));
        }
        masm.cmpqAndJcc(temp, new AMD64Address(arrayB, length, scaleB, 0), AMD64Assembler.ConditionFlag.NotEqual, requiresNaNCheck ? nanCheck : falseLabel, requiresNaNCheck);
        masm.bind(loopCheck);
        masm.addqAndJcc(length, elementsPerVector, AMD64Assembler.ConditionFlag.NotZero, loop, true);
        masm.testlAndJcc(result, result, AMD64Assembler.ConditionFlag.Zero, trueLabel, false);
        if (requiresNaNCheck) {
            assert (!this.withMask());
            Label unalignedCheck = new Label();
            masm.jmpb(unalignedCheck);
            masm.bind(nanCheck);
            for (int offset = 0; offset < 8; offset += scaleA.value) {
                this.emitFloatCompare(masm, scaleA, scaleB, arrayA, arrayB, length, offset, falseLabel, scaleA.value == 8);
            }
            masm.jmpb(loopCheck);
            masm.bind(unalignedCheck);
        }
        masm.movq(temp, new AMD64Address(arrayA, result, scaleA, -8));
        if (requiresNaNCheck) {
            assert (!this.withMask());
            masm.cmpqAndJcc(temp, new AMD64Address(arrayB, result, scaleB, -8), AMD64Assembler.ConditionFlag.Equal, trueLabel, false);
            for (int offset = 0; offset < 8; offset += scaleA.value) {
                this.emitFloatCompare(masm, scaleA, scaleB, arrayA, arrayB, result, -8 + offset, falseLabel, scaleA.value == 8);
            }
        } else {
            if (this.withMask()) {
                masm.orq(temp, new AMD64Address(mask, result, scaleMask, -8));
            }
            masm.cmpqAndJcc(temp, new AMD64Address(arrayB, result, scaleB, -8), AMD64Assembler.ConditionFlag.NotEqual, falseLabel, true);
        }
        masm.jmpb(trueLabel);
        masm.bind(compareTail);
        masm.movl(length, result);
    }

    private void emitTailCompares(AMD64MacroAssembler masm, AMD64Address.Scale scaleA, AMD64Address.Scale scaleB, AMD64Address.Scale scaleMask, Register result, Register arrayA, Register arrayB, Register mask, Register length, Label trueLabel, Label falseLabel) {
        assert (scaleA == scaleB && scaleA == scaleMask);
        Label compare2Bytes = new Label();
        Label compare1Byte = new Label();
        Register temp = ValueUtil.asRegister((Value)this.offsetAValueTemp);
        if (scaleA.value <= 4) {
            masm.testlAndJcc(result, 4 >> scaleA.log2, AMD64Assembler.ConditionFlag.Zero, compare2Bytes, true);
            masm.movl(temp, new AMD64Address(arrayA, 0));
            if (this.elementKind == JavaKind.Float) {
                assert (!this.withMask());
                masm.cmplAndJcc(temp, new AMD64Address(arrayB, 0), AMD64Assembler.ConditionFlag.Equal, trueLabel, true);
                this.emitFloatCompare(masm, scaleA, scaleB, arrayA, arrayB, Register.None, 0, falseLabel, true);
                masm.jmpb(trueLabel);
            } else {
                if (this.withMask()) {
                    masm.orl(temp, new AMD64Address(mask, 0));
                }
                masm.cmplAndJcc(temp, new AMD64Address(arrayB, 0), AMD64Assembler.ConditionFlag.NotEqual, falseLabel, true);
            }
            if (scaleA.value <= 2) {
                masm.leaq(arrayA, new AMD64Address(arrayA, 4));
                masm.leaq(arrayB, new AMD64Address(arrayB, 4));
                if (this.withMask()) {
                    masm.leaq(mask, new AMD64Address(mask, 4));
                }
                masm.bind(compare2Bytes);
                masm.testlAndJcc(result, 2 >> scaleA.log2, AMD64Assembler.ConditionFlag.Zero, compare1Byte, true);
                masm.movzwl(temp, new AMD64Address(arrayA, 0));
                if (this.withMask()) {
                    masm.movzwl(length, new AMD64Address(mask, 0));
                    masm.orl(temp, length);
                }
                masm.movzwl(length, new AMD64Address(arrayB, 0));
                masm.cmplAndJcc(temp, length, AMD64Assembler.ConditionFlag.NotEqual, falseLabel, true);
                if (scaleA.value <= 1) {
                    masm.leaq(arrayA, new AMD64Address(arrayA, 2));
                    masm.leaq(arrayB, new AMD64Address(arrayB, 2));
                    if (this.withMask()) {
                        masm.leaq(mask, new AMD64Address(mask, 2));
                    }
                    masm.bind(compare1Byte);
                    masm.testlAndJcc(result, 1, AMD64Assembler.ConditionFlag.Zero, trueLabel, true);
                    masm.movzbl(temp, new AMD64Address(arrayA, 0));
                    if (this.withMask()) {
                        masm.movzbl(length, new AMD64Address(mask, 0));
                        masm.orl(temp, length);
                    }
                    masm.movzbl(length, new AMD64Address(arrayB, 0));
                    masm.cmplAndJcc(temp, length, AMD64Assembler.ConditionFlag.NotEqual, falseLabel, true);
                } else {
                    masm.bind(compare1Byte);
                }
            } else {
                masm.bind(compare2Bytes);
            }
        }
    }

    private void emitDifferentKindsElementWiseCompare(CompilationResultBuilder crb, AMD64MacroAssembler masm, AMD64Address.Scale scaleA, AMD64Address.Scale scaleB, AMD64Address.Scale scaleMask, Register result, Register array1, Register array2, Register mask, Register length, Label trueLabel, Label falseLabel) {
        int i;
        assert (scaleA != scaleB || scaleA != scaleMask);
        assert (this.elementKind.isNumericInteger());
        Label loop = new Label();
        Label compareTail = new Label();
        int elementsPerLoopIteration = 2;
        Register tmp1 = ValueUtil.asRegister((Value)this.offsetAValueTemp);
        Register tmp2 = ValueUtil.asRegister((Value)this.offsetBValueTemp);
        masm.andl(result, elementsPerLoopIteration - 1);
        masm.andlAndJcc(length, ~(elementsPerLoopIteration - 1), AMD64Assembler.ConditionFlag.Zero, compareTail, true);
        masm.leaq(array1, new AMD64Address(array1, length, scaleA, 0));
        masm.leaq(array2, new AMD64Address(array2, length, scaleB, 0));
        if (this.withMask()) {
            masm.leaq(mask, new AMD64Address(mask, length, scaleMask, 0));
        }
        masm.negq(length);
        masm.align(crb.target.wordSize * 2);
        masm.bind(loop);
        for (i = 0; i < elementsPerLoopIteration; ++i) {
            AMD64MacroAssembler.movSZx(masm, scaleA, this.extendMode, tmp1, new AMD64Address(array1, length, scaleA, i << scaleA.log2));
            if (this.withMask()) {
                AMD64MacroAssembler.movSZx(masm, scaleMask, this.extendMode, tmp2, new AMD64Address(mask, length, scaleMask, i << scaleMask.log2));
                masm.orq(tmp1, tmp2);
            }
            AMD64MacroAssembler.movSZx(masm, scaleB, this.extendMode, tmp2, new AMD64Address(array2, length, scaleB, i << scaleB.log2));
            masm.cmpqAndJcc(tmp1, tmp2, AMD64Assembler.ConditionFlag.NotEqual, falseLabel, true);
        }
        masm.addqAndJcc(length, elementsPerLoopIteration, AMD64Assembler.ConditionFlag.NotZero, loop, true);
        masm.bind(compareTail);
        masm.testlAndJcc(result, result, AMD64Assembler.ConditionFlag.Zero, trueLabel, true);
        for (i = 0; i < elementsPerLoopIteration - 1; ++i) {
            AMD64MacroAssembler.movSZx(masm, scaleA, this.extendMode, tmp1, new AMD64Address(array1, length, scaleA, 0));
            if (this.withMask()) {
                AMD64MacroAssembler.movSZx(masm, scaleMask, this.extendMode, tmp2, new AMD64Address(mask, length, scaleMask, 0));
                masm.orq(tmp1, tmp2);
            }
            AMD64MacroAssembler.movSZx(masm, scaleB, this.extendMode, tmp2, new AMD64Address(array2, length, scaleB, 0));
            masm.cmpqAndJcc(tmp1, tmp2, AMD64Assembler.ConditionFlag.NotEqual, falseLabel, true);
            if (i < elementsPerLoopIteration - 2) {
                masm.incrementq(length, 1);
                masm.decqAndJcc(result, AMD64Assembler.ConditionFlag.Zero, trueLabel, true);
                continue;
            }
            masm.jmpb(trueLabel);
        }
    }

    private void emitNaNCheck(AMD64MacroAssembler masm, AMD64Address src, Label branchIfNonNaN) {
        assert (this.elementKind.isNumericFloat());
        Register tempXMMReg = ValueUtil.asRegister((Value)this.tempXMM);
        if (this.elementKind == JavaKind.Float) {
            masm.movflt(tempXMMReg, src);
        } else {
            masm.movdbl(tempXMMReg, src);
        }
        AMD64Assembler.SSEOp.UCOMIS.emit((AMD64Assembler)masm, this.elementKind == JavaKind.Float ? AMD64BaseAssembler.OperandSize.PS : AMD64BaseAssembler.OperandSize.PD, tempXMMReg, tempXMMReg);
        masm.jcc(AMD64Assembler.ConditionFlag.NoParity, branchIfNonNaN);
    }

    private void emitFloatCompare(AMD64MacroAssembler masm, AMD64Address.Scale scaleA, AMD64Address.Scale scaleB, Register arrayA, Register arrayB, Register index, int offset, Label falseLabel, boolean skipBitwiseCompare) {
        AMD64Address address1 = new AMD64Address(arrayA, index, scaleA, offset);
        AMD64Address address2 = new AMD64Address(arrayB, index, scaleB, offset);
        Label bitwiseEqual = new Label();
        if (!skipBitwiseCompare) {
            Register temp = ValueUtil.asRegister((Value)this.offsetAValueTemp);
            if (this.elementKind == JavaKind.Float) {
                masm.movl(temp, address1);
                masm.cmplAndJcc(temp, address2, AMD64Assembler.ConditionFlag.Equal, bitwiseEqual, true);
            } else {
                masm.movq(temp, address1);
                masm.cmpqAndJcc(temp, address2, AMD64Assembler.ConditionFlag.Equal, bitwiseEqual, true);
            }
        }
        this.emitNaNCheck(masm, address1, falseLabel);
        this.emitNaNCheck(masm, address2, falseLabel);
        masm.bind(bitwiseEqual);
    }

    private void emitFloatCompareWithinRange(CompilationResultBuilder crb, AMD64MacroAssembler masm, AMD64Address.Scale scaleA, AMD64Address.Scale scaleB, Register arrayA, Register arrayB, Register index, int offset, Label falseLabel, int range) {
        assert (this.elementKind.isNumericFloat());
        Label loop = new Label();
        Register i = ValueUtil.asRegister((Value)this.offsetBValueTemp);
        masm.movq(i, range);
        masm.negq(i);
        masm.align(crb.target.wordSize * 2);
        masm.bind(loop);
        this.emitFloatCompare(masm, scaleA, scaleB, arrayA, arrayB, index, offset, falseLabel, range == 1);
        masm.incrementq(index, 1);
        masm.incqAndJcc(i, AMD64Assembler.ConditionFlag.NotZero, loop, true);
        masm.subq(index, range);
    }

    private void emitConstantLengthArrayCompareBytes(AMD64MacroAssembler asm, Register result) {
        int elementsPerVector;
        asm.movl(result, 1);
        if (this.constantLength() == 0) {
            return;
        }
        AMD64Address.Scale maxScale = AMD64ArrayEqualsOp.max(this.argScaleA, this.argScaleB);
        Register arrayA = ValueUtil.asRegister((Value)this.arrayAValue);
        Register arrayB = ValueUtil.asRegister((Value)this.arrayBValue);
        Register mask = this.withMask() ? ValueUtil.asRegister((Value)this.arrayMaskValue) : null;
        Register vector1 = ValueUtil.asRegister((Value)this.vectorTemp[0]);
        Register vector2 = ValueUtil.asRegister((Value)this.vectorTemp[1]);
        Register vector3 = ValueUtil.asRegister((Value)this.vectorTemp[2]);
        Register vector4 = this.withMask() ? ValueUtil.asRegister((Value)this.vectorTemp[3]) : null;
        Register tmp = ValueUtil.asRegister((Value)this.lengthValue);
        GraalError.guarantee(this.constantLength() <= AMD64ArrayEqualsOp.getElementsPerVector(this.vectorSize, maxScale) * 2, "constant length too long for specialized arrayEquals!");
        AVXKind.AVXSize vSize = this.vectorSize;
        if (this.constantLength() < AMD64ArrayEqualsOp.getElementsPerVector(this.vectorSize, maxScale)) {
            vSize = AVXKind.AVXSize.XMM;
        }
        if ((elementsPerVector = AMD64ArrayEqualsOp.getElementsPerVector(vSize, maxScale)) > this.constantLength()) {
            assert (this.argScaleA == this.argScaleB && this.argScaleA == this.argScaleMask);
            int byteLength = this.constantLength() << this.argScaleA.log2;
            AMD64Address.Scale movScale = byteLength < 2 ? AMD64Address.Scale.Times1 : (byteLength < 4 ? AMD64Address.Scale.Times2 : (byteLength < 8 ? AMD64Address.Scale.Times4 : AMD64Address.Scale.Times8));
            AMD64MacroAssembler.movSZx(asm, movScale, this.extendMode, tmp, new AMD64Address(arrayA));
            if (this.withMask()) {
                AMD64ArrayEqualsOp.emitOrBytes(asm, tmp, new AMD64Address(mask, 0), movScale);
            }
            if (byteLength > movScale.value) {
                AMD64ArrayEqualsOp.emitXorBytes(asm, tmp, new AMD64Address(arrayB), movScale);
                AMD64MacroAssembler.movSZx(asm, movScale, this.extendMode, arrayA, new AMD64Address(arrayA, byteLength - movScale.value));
                if (this.withMask()) {
                    AMD64ArrayEqualsOp.emitOrBytes(asm, arrayA, new AMD64Address(mask, byteLength - movScale.value), movScale);
                }
                AMD64ArrayEqualsOp.emitXorBytes(asm, arrayA, new AMD64Address(arrayB, byteLength - movScale.value), movScale);
                asm.xorq(arrayB, arrayB);
                asm.orq(tmp, arrayA);
                asm.cmovl(AMD64Assembler.ConditionFlag.NotZero, result, arrayB);
            } else {
                asm.xorq(arrayA, arrayA);
                AMD64ArrayEqualsOp.emitXorBytes(asm, tmp, new AMD64Address(arrayB), movScale);
                asm.cmovl(AMD64Assembler.ConditionFlag.NotZero, result, arrayA);
            }
        } else {
            this.pmovSZx(asm, vSize, vector1, maxScale, arrayA, 0, this.argScaleA);
            this.pmovSZx(asm, vSize, vector2, maxScale, arrayB, 0, this.argScaleB);
            if (this.withMask()) {
                this.pmovSZx(asm, vSize, vector4, maxScale, mask, 0, this.argScaleMask);
                AMD64MacroAssembler.por(asm, vSize, vector1, vector4);
            }
            AMD64MacroAssembler.pxor(asm, vSize, vector1, vector2);
            if (this.constantLength() > elementsPerVector) {
                int endOffset = (this.constantLength() << maxScale.log2) - vSize.getBytes();
                this.pmovSZx(asm, vSize, vector3, maxScale, arrayA, endOffset, this.argScaleA);
                this.pmovSZx(asm, vSize, vector2, maxScale, arrayB, endOffset, this.argScaleB);
                if (this.withMask()) {
                    this.pmovSZx(asm, vSize, vector4, maxScale, mask, endOffset, this.argScaleMask);
                    AMD64MacroAssembler.por(asm, vSize, vector3, vector4);
                }
                AMD64MacroAssembler.pxor(asm, vSize, vector3, vector2);
                AMD64MacroAssembler.por(asm, vSize, vector1, vector3);
            }
            asm.xorq(arrayA, arrayA);
            AMD64MacroAssembler.ptest(asm, vSize, vector1);
            asm.cmovl(AMD64Assembler.ConditionFlag.NotZero, result, arrayA);
        }
    }

    private static void emitOrBytes(AMD64MacroAssembler asm, Register dst, AMD64Address src, AMD64Address.Scale scale) {
        AMD64BaseAssembler.OperandSize opSize = AMD64ArrayEqualsOp.getOperandSize(scale);
        AMD64Assembler.AMD64BinaryArithmetic.OR.getRMOpcode(opSize).emit((AMD64Assembler)asm, opSize, dst, src);
    }

    private static void emitXorBytes(AMD64MacroAssembler asm, Register dst, AMD64Address src, AMD64Address.Scale scale) {
        AMD64BaseAssembler.OperandSize opSize = AMD64ArrayEqualsOp.getOperandSize(scale);
        AMD64Assembler.AMD64BinaryArithmetic.XOR.getRMOpcode(opSize).emit((AMD64Assembler)asm, opSize, dst, src);
    }

    private static AMD64BaseAssembler.OperandSize getOperandSize(AMD64Address.Scale size) {
        switch (size) {
            case Times1: {
                return AMD64BaseAssembler.OperandSize.BYTE;
            }
            case Times2: {
                return AMD64BaseAssembler.OperandSize.WORD;
            }
            case Times4: {
                return AMD64BaseAssembler.OperandSize.DWORD;
            }
            case Times8: {
                return AMD64BaseAssembler.OperandSize.QWORD;
            }
        }
        throw new IllegalStateException();
    }

    public static AMD64Address.Scale max(AMD64Address.Scale a, AMD64Address.Scale b) {
        return a.value > b.value ? a : b;
    }
}

