/*
 * Decompiled with CFR 0.152.
 */
package org.graalvm.compiler.lir.amd64;

import java.util.Objects;
import jdk.vm.ci.amd64.AMD64;
import jdk.vm.ci.amd64.AMD64Kind;
import jdk.vm.ci.code.Register;
import jdk.vm.ci.code.RegisterValue;
import jdk.vm.ci.code.ValueUtil;
import jdk.vm.ci.meta.AllocatableValue;
import jdk.vm.ci.meta.JavaConstant;
import jdk.vm.ci.meta.JavaKind;
import jdk.vm.ci.meta.PlatformKind;
import jdk.vm.ci.meta.Value;
import org.graalvm.compiler.asm.Label;
import org.graalvm.compiler.asm.amd64.AMD64Address;
import org.graalvm.compiler.asm.amd64.AMD64Assembler;
import org.graalvm.compiler.asm.amd64.AMD64BaseAssembler;
import org.graalvm.compiler.asm.amd64.AMD64MacroAssembler;
import org.graalvm.compiler.asm.amd64.AVXKind;
import org.graalvm.compiler.core.common.LIRKind;
import org.graalvm.compiler.debug.GraalError;
import org.graalvm.compiler.lir.ConstantValue;
import org.graalvm.compiler.lir.LIRInstruction;
import org.graalvm.compiler.lir.LIRInstructionClass;
import org.graalvm.compiler.lir.Opcode;
import org.graalvm.compiler.lir.amd64.AMD64ComplexVectorOp;
import org.graalvm.compiler.lir.asm.CompilationResultBuilder;
import org.graalvm.compiler.lir.gen.LIRGeneratorTool;

@Opcode(value="AMD64_ARRAY_INDEX_OF")
public final class AMD64ArrayIndexOfOp
extends AMD64ComplexVectorOp {
    public static final LIRInstructionClass<AMD64ArrayIndexOfOp> TYPE = LIRInstructionClass.create(AMD64ArrayIndexOfOp.class);
    private static final Register REG_ARRAY = AMD64.rsi;
    private static final Register REG_OFFSET = AMD64.rax;
    private static final Register REG_LENGTH = AMD64.rdx;
    private static final Register REG_FROM_INDEX = AMD64.rdi;
    private static final Register REG_SEARCH_VALUE_1 = AMD64.rcx;
    private static final Register REG_SEARCH_VALUE_2 = AMD64.r8;
    private final JavaKind valueKind;
    private final int nValues;
    private final boolean findTwoConsecutive;
    private final boolean withMask;
    private final AMD64Kind vectorKind;
    private final AMD64Address.Scale arrayIndexScale;
    private final int arrayBaseOffset;
    private final int constOffset;
    @LIRInstruction.Def(value={LIRInstruction.OperandFlag.REG})
    Value resultValue;
    @LIRInstruction.Use(value={LIRInstruction.OperandFlag.REG})
    Value arrayReg;
    @LIRInstruction.Use(value={LIRInstruction.OperandFlag.REG})
    Value offsetReg;
    @LIRInstruction.Use(value={LIRInstruction.OperandFlag.REG})
    Value lengthReg;
    @LIRInstruction.Use(value={LIRInstruction.OperandFlag.REG})
    Value fromIndexReg;
    @LIRInstruction.Use(value={LIRInstruction.OperandFlag.REG})
    Value searchValue1;
    @LIRInstruction.Use(value={LIRInstruction.OperandFlag.REG, LIRInstruction.OperandFlag.ILLEGAL})
    Value searchValue2;
    @LIRInstruction.Temp(value={LIRInstruction.OperandFlag.REG})
    Value arrayTmp;
    @LIRInstruction.Temp(value={LIRInstruction.OperandFlag.REG})
    Value offsetTmp;
    @LIRInstruction.Temp(value={LIRInstruction.OperandFlag.REG})
    Value lengthTmp;
    @LIRInstruction.Temp(value={LIRInstruction.OperandFlag.REG})
    Value fromIndexTmp;
    @LIRInstruction.Temp(value={LIRInstruction.OperandFlag.REG})
    Value searchValue1Tmp;
    @LIRInstruction.Temp(value={LIRInstruction.OperandFlag.REG, LIRInstruction.OperandFlag.ILLEGAL})
    Value searchValue2Tmp;
    @LIRInstruction.Alive(value={LIRInstruction.OperandFlag.REG, LIRInstruction.OperandFlag.STACK, LIRInstruction.OperandFlag.ILLEGAL})
    Value searchValue3;
    @LIRInstruction.Alive(value={LIRInstruction.OperandFlag.REG, LIRInstruction.OperandFlag.STACK, LIRInstruction.OperandFlag.ILLEGAL})
    Value searchValue4;
    @LIRInstruction.Temp(value={LIRInstruction.OperandFlag.REG})
    Value[] vectorCompareVal;
    @LIRInstruction.Temp(value={LIRInstruction.OperandFlag.REG})
    Value[] vectorArray;

    private AMD64ArrayIndexOfOp(int arrayBaseOffset, JavaKind valueKind, boolean findTwoConsecutive, boolean withMask, int constOffset, int nValues, LIRGeneratorTool tool, Value result, Value arrayPtr, Value arrayOffset, Value arrayLength, Value fromIndex, Value searchValue1, Value searchValue2, Value searchValue3, Value searchValue4) {
        super(TYPE, tool, AVXKind.AVXSize.YMM);
        this.valueKind = valueKind;
        this.arrayIndexScale = Objects.requireNonNull(AMD64Address.Scale.fromInt(tool.getProviders().getMetaAccess().getArrayIndexScale(valueKind)));
        this.arrayBaseOffset = arrayBaseOffset;
        this.findTwoConsecutive = findTwoConsecutive;
        this.withMask = withMask;
        this.constOffset = constOffset;
        this.nValues = nValues;
        assert (0 < nValues && nValues <= 4);
        assert (valueKind == JavaKind.Byte || valueKind == JavaKind.Char || valueKind == JavaKind.Int);
        assert (AMD64ArrayIndexOfOp.supports(tool.target(), AMD64.CPUFeature.SSE2) || AMD64ArrayIndexOfOp.supports(tool.target(), AMD64.CPUFeature.AVX) || AMD64ArrayIndexOfOp.supports(tool.target(), AMD64.CPUFeature.AVX2));
        assert (withMask || !findTwoConsecutive || nValues == 2);
        assert (!withMask || !findTwoConsecutive || nValues == 4);
        assert (!withMask || findTwoConsecutive || nValues == 2);
        this.resultValue = result;
        this.arrayTmp = this.arrayReg = arrayPtr;
        this.offsetTmp = this.offsetReg = arrayOffset;
        this.lengthTmp = this.lengthReg = arrayLength;
        this.fromIndexTmp = this.fromIndexReg = fromIndex;
        this.searchValue1Tmp = this.searchValue1 = searchValue1;
        this.searchValue2Tmp = this.searchValue2 = searchValue2;
        this.searchValue3 = searchValue3;
        this.searchValue4 = searchValue4;
        this.vectorKind = this.getVectorKind(valueKind);
        this.vectorCompareVal = AMD64ArrayIndexOfOp.allocateVectorRegisters(tool, this.vectorKind, nValues);
        this.vectorArray = AMD64ArrayIndexOfOp.allocateVectorRegisters(tool, this.vectorKind, 4);
    }

    private static Value[] allocateVectorRegisters(LIRGeneratorTool tool, AMD64Kind vectorKind, int n) {
        Value[] vectors = new Value[n];
        for (int i = 0; i < vectors.length; ++i) {
            vectors[i] = tool.newVariable(LIRKind.value((PlatformKind)vectorKind));
        }
        return vectors;
    }

    private static Register[] asRegisters(Value[] values) {
        Register[] registers = new Register[values.length];
        for (int i = 0; i < registers.length; ++i) {
            registers[i] = ValueUtil.asRegister((Value)values[i]);
        }
        return registers;
    }

    public static AMD64ArrayIndexOfOp movParamsAndCreate(int arrayBaseOffset, JavaKind valueKind, boolean findTwoConsecutive, boolean withMask, LIRGeneratorTool tool, Value result, Value arrayPtr, Value arrayOffset, Value arrayLength, Value fromIndex, Value ... searchValues) {
        int nValues = searchValues.length;
        RegisterValue regArray = REG_ARRAY.asValue(arrayPtr.getValueKind());
        RegisterValue regOffset = REG_OFFSET.asValue(arrayOffset.getValueKind());
        RegisterValue regLength = REG_LENGTH.asValue(arrayLength.getValueKind());
        RegisterValue regFromIndex = REG_FROM_INDEX.asValue(fromIndex.getValueKind());
        RegisterValue regSearchValue1 = REG_SEARCH_VALUE_1.asValue(searchValues[0].getValueKind());
        AllocatableValue regSearchValue2 = nValues > 1 ? REG_SEARCH_VALUE_2.asValue(searchValues[1].getValueKind()) : Value.ILLEGAL;
        AllocatableValue regSearchValue3 = nValues > 2 ? tool.asAllocatable(searchValues[2]) : Value.ILLEGAL;
        AllocatableValue regSearchValue4 = nValues > 3 ? tool.asAllocatable(searchValues[3]) : Value.ILLEGAL;
        tool.emitConvertNullToZero((AllocatableValue)regArray, arrayPtr);
        tool.emitMove((AllocatableValue)regOffset, arrayOffset);
        tool.emitMove((AllocatableValue)regLength, arrayLength);
        tool.emitMove((AllocatableValue)regFromIndex, fromIndex);
        tool.emitMove((AllocatableValue)regSearchValue1, searchValues[0]);
        if (nValues > 1) {
            tool.emitMove((AllocatableValue)((RegisterValue)regSearchValue2), searchValues[1]);
        }
        int constOffset = AMD64ArrayIndexOfOp.isConstant(arrayOffset) && AMD64ArrayIndexOfOp.asConstant(arrayOffset).asLong() >= 0L && AMD64ArrayIndexOfOp.asConstant(arrayOffset).asLong() <= Integer.MAX_VALUE ? (int)AMD64ArrayIndexOfOp.asConstant(arrayOffset).asLong() : -1;
        return new AMD64ArrayIndexOfOp(arrayBaseOffset, valueKind, findTwoConsecutive, withMask, constOffset, nValues, tool, result, (Value)regArray, (Value)regOffset, (Value)regLength, (Value)regFromIndex, (Value)regSearchValue1, (Value)regSearchValue2, (Value)regSearchValue3, (Value)regSearchValue4);
    }

    private boolean useConstantOffset() {
        return this.constOffset >= 0;
    }

    @Override
    public void emitCode(CompilationResultBuilder crb, AMD64MacroAssembler asm) {
        AMD64Address arrayAddr;
        int nVectors = this.withMask ? 1 : (this.nValues == 1 ? 4 : (this.nValues == 2 ? 2 : 1));
        Register arrayPtr = ValueUtil.asRegister((Value)this.arrayReg);
        Register arrayLength = ValueUtil.asRegister((Value)this.lengthReg);
        Register index = ValueUtil.asRegister((Value)this.resultValue);
        Value[] searchValue = new Value[]{this.nValues > 0 ? this.searchValue1 : null, this.nValues > 1 ? this.searchValue2 : null, this.nValues > 2 ? this.searchValue3 : null, this.nValues > 3 ? this.searchValue4 : null};
        Register[] vecCmp = AMD64ArrayIndexOfOp.asRegisters(this.vectorCompareVal);
        Register[] vecArray = AMD64ArrayIndexOfOp.asRegisters(this.vectorArray);
        Label ret = new Label();
        Label bulkVectorLoop = new Label();
        Label singleVectorLoop = new Label();
        Label[] vectorFound = new Label[]{new Label(), new Label(), new Label(), new Label()};
        Label runVectorized = new Label();
        Label qWordWise = new Label();
        Label elementWise = new Label();
        Label elementWiseLoop = new Label();
        Label elementWiseFound = new Label();
        Label elementWiseNotFound = new Label();
        Label skipBulkVectorLoop = new Label();
        Label bsfAdd = new Label();
        int vectorLength = this.vectorKind.getVectorLength();
        int bulkSize = vectorLength * nVectors;
        if (this.useConstantOffset()) {
            asm.leaq(arrayPtr, new AMD64Address(arrayPtr, this.constOffset + this.arrayBaseOffset));
        } else {
            asm.leaq(arrayPtr, new AMD64Address(arrayPtr, ValueUtil.asRegister((Value)this.offsetReg), AMD64Address.Scale.Times1, this.arrayBaseOffset));
        }
        asm.leaq(index, new AMD64Address(ValueUtil.asRegister((Value)this.fromIndexReg), vectorLength + (this.findTwoConsecutive ? 1 : 0)));
        Register cmpResult = ValueUtil.asRegister((Value)this.fromIndexReg);
        for (int i = 0; i < this.nValues; ++i) {
            this.broadcastSearchValue(crb, asm, vecCmp[i], searchValue[i], cmpResult, vecArray[0]);
        }
        asm.cmpqAndJcc(index, arrayLength, AMD64Assembler.ConditionFlag.LessEqual, runVectorized, false);
        if (this.supportsAVX2AndYMM()) {
            Label[] xmmFound = new Label[]{new Label()};
            asm.subq(index, vectorLength / 2);
            asm.cmpqAndJcc(index, arrayLength, AMD64Assembler.ConditionFlag.Greater, qWordWise, false);
            this.emitVectorCompare(asm, this.valueKind, AVXKind.AVXSize.XMM, 1, arrayPtr, index, vecCmp, vecArray, cmpResult, xmmFound, true);
            asm.movq(index, arrayLength);
            this.emitVectorCompare(asm, this.valueKind, AVXKind.AVXSize.XMM, 1, arrayPtr, index, vecCmp, vecArray, cmpResult, xmmFound, true);
            asm.jmp(elementWiseNotFound);
            asm.bind(xmmFound[0]);
            asm.subq(index, vectorLength / 2 + (this.findTwoConsecutive ? 1 : 0));
            asm.jmp(bsfAdd);
        }
        asm.bind(qWordWise);
        int vectorLengthQWord = AVXKind.AVXSize.QWORD.getBytes() / this.valueKind.getByteCount();
        Label[] qWordFound = new Label[]{new Label()};
        asm.subq(index, vectorLengthQWord);
        asm.cmpqAndJcc(index, arrayLength, AMD64Assembler.ConditionFlag.Greater, elementWise, false);
        this.emitVectorCompare(asm, this.valueKind, AVXKind.AVXSize.QWORD, 1, arrayPtr, index, vecCmp, vecArray, cmpResult, qWordFound, true);
        asm.movq(index, arrayLength);
        this.emitVectorCompare(asm, this.valueKind, AVXKind.AVXSize.QWORD, 1, arrayPtr, index, vecCmp, vecArray, cmpResult, qWordFound, true);
        asm.jmpb(elementWiseNotFound);
        asm.bind(qWordFound[0]);
        asm.subq(index, vectorLengthQWord + (this.findTwoConsecutive ? 1 : 0));
        asm.jmp(bsfAdd);
        asm.bind(elementWise);
        asm.subq(index, vectorLengthQWord);
        asm.cmpqAndJcc(index, arrayLength, AMD64Assembler.ConditionFlag.GreaterEqual, elementWiseNotFound, true);
        AMD64BaseAssembler.OperandSize valueSize = AMD64ArrayIndexOfOp.getOpSize(this.valueKind);
        if (this.findTwoConsecutive) {
            asm.shlq(ValueUtil.asRegister((Value)searchValue[1]), this.valueKind.getBitCount());
            asm.orq(ValueUtil.asRegister((Value)searchValue[0]), ValueUtil.asRegister((Value)searchValue[1]));
            if (this.withMask) {
                if (ValueUtil.isStackSlot((Value)searchValue[3])) {
                    AMD64MacroAssembler.movSZx(asm, valueSize, AMD64MacroAssembler.ExtendMode.ZERO_EXTEND, ValueUtil.asRegister((Value)searchValue[1]), (AMD64Address)crb.asAddress(searchValue[3]));
                } else {
                    asm.movq(ValueUtil.asRegister((Value)searchValue[1]), ValueUtil.asRegister((Value)searchValue[3]));
                }
                asm.shlq(ValueUtil.asRegister((Value)searchValue[1]), this.valueKind.getBitCount());
                if (ValueUtil.isStackSlot((Value)searchValue[2])) {
                    AMD64MacroAssembler.movSZx(asm, valueSize, AMD64MacroAssembler.ExtendMode.ZERO_EXTEND, cmpResult, (AMD64Address)crb.asAddress(searchValue[2]));
                    asm.orq(ValueUtil.asRegister((Value)searchValue[1]), cmpResult);
                } else {
                    asm.orq(ValueUtil.asRegister((Value)searchValue[1]), ValueUtil.asRegister((Value)searchValue[2]));
                }
            }
        }
        asm.bind(elementWiseLoop);
        if (this.findTwoConsecutive) {
            arrayAddr = new AMD64Address(arrayPtr, index, this.arrayIndexScale, -this.valueKind.getByteCount());
            if (this.withMask) {
                AMD64MacroAssembler.movSZx(asm, AMD64ArrayIndexOfOp.getDoubleOpSize(this.valueKind), AMD64MacroAssembler.ExtendMode.ZERO_EXTEND, cmpResult, arrayAddr);
                asm.orq(cmpResult, ValueUtil.asRegister((Value)searchValue[1]));
                asm.cmpqAndJcc(cmpResult, ValueUtil.asRegister((Value)searchValue[0]), AMD64Assembler.ConditionFlag.Equal, elementWiseFound, true);
            } else {
                asm.cmpAndJcc(AMD64ArrayIndexOfOp.getDoubleOpSize(this.valueKind), ValueUtil.asRegister((Value)searchValue[0]), arrayAddr, AMD64Assembler.ConditionFlag.Equal, elementWiseFound, true);
            }
        } else {
            int i;
            arrayAddr = new AMD64Address(arrayPtr, index, this.arrayIndexScale);
            boolean valuesOnStack = this.searchValuesOnStack(searchValue);
            if (this.withMask) {
                assert (!valuesOnStack);
                assert (this.nValues == 2);
                AMD64MacroAssembler.movSZx(asm, valueSize, AMD64MacroAssembler.ExtendMode.ZERO_EXTEND, cmpResult, arrayAddr);
                asm.orq(cmpResult, ValueUtil.asRegister((Value)searchValue[1]));
                asm.cmpqAndJcc(cmpResult, ValueUtil.asRegister((Value)searchValue[0]), AMD64Assembler.ConditionFlag.Equal, elementWiseFound, true);
            } else if (valuesOnStack) {
                AMD64MacroAssembler.movSZx(asm, valueSize, AMD64MacroAssembler.ExtendMode.ZERO_EXTEND, cmpResult, arrayAddr);
                for (i = 0; i < this.nValues; ++i) {
                    if (ValueUtil.isStackSlot((Value)searchValue[i])) {
                        asm.cmpqAndJcc(cmpResult, (AMD64Address)crb.asAddress(searchValue[i]), AMD64Assembler.ConditionFlag.Equal, elementWiseFound, true);
                        continue;
                    }
                    asm.cmpqAndJcc(cmpResult, ValueUtil.asRegister((Value)searchValue[i]), AMD64Assembler.ConditionFlag.Equal, elementWiseFound, true);
                }
            } else {
                for (i = 0; i < this.nValues; ++i) {
                    asm.cmpAndJcc(valueSize, ValueUtil.asRegister((Value)searchValue[i]), arrayAddr, AMD64Assembler.ConditionFlag.Equal, elementWiseFound, true);
                }
            }
        }
        asm.incrementq(index, 1);
        asm.cmpqAndJcc(index, arrayLength, AMD64Assembler.ConditionFlag.Less, elementWiseLoop, true);
        asm.bind(elementWiseNotFound);
        asm.xorq(index, index);
        if (this.findTwoConsecutive) {
            asm.bind(elementWiseFound);
            asm.decrementq(index, 1);
        } else {
            asm.decrementq(index, 1);
            asm.bind(elementWiseFound);
        }
        asm.jmp(ret);
        asm.bind(runVectorized);
        this.emitVectorCompare(asm, this.valueKind, this.vectorSize, 1, arrayPtr, index, vecCmp, vecArray, cmpResult, vectorFound, false);
        asm.movl(cmpResult, arrayPtr);
        if (this.valueKind.getByteCount() > 1) {
            asm.shrl(cmpResult, this.strideAsPowerOf2());
        }
        asm.addq(index, cmpResult);
        asm.andq(index, -vectorLength);
        asm.subq(index, cmpResult);
        asm.addq(index, bulkSize);
        asm.cmpqAndJcc(index, arrayLength, AMD64Assembler.ConditionFlag.Greater, skipBulkVectorLoop, true);
        AMD64ArrayIndexOfOp.emitAlign(crb, asm);
        asm.bind(bulkVectorLoop);
        this.emitVectorCompare(asm, this.valueKind, this.vectorSize, nVectors, arrayPtr, index, vecCmp, vecArray, cmpResult, vectorFound, false);
        asm.addq(index, bulkSize);
        asm.cmpqAndJcc(index, arrayLength, AMD64Assembler.ConditionFlag.LessEqual, bulkVectorLoop, true);
        asm.bind(skipBulkVectorLoop);
        if (nVectors == 1) {
            asm.movq(index, arrayLength);
            this.emitVectorCompare(asm, this.valueKind, this.vectorSize, 1, arrayPtr, index, vecCmp, vecArray, cmpResult, vectorFound, true);
        } else {
            asm.subq(index, bulkSize);
            AMD64ArrayIndexOfOp.emitAlign(crb, asm);
            asm.bind(singleVectorLoop);
            asm.addq(index, vectorLength);
            asm.cmpq(index, arrayLength);
            asm.cmovq(AMD64Assembler.ConditionFlag.Greater, index, arrayLength);
            this.emitVectorCompare(asm, this.valueKind, this.vectorSize, 1, arrayPtr, index, vecCmp, vecArray, cmpResult, vectorFound, true);
            asm.cmpqAndJcc(index, arrayLength, AMD64Assembler.ConditionFlag.Less, singleVectorLoop, true);
        }
        asm.movl(index, -1);
        asm.jmpb(ret);
        for (int i = 0; i < nVectors; ++i) {
            asm.bind(vectorFound[i]);
            asm.subq(index, this.getResultIndexDelta(i));
            if (i >= nVectors - 1) continue;
            asm.jmpb(bsfAdd);
        }
        asm.bind(bsfAdd);
        asm.bsfq(cmpResult, cmpResult);
        if (this.valueKind.getByteCount() > 1) {
            asm.shrq(cmpResult, this.strideAsPowerOf2());
        }
        asm.addq(index, cmpResult);
        asm.bind(ret);
    }

    private boolean searchValuesOnStack(Value[] searchValue) {
        for (int i = 0; i < this.nValues; ++i) {
            if (!ValueUtil.isStackSlot((Value)searchValue[i])) continue;
            return true;
        }
        return false;
    }

    private int getResultIndexDelta(int i) {
        return (i + 1) * this.vectorKind.getVectorLength() + (this.findTwoConsecutive ? 1 : 0);
    }

    private int getVectorOffset(int i, int j, AVXKind.AVXSize targetVectorSize) {
        if (this.findTwoConsecutive) {
            return -((i + 1) * targetVectorSize.getBytes() + (j ^ 1) * this.valueKind.getByteCount());
        }
        return -((i + 1) * targetVectorSize.getBytes());
    }

    private void broadcastSearchValue(CompilationResultBuilder crb, AMD64MacroAssembler asm, Register dst, Value srcVal, Register tmpReg, Register tmpVector) {
        Register src = AMD64ArrayIndexOfOp.asRegOrTmpReg(crb, asm, srcVal, tmpReg);
        AMD64MacroAssembler.movdl(asm, dst, src);
        AMD64ArrayIndexOfOp.emitBroadcast(asm, this.valueKind, dst, tmpVector, this.vectorSize);
    }

    private static boolean isConstant(Value val) {
        assert (!(val instanceof ConstantValue) || ((ConstantValue)val).isJavaConstant());
        return val instanceof ConstantValue;
    }

    private static JavaConstant asConstant(Value val) {
        return ((ConstantValue)val).getJavaConstant();
    }

    private static Register asRegOrTmpReg(CompilationResultBuilder crb, AMD64MacroAssembler asm, Value val, Register tmpReg) {
        if (ValueUtil.isRegister((Value)val)) {
            return ValueUtil.asRegister((Value)val);
        }
        if (ValueUtil.isStackSlot((Value)val)) {
            asm.movl(tmpReg, (AMD64Address)crb.asAddress(val));
            return tmpReg;
        }
        assert (AMD64ArrayIndexOfOp.isConstant(val));
        asm.movl(tmpReg, AMD64ArrayIndexOfOp.asConstant(val).asInt());
        return tmpReg;
    }

    private static void emitAlign(CompilationResultBuilder crb, AMD64MacroAssembler asm) {
        asm.align(crb.target.wordSize * 2);
    }

    private static void emitBroadcast(AMD64MacroAssembler asm, JavaKind kind, Register vecDst, Register vecTmp, AVXKind.AVXSize targetVectorSize) {
        switch (kind) {
            case Byte: {
                if (asm.supports(AMD64.CPUFeature.AVX2)) {
                    AMD64Assembler.VexRMOp.VPBROADCASTB.emit((AMD64Assembler)asm, targetVectorSize, vecDst, vecDst);
                    break;
                }
                if (asm.supports(AMD64.CPUFeature.AVX)) {
                    AMD64Assembler.VexRVMOp.VPXOR.emit((AMD64Assembler)asm, targetVectorSize, vecTmp, vecTmp, vecTmp);
                    AMD64Assembler.VexRVMOp.VPSHUFB.emit((AMD64Assembler)asm, targetVectorSize, vecDst, vecDst, vecTmp);
                    break;
                }
                if (asm.supports(AMD64.CPUFeature.SSSE3)) {
                    asm.pxor(vecTmp, vecTmp);
                    asm.pshufb(vecDst, vecTmp);
                    break;
                }
                asm.punpcklbw(vecDst, vecDst);
                asm.punpcklbw(vecDst, vecDst);
                asm.pshufd(vecDst, vecDst, 0);
                break;
            }
            case Short: 
            case Char: {
                if (asm.supports(AMD64.CPUFeature.AVX2)) {
                    AMD64Assembler.VexRMOp.VPBROADCASTW.emit((AMD64Assembler)asm, targetVectorSize, vecDst, vecDst);
                    break;
                }
                if (asm.supports(AMD64.CPUFeature.AVX)) {
                    AMD64Assembler.VexRMIOp.VPSHUFLW.emit((AMD64Assembler)asm, targetVectorSize, vecDst, vecDst, 0);
                    AMD64Assembler.VexRMIOp.VPSHUFD.emit((AMD64Assembler)asm, targetVectorSize, vecDst, vecDst, 0);
                    break;
                }
                asm.pshuflw(vecDst, vecDst, 0);
                asm.pshufd(vecDst, vecDst, 0);
                break;
            }
            case Int: {
                if (asm.supports(AMD64.CPUFeature.AVX2)) {
                    AMD64Assembler.VexRMOp.VPBROADCASTD.emit((AMD64Assembler)asm, targetVectorSize, vecDst, vecDst);
                    break;
                }
                if (asm.supports(AMD64.CPUFeature.AVX)) {
                    AMD64Assembler.VexRMIOp.VPSHUFD.emit((AMD64Assembler)asm, targetVectorSize, vecDst, vecDst, 0);
                    break;
                }
                asm.pshufd(vecDst, vecDst, 0);
                break;
            }
            default: {
                throw new UnsupportedOperationException();
            }
        }
    }

    private void emitVectorCompare(AMD64MacroAssembler asm, JavaKind kind, AVXKind.AVXSize targetVectorSize, int nVectors, Register arrayPtr, Register index, Register[] vecCmp, Register[] vecArray, Register cmpResult, Label[] vectorFound, boolean shortJmp) {
        int j;
        int base;
        int i;
        for (i = 0; i < nVectors; ++i) {
            base = i * this.nValues;
            for (j = 0; j < (this.withMask ? this.nValues / 2 : this.nValues); ++j) {
                this.emitArrayLoad(asm, targetVectorSize, vecArray[base + j], arrayPtr, index, this.getVectorOffset(nVectors - (i + 1), j, targetVectorSize));
            }
        }
        if (this.findTwoConsecutive) {
            for (i = 0; i < nVectors << 1; i += 2) {
                if (this.withMask) {
                    AMD64MacroAssembler.por(asm, targetVectorSize, vecArray[i], vecCmp[2]);
                    AMD64MacroAssembler.por(asm, targetVectorSize, vecArray[i + 1], vecCmp[3]);
                }
                AMD64MacroAssembler.pcmpeq(asm, targetVectorSize, kind, vecArray[i], vecCmp[0]);
                AMD64MacroAssembler.pcmpeq(asm, targetVectorSize, kind, vecArray[i + 1], vecCmp[1]);
                AMD64MacroAssembler.pand(asm, targetVectorSize, vecArray[i], vecArray[i + 1]);
                AMD64MacroAssembler.pmovmsk(asm, targetVectorSize, cmpResult, vecArray[i]);
                AMD64ArrayIndexOfOp.emitVectorCompareCheckVectorFound(asm, targetVectorSize, cmpResult, vectorFound[nVectors - (i / 2 + 1)], shortJmp);
            }
        } else if (this.withMask) {
            assert (this.nValues == 2 && nVectors == 1);
            AMD64MacroAssembler.por(asm, targetVectorSize, vecArray[0], vecCmp[1]);
            AMD64MacroAssembler.pcmpeq(asm, targetVectorSize, kind, vecArray[0], vecCmp[0]);
            AMD64MacroAssembler.pmovmsk(asm, targetVectorSize, cmpResult, vecArray[0]);
            AMD64ArrayIndexOfOp.emitVectorCompareCheckVectorFound(asm, targetVectorSize, cmpResult, vectorFound[0], shortJmp);
        } else {
            for (i = 0; i < nVectors; ++i) {
                base = i * this.nValues;
                for (j = 0; j < this.nValues; ++j) {
                    AMD64MacroAssembler.pcmpeq(asm, targetVectorSize, kind, vecArray[base + j], vecCmp[j]);
                    if ((j & 1) != 1) continue;
                    AMD64MacroAssembler.por(asm, targetVectorSize, vecArray[base + j - 1], vecArray[base + j]);
                }
                if (this.nValues > 2) {
                    AMD64MacroAssembler.por(asm, targetVectorSize, vecArray[base], vecArray[base + 2]);
                }
                AMD64MacroAssembler.pmovmsk(asm, targetVectorSize, cmpResult, vecArray[base]);
                AMD64ArrayIndexOfOp.emitVectorCompareCheckVectorFound(asm, targetVectorSize, cmpResult, vectorFound[nVectors - (i + 1)], shortJmp);
            }
        }
    }

    private static void emitVectorCompareCheckVectorFound(AMD64MacroAssembler asm, AVXKind.AVXSize targetVectorSize, Register cmpResult, Label branchTarget, boolean shortJmp) {
        switch (targetVectorSize) {
            case DWORD: {
                asm.andlAndJcc(cmpResult, 15, AMD64Assembler.ConditionFlag.NotZero, branchTarget, shortJmp);
                break;
            }
            case QWORD: {
                asm.andlAndJcc(cmpResult, 255, AMD64Assembler.ConditionFlag.NotZero, branchTarget, shortJmp);
                break;
            }
            case XMM: 
            case YMM: {
                asm.testlAndJcc(cmpResult, cmpResult, AMD64Assembler.ConditionFlag.NotZero, branchTarget, shortJmp);
                break;
            }
            case ZMM: {
                throw GraalError.shouldNotReachHere();
            }
        }
    }

    private void emitArrayLoad(AMD64MacroAssembler asm, AVXKind.AVXSize targetVectorSize, Register vecDst, Register array, Register index, int displacement) {
        AMD64Address src = new AMD64Address(array, index, this.arrayIndexScale, displacement);
        if (asm.supports(AMD64.CPUFeature.AVX)) {
            switch (targetVectorSize) {
                case DWORD: {
                    AMD64Assembler.VexMoveOp.VMOVD.emit((AMD64Assembler)asm, AVXKind.AVXSize.DWORD, vecDst, src);
                    break;
                }
                case QWORD: {
                    AMD64Assembler.VexMoveOp.VMOVQ.emit((AMD64Assembler)asm, AVXKind.AVXSize.QWORD, vecDst, src);
                    break;
                }
                case XMM: 
                case YMM: {
                    AMD64Assembler.VexMoveOp.VMOVDQU32.emit((AMD64Assembler)asm, targetVectorSize, vecDst, src);
                    break;
                }
                case ZMM: {
                    AMD64Assembler.VexMoveOp.VMOVDQU64.emit((AMD64Assembler)asm, targetVectorSize, vecDst, src);
                }
            }
        } else {
            switch (targetVectorSize) {
                case DWORD: {
                    asm.movdl(vecDst, src);
                    break;
                }
                case QWORD: {
                    asm.movdq(vecDst, src);
                    break;
                }
                case XMM: 
                case YMM: {
                    asm.movdqu(vecDst, src);
                    break;
                }
                case ZMM: {
                    throw GraalError.shouldNotReachHere();
                }
            }
        }
    }

    private static AMD64BaseAssembler.OperandSize getOpSize(JavaKind kind) {
        switch (kind) {
            case Byte: {
                return AMD64BaseAssembler.OperandSize.BYTE;
            }
            case Short: 
            case Char: {
                return AMD64BaseAssembler.OperandSize.WORD;
            }
            case Int: {
                return AMD64BaseAssembler.OperandSize.DWORD;
            }
        }
        return AMD64BaseAssembler.OperandSize.QWORD;
    }

    private static AMD64BaseAssembler.OperandSize getDoubleOpSize(JavaKind kind) {
        switch (kind) {
            case Byte: {
                return AMD64BaseAssembler.OperandSize.WORD;
            }
            case Short: 
            case Char: {
                return AMD64BaseAssembler.OperandSize.DWORD;
            }
        }
        assert (kind.equals((Object)JavaKind.Int));
        return AMD64BaseAssembler.OperandSize.QWORD;
    }

    private int strideAsPowerOf2() {
        return Integer.numberOfTrailingZeros(this.valueKind.getByteCount());
    }
}

