/*
 * Decompiled with CFR 0.152.
 */
package org.graalvm.compiler.asm.amd64;

import java.util.function.IntConsumer;
import java.util.function.Supplier;
import jdk.vm.ci.amd64.AMD64;
import jdk.vm.ci.code.Register;
import jdk.vm.ci.code.TargetDescription;
import jdk.vm.ci.meta.JavaKind;
import org.graalvm.compiler.asm.Label;
import org.graalvm.compiler.asm.amd64.AMD64Address;
import org.graalvm.compiler.asm.amd64.AMD64Assembler;
import org.graalvm.compiler.asm.amd64.AMD64BaseAssembler;
import org.graalvm.compiler.asm.amd64.AVXKind;
import org.graalvm.compiler.core.common.NumUtil;
import org.graalvm.compiler.debug.GraalError;
import org.graalvm.compiler.options.OptionValues;

public class AMD64MacroAssembler
extends AMD64Assembler {
    private static final int DIRECT_CALL_INSTRUCTION_CODE = 232;
    private static final int DIRECT_CALL_INSTRUCTION_SIZE = 5;

    public AMD64MacroAssembler(TargetDescription target) {
        super(target);
    }

    public AMD64MacroAssembler(TargetDescription target, OptionValues optionValues) {
        super(target, optionValues);
    }

    public AMD64MacroAssembler(TargetDescription target, OptionValues optionValues, boolean hasIntelJccErratum) {
        super(target, optionValues, hasIntelJccErratum);
    }

    public final void decrementq(Register reg) {
        this.decrementq(reg, 1);
    }

    public final void decrementq(Register reg, int value) {
        if (value == Integer.MIN_VALUE) {
            this.subq(reg, value);
            return;
        }
        if (value < 0) {
            this.incrementq(reg, -value);
            return;
        }
        if (value == 0) {
            return;
        }
        if (value == 1) {
            this.decq(reg);
        } else {
            this.subq(reg, value);
        }
    }

    public final void decrementq(AMD64Address dst, int value) {
        if (value == Integer.MIN_VALUE) {
            this.subq(dst, value);
            return;
        }
        if (value < 0) {
            this.incrementq(dst, -value);
            return;
        }
        if (value == 0) {
            return;
        }
        if (value == 1) {
            this.decq(dst);
        } else {
            this.subq(dst, value);
        }
    }

    public final void incrementq(Register reg) {
        this.incrementq(reg, 1);
    }

    public void incrementq(Register reg, int value) {
        if (value == Integer.MIN_VALUE) {
            this.addq(reg, value);
            return;
        }
        if (value < 0) {
            this.decrementq(reg, -value);
            return;
        }
        if (value == 0) {
            return;
        }
        if (value == 1) {
            this.incq(reg);
        } else {
            this.addq(reg, value);
        }
    }

    public final void incrementq(AMD64Address dst, int value) {
        if (value == Integer.MIN_VALUE) {
            this.addq(dst, value);
            return;
        }
        if (value < 0) {
            this.decrementq(dst, -value);
            return;
        }
        if (value == 0) {
            return;
        }
        if (value == 1) {
            this.incq(dst);
        } else {
            this.addq(dst, value);
        }
    }

    public final void movptr(Register dst, AMD64Address src) {
        this.movq(dst, src);
    }

    public final void movptr(AMD64Address dst, Register src) {
        this.movq(dst, src);
    }

    public final void movptr(AMD64Address dst, int src) {
        this.movslq(dst, src);
    }

    public final void cmpptr(Register src1, Register src2) {
        this.cmpq(src1, src2);
    }

    public final void cmpptr(Register src1, AMD64Address src2) {
        this.cmpq(src1, src2);
    }

    public final void decrementl(Register reg) {
        this.decrementl(reg, 1);
    }

    public final void decrementl(Register reg, int value) {
        if (value == Integer.MIN_VALUE) {
            this.subl(reg, value);
            return;
        }
        if (value < 0) {
            this.incrementl(reg, -value);
            return;
        }
        if (value == 0) {
            return;
        }
        if (value == 1) {
            this.decl(reg);
        } else {
            this.subl(reg, value);
        }
    }

    public final void decrementl(AMD64Address dst, int value) {
        if (value == Integer.MIN_VALUE) {
            this.subl(dst, value);
            return;
        }
        if (value < 0) {
            this.incrementl(dst, -value);
            return;
        }
        if (value == 0) {
            return;
        }
        if (value == 1) {
            this.decl(dst);
        } else {
            this.subl(dst, value);
        }
    }

    public final void incrementl(Register reg, int value) {
        if (value == Integer.MIN_VALUE) {
            this.addl(reg, value);
            return;
        }
        if (value < 0) {
            this.decrementl(reg, -value);
            return;
        }
        if (value == 0) {
            return;
        }
        if (value == 1) {
            this.incl(reg);
        } else {
            this.addl(reg, value);
        }
    }

    public final void incrementl(AMD64Address dst, int value) {
        if (value == Integer.MIN_VALUE) {
            this.addl(dst, value);
            return;
        }
        if (value < 0) {
            this.decrementl(dst, -value);
            return;
        }
        if (value == 0) {
            return;
        }
        if (value == 1) {
            this.incl(dst);
        } else {
            this.addl(dst, value);
        }
    }

    public void movflt(Register dst, Register src) {
        assert (dst.getRegisterCategory().equals((Object)AMD64.XMM) && src.getRegisterCategory().equals((Object)AMD64.XMM));
        if (AMD64MacroAssembler.isAVX512Register(dst) || AMD64MacroAssembler.isAVX512Register(src)) {
            AMD64Assembler.VexMoveOp.VMOVAPS.emit((AMD64Assembler)this, AVXKind.AVXSize.XMM, dst, src);
        } else {
            this.movaps(dst, src);
        }
    }

    public void movflt(Register dst, AMD64Address src) {
        assert (dst.getRegisterCategory().equals((Object)AMD64.XMM));
        if (AMD64MacroAssembler.isAVX512Register(dst)) {
            AMD64Assembler.VexMoveOp.VMOVSS.emit((AMD64Assembler)this, AVXKind.AVXSize.XMM, dst, src);
        } else {
            this.movss(dst, src);
        }
    }

    public void movflt(AMD64Address dst, Register src) {
        assert (src.getRegisterCategory().equals((Object)AMD64.XMM));
        if (AMD64MacroAssembler.isAVX512Register(src)) {
            AMD64Assembler.VexMoveOp.VMOVSS.emit((AMD64Assembler)this, AVXKind.AVXSize.XMM, dst, src);
        } else {
            this.movss(dst, src);
        }
    }

    public void movdbl(Register dst, Register src) {
        assert (dst.getRegisterCategory().equals((Object)AMD64.XMM) && src.getRegisterCategory().equals((Object)AMD64.XMM));
        if (AMD64MacroAssembler.isAVX512Register(dst) || AMD64MacroAssembler.isAVX512Register(src)) {
            AMD64Assembler.VexMoveOp.VMOVAPD.emit((AMD64Assembler)this, AVXKind.AVXSize.XMM, dst, src);
        } else {
            this.movapd(dst, src);
        }
    }

    public void movdbl(Register dst, AMD64Address src) {
        assert (dst.getRegisterCategory().equals((Object)AMD64.XMM));
        if (AMD64MacroAssembler.isAVX512Register(dst)) {
            AMD64Assembler.VexMoveOp.VMOVSD.emit((AMD64Assembler)this, AVXKind.AVXSize.XMM, dst, src);
        } else {
            this.movsd(dst, src);
        }
    }

    public void movdbl(AMD64Address dst, Register src) {
        assert (src.getRegisterCategory().equals((Object)AMD64.XMM));
        if (AMD64MacroAssembler.isAVX512Register(src)) {
            AMD64Assembler.VexMoveOp.VMOVSD.emit((AMD64Assembler)this, AVXKind.AVXSize.XMM, dst, src);
        } else {
            this.movsd(dst, src);
        }
    }

    public final void movlong(AMD64Address dst, long src) {
        if (NumUtil.isInt(src)) {
            AMD64Assembler.AMD64MIOp.MOV.emit((AMD64Assembler)this, AMD64BaseAssembler.OperandSize.QWORD, dst, (int)src);
        } else {
            AMD64Address high = new AMD64Address(dst.getBase(), dst.getIndex(), dst.getScale(), dst.getDisplacement() + 4, dst.getDisplacementAnnotation(), dst.instructionStartPosition);
            this.movl(dst, (int)(src & 0xFFFFFFFFFFFFFFFFL));
            this.movl(high, (int)(src >> 32));
        }
    }

    public final void setl(AMD64Assembler.ConditionFlag cc, Register dst) {
        this.setb(cc, dst);
        this.movzbl(dst, dst);
    }

    public final void setq(AMD64Assembler.ConditionFlag cc, Register dst) {
        this.setb(cc, dst);
        this.movzbq(dst, dst);
    }

    public final void flog(Register dest, Register value, boolean base10, AMD64Address tmp) {
        if (base10) {
            this.fldlg2();
        } else {
            this.fldln2();
        }
        this.trigPrologue(value, tmp);
        this.fyl2x();
        this.trigEpilogue(dest, tmp);
    }

    public final void fsin(Register dest, Register value, AMD64Address tmp) {
        this.trigPrologue(value, tmp);
        this.fsin();
        this.trigEpilogue(dest, tmp);
    }

    public final void fcos(Register dest, Register value, AMD64Address tmp) {
        this.trigPrologue(value, tmp);
        this.fcos();
        this.trigEpilogue(dest, tmp);
    }

    public final void ftan(Register dest, Register value, AMD64Address tmp) {
        this.trigPrologue(value, tmp);
        this.fptan();
        this.fstp(0);
        this.trigEpilogue(dest, tmp);
    }

    public final void fpop() {
        this.ffree(0);
        this.fincstp();
    }

    private void trigPrologue(Register value, AMD64Address tmp) {
        assert (value.getRegisterCategory().equals((Object)AMD64.XMM));
        this.movdbl(tmp, value);
        this.fldd(tmp);
    }

    private void trigEpilogue(Register dest, AMD64Address tmp) {
        assert (dest.getRegisterCategory().equals((Object)AMD64.XMM));
        this.fstpd(tmp);
        this.movdbl(dest, tmp);
    }

    public void alignBeforeCall(boolean align, int prefixInstructionSize) {
        this.emitAlignmentForDirectCall(align, prefixInstructionSize);
        if (this.mitigateJCCErratum(this.position() + prefixInstructionSize, 5) != 0) {
            this.emitAlignmentForDirectCall(align, prefixInstructionSize);
        }
    }

    private void emitAlignmentForDirectCall(boolean align, int additionalInstructionSize) {
        int displacementPos;
        if (align && (displacementPos = this.position() + this.getMachineCodeCallDisplacementOffset() + additionalInstructionSize) % 4 != 0) {
            this.nop(4 - displacementPos % 4);
        }
    }

    public final int indirectCall(Register callReg) {
        return this.indirectCall(callReg, false);
    }

    public final int indirectCall(Register callReg, boolean mitigateDecodingAsDirectCall) {
        int prefixNops;
        int indirectCallPos;
        int directCallPos;
        int indirectCallSize = AMD64MacroAssembler.needsRex(callReg) ? 3 : 2;
        int insertedNops = this.mitigateJCCErratum(indirectCallSize);
        if (mitigateDecodingAsDirectCall && ((directCallPos = (indirectCallPos = this.position()) - (5 - indirectCallSize)) < 0 || this.getByte(directCallPos) == 232) && (prefixNops = 5 - indirectCallSize - insertedNops) > 0) {
            this.nop(prefixNops);
        }
        int beforeCall = this.position();
        this.call(callReg);
        assert (beforeCall + indirectCallSize == this.position());
        if (mitigateDecodingAsDirectCall) {
            directCallPos = this.position() - 5;
            GraalError.guarantee(directCallPos >= 0 && this.getByte(directCallPos) != 232, "This indirect call can be decoded as a direct call.");
        }
        return beforeCall;
    }

    public final int directCall(long address, Register scratch) {
        int bytesToEmit = AMD64MacroAssembler.needsRex(scratch) ? 13 : 12;
        this.mitigateJCCErratum(bytesToEmit);
        int beforeCall = this.position();
        this.movq(scratch, address);
        this.call(scratch);
        assert (beforeCall + bytesToEmit == this.position());
        return beforeCall;
    }

    public final int directJmp(long address, Register scratch) {
        int bytesToEmit = AMD64MacroAssembler.needsRex(scratch) ? 13 : 12;
        this.mitigateJCCErratum(bytesToEmit);
        int beforeJmp = this.position();
        this.movq(scratch, address);
        this.jmpWithoutAlignment(scratch);
        assert (beforeJmp + bytesToEmit == this.position());
        return beforeJmp;
    }

    private void alignFusedPair(Label branchTarget, boolean isShortJmp, int prevOpInBytes) {
        assert (prevOpInBytes < 26) : "Fused pair may be longer than 0x20 bytes.";
        if (branchTarget == null) {
            this.mitigateJCCErratum(prevOpInBytes + 6);
        } else if (isShortJmp) {
            this.mitigateJCCErratum(prevOpInBytes + 2);
        } else if (!branchTarget.isBound()) {
            this.mitigateJCCErratum(prevOpInBytes + 6);
        } else {
            long disp = branchTarget.position() - (this.position() + prevOpInBytes);
            if (NumUtil.isByte(disp - 2L)) {
                this.mitigateJCCErratum(prevOpInBytes + 2);
                disp = branchTarget.position() - (this.position() + prevOpInBytes);
                if (NumUtil.isByte(disp - 2L)) {
                    return;
                }
            }
            this.mitigateJCCErratum(prevOpInBytes + 6);
        }
    }

    private void applyMIOpAndJcc(AMD64Assembler.AMD64MIOp op, AMD64BaseAssembler.OperandSize size, Register src, int imm32, AMD64Assembler.ConditionFlag cc, Label branchTarget, boolean isShortJmp, boolean annotateImm, IntConsumer applyBeforeFusedPair) {
        int bytesToEmit = AMD64MacroAssembler.getPrefixInBytes(size, src, op.srcIsByte) + 1 + 1 + op.immediateSize(size);
        this.alignFusedPair(branchTarget, isShortJmp, bytesToEmit);
        int beforeFusedPair = this.position();
        if (applyBeforeFusedPair != null) {
            applyBeforeFusedPair.accept(beforeFusedPair);
        }
        op.emit((AMD64Assembler)this, size, src, imm32, annotateImm);
        assert (beforeFusedPair + bytesToEmit == this.position());
        this.jcc(cc, branchTarget, isShortJmp);
        assert (this.ensureWithinBoundary(beforeFusedPair));
    }

    private void applyMIOpAndJcc(AMD64Assembler.AMD64MIOp op, AMD64BaseAssembler.OperandSize size, AMD64Address src, int imm32, AMD64Assembler.ConditionFlag cc, Label branchTarget, boolean isShortJmp, boolean annotateImm, IntConsumer applyBeforeFusedPair) {
        int bytesToEmit = AMD64MacroAssembler.getPrefixInBytes(size, src) + 1 + this.addressInBytes(src) + op.immediateSize(size);
        this.alignFusedPair(branchTarget, isShortJmp, bytesToEmit);
        int beforeFusedPair = this.position();
        if (applyBeforeFusedPair != null) {
            applyBeforeFusedPair.accept(beforeFusedPair);
        }
        op.emit((AMD64Assembler)this, size, src, imm32, annotateImm);
        assert (beforeFusedPair + bytesToEmit == this.position());
        this.jcc(cc, branchTarget, isShortJmp);
        assert (this.ensureWithinBoundary(beforeFusedPair));
    }

    private int applyRMOpAndJcc(AMD64Assembler.AMD64RMOp op, AMD64BaseAssembler.OperandSize size, Register src1, Register src2, AMD64Assembler.ConditionFlag cc, Label branchTarget, boolean isShortJmp) {
        int bytesToEmit = AMD64MacroAssembler.getPrefixInBytes(size, src1, op.dstIsByte, src2, op.srcIsByte) + 1 + 1;
        this.alignFusedPair(branchTarget, isShortJmp, bytesToEmit);
        int beforeFusedPair = this.position();
        op.emit((AMD64Assembler)this, size, src1, src2);
        int beforeJcc = this.position();
        assert (beforeFusedPair + bytesToEmit == beforeJcc);
        this.jcc(cc, branchTarget, isShortJmp);
        assert (this.ensureWithinBoundary(beforeFusedPair));
        return beforeJcc;
    }

    private int applyRMOpAndJcc(AMD64Assembler.AMD64RMOp op, AMD64BaseAssembler.OperandSize size, Register src1, AMD64Address src2, AMD64Assembler.ConditionFlag cc, Label branchTarget, boolean isShortJmp, IntConsumer applyBeforeFusedPair) {
        int bytesToEmit = AMD64MacroAssembler.getPrefixInBytes(size, src1, op.dstIsByte, src2) + 1 + this.addressInBytes(src2);
        this.alignFusedPair(branchTarget, isShortJmp, bytesToEmit);
        int beforeFusedPair = this.position();
        if (applyBeforeFusedPair != null) {
            applyBeforeFusedPair.accept(beforeFusedPair);
        }
        op.emit((AMD64Assembler)this, size, src1, src2);
        int beforeJcc = this.position();
        assert (beforeFusedPair + bytesToEmit == beforeJcc);
        this.jcc(cc, branchTarget, isShortJmp);
        assert (this.ensureWithinBoundary(beforeFusedPair));
        return beforeJcc;
    }

    public void applyMOpAndJcc(AMD64Assembler.AMD64MOp op, AMD64BaseAssembler.OperandSize size, Register dst, AMD64Assembler.ConditionFlag cc, Label branchTarget, boolean isShortJmp) {
        int bytesToEmit = AMD64MacroAssembler.getPrefixInBytes(size, dst, op.srcIsByte) + 1 + 1;
        this.alignFusedPair(branchTarget, isShortJmp, bytesToEmit);
        int beforeFusedPair = this.position();
        op.emit((AMD64Assembler)this, size, dst);
        assert (beforeFusedPair + bytesToEmit == this.position());
        this.jcc(cc, branchTarget, isShortJmp);
        assert (this.ensureWithinBoundary(beforeFusedPair));
    }

    public final void testAndJcc(AMD64BaseAssembler.OperandSize size, Register src, int imm32, AMD64Assembler.ConditionFlag cc, Label branchTarget, boolean isShortJmp) {
        this.applyMIOpAndJcc(AMD64Assembler.AMD64MIOp.TEST, size, src, imm32, cc, branchTarget, isShortJmp, false, null);
    }

    public final void testlAndJcc(Register src, int imm32, AMD64Assembler.ConditionFlag cc, Label branchTarget, boolean isShortJmp) {
        this.applyMIOpAndJcc(AMD64Assembler.AMD64MIOp.TEST, AMD64BaseAssembler.OperandSize.DWORD, src, imm32, cc, branchTarget, isShortJmp, false, null);
    }

    public final void testAndJcc(AMD64BaseAssembler.OperandSize size, AMD64Address src, int imm32, AMD64Assembler.ConditionFlag cc, Label branchTarget, boolean isShortJmp, IntConsumer applyBeforeFusedPair) {
        this.applyMIOpAndJcc(AMD64Assembler.AMD64MIOp.TEST, size, src, imm32, cc, branchTarget, isShortJmp, false, applyBeforeFusedPair);
    }

    public final void testAndJcc(AMD64BaseAssembler.OperandSize size, Register src1, Register src2, AMD64Assembler.ConditionFlag cc, Label branchTarget, boolean isShortJmp) {
        this.applyRMOpAndJcc(AMD64Assembler.AMD64RMOp.TEST, size, src1, src2, cc, branchTarget, isShortJmp);
    }

    public final void testlAndJcc(Register src1, Register src2, AMD64Assembler.ConditionFlag cc, Label branchTarget, boolean isShortJmp) {
        this.applyRMOpAndJcc(AMD64Assembler.AMD64RMOp.TEST, AMD64BaseAssembler.OperandSize.DWORD, src1, src2, cc, branchTarget, isShortJmp);
    }

    public final int testqAndJcc(Register src1, Register src2, AMD64Assembler.ConditionFlag cc, Label branchTarget, boolean isShortJmp) {
        return this.applyRMOpAndJcc(AMD64Assembler.AMD64RMOp.TEST, AMD64BaseAssembler.OperandSize.QWORD, src1, src2, cc, branchTarget, isShortJmp);
    }

    public final void testAndJcc(AMD64BaseAssembler.OperandSize size, Register src1, AMD64Address src2, AMD64Assembler.ConditionFlag cc, Label branchTarget, boolean isShortJmp) {
        this.applyRMOpAndJcc(AMD64Assembler.AMD64RMOp.TEST, size, src1, src2, cc, branchTarget, isShortJmp, null);
    }

    public final void testAndJcc(AMD64BaseAssembler.OperandSize size, Register src1, AMD64Address src2, AMD64Assembler.ConditionFlag cc, Label branchTarget, boolean isShortJmp, IntConsumer applyBeforeFusedPair) {
        this.applyRMOpAndJcc(AMD64Assembler.AMD64RMOp.TEST, size, src1, src2, cc, branchTarget, isShortJmp, applyBeforeFusedPair);
    }

    public final void testbAndJcc(Register src1, Register src2, AMD64Assembler.ConditionFlag cc, Label branchTarget, boolean isShortJmp) {
        this.applyRMOpAndJcc(AMD64Assembler.AMD64RMOp.TESTB, AMD64BaseAssembler.OperandSize.BYTE, src1, src2, cc, branchTarget, isShortJmp);
    }

    public final void testbAndJcc(Register src1, AMD64Address src2, AMD64Assembler.ConditionFlag cc, Label branchTarget, boolean isShortJmp) {
        this.applyRMOpAndJcc(AMD64Assembler.AMD64RMOp.TESTB, AMD64BaseAssembler.OperandSize.BYTE, src1, src2, cc, branchTarget, isShortJmp, null);
    }

    public final void cmpAndJcc(AMD64BaseAssembler.OperandSize size, Register src, int imm32, AMD64Assembler.ConditionFlag cc, Label branchTarget, boolean isShortJmp) {
        this.applyMIOpAndJcc(AMD64Assembler.AMD64BinaryArithmetic.CMP.getMIOpcode(size, NumUtil.isByte(imm32)), size, src, imm32, cc, branchTarget, isShortJmp, false, null);
    }

    public final void cmpAndJcc(AMD64BaseAssembler.OperandSize size, Register src, int imm32, AMD64Assembler.ConditionFlag cc, Label branchTarget, boolean isShortJmp, boolean annotateImm, IntConsumer applyBeforeFusedPair) {
        this.applyMIOpAndJcc(AMD64Assembler.AMD64BinaryArithmetic.CMP.getMIOpcode(size, NumUtil.isByte(imm32)), size, src, imm32, cc, branchTarget, isShortJmp, annotateImm, applyBeforeFusedPair);
    }

    public final void cmplAndJcc(Register src, int imm32, AMD64Assembler.ConditionFlag cc, Label branchTarget, boolean isShortJmp) {
        this.applyMIOpAndJcc(AMD64Assembler.AMD64BinaryArithmetic.CMP.getMIOpcode(AMD64BaseAssembler.OperandSize.DWORD, NumUtil.isByte(imm32)), AMD64BaseAssembler.OperandSize.DWORD, src, imm32, cc, branchTarget, isShortJmp, false, null);
    }

    public final void cmpqAndJcc(Register src, int imm32, AMD64Assembler.ConditionFlag cc, Label branchTarget, boolean isShortJmp) {
        this.applyMIOpAndJcc(AMD64Assembler.AMD64BinaryArithmetic.CMP.getMIOpcode(AMD64BaseAssembler.OperandSize.QWORD, NumUtil.isByte(imm32)), AMD64BaseAssembler.OperandSize.QWORD, src, imm32, cc, branchTarget, isShortJmp, false, null);
    }

    public final void cmpAndJcc(AMD64BaseAssembler.OperandSize size, AMD64Address src, int imm32, AMD64Assembler.ConditionFlag cc, Label branchTarget, boolean isShortJmp) {
        this.applyMIOpAndJcc(AMD64Assembler.AMD64BinaryArithmetic.CMP.getMIOpcode(size, NumUtil.isByte(imm32)), size, src, imm32, cc, branchTarget, isShortJmp, false, null);
    }

    public final void cmpAndJcc(AMD64BaseAssembler.OperandSize size, AMD64Address src, int imm32, AMD64Assembler.ConditionFlag cc, Label branchTarget, boolean isShortJmp, boolean annotateImm, IntConsumer applyBeforeFusedPair) {
        this.applyMIOpAndJcc(AMD64Assembler.AMD64BinaryArithmetic.CMP.getMIOpcode(size, NumUtil.isByte(imm32)), size, src, imm32, cc, branchTarget, isShortJmp, annotateImm, applyBeforeFusedPair);
    }

    public final void cmpAndJcc(AMD64BaseAssembler.OperandSize size, Register src1, Register src2, AMD64Assembler.ConditionFlag cc, Label branchTarget, boolean isShortJmp) {
        this.applyRMOpAndJcc(AMD64Assembler.AMD64BinaryArithmetic.CMP.getRMOpcode(size), size, src1, src2, cc, branchTarget, isShortJmp);
    }

    public final void cmpAndJcc(AMD64BaseAssembler.OperandSize size, Register src1, AMD64Address src2, AMD64Assembler.ConditionFlag cc, Label branchTarget, boolean isShortJmp) {
        this.applyRMOpAndJcc(AMD64Assembler.AMD64BinaryArithmetic.CMP.getRMOpcode(size), size, src1, src2, cc, branchTarget, isShortJmp, null);
    }

    public final void cmplAndJcc(Register src1, Register src2, AMD64Assembler.ConditionFlag cc, Label branchTarget, boolean isShortJmp) {
        this.applyRMOpAndJcc(AMD64Assembler.AMD64BinaryArithmetic.CMP.getRMOpcode(AMD64BaseAssembler.OperandSize.DWORD), AMD64BaseAssembler.OperandSize.DWORD, src1, src2, cc, branchTarget, isShortJmp);
    }

    public final int cmpqAndJcc(Register src1, Register src2, AMD64Assembler.ConditionFlag cc, Label branchTarget, boolean isShortJmp) {
        return this.applyRMOpAndJcc(AMD64Assembler.AMD64BinaryArithmetic.CMP.getRMOpcode(AMD64BaseAssembler.OperandSize.QWORD), AMD64BaseAssembler.OperandSize.QWORD, src1, src2, cc, branchTarget, isShortJmp);
    }

    public final void cmpAndJcc(AMD64BaseAssembler.OperandSize size, Register src1, AMD64Address src2, AMD64Assembler.ConditionFlag cc, Label branchTarget, boolean isShortJmp, IntConsumer applyBeforeFusedPair) {
        this.applyRMOpAndJcc(AMD64Assembler.AMD64BinaryArithmetic.CMP.getRMOpcode(size), size, src1, src2, cc, branchTarget, isShortJmp, applyBeforeFusedPair);
    }

    public final void cmplAndJcc(Register src1, AMD64Address src2, AMD64Assembler.ConditionFlag cc, Label branchTarget, boolean isShortJmp) {
        this.applyRMOpAndJcc(AMD64Assembler.AMD64BinaryArithmetic.CMP.getRMOpcode(AMD64BaseAssembler.OperandSize.DWORD), AMD64BaseAssembler.OperandSize.DWORD, src1, src2, cc, branchTarget, isShortJmp, null);
    }

    public final int cmpqAndJcc(Register src1, AMD64Address src2, AMD64Assembler.ConditionFlag cc, Label branchTarget, boolean isShortJmp) {
        return this.applyRMOpAndJcc(AMD64Assembler.AMD64BinaryArithmetic.CMP.getRMOpcode(AMD64BaseAssembler.OperandSize.QWORD), AMD64BaseAssembler.OperandSize.QWORD, src1, src2, cc, branchTarget, isShortJmp, null);
    }

    public final void cmpAndJcc(AMD64BaseAssembler.OperandSize size, Register src1, Supplier<AMD64Address> src2, AMD64Assembler.ConditionFlag cc, Label branchTarget) {
        AMD64Address placeHolder = this.getPlaceholder(this.position());
        AMD64Assembler.AMD64RMOp op = AMD64Assembler.AMD64BinaryArithmetic.CMP.getRMOpcode(size);
        int bytesToEmit = AMD64MacroAssembler.getPrefixInBytes(size, src1, op.dstIsByte, placeHolder) + 1 + this.addressInBytes(placeHolder);
        this.alignFusedPair(branchTarget, false, bytesToEmit);
        int beforeFusedPair = this.position();
        AMD64Address src2AsAddress = src2.get();
        op.emit((AMD64Assembler)this, size, src1, src2AsAddress);
        assert (beforeFusedPair + bytesToEmit == this.position());
        this.jcc(cc, branchTarget, false);
        assert (this.ensureWithinBoundary(beforeFusedPair));
    }

    public final void andlAndJcc(Register dst, int imm32, AMD64Assembler.ConditionFlag cc, Label branchTarget, boolean isShortJmp) {
        this.applyMIOpAndJcc(AMD64Assembler.AMD64BinaryArithmetic.AND.getMIOpcode(AMD64BaseAssembler.OperandSize.DWORD, NumUtil.isByte(imm32)), AMD64BaseAssembler.OperandSize.DWORD, dst, imm32, cc, branchTarget, isShortJmp, false, null);
    }

    public final void addqAndJcc(Register dst, int imm32, AMD64Assembler.ConditionFlag cc, Label branchTarget, boolean isShortJmp) {
        this.applyMIOpAndJcc(AMD64Assembler.AMD64BinaryArithmetic.ADD.getMIOpcode(AMD64BaseAssembler.OperandSize.QWORD, NumUtil.isByte(imm32)), AMD64BaseAssembler.OperandSize.QWORD, dst, imm32, cc, branchTarget, isShortJmp, false, null);
    }

    public final void sublAndJcc(Register dst, Register src, AMD64Assembler.ConditionFlag cc, Label branchTarget, boolean isShortJmp) {
        this.applyRMOpAndJcc(AMD64Assembler.AMD64BinaryArithmetic.SUB.getRMOpcode(AMD64BaseAssembler.OperandSize.DWORD), AMD64BaseAssembler.OperandSize.DWORD, dst, src, cc, branchTarget, isShortJmp);
    }

    public final void subqAndJcc(Register dst, Register src, AMD64Assembler.ConditionFlag cc, Label branchTarget, boolean isShortJmp) {
        this.applyRMOpAndJcc(AMD64Assembler.AMD64BinaryArithmetic.SUB.getRMOpcode(AMD64BaseAssembler.OperandSize.QWORD), AMD64BaseAssembler.OperandSize.QWORD, dst, src, cc, branchTarget, isShortJmp);
    }

    public final void sublAndJcc(Register dst, int imm32, AMD64Assembler.ConditionFlag cc, Label branchTarget, boolean isShortJmp) {
        this.applyMIOpAndJcc(AMD64Assembler.AMD64BinaryArithmetic.SUB.getMIOpcode(AMD64BaseAssembler.OperandSize.DWORD, NumUtil.isByte(imm32)), AMD64BaseAssembler.OperandSize.DWORD, dst, imm32, cc, branchTarget, isShortJmp, false, null);
    }

    public final void subqAndJcc(Register dst, int imm32, AMD64Assembler.ConditionFlag cc, Label branchTarget, boolean isShortJmp) {
        this.applyMIOpAndJcc(AMD64Assembler.AMD64BinaryArithmetic.SUB.getMIOpcode(AMD64BaseAssembler.OperandSize.QWORD, NumUtil.isByte(imm32)), AMD64BaseAssembler.OperandSize.QWORD, dst, imm32, cc, branchTarget, isShortJmp, false, null);
    }

    public final void incqAndJcc(Register dst, AMD64Assembler.ConditionFlag cc, Label branchTarget, boolean isShortJmp) {
        this.applyMOpAndJcc(AMD64Assembler.AMD64MOp.INC, AMD64BaseAssembler.OperandSize.QWORD, dst, cc, branchTarget, isShortJmp);
    }

    public final void decqAndJcc(Register dst, AMD64Assembler.ConditionFlag cc, Label branchTarget, boolean isShortJmp) {
        this.applyMOpAndJcc(AMD64Assembler.AMD64MOp.DEC, AMD64BaseAssembler.OperandSize.QWORD, dst, cc, branchTarget, isShortJmp);
    }

    public final void xorlAndJcc(Register dst, int imm32, AMD64Assembler.ConditionFlag cc, Label branchTarget, boolean isShortJmp) {
        this.applyMIOpAndJcc(AMD64Assembler.AMD64BinaryArithmetic.XOR.getMIOpcode(AMD64BaseAssembler.OperandSize.DWORD, NumUtil.isByte(imm32)), AMD64BaseAssembler.OperandSize.DWORD, dst, imm32, cc, branchTarget, isShortJmp, false, null);
    }

    public static void movSZx(AMD64MacroAssembler asm, AMD64BaseAssembler.OperandSize operandSize, ExtendMode extendMode, Register dst, AMD64Address src) {
        AMD64MacroAssembler.movSZx(asm, AMD64Address.Scale.fromInt(operandSize.getBytes()), extendMode, dst, src);
    }

    public static void movSZx(AMD64MacroAssembler asm, AMD64Address.Scale scaleSrc, ExtendMode extendMode, Register dst, AMD64Address src) {
        switch (scaleSrc) {
            case Times1: {
                if (extendMode == ExtendMode.SIGN_EXTEND) {
                    asm.movsbq(dst, src);
                    break;
                }
                asm.movzbq(dst, src);
                break;
            }
            case Times2: {
                if (extendMode == ExtendMode.SIGN_EXTEND) {
                    asm.movswq(dst, src);
                    break;
                }
                asm.movzwq(dst, src);
                break;
            }
            case Times4: {
                if (extendMode == ExtendMode.SIGN_EXTEND) {
                    asm.movslq(dst, src);
                    break;
                }
                asm.movl(dst, src);
                break;
            }
            case Times8: {
                asm.movq(dst, src);
                break;
            }
            default: {
                throw new IllegalStateException();
            }
        }
    }

    public static void pmovSZx(AMD64MacroAssembler asm, AVXKind.AVXSize size, ExtendMode extendMode, Register dst, AMD64Address.Scale scaleDst, Register src, AMD64Address.Scale scaleSrc, int displacement) {
        AMD64MacroAssembler.pmovSZx(asm, size, dst, extendMode, scaleDst, src, scaleSrc, null, displacement);
    }

    public static void pmovSZx(AMD64MacroAssembler asm, AVXKind.AVXSize size, Register dst, ExtendMode extendMode, AMD64Address.Scale scaleDst, Register src, AMD64Address.Scale scaleSrc, Register index, int displacement) {
        assert (size == AVXKind.AVXSize.XMM || size == AVXKind.AVXSize.YMM);
        int scaledDisplacement = AMD64MacroAssembler.scaleDisplacement(scaleDst, scaleSrc, displacement);
        AMD64Address address = index == null ? new AMD64Address(src, scaledDisplacement) : new AMD64Address(src, index, scaleSrc, scaledDisplacement);
        AMD64MacroAssembler.pmovSZx(asm, size, extendMode, dst, scaleDst, address, scaleSrc);
    }

    public static void pmovSZx(AMD64MacroAssembler asm, AVXKind.AVXSize size, ExtendMode extendMode, Register dst, AMD64Address.Scale scaleDst, AMD64Address src, AMD64Address.Scale scaleSrc) {
        if (scaleSrc.value < scaleDst.value) {
            if (AMD64MacroAssembler.isAVX(asm)) {
                AMD64MacroAssembler.loadAndExtendAVX(asm, size, extendMode, dst, scaleDst, src, scaleSrc);
            } else {
                AMD64MacroAssembler.loadAndExtendSSE(asm, extendMode, dst, scaleDst, src, scaleSrc);
            }
        } else {
            assert (scaleSrc.value == scaleDst.value);
            AMD64MacroAssembler.movdqu(asm, size, dst, src);
        }
    }

    public static void pmovSZx(AMD64MacroAssembler asm, AVXKind.AVXSize size, ExtendMode extendMode, Register dst, AMD64Address.Scale scaleDst, Register src, AMD64Address.Scale scaleSrc) {
        if (scaleSrc.value < scaleDst.value) {
            if (AMD64MacroAssembler.isAVX(asm)) {
                AMD64MacroAssembler.getAVXLoadAndExtendOp(scaleDst, scaleSrc, extendMode).emit((AMD64Assembler)asm, size, dst, src);
            } else {
                AMD64MacroAssembler.loadAndExtendSSE(asm, extendMode, dst, scaleDst, src, scaleSrc);
            }
        } else {
            assert (scaleSrc.value == scaleDst.value);
            AMD64MacroAssembler.movdqu(asm, size, dst, src);
        }
    }

    public static void pmovmsk(AMD64MacroAssembler asm, AVXKind.AVXSize size, Register dst, Register src) {
        if (AMD64MacroAssembler.isAVX(asm)) {
            AMD64Assembler.VexRMOp.VPMOVMSKB.emit((AMD64Assembler)asm, size, dst, src);
        } else {
            asm.pmovmskb(dst, src);
        }
    }

    public static void movdqu(AMD64MacroAssembler asm, AVXKind.AVXSize size, Register dst, AMD64Address src) {
        if (AMD64MacroAssembler.isAVX(asm)) {
            AMD64Assembler.VexMoveOp.VMOVDQU32.emit((AMD64Assembler)asm, size, dst, src);
        } else {
            asm.movdqu(dst, src);
        }
    }

    public static void movdqu(AMD64MacroAssembler asm, AVXKind.AVXSize size, AMD64Address dst, Register src) {
        if (AMD64MacroAssembler.isAVX(asm)) {
            AMD64Assembler.VexMoveOp.VMOVDQU32.emit((AMD64Assembler)asm, size, dst, src);
        } else {
            asm.movdqu(dst, src);
        }
    }

    public static void movdqu(AMD64MacroAssembler asm, AVXKind.AVXSize size, Register dst, Register src) {
        if (AMD64MacroAssembler.isAVX(asm)) {
            AMD64Assembler.VexMoveOp.VMOVDQU32.emit((AMD64Assembler)asm, size, dst, src);
        } else {
            asm.movdqu(dst, src);
        }
    }

    public static void pcmpeq(AMD64MacroAssembler asm, AVXKind.AVXSize vectorSize, AMD64Address.Scale elementStride, Register dst, Register src) {
        AMD64MacroAssembler.pcmpeq(asm, vectorSize, elementStride.value, dst, src);
    }

    public static void pcmpeq(AMD64MacroAssembler asm, AVXKind.AVXSize vectorSize, JavaKind elementKind, Register dst, Register src) {
        AMD64MacroAssembler.pcmpeq(asm, vectorSize, elementKind.getByteCount(), dst, src);
    }

    private static void pcmpeq(AMD64MacroAssembler asm, AVXKind.AVXSize vectorSize, int elementSize, Register dst, Register src) {
        switch (elementSize) {
            case 1: {
                AMD64MacroAssembler.pcmpeqb(asm, vectorSize, dst, src);
                break;
            }
            case 2: {
                AMD64MacroAssembler.pcmpeqw(asm, vectorSize, dst, src);
                break;
            }
            case 4: {
                AMD64MacroAssembler.pcmpeqd(asm, vectorSize, dst, src);
                break;
            }
            default: {
                throw new UnsupportedOperationException();
            }
        }
    }

    public static void pcmpeqb(AMD64MacroAssembler asm, AVXKind.AVXSize size, Register dst, Register src) {
        if (AMD64MacroAssembler.isAVX(asm)) {
            AMD64Assembler.VexRVMOp.VPCMPEQB.emit((AMD64Assembler)asm, size, dst, src, dst);
        } else {
            asm.pcmpeqb(dst, src);
        }
    }

    public static void pcmpeqw(AMD64MacroAssembler asm, AVXKind.AVXSize vectorSize, Register dst, Register src) {
        if (AMD64MacroAssembler.isAVX(asm)) {
            AMD64Assembler.VexRVMOp.VPCMPEQW.emit((AMD64Assembler)asm, vectorSize, dst, src, dst);
        } else {
            asm.pcmpeqw(dst, src);
        }
    }

    public static void pcmpeqd(AMD64MacroAssembler asm, AVXKind.AVXSize vectorSize, Register dst, Register src) {
        if (AMD64MacroAssembler.isAVX(asm)) {
            AMD64Assembler.VexRVMOp.VPCMPEQD.emit((AMD64Assembler)asm, vectorSize, dst, src, dst);
        } else {
            asm.pcmpeqd(dst, src);
        }
    }

    public static void pcmpeq(AMD64MacroAssembler asm, AVXKind.AVXSize size, AMD64Address.Scale elementStride, Register dst, AMD64Address src) {
        AMD64MacroAssembler.pcmpeq(asm, size, elementStride.value, dst, src);
    }

    public static void pcmpeq(AMD64MacroAssembler asm, AVXKind.AVXSize size, JavaKind elementKind, Register dst, AMD64Address src) {
        AMD64MacroAssembler.pcmpeq(asm, size, elementKind.getByteCount(), dst, src);
    }

    private static void pcmpeq(AMD64MacroAssembler asm, AVXKind.AVXSize vectorSize, int elementSize, Register dst, AMD64Address src) {
        switch (elementSize) {
            case 1: {
                AMD64MacroAssembler.pcmpeqb(asm, vectorSize, dst, src);
                break;
            }
            case 2: {
                AMD64MacroAssembler.pcmpeqw(asm, vectorSize, dst, src);
                break;
            }
            case 4: {
                AMD64MacroAssembler.pcmpeqd(asm, vectorSize, dst, src);
                break;
            }
            default: {
                throw new UnsupportedOperationException();
            }
        }
    }

    public static void pcmpeqb(AMD64MacroAssembler asm, AVXKind.AVXSize size, Register dst, AMD64Address src) {
        if (AMD64MacroAssembler.isAVX(asm)) {
            AMD64Assembler.VexRVMOp.VPCMPEQB.emit((AMD64Assembler)asm, size, dst, dst, src);
        } else {
            asm.pcmpeqb(dst, src);
        }
    }

    public static void pcmpeqw(AMD64MacroAssembler asm, AVXKind.AVXSize size, Register dst, AMD64Address src) {
        if (AMD64MacroAssembler.isAVX(asm)) {
            AMD64Assembler.VexRVMOp.VPCMPEQW.emit((AMD64Assembler)asm, size, dst, dst, src);
        } else {
            asm.pcmpeqw(dst, src);
        }
    }

    public static void pcmpeqd(AMD64MacroAssembler asm, AVXKind.AVXSize size, Register dst, AMD64Address src) {
        if (AMD64MacroAssembler.isAVX(asm)) {
            AMD64Assembler.VexRVMOp.VPCMPEQD.emit((AMD64Assembler)asm, size, dst, dst, src);
        } else {
            asm.pcmpeqd(dst, src);
        }
    }

    public static void pcmpgtb(AMD64MacroAssembler asm, AVXKind.AVXSize size, Register dst, Register src) {
        if (AMD64MacroAssembler.isAVX(asm)) {
            AMD64Assembler.VexRVMOp.VPCMPGTB.emit((AMD64Assembler)asm, size, dst, dst, src);
        } else {
            asm.pcmpgtb(dst, src);
        }
    }

    public static void pcmpgtd(AMD64MacroAssembler asm, AVXKind.AVXSize size, Register dst, Register src) {
        if (AMD64MacroAssembler.isAVX(asm)) {
            AMD64Assembler.VexRVMOp.VPCMPGTD.emit((AMD64Assembler)asm, size, dst, dst, src);
        } else {
            asm.pcmpgtd(dst, src);
        }
    }

    private static int scaleDisplacement(AMD64Address.Scale scaleDst, AMD64Address.Scale scaleSrc, int displacement) {
        if (scaleSrc.value < scaleDst.value) {
            assert ((displacement & (1 << scaleDst.log2 - scaleSrc.log2) - 1) == 0);
            return displacement >> scaleDst.log2 - scaleSrc.log2;
        }
        assert (scaleSrc.value == scaleDst.value);
        return displacement;
    }

    public static void loadAndExtendAVX(AMD64MacroAssembler asm, AVXKind.AVXSize size, ExtendMode extendMode, Register dst, AMD64Address.Scale scaleDst, AMD64Address src, AMD64Address.Scale scaleSrc) {
        AMD64MacroAssembler.getAVXLoadAndExtendOp(scaleDst, scaleSrc, extendMode).emit((AMD64Assembler)asm, size, dst, src);
    }

    private static AMD64Assembler.VexRMOp getAVXLoadAndExtendOp(AMD64Address.Scale scaleDst, AMD64Address.Scale scaleSrc, ExtendMode extendMode) {
        switch (scaleSrc) {
            case Times1: {
                switch (scaleDst) {
                    case Times2: {
                        return extendMode == ExtendMode.SIGN_EXTEND ? AMD64Assembler.VexRMOp.VPMOVSXBW : AMD64Assembler.VexRMOp.VPMOVZXBW;
                    }
                    case Times4: {
                        return extendMode == ExtendMode.SIGN_EXTEND ? AMD64Assembler.VexRMOp.VPMOVSXBD : AMD64Assembler.VexRMOp.VPMOVZXBD;
                    }
                    case Times8: {
                        return extendMode == ExtendMode.SIGN_EXTEND ? AMD64Assembler.VexRMOp.VPMOVSXBQ : AMD64Assembler.VexRMOp.VPMOVZXBQ;
                    }
                }
                throw GraalError.shouldNotReachHere();
            }
            case Times2: {
                switch (scaleDst) {
                    case Times4: {
                        return extendMode == ExtendMode.SIGN_EXTEND ? AMD64Assembler.VexRMOp.VPMOVSXWD : AMD64Assembler.VexRMOp.VPMOVZXWD;
                    }
                    case Times8: {
                        return extendMode == ExtendMode.SIGN_EXTEND ? AMD64Assembler.VexRMOp.VPMOVSXWQ : AMD64Assembler.VexRMOp.VPMOVZXWQ;
                    }
                }
                throw GraalError.shouldNotReachHere();
            }
            case Times4: {
                return extendMode == ExtendMode.SIGN_EXTEND ? AMD64Assembler.VexRMOp.VPMOVSXDQ : AMD64Assembler.VexRMOp.VPMOVZXDQ;
            }
        }
        throw GraalError.shouldNotReachHere();
    }

    public static void loadAndExtendSSE(AMD64MacroAssembler asm, ExtendMode extendMode, Register dst, AMD64Address.Scale scaleDst, AMD64Address src, AMD64Address.Scale scaleSrc) {
        boolean signExtend = extendMode == ExtendMode.SIGN_EXTEND;
        switch (scaleSrc) {
            case Times1: {
                switch (scaleDst) {
                    case Times2: {
                        if (signExtend) {
                            asm.pmovsxbw(dst, src);
                        } else {
                            asm.pmovzxbw(dst, src);
                        }
                        return;
                    }
                    case Times4: {
                        if (signExtend) {
                            asm.pmovsxbd(dst, src);
                        } else {
                            asm.pmovzxbd(dst, src);
                        }
                        return;
                    }
                    case Times8: {
                        if (signExtend) {
                            asm.pmovsxbq(dst, src);
                        } else {
                            asm.pmovzxbq(dst, src);
                        }
                        return;
                    }
                }
                throw GraalError.shouldNotReachHere();
            }
            case Times2: {
                switch (scaleDst) {
                    case Times4: {
                        if (signExtend) {
                            asm.pmovsxwd(dst, src);
                        } else {
                            asm.pmovzxwd(dst, src);
                        }
                        return;
                    }
                    case Times8: {
                        if (signExtend) {
                            asm.pmovsxwq(dst, src);
                        } else {
                            asm.pmovzxwq(dst, src);
                        }
                        return;
                    }
                }
                throw GraalError.shouldNotReachHere();
            }
            case Times4: {
                if (signExtend) {
                    asm.pmovsxdq(dst, src);
                } else {
                    asm.pmovzxdq(dst, src);
                }
                return;
            }
        }
        throw GraalError.shouldNotReachHere();
    }

    public static void loadAndExtendSSE(AMD64MacroAssembler asm, ExtendMode extendMode, Register dst, AMD64Address.Scale scaleDst, Register src, AMD64Address.Scale scaleSrc) {
        boolean signExtend = extendMode == ExtendMode.SIGN_EXTEND;
        switch (scaleSrc) {
            case Times1: {
                switch (scaleDst) {
                    case Times2: {
                        if (signExtend) {
                            asm.pmovsxbw(dst, src);
                        } else {
                            asm.pmovzxbw(dst, src);
                        }
                        return;
                    }
                    case Times4: {
                        if (signExtend) {
                            asm.pmovsxbd(dst, src);
                        } else {
                            asm.pmovzxbd(dst, src);
                        }
                        return;
                    }
                    case Times8: {
                        if (signExtend) {
                            asm.pmovsxbq(dst, src);
                        } else {
                            asm.pmovzxbq(dst, src);
                        }
                        return;
                    }
                }
                throw GraalError.shouldNotReachHere();
            }
            case Times2: {
                switch (scaleDst) {
                    case Times4: {
                        if (signExtend) {
                            asm.pmovsxwd(dst, src);
                        } else {
                            asm.pmovzxwd(dst, src);
                        }
                        return;
                    }
                    case Times8: {
                        if (signExtend) {
                            asm.pmovsxwq(dst, src);
                        } else {
                            asm.pmovzxwq(dst, src);
                        }
                        return;
                    }
                }
                throw GraalError.shouldNotReachHere();
            }
            case Times4: {
                if (signExtend) {
                    asm.pmovsxdq(dst, src);
                } else {
                    asm.pmovzxdq(dst, src);
                }
                return;
            }
        }
        throw GraalError.shouldNotReachHere();
    }

    public static void packuswb(AMD64MacroAssembler asm, AVXKind.AVXSize size, Register dst, Register src) {
        if (AMD64MacroAssembler.isAVX(asm)) {
            AMD64Assembler.VexRVMOp.VPACKUSWB.emit((AMD64Assembler)asm, size, dst, dst, src);
        } else {
            asm.packuswb(dst, src);
        }
    }

    public static void packusdw(AMD64MacroAssembler asm, AVXKind.AVXSize size, Register dst, Register src) {
        if (AMD64MacroAssembler.isAVX(asm)) {
            AMD64Assembler.VexRVMOp.VPACKUSDW.emit((AMD64Assembler)asm, size, dst, dst, src);
        } else {
            asm.packusdw(dst, src);
        }
    }

    public static void palignr(AMD64MacroAssembler asm, AVXKind.AVXSize size, Register dst, Register src, int imm8) {
        AMD64MacroAssembler.palignr(asm, size, dst, dst, src, imm8);
    }

    public static void palignr(AMD64MacroAssembler asm, AVXKind.AVXSize size, Register dst, Register src1, Register src2, int imm8) {
        if (AMD64MacroAssembler.isAVX(asm)) {
            AMD64Assembler.VexRVMIOp.VPALIGNR.emit((AMD64Assembler)asm, size, dst, src1, src2, imm8);
        } else {
            if (!dst.equals((Object)src1)) {
                asm.movdqu(dst, src1);
            }
            asm.palignr(dst, src2, imm8);
        }
    }

    public static void pand(AMD64MacroAssembler asm, AVXKind.AVXSize size, Register dst, Register src) {
        AMD64MacroAssembler.pand(asm, size, dst, dst, src);
    }

    public static void pand(AMD64MacroAssembler asm, AVXKind.AVXSize size, Register dst, Register src1, Register src2) {
        if (AMD64MacroAssembler.isAVX(asm)) {
            AMD64Assembler.VexRVMOp.VPAND.emit((AMD64Assembler)asm, size, dst, src1, src2);
        } else {
            if (!dst.equals((Object)src1)) {
                asm.movdqu(dst, src1);
            }
            asm.pand(dst, src2);
        }
    }

    public static void pand(AMD64MacroAssembler asm, AVXKind.AVXSize size, Register dst, AMD64Address src) {
        if (AMD64MacroAssembler.isAVX(asm)) {
            AMD64Assembler.VexRVMOp.VPAND.emit((AMD64Assembler)asm, size, dst, dst, src);
        } else {
            asm.pand(dst, src);
        }
    }

    public static void pandU(AMD64MacroAssembler asm, AVXKind.AVXSize size, Register dst, AMD64Address src, Register tmp) {
        if (AMD64MacroAssembler.isAVX(asm)) {
            AMD64Assembler.VexRVMOp.VPAND.emit((AMD64Assembler)asm, size, dst, dst, src);
        } else {
            asm.movdqu(tmp, src);
            asm.pand(dst, tmp);
        }
    }

    public static void pandn(AMD64MacroAssembler asm, AVXKind.AVXSize size, Register dst, Register src) {
        if (AMD64MacroAssembler.isAVX(asm)) {
            AMD64Assembler.VexRVMOp.VPANDN.emit((AMD64Assembler)asm, size, dst, dst, src);
        } else {
            asm.pandn(dst, src);
        }
    }

    public static void por(AMD64MacroAssembler asm, AVXKind.AVXSize size, Register dst, Register src) {
        if (AMD64MacroAssembler.isAVX(asm)) {
            AMD64Assembler.VexRVMOp.VPOR.emit((AMD64Assembler)asm, size, dst, dst, src);
        } else {
            asm.por(dst, src);
        }
    }

    public static void pxor(AMD64MacroAssembler asm, AVXKind.AVXSize size, Register dst, Register src) {
        AMD64MacroAssembler.pxor(asm, size, dst, dst, src);
    }

    public static void pxor(AMD64MacroAssembler asm, AVXKind.AVXSize size, Register dst, Register src1, Register src2) {
        if (AMD64MacroAssembler.isAVX(asm)) {
            AMD64Assembler.VexRVMOp.VPXOR.emit((AMD64Assembler)asm, size, dst, src1, src2);
        } else {
            if (!dst.equals((Object)src1)) {
                asm.movdqu(dst, src1);
            }
            asm.pxor(dst, src2);
        }
    }

    public static void psllw(AMD64MacroAssembler asm, AVXKind.AVXSize size, Register dst, int imm8) {
        AMD64MacroAssembler.psllw(asm, size, dst, dst, imm8);
    }

    public static void psllw(AMD64MacroAssembler asm, AVXKind.AVXSize size, Register dst, Register src, int imm8) {
        if (AMD64MacroAssembler.isAVX(asm)) {
            AMD64Assembler.VexShiftOp.VPSLLW.emit((AMD64Assembler)asm, size, dst, src, imm8);
        } else {
            if (!dst.equals((Object)src)) {
                asm.movdqu(dst, src);
            }
            asm.psllw(dst, imm8);
        }
    }

    public static void psrlw(AMD64MacroAssembler asm, AVXKind.AVXSize size, Register dst, int imm8) {
        AMD64MacroAssembler.psrlw(asm, size, dst, dst, imm8);
    }

    public static void psrlw(AMD64MacroAssembler asm, AVXKind.AVXSize size, Register dst, Register src, int imm8) {
        if (AMD64MacroAssembler.isAVX(asm)) {
            AMD64Assembler.VexShiftOp.VPSRLW.emit((AMD64Assembler)asm, size, dst, src, imm8);
        } else {
            if (!dst.equals((Object)src)) {
                asm.movdqu(dst, src);
            }
            asm.psrlw(dst, imm8);
        }
    }

    public static void pslld(AMD64MacroAssembler asm, AVXKind.AVXSize size, Register dst, int imm8) {
        AMD64MacroAssembler.pslld(asm, size, dst, dst, imm8);
    }

    public static void pslld(AMD64MacroAssembler asm, AVXKind.AVXSize size, Register dst, Register src, int imm8) {
        if (AMD64MacroAssembler.isAVX(asm)) {
            AMD64Assembler.VexShiftOp.VPSLLD.emit((AMD64Assembler)asm, size, dst, src, imm8);
        } else {
            if (!dst.equals((Object)src)) {
                asm.movdqu(dst, src);
            }
            asm.pslld(dst, imm8);
        }
    }

    public static void psrld(AMD64MacroAssembler asm, AVXKind.AVXSize size, Register dst, int imm8) {
        AMD64MacroAssembler.psrld(asm, size, dst, dst, imm8);
    }

    public static void psrld(AMD64MacroAssembler asm, AVXKind.AVXSize size, Register dst, Register src, int imm8) {
        if (AMD64MacroAssembler.isAVX(asm)) {
            AMD64Assembler.VexShiftOp.VPSRLD.emit((AMD64Assembler)asm, size, dst, src, imm8);
        } else {
            if (!dst.equals((Object)src)) {
                asm.movdqu(dst, src);
            }
            asm.psrld(dst, imm8);
        }
    }

    public static void pshufb(AMD64MacroAssembler asm, AVXKind.AVXSize size, Register dst, Register src) {
        if (AMD64MacroAssembler.isAVX(asm)) {
            AMD64Assembler.VexRVMOp.VPSHUFB.emit((AMD64Assembler)asm, size, dst, dst, src);
        } else {
            asm.pshufb(dst, src);
        }
    }

    public static void pshufb(AMD64MacroAssembler asm, AVXKind.AVXSize size, Register dst, AMD64Address src) {
        if (AMD64MacroAssembler.isAVX(asm)) {
            AMD64Assembler.VexRVMOp.VPSHUFB.emit((AMD64Assembler)asm, size, dst, dst, src);
        } else {
            asm.pshufb(dst, src);
        }
    }

    public static void ptest(AMD64MacroAssembler asm, AVXKind.AVXSize size, Register dst) {
        AMD64MacroAssembler.ptest(asm, size, dst, dst);
    }

    public static void ptest(AMD64MacroAssembler asm, AVXKind.AVXSize size, Register dst, Register src) {
        if (AMD64MacroAssembler.isAVX(asm)) {
            AMD64Assembler.VexRMOp.VPTEST.emit((AMD64Assembler)asm, size, dst, src);
        } else {
            asm.ptest(dst, src);
        }
    }

    public static void ptestU(AMD64MacroAssembler asm, AVXKind.AVXSize size, Register dst, AMD64Address src, Register tmp) {
        if (AMD64MacroAssembler.isAVX(asm)) {
            AMD64Assembler.VexRMOp.VPTEST.emit((AMD64Assembler)asm, size, dst, src);
        } else {
            asm.movdqu(tmp, src);
            asm.ptest(dst, tmp);
        }
    }

    public static void movlhps(AMD64MacroAssembler asm, Register dst, Register src) {
        if (AMD64MacroAssembler.isAVX(asm)) {
            AMD64Assembler.VexRVMOp.MOVLHPS.emit((AMD64Assembler)asm, AVXKind.AVXSize.XMM, dst, dst, src);
        } else {
            asm.movlhps(dst, src);
        }
    }

    public static void movdl(AMD64MacroAssembler asm, Register dst, Register src) {
        if (AMD64MacroAssembler.isAVX(asm)) {
            AMD64Assembler.VexMoveOp.VMOVD.emit((AMD64Assembler)asm, AVXKind.AVXSize.DWORD, dst, src);
        } else {
            asm.movdl(dst, src);
        }
    }

    public static void movdl(AMD64MacroAssembler asm, Register dst, AMD64Address src) {
        if (AMD64MacroAssembler.isAVX(asm)) {
            AMD64Assembler.VexMoveOp.VMOVD.emit((AMD64Assembler)asm, AVXKind.AVXSize.DWORD, dst, src);
        } else {
            asm.movdl(dst, src);
        }
    }

    public static boolean isAVX(AMD64MacroAssembler asm) {
        return asm.supports(AMD64.CPUFeature.AVX);
    }

    public static boolean isAVX(AMD64 arch) {
        return arch.getFeatures().contains(AMD64.CPUFeature.AVX);
    }

    public static enum ExtendMode {
        ZERO_EXTEND,
        SIGN_EXTEND;

    }
}

