/*
 * Decompiled with CFR 0.152.
 */
package jdk.graal.compiler.lir.aarch64;

import jdk.graal.compiler.asm.Label;
import jdk.graal.compiler.asm.aarch64.AArch64Address;
import jdk.graal.compiler.asm.aarch64.AArch64Assembler;
import jdk.graal.compiler.asm.aarch64.AArch64MacroAssembler;
import jdk.graal.compiler.debug.GraalError;
import jdk.graal.compiler.lir.LIRInstruction;
import jdk.graal.compiler.lir.LIRInstructionClass;
import jdk.graal.compiler.lir.SyncPort;
import jdk.graal.compiler.lir.SyncPorts;
import jdk.graal.compiler.lir.aarch64.AArch64LIRInstruction;
import jdk.graal.compiler.lir.asm.CompilationResultBuilder;
import jdk.vm.ci.aarch64.AArch64;
import jdk.vm.ci.aarch64.AArch64Kind;
import jdk.vm.ci.code.Register;
import jdk.vm.ci.code.ValueUtil;
import jdk.vm.ci.meta.Value;

@SyncPorts(value={@SyncPort(from="https://github.com/openjdk/jdk/blob/0a3a925ad88921d387aa851157f54ac0054d347b/src/hotspot/cpu/aarch64/stubGenerator_aarch64.cpp#L4626-L4664", sha1="9c106817eae54d0e6783c1442b26fee08bc7a07a"), @SyncPort(from="https://github.com/openjdk/jdk/blob/0a3a925ad88921d387aa851157f54ac0054d347b/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp#L3113-L3122", sha1="376de6fbb2caccaac53c4aa934ce96f8f0dc7f18"), @SyncPort(from="https://github.com/openjdk/jdk/blob/0a3a925ad88921d387aa851157f54ac0054d347b/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp#L3310-L3620", sha1="dfdfc5113a04698da12c5cb29bc78ced09a2eb63")})
public final class AArch64BigIntegerMultiplyToLenOp
extends AArch64LIRInstruction {
    public static final LIRInstructionClass<AArch64BigIntegerMultiplyToLenOp> TYPE = LIRInstructionClass.create(AArch64BigIntegerMultiplyToLenOp.class);
    @LIRInstruction.Alive(value={LIRInstruction.OperandFlag.REG})
    private Value xValue;
    @LIRInstruction.Alive(value={LIRInstruction.OperandFlag.REG})
    private Value xlenValue;
    @LIRInstruction.Alive(value={LIRInstruction.OperandFlag.REG})
    private Value yValue;
    @LIRInstruction.Alive(value={LIRInstruction.OperandFlag.REG})
    private Value ylenValue;
    @LIRInstruction.Alive(value={LIRInstruction.OperandFlag.REG})
    private Value zValue;
    @LIRInstruction.Alive(value={LIRInstruction.OperandFlag.REG})
    private Value zlenValue;
    @LIRInstruction.Temp
    protected Value[] temps;

    public AArch64BigIntegerMultiplyToLenOp(Value xValue, Value xlenValue, Value yValue, Value ylenValue, Value zValue, Value zlenValue) {
        super((LIRInstructionClass<? extends AArch64LIRInstruction>)TYPE);
        this.xValue = xValue;
        this.xlenValue = xlenValue;
        this.yValue = yValue;
        this.ylenValue = ylenValue;
        this.zValue = zValue;
        this.zlenValue = zlenValue;
        this.temps = new Value[]{AArch64.r10.asValue(), AArch64.r11.asValue(), AArch64.r12.asValue(), AArch64.r13.asValue(), AArch64.r14.asValue(), AArch64.r15.asValue(), AArch64.r16.asValue(), AArch64.r17.asValue(), AArch64.r19.asValue(), AArch64.r20.asValue(), AArch64.r21.asValue(), AArch64.r22.asValue(), AArch64.r23.asValue()};
    }

    @Override
    public void emitCode(CompilationResultBuilder crb, AArch64MacroAssembler masm) {
        GraalError.guarantee(this.xValue.getPlatformKind().equals((Object)AArch64Kind.QWORD), "Invalid xValue kind: %s", (Object)this.xValue);
        GraalError.guarantee(this.xlenValue.getPlatformKind().equals((Object)AArch64Kind.DWORD), "Invalid xlenValue kind: %s", (Object)this.xlenValue);
        GraalError.guarantee(this.yValue.getPlatformKind().equals((Object)AArch64Kind.QWORD), "Invalid yValue kind: %s", (Object)this.yValue);
        GraalError.guarantee(this.ylenValue.getPlatformKind().equals((Object)AArch64Kind.DWORD), "Invalid ylenValue kind: %s", (Object)this.ylenValue);
        GraalError.guarantee(this.zValue.getPlatformKind().equals((Object)AArch64Kind.QWORD), "Invalid zValue kind: %s", (Object)this.zValue);
        GraalError.guarantee(this.zlenValue.getPlatformKind().equals((Object)AArch64Kind.DWORD), "Invalid zlenValue kind: %s", (Object)this.zlenValue);
        Register x = ValueUtil.asRegister((Value)this.xValue);
        Register xlen = ValueUtil.asRegister((Value)this.xlenValue);
        Register y = ValueUtil.asRegister((Value)this.yValue);
        Register ylen = ValueUtil.asRegister((Value)this.ylenValue);
        Register z = ValueUtil.asRegister((Value)this.zValue);
        Register zlen = ValueUtil.asRegister((Value)this.zlenValue);
        AArch64BigIntegerMultiplyToLenOp.multiplyToLen(masm, x, xlen, y, ylen, z, zlen, AArch64.r10, AArch64.r11, AArch64.r12, AArch64.r13, AArch64.r14, AArch64.r15, AArch64.r16, AArch64.r17, AArch64.r19, AArch64.r20, AArch64.r21, AArch64.r22, AArch64.r23);
    }

    private static void add2WithCarry(AArch64MacroAssembler masm, Register finalDestHi, Register destHi, Register destLo, Register src1, Register src2) {
        masm.adds(64, destLo, destLo, src1);
        masm.adc(64, destHi, destHi, AArch64.zr);
        masm.adds(64, destLo, destLo, src2);
        masm.adc(64, finalDestHi, destHi, AArch64.zr);
    }

    private static void multiply64x64Loop(AArch64MacroAssembler masm, Register x, Register xstart, Register xAtXstart, Register y, Register yAtIdx, Register z, Register carry, Register product, Register idx, Register kdx) {
        Label labelFirstLoop = new Label();
        Label labelFirstLoopExit = new Label();
        Label labelOneX = new Label();
        Label labelOneY = new Label();
        Label labelMultiply = new Label();
        try (AArch64MacroAssembler.ScratchRegister sr1 = masm.getScratchRegister();
             AArch64MacroAssembler.ScratchRegister sr2 = masm.getScratchRegister();){
            Register rscratch1 = sr1.getRegister();
            Register rscratch2 = sr2.getRegister();
            masm.subs(32, xstart, xstart, 1);
            masm.branchConditionally(AArch64Assembler.ConditionFlag.MI, labelOneX);
            masm.loadAddress(rscratch1, AArch64Address.createRegisterOffsetAddress(32, x, xstart, true));
            masm.ldr(64, xAtXstart, AArch64Address.createBaseRegisterOnlyAddress(64, rscratch1));
            masm.ror(64, xAtXstart, xAtXstart, 32L);
            masm.bind(labelFirstLoop);
            masm.subs(32, idx, idx, 1);
            masm.branchConditionally(AArch64Assembler.ConditionFlag.MI, labelFirstLoopExit);
            masm.subs(32, idx, idx, 1);
            masm.branchConditionally(AArch64Assembler.ConditionFlag.MI, labelOneY);
            masm.loadAddress(rscratch1, AArch64Address.createExtendedRegisterOffsetAddress(32, y, idx, true, AArch64Assembler.ExtendType.UXTW));
            masm.ldr(64, yAtIdx, AArch64Address.createBaseRegisterOnlyAddress(64, rscratch1));
            masm.ror(64, yAtIdx, yAtIdx, 32L);
            masm.bind(labelMultiply);
            masm.umulh(64, rscratch1, xAtXstart, yAtIdx);
            masm.mul(64, product, xAtXstart, yAtIdx);
            masm.adds(64, product, product, carry);
            masm.adc(64, carry, rscratch1, AArch64.zr);
            masm.sub(32, kdx, kdx, 2);
            masm.ror(64, product, product, 32L);
            masm.loadAddress(rscratch2, AArch64Address.createExtendedRegisterOffsetAddress(32, z, kdx, true, AArch64Assembler.ExtendType.UXTW));
            masm.str(64, product, AArch64Address.createBaseRegisterOnlyAddress(64, rscratch2));
            masm.jmp(labelFirstLoop);
            masm.bind(labelOneY);
            masm.ldr(32, yAtIdx, AArch64Address.createBaseRegisterOnlyAddress(32, y));
            masm.jmp(labelMultiply);
            masm.bind(labelOneX);
            masm.ldr(32, xAtXstart, AArch64Address.createBaseRegisterOnlyAddress(32, x));
            masm.jmp(labelFirstLoop);
            masm.bind(labelFirstLoopExit);
        }
    }

    private static void multiply128x128Loop(AArch64MacroAssembler masm, Register y, Register z, Register carry, Register carry2, Register idx, Register jdx, Register yzAtIdx1, Register yzAtIdx2, Register tmp, Register tmp3, Register tmp4, Register tmp6, Register productHi) {
        Label labelThirdLoop = new Label();
        Label labelThirdLoopExit = new Label();
        Label labelPostThirdLoopDone = new Label();
        Label labelCheck1 = new Label();
        try (AArch64MacroAssembler.ScratchRegister sr1 = masm.getScratchRegister();
             AArch64MacroAssembler.ScratchRegister sr2 = masm.getScratchRegister();){
            Register rscratch1 = sr1.getRegister();
            Register rscratch2 = sr2.getRegister();
            masm.lsr(32, jdx, idx, 2L);
            masm.bind(labelThirdLoop);
            masm.subs(32, jdx, jdx, 1);
            masm.branchConditionally(AArch64Assembler.ConditionFlag.MI, labelThirdLoopExit);
            masm.sub(32, idx, idx, 4);
            masm.loadAddress(rscratch1, AArch64Address.createExtendedRegisterOffsetAddress(32, y, idx, true, AArch64Assembler.ExtendType.UXTW));
            masm.ldp(64, yzAtIdx2, yzAtIdx1, AArch64Address.createPairBaseRegisterOnlyAddress(64, rscratch1));
            masm.loadAddress(tmp6, AArch64Address.createExtendedRegisterOffsetAddress(32, z, idx, true, AArch64Assembler.ExtendType.UXTW));
            masm.ror(64, yzAtIdx1, yzAtIdx1, 32L);
            masm.ror(64, yzAtIdx2, yzAtIdx2, 32L);
            masm.ldp(64, rscratch2, rscratch1, AArch64Address.createPairBaseRegisterOnlyAddress(64, tmp6));
            masm.mul(64, tmp3, productHi, yzAtIdx1);
            masm.umulh(64, tmp4, productHi, yzAtIdx1);
            masm.ror(64, rscratch1, rscratch1, 32L);
            masm.ror(64, rscratch2, rscratch2, 32L);
            masm.mul(64, tmp, productHi, yzAtIdx2);
            masm.umulh(64, carry2, productHi, yzAtIdx2);
            masm.adds(64, tmp3, tmp3, carry);
            masm.adc(64, tmp4, tmp4, AArch64.zr);
            masm.adds(64, tmp3, tmp3, rscratch1);
            masm.adcs(64, tmp4, tmp4, tmp);
            masm.adc(64, carry, carry2, AArch64.zr);
            masm.adds(64, tmp4, tmp4, rscratch2);
            masm.adc(64, carry, carry, AArch64.zr);
            masm.ror(64, tmp3, tmp3, 32L);
            masm.ror(64, tmp4, tmp4, 32L);
            masm.stp(64, tmp4, tmp3, AArch64Address.createPairBaseRegisterOnlyAddress(64, tmp6));
            masm.jmp(labelThirdLoop);
            masm.bind(labelThirdLoopExit);
            masm.and(32, idx, idx, 3L);
            masm.cbz(32, idx, labelPostThirdLoopDone);
            masm.subs(32, idx, idx, 2);
            masm.branchConditionally(AArch64Assembler.ConditionFlag.MI, labelCheck1);
            masm.loadAddress(rscratch1, AArch64Address.createExtendedRegisterOffsetAddress(32, y, idx, true, AArch64Assembler.ExtendType.UXTW));
            masm.ldr(64, yzAtIdx1, AArch64Address.createBaseRegisterOnlyAddress(64, rscratch1));
            masm.ror(64, yzAtIdx1, yzAtIdx1, 32L);
            masm.mul(64, tmp3, productHi, yzAtIdx1);
            masm.umulh(64, tmp4, productHi, yzAtIdx1);
            masm.loadAddress(rscratch1, AArch64Address.createExtendedRegisterOffsetAddress(32, z, idx, true, AArch64Assembler.ExtendType.UXTW));
            masm.ldr(64, yzAtIdx2, AArch64Address.createBaseRegisterOnlyAddress(64, rscratch1));
            masm.ror(64, yzAtIdx2, yzAtIdx2, 32L);
            AArch64BigIntegerMultiplyToLenOp.add2WithCarry(masm, carry, tmp4, tmp3, carry, yzAtIdx2);
            masm.ror(64, tmp3, tmp3, 32L);
            masm.str(64, tmp3, AArch64Address.createBaseRegisterOnlyAddress(64, rscratch1));
            masm.bind(labelCheck1);
            masm.and(32, idx, idx, 1L);
            masm.subs(32, idx, idx, 1);
            masm.branchConditionally(AArch64Assembler.ConditionFlag.MI, labelPostThirdLoopDone);
            masm.ldr(32, tmp4, AArch64Address.createExtendedRegisterOffsetAddress(32, y, idx, true, AArch64Assembler.ExtendType.UXTW));
            masm.mul(64, tmp3, tmp4, productHi);
            masm.umulh(64, carry2, tmp4, productHi);
            masm.ldr(32, tmp4, AArch64Address.createExtendedRegisterOffsetAddress(32, z, idx, true, AArch64Assembler.ExtendType.UXTW));
            AArch64BigIntegerMultiplyToLenOp.add2WithCarry(masm, carry2, carry2, tmp3, tmp4, carry);
            masm.str(32, tmp3, AArch64Address.createExtendedRegisterOffsetAddress(32, z, idx, true, AArch64Assembler.ExtendType.UXTW));
            masm.extr(64, carry, carry2, tmp3, 32);
            masm.bind(labelPostThirdLoopDone);
        }
    }

    static void multiplyToLen(AArch64MacroAssembler masm, Register x, Register xlen, Register y, Register ylen, Register z, Register zlen, Register tmp1, Register tmp2, Register tmp3, Register tmp4, Register tmp5, Register tmp6, Register tmp7, Register tmp8, Register tmp9, Register tmp10, Register tmp11, Register tmp12, Register tmp13) {
        Register idx = tmp1;
        Register kdx = tmp2;
        Register xstart = tmp3;
        Register yAtIdx = tmp4;
        Register carry = tmp5;
        Register productHi = tmp7;
        Register product = tmp8;
        Register xAtXstart = tmp9;
        Label labelDone = new Label();
        Label labelSecondLoop = new Label();
        Label labelCarry = new Label();
        Label labelLastX = new Label();
        Label labelThirdLoopPrologue = new Label();
        masm.mov(32, idx, ylen);
        masm.mov(32, kdx, zlen);
        masm.mov(64, carry, AArch64.zr);
        masm.mov(32, xstart, xlen);
        masm.subs(32, xstart, xstart, 1);
        masm.branchConditionally(AArch64Assembler.ConditionFlag.MI, labelDone);
        AArch64BigIntegerMultiplyToLenOp.multiply64x64Loop(masm, x, xstart, xAtXstart, y, yAtIdx, z, carry, product, idx, kdx);
        masm.cbz(32, kdx, labelSecondLoop);
        masm.sub(32, kdx, kdx, 1);
        masm.cbz(32, kdx, labelCarry);
        masm.str(32, carry, AArch64Address.createExtendedRegisterOffsetAddress(32, z, kdx, true, AArch64Assembler.ExtendType.UXTW));
        masm.lsr(64, carry, carry, 32L);
        masm.sub(32, kdx, kdx, 1);
        masm.bind(labelCarry);
        masm.str(32, carry, AArch64Address.createExtendedRegisterOffsetAddress(32, z, kdx, true, AArch64Assembler.ExtendType.UXTW));
        Register jdx = tmp1;
        Register newZ = tmp10;
        masm.bind(labelSecondLoop);
        masm.mov(64, carry, AArch64.zr);
        masm.mov(32, jdx, ylen);
        masm.subs(32, xstart, xstart, 1);
        masm.branchConditionally(AArch64Assembler.ConditionFlag.MI, labelDone);
        masm.loadAddress(newZ, AArch64Address.createExtendedRegisterOffsetAddress(32, z, xstart, true, AArch64Assembler.ExtendType.UXTW));
        masm.add(64, newZ, newZ, 4);
        masm.subs(32, xstart, xstart, 1);
        masm.branchConditionally(AArch64Assembler.ConditionFlag.MI, labelLastX);
        masm.loadAddress(tmp13, AArch64Address.createExtendedRegisterOffsetAddress(32, x, xstart, true, AArch64Assembler.ExtendType.UXTW));
        masm.ldr(64, productHi, AArch64Address.createBaseRegisterOnlyAddress(64, tmp13));
        masm.ror(64, productHi, productHi, 32L);
        masm.bind(labelThirdLoopPrologue);
        AArch64BigIntegerMultiplyToLenOp.multiply128x128Loop(masm, y, newZ, carry, tmp11, jdx, tmp12, product, tmp2, xAtXstart, tmp13, tmp4, tmp6, productHi);
        masm.add(32, xstart, xstart, 1);
        masm.str(32, carry, AArch64Address.createExtendedRegisterOffsetAddress(32, z, xstart, true, AArch64Assembler.ExtendType.UXTW));
        masm.subs(32, xstart, xstart, 1);
        masm.branchConditionally(AArch64Assembler.ConditionFlag.MI, labelDone);
        masm.lsr(64, carry, carry, 32L);
        masm.str(32, carry, AArch64Address.createExtendedRegisterOffsetAddress(32, z, xstart, true, AArch64Assembler.ExtendType.UXTW));
        masm.jmp(labelSecondLoop);
        masm.bind(labelLastX);
        masm.ldr(32, productHi, AArch64Address.createBaseRegisterOnlyAddress(32, x));
        masm.jmp(labelThirdLoopPrologue);
        masm.bind(labelDone);
    }
}

