/*
 * Decompiled with CFR 0.152.
 */
package jdk.graal.compiler.lir.aarch64;

import jdk.graal.compiler.asm.Label;
import jdk.graal.compiler.asm.aarch64.AArch64ASIMDAssembler;
import jdk.graal.compiler.asm.aarch64.AArch64Address;
import jdk.graal.compiler.asm.aarch64.AArch64Assembler;
import jdk.graal.compiler.asm.aarch64.AArch64MacroAssembler;
import jdk.graal.compiler.core.aarch64.AArch64LIRGenerator;
import jdk.graal.compiler.debug.GraalError;
import jdk.graal.compiler.lir.LIRInstruction;
import jdk.graal.compiler.lir.LIRInstructionClass;
import jdk.graal.compiler.lir.Opcode;
import jdk.graal.compiler.lir.SyncPort;
import jdk.graal.compiler.lir.SyncPorts;
import jdk.graal.compiler.lir.aarch64.AArch64ComplexVectorOp;
import jdk.graal.compiler.lir.aarch64.AArch64LIRInstruction;
import jdk.graal.compiler.lir.asm.CompilationResultBuilder;
import jdk.vm.ci.aarch64.AArch64;
import jdk.vm.ci.code.CodeUtil;
import jdk.vm.ci.code.Register;
import jdk.vm.ci.code.ValueUtil;
import jdk.vm.ci.meta.AllocatableValue;
import jdk.vm.ci.meta.Value;

@Opcode(value="AARCH64_COUNT_POSITIVES")
@SyncPorts(value={@SyncPort(from="https://github.com/openjdk/jdk/blob/0a3a925ad88921d387aa851157f54ac0054d347b/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp#L5045-L5114", sha1="ce54a7cf2fcfe7ccb8f6604c038887fc1c4ebce1"), @SyncPort(from="https://github.com/openjdk/jdk/blob/0a3a925ad88921d387aa851157f54ac0054d347b/src/hotspot/cpu/aarch64/stubGenerator_aarch64.cpp#L4955-L5121", sha1="3b4e6edb4372e8babb009763c2d05961348dd723")})
public final class AArch64CountPositivesOp
extends AArch64ComplexVectorOp {
    public static final LIRInstructionClass<AArch64CountPositivesOp> TYPE = LIRInstructionClass.create(AArch64CountPositivesOp.class);
    @LIRInstruction.Def(value={LIRInstruction.OperandFlag.REG})
    private Value resultValue;
    @LIRInstruction.Alive(value={LIRInstruction.OperandFlag.REG})
    private Value arrayValue;
    @LIRInstruction.Alive(value={LIRInstruction.OperandFlag.REG})
    private Value lengthValue;
    @LIRInstruction.Temp(value={LIRInstruction.OperandFlag.REG})
    private Value arrayTempValue;
    @LIRInstruction.Temp(value={LIRInstruction.OperandFlag.REG})
    private Value lengthTempValue;
    @LIRInstruction.Temp(value={LIRInstruction.OperandFlag.REG})
    private Value[] temp;
    private final int vmPageSize;
    private final int softwarePrefetchHintDistance;
    private static final long UPPER_BIT_MASK = -9187201950435737472L;
    private static final int LARGE_LOOP_SIZE = 64;

    public AArch64CountPositivesOp(AArch64LIRGenerator tool, AllocatableValue resultValue, AllocatableValue arrayValue, AllocatableValue lengthValue, int vmPageSize, int softwarePrefetchHintDistance) {
        super((LIRInstructionClass<? extends AArch64LIRInstruction>)TYPE);
        this.resultValue = resultValue;
        this.arrayValue = arrayValue;
        this.lengthValue = lengthValue;
        this.vmPageSize = vmPageSize;
        this.softwarePrefetchHintDistance = softwarePrefetchHintDistance;
        this.arrayTempValue = tool.newVariable(arrayValue.getValueKind());
        this.lengthTempValue = tool.newVariable(lengthValue.getValueKind());
        this.temp = new Value[]{AArch64.r3.asValue(), AArch64.r4.asValue(), AArch64.r5.asValue(), AArch64.r6.asValue(), AArch64.r7.asValue(), AArch64.r10.asValue(), AArch64.v0.asValue(), AArch64.v1.asValue(), AArch64.v2.asValue(), AArch64.v3.asValue()};
    }

    @Override
    public void emitCode(CompilationResultBuilder crb, AArch64MacroAssembler masm) {
        try (AArch64MacroAssembler.ScratchRegister scratchReg1 = masm.getScratchRegister();
             AArch64MacroAssembler.ScratchRegister scratchReg2 = masm.getScratchRegister();){
            Register rscratch1 = scratchReg1.getRegister();
            Register rscratch2 = scratchReg2.getRegister();
            int wordSize = crb.target.wordSize;
            Register result = ValueUtil.asRegister((Value)this.resultValue);
            Register ary1 = ValueUtil.asRegister((Value)this.arrayTempValue);
            Register len = ValueUtil.asRegister((Value)this.lengthTempValue);
            masm.mov(64, ary1, ValueUtil.asRegister((Value)this.arrayValue));
            masm.mov(32, len, ValueUtil.asRegister((Value)this.lengthValue));
            Label labelLoop = new Label();
            Label labelEnd = new Label();
            Label labelStub = new Label();
            Label labelStubLong = new Label();
            Label labelSetResult = new Label();
            Label labelDone = new Label();
            masm.mov(32, result, len);
            masm.compare(32, len, 0);
            masm.branchConditionally(AArch64Assembler.ConditionFlag.LE, labelDone);
            masm.compare(32, len, 4 * wordSize);
            masm.branchConditionally(AArch64Assembler.ConditionFlag.GE, labelStubLong);
            if (this.vmPageSize > 0) {
                GraalError.guarantee(CodeUtil.isPowerOf2((int)this.vmPageSize), "vmPageSize is not power of 2: %d", (Object)this.vmPageSize);
                int shift = 64 - CodeUtil.log2((int)this.vmPageSize);
                masm.lsl(64, rscratch1, ary1, shift);
                masm.mov(rscratch2, 4L * (long)wordSize << shift);
                masm.adds(64, rscratch2, rscratch1, rscratch2);
                masm.branchConditionally(AArch64Assembler.ConditionFlag.HS, labelStub);
            }
            masm.subs(64, len, len, wordSize);
            masm.branchConditionally(AArch64Assembler.ConditionFlag.LT, labelEnd);
            masm.bind(labelLoop);
            masm.ldr(64, rscratch1, AArch64Address.createImmediateAddress(64, AArch64Address.AddressingMode.IMMEDIATE_POST_INDEXED, ary1, wordSize));
            masm.tst(64, rscratch1, -9187201950435737472L);
            masm.branchConditionally(AArch64Assembler.ConditionFlag.NE, labelSetResult);
            masm.subs(32, len, len, wordSize);
            masm.branchConditionally(AArch64Assembler.ConditionFlag.GE, labelLoop);
            masm.compare(32, len, -wordSize);
            masm.branchConditionally(AArch64Assembler.ConditionFlag.EQ, labelDone);
            masm.bind(labelEnd);
            masm.ldr(64, rscratch1, AArch64Address.createBaseRegisterOnlyAddress(64, ary1));
            masm.sub(64, rscratch2, AArch64.zr, len, AArch64Assembler.ShiftType.LSL, 3);
            masm.lsl(64, rscratch1, rscratch1, rscratch2);
            masm.tst(64, rscratch1, -9187201950435737472L);
            masm.branchConditionally(AArch64Assembler.ConditionFlag.NE, labelSetResult);
            masm.jmp(labelDone);
            masm.bind(labelStub);
            this.emitStub(masm, result, ary1, len, rscratch1, rscratch2, labelStubLong, labelDone);
            GraalError.guarantee(labelStubLong.isBound(), "labelStubLong should be bound");
            masm.jmp(labelDone);
            masm.bind(labelSetResult);
            masm.add(32, len, len, wordSize);
            masm.sub(32, result, result, len);
            masm.bind(labelDone);
        }
    }

    private void emitStub(AArch64MacroAssembler masm, Register result, Register ary1, Register len, Register rscratch1, Register rscratch2, Label labelStubLong, Label labelDone) {
        Label labelRetAdjust = new Label();
        Label labelRetAdjust16 = new Label();
        Label labelRetAdjustLong = new Label();
        Label labelRetNoPop = new Label();
        Label labelAligned = new Label();
        Label labelLoop16 = new Label();
        Label labelCheck16 = new Label();
        Label labelLargeLoop = new Label();
        Label labelPostLoop16 = new Label();
        Label labelLenOver8 = new Label();
        Label labelPostLoop16LoadTail = new Label();
        Register tmp1 = AArch64.r3;
        Register tmp2 = AArch64.r4;
        Register tmp3 = AArch64.r5;
        Register tmp4 = AArch64.r6;
        Register tmp5 = AArch64.r7;
        Register tmp6 = AArch64.r10;
        Register vtmp0 = AArch64.v0;
        Register vtmp1 = AArch64.v1;
        Register vtmp2 = AArch64.v2;
        Register vtmp3 = AArch64.v3;
        masm.compare(32, len, 15);
        masm.branchConditionally(AArch64Assembler.ConditionFlag.GT, labelStubLong);
        masm.add(64, ary1, ary1, len);
        masm.subs(32, len, len, 8);
        masm.branchConditionally(AArch64Assembler.ConditionFlag.GT, labelLenOver8);
        masm.ldr(64, rscratch2, AArch64Address.createImmediateAddress(64, AArch64Address.AddressingMode.IMMEDIATE_SIGNED_UNSCALED, ary1, -8));
        masm.sub(64, rscratch1, AArch64.zr, len, AArch64Assembler.ShiftType.LSL, 3);
        masm.lsr(64, rscratch2, rscratch2, rscratch1);
        masm.tst(64, rscratch2, -9187201950435737472L);
        masm.csel(32, result, AArch64.zr, result, AArch64Assembler.ConditionFlag.NE);
        masm.jmp(labelDone);
        masm.bind(labelLenOver8);
        masm.ldp(64, rscratch1, rscratch2, AArch64Address.createImmediateAddress(64, AArch64Address.AddressingMode.IMMEDIATE_PAIR_SIGNED_SCALED, ary1, -16));
        masm.sub(32, len, len, 8);
        masm.tst(64, rscratch2, -9187201950435737472L);
        masm.branchConditionally(AArch64Assembler.ConditionFlag.NE, labelRetNoPop);
        masm.sub(64, rscratch2, AArch64.zr, len, AArch64Assembler.ShiftType.LSL, 3);
        masm.lsr(64, rscratch1, rscratch1, rscratch2);
        masm.tst(64, rscratch1, -9187201950435737472L);
        masm.bind(labelRetNoPop);
        masm.csel(32, result, AArch64.zr, result, AArch64Assembler.ConditionFlag.NE);
        masm.jmp(labelDone);
        masm.bind(labelStubLong);
        masm.and(64, rscratch2, ary1, 15L);
        masm.cbz(64, rscratch2, labelAligned);
        masm.ldp(64, tmp6, tmp1, AArch64Address.createPairBaseRegisterOnlyAddress(64, ary1));
        masm.mov(tmp5, 16);
        masm.sub(64, rscratch1, tmp5, rscratch2);
        masm.add(64, ary1, ary1, rscratch1);
        masm.orr(64, tmp6, tmp6, tmp1);
        masm.tst(64, tmp6, -9187201950435737472L);
        masm.branchConditionally(AArch64Assembler.ConditionFlag.NE, labelRetAdjust);
        masm.sub(32, len, len, rscratch1);
        masm.bind(labelAligned);
        masm.compare(32, len, 64);
        masm.branchConditionally(AArch64Assembler.ConditionFlag.LT, labelCheck16);
        masm.ldp(64, tmp6, tmp1, AArch64Address.createImmediateAddress(64, AArch64Address.AddressingMode.IMMEDIATE_PAIR_POST_INDEXED, ary1, 16));
        masm.sub(32, len, len, 16);
        masm.orr(64, tmp6, tmp6, tmp1);
        masm.tst(64, tmp6, -9187201950435737472L);
        masm.branchConditionally(AArch64Assembler.ConditionFlag.NE, labelRetAdjust16);
        masm.compare(32, len, 64);
        masm.branchConditionally(AArch64Assembler.ConditionFlag.LT, labelCheck16);
        masm.bind(labelLargeLoop);
        if (this.softwarePrefetchHintDistance >= 0) {
            masm.prfm(AArch64Address.createImmediateAddress(64, AArch64Address.AddressingMode.IMMEDIATE_UNSIGNED_SCALED, ary1, this.softwarePrefetchHintDistance & 0xFFFFFFF8), AArch64Assembler.PrefetchMode.PLDL1KEEP);
        }
        masm.neon.ld1MultipleVVVV(AArch64ASIMDAssembler.ASIMDSize.FullReg, AArch64ASIMDAssembler.ElementSize.Byte, vtmp0, vtmp1, vtmp2, vtmp3, AArch64Address.createStructureImmediatePostIndexAddress(AArch64ASIMDAssembler.ASIMDInstruction.LD1_MULTIPLE_4R, AArch64ASIMDAssembler.ASIMDSize.FullReg, AArch64ASIMDAssembler.ElementSize.Byte, ary1, 64));
        masm.sub(32, len, len, 64);
        masm.neon.orrVVV(AArch64ASIMDAssembler.ASIMDSize.FullReg, vtmp0, vtmp0, vtmp1);
        masm.neon.orrVVV(AArch64ASIMDAssembler.ASIMDSize.FullReg, vtmp2, vtmp2, vtmp3);
        masm.neon.orrVVV(AArch64ASIMDAssembler.ASIMDSize.FullReg, vtmp0, vtmp0, vtmp2);
        masm.neon.sminpVVV(AArch64ASIMDAssembler.ASIMDSize.FullReg, AArch64ASIMDAssembler.ElementSize.Byte, vtmp0, vtmp0, vtmp2);
        masm.neon.ushrVVI(AArch64ASIMDAssembler.ASIMDSize.FullReg, AArch64ASIMDAssembler.ElementSize.Byte, vtmp0, vtmp0, 7);
        AArch64CountPositivesOp.cbnzVector(masm, AArch64ASIMDAssembler.ElementSize.Byte, vtmp0, vtmp0, tmp5, false, labelRetAdjustLong);
        masm.compare(32, len, 64);
        masm.branchConditionally(AArch64Assembler.ConditionFlag.GE, labelLargeLoop);
        masm.bind(labelCheck16);
        masm.compare(32, len, 16);
        masm.branchConditionally(AArch64Assembler.ConditionFlag.LT, labelPostLoop16);
        masm.bind(labelLoop16);
        masm.ldp(64, tmp2, tmp3, AArch64Address.createImmediateAddress(64, AArch64Address.AddressingMode.IMMEDIATE_PAIR_POST_INDEXED, ary1, 16));
        masm.sub(32, len, len, 16);
        masm.orr(64, tmp2, tmp2, tmp3);
        masm.tst(64, tmp2, -9187201950435737472L);
        masm.branchConditionally(AArch64Assembler.ConditionFlag.NE, labelRetAdjust16);
        masm.compare(32, len, 16);
        masm.branchConditionally(AArch64Assembler.ConditionFlag.GE, labelLoop16);
        masm.bind(labelPostLoop16);
        masm.compare(32, len, 8);
        masm.branchConditionally(AArch64Assembler.ConditionFlag.LE, labelPostLoop16LoadTail);
        masm.ldr(64, tmp3, AArch64Address.createImmediateAddress(64, AArch64Address.AddressingMode.IMMEDIATE_POST_INDEXED, ary1, 8));
        masm.tst(64, tmp3, -9187201950435737472L);
        masm.branchConditionally(AArch64Assembler.ConditionFlag.NE, labelRetAdjust);
        masm.sub(32, len, len, 8);
        masm.bind(labelPostLoop16LoadTail);
        masm.cbz(32, len, labelDone);
        masm.ldr(64, tmp1, AArch64Address.createBaseRegisterOnlyAddress(64, ary1));
        masm.mov(tmp2, 64);
        masm.sub(64, tmp4, tmp2, len, AArch64Assembler.ShiftType.LSL, 3);
        masm.lsl(64, tmp1, tmp1, tmp4);
        masm.tst(64, tmp1, -9187201950435737472L);
        masm.branchConditionally(AArch64Assembler.ConditionFlag.NE, labelRetAdjust);
        masm.jmp(labelDone);
        masm.bind(labelRetAdjustLong);
        masm.add(32, len, len, 48);
        masm.bind(labelRetAdjust16);
        masm.add(32, len, len, 16);
        masm.bind(labelRetAdjust);
        masm.sub(32, result, result, len);
    }
}

