/*
 * Decompiled with CFR 0.152.
 */
package jdk.graal.compiler.lir.aarch64;

import jdk.graal.compiler.asm.Label;
import jdk.graal.compiler.asm.aarch64.AArch64ASIMDAssembler;
import jdk.graal.compiler.asm.aarch64.AArch64Address;
import jdk.graal.compiler.asm.aarch64.AArch64Assembler;
import jdk.graal.compiler.asm.aarch64.AArch64MacroAssembler;
import jdk.graal.compiler.core.common.LIRKind;
import jdk.graal.compiler.lir.LIRInstruction;
import jdk.graal.compiler.lir.LIRInstructionClass;
import jdk.graal.compiler.lir.Opcode;
import jdk.graal.compiler.lir.aarch64.AArch64LIRInstruction;
import jdk.graal.compiler.lir.asm.CompilationResultBuilder;
import jdk.graal.compiler.lir.gen.LIRGeneratorTool;
import jdk.vm.ci.aarch64.AArch64;
import jdk.vm.ci.aarch64.AArch64Kind;
import jdk.vm.ci.code.Register;
import jdk.vm.ci.code.ValueUtil;
import jdk.vm.ci.meta.AllocatableValue;
import jdk.vm.ci.meta.PlatformKind;
import jdk.vm.ci.meta.Value;

@Opcode(value="AArch64_STRING_COMPRESS")
public final class AArch64StringUTF16CompressOp
extends AArch64LIRInstruction {
    public static final LIRInstructionClass<AArch64StringUTF16CompressOp> TYPE = LIRInstructionClass.create(AArch64StringUTF16CompressOp.class);
    private static final int CHUNK_ELEMENT_COUNT = 16;
    @LIRInstruction.Def(value={LIRInstruction.OperandFlag.REG})
    protected AllocatableValue resultValue;
    @LIRInstruction.Use(value={LIRInstruction.OperandFlag.REG})
    protected AllocatableValue len;
    @LIRInstruction.Alive(value={LIRInstruction.OperandFlag.REG})
    protected AllocatableValue src;
    @LIRInstruction.Alive(value={LIRInstruction.OperandFlag.REG})
    protected AllocatableValue dst;
    @LIRInstruction.Temp(value={LIRInstruction.OperandFlag.REG})
    protected AllocatableValue temp1;
    @LIRInstruction.Temp(value={LIRInstruction.OperandFlag.REG})
    protected AllocatableValue temp2;
    @LIRInstruction.Temp(value={LIRInstruction.OperandFlag.REG})
    protected AllocatableValue temp3;
    @LIRInstruction.Temp(value={LIRInstruction.OperandFlag.REG})
    protected AllocatableValue vectorTemp1;
    @LIRInstruction.Temp(value={LIRInstruction.OperandFlag.REG})
    protected AllocatableValue vectorTemp2;
    @LIRInstruction.Temp(value={LIRInstruction.OperandFlag.REG})
    protected AllocatableValue vectorTemp3;

    public AArch64StringUTF16CompressOp(LIRGeneratorTool tool, AllocatableValue src, AllocatableValue dst, AllocatableValue len, AllocatableValue result) {
        super((LIRInstructionClass<? extends AArch64LIRInstruction>)TYPE);
        assert (result.getPlatformKind().equals((Object)AArch64Kind.DWORD)) : result;
        assert (len.getPlatformKind().equals((Object)AArch64Kind.DWORD)) : len;
        assert (src.getPlatformKind().equals((Object)AArch64Kind.QWORD)) : src;
        assert (dst.getPlatformKind().equals((Object)AArch64Kind.QWORD)) : dst;
        this.len = len;
        this.src = src;
        this.dst = dst;
        this.resultValue = result;
        LIRKind archWordKind = LIRKind.value((PlatformKind)AArch64Kind.QWORD);
        this.temp1 = tool.newVariable(archWordKind);
        this.temp2 = tool.newVariable(archWordKind);
        this.temp3 = tool.newVariable(archWordKind);
        LIRKind vectorKind = LIRKind.value(tool.target().arch.getLargestStorableKind(AArch64.SIMD));
        this.vectorTemp1 = tool.newVariable(vectorKind);
        this.vectorTemp2 = tool.newVariable(vectorKind);
        this.vectorTemp3 = tool.newVariable(vectorKind);
    }

    @Override
    public void emitCode(CompilationResultBuilder crb, AArch64MacroAssembler masm) {
        Label simdImpl = new Label();
        Label scalarImpl = new Label();
        Label done = new Label();
        Register result = ValueUtil.asRegister((Value)this.resultValue);
        masm.sxt(64, 32, result, ValueUtil.asRegister((Value)this.len));
        masm.cbz(32, result, done);
        Register srcAddress = ValueUtil.asRegister((Value)this.temp1);
        Register destAddress = ValueUtil.asRegister((Value)this.temp2);
        masm.mov(64, srcAddress, ValueUtil.asRegister((Value)this.src));
        masm.mov(64, destAddress, ValueUtil.asRegister((Value)this.dst));
        masm.compare(64, result, 16);
        masm.branchConditionally(AArch64Assembler.ConditionFlag.GE, simdImpl);
        masm.bind(scalarImpl);
        AArch64StringUTF16CompressOp.emitScalar(masm, done, srcAddress, destAddress, result);
        masm.jmp(done);
        masm.bind(simdImpl);
        this.emitSIMD(masm, scalarImpl, done, srcAddress, destAddress, result);
        masm.bind(done);
    }

    private static void emitScalar(AArch64MacroAssembler masm, Label done, Register srcAddress, Register destAddress, Register result) {
        Label failToCompress = new Label();
        Label scalarLoop = new Label();
        try (AArch64MacroAssembler.ScratchRegister scratchReg1 = masm.getScratchRegister();
             AArch64MacroAssembler.ScratchRegister scratchReg2 = masm.getScratchRegister();){
            Register val = scratchReg1.getRegister();
            Register count = scratchReg2.getRegister();
            masm.mov(64, count, result);
            masm.align(16);
            masm.bind(scalarLoop);
            masm.ldr(16, val, AArch64Address.createImmediateAddress(16, AArch64Address.AddressingMode.IMMEDIATE_POST_INDEXED, srcAddress, 2));
            masm.compare(64, val, 255);
            masm.branchConditionally(AArch64Assembler.ConditionFlag.GT, failToCompress);
            masm.str(8, val, AArch64Address.createImmediateAddress(8, AArch64Address.AddressingMode.IMMEDIATE_POST_INDEXED, destAddress, 1));
            masm.subs(64, count, count, 1);
            masm.branchConditionally(AArch64Assembler.ConditionFlag.GT, scalarLoop);
            masm.jmp(done);
            masm.bind(failToCompress);
            masm.mov(64, result, AArch64.zr);
        }
    }

    private void emitSIMD(AArch64MacroAssembler masm, Label scalarImpl, Label done, Register srcChunkAddress, Register destChunkAddress, Register result) {
        Register chunkPart1RegV = ValueUtil.asRegister((Value)this.vectorTemp1);
        Register chunkPart2RegV = ValueUtil.asRegister((Value)this.vectorTemp2);
        Register tmpRegV1 = ValueUtil.asRegister((Value)this.vectorTemp3);
        Register tmp = ValueUtil.asRegister((Value)this.temp3);
        Label simdLoop = new Label();
        Label failToCompress = new Label();
        Label redoEntireChunk = new Label();
        try (AArch64MacroAssembler.ScratchRegister scratchRegister1 = masm.getScratchRegister();
             AArch64MacroAssembler.ScratchRegister scratchRegister2 = masm.getScratchRegister();){
            Register lastChunkAddress = scratchRegister1.getRegister();
            Register endOfSrcAddress = scratchRegister2.getRegister();
            masm.add(64, endOfSrcAddress, srcChunkAddress, result, AArch64Assembler.ShiftType.LSL, 1);
            masm.sub(64, lastChunkAddress, endOfSrcAddress, 32);
            masm.align(16);
            masm.bind(simdLoop);
            masm.fldp(128, chunkPart1RegV, chunkPart2RegV, AArch64Address.createImmediateAddress(128, AArch64Address.AddressingMode.IMMEDIATE_PAIR_POST_INDEXED, srcChunkAddress, 32));
            masm.neon.orrVVV(AArch64ASIMDAssembler.ASIMDSize.FullReg, tmpRegV1, chunkPart1RegV, chunkPart2RegV);
            masm.neon.uzp2VVV(AArch64ASIMDAssembler.ASIMDSize.FullReg, AArch64ASIMDAssembler.ElementSize.Byte, tmpRegV1, tmpRegV1, tmpRegV1);
            masm.neon.umovGX(AArch64ASIMDAssembler.ElementSize.DoubleWord, tmp, tmpRegV1, 0);
            masm.cbnz(64, tmp, failToCompress);
            masm.neon.xtnVV(AArch64ASIMDAssembler.ElementSize.Byte, tmpRegV1, chunkPart1RegV);
            masm.neon.xtn2VV(AArch64ASIMDAssembler.ElementSize.Byte, tmpRegV1, chunkPart2RegV);
            masm.fstr(128, tmpRegV1, AArch64Address.createImmediateAddress(128, AArch64Address.AddressingMode.IMMEDIATE_POST_INDEXED, destChunkAddress, 16));
            masm.cmp(64, srcChunkAddress, lastChunkAddress);
            masm.branchConditionally(AArch64Assembler.ConditionFlag.LO, simdLoop);
            masm.cmp(64, srcChunkAddress, endOfSrcAddress);
            masm.branchConditionally(AArch64Assembler.ConditionFlag.HS, done);
            masm.sub(64, srcChunkAddress, srcChunkAddress, lastChunkAddress);
            masm.sub(64, destChunkAddress, destChunkAddress, srcChunkAddress, AArch64Assembler.ShiftType.LSR, 1);
            masm.mov(64, srcChunkAddress, lastChunkAddress);
            masm.jmp(simdLoop);
        }
        masm.bind(failToCompress);
        masm.mov(result, 8);
        masm.neon.uzp2VVV(AArch64ASIMDAssembler.ASIMDSize.FullReg, AArch64ASIMDAssembler.ElementSize.Byte, tmpRegV1, chunkPart1RegV, chunkPart1RegV);
        masm.neon.umovGX(AArch64ASIMDAssembler.ElementSize.DoubleWord, tmp, tmpRegV1, 0);
        masm.cbnz(64, tmp, redoEntireChunk);
        masm.sub(64, srcChunkAddress, srcChunkAddress, 16);
        masm.neon.xtnVV(AArch64ASIMDAssembler.ElementSize.Byte, tmpRegV1, chunkPart1RegV);
        masm.fstr(64, tmpRegV1, AArch64Address.createImmediateAddress(64, AArch64Address.AddressingMode.IMMEDIATE_POST_INDEXED, destChunkAddress, 8));
        masm.jmp(scalarImpl);
        masm.bind(redoEntireChunk);
        masm.sub(64, srcChunkAddress, srcChunkAddress, 32);
        masm.jmp(scalarImpl);
    }
}

