From 109e2cfbf7974389cfe6af15b9601426b29f745b Mon Sep 17 00:00:00 2001 From: herengui Date: Wed, 30 Aug 2023 17:04:20 +0800 Subject: [PATCH 1001/1001] add loongarch support upstream new Signed-off-by: herengui --- .../llvm/BinaryFormat/ELFRelocs/LoongArch.def | 102 + include/llvm/IR/IntrinsicsLoongArch.td | 3619 ++++++++ lib/Target/LoongArch/AsmParser/CMakeLists.txt | 13 + .../AsmParser/LoongArchAsmParser.cpp | 2269 +++++ lib/Target/LoongArch/CMakeLists.txt | 55 + .../LoongArch/Disassembler/CMakeLists.txt | 11 + .../Disassembler/LoongArchDisassembler.cpp | 938 ++ lib/Target/LoongArch/LoongArch.h | 37 + lib/Target/LoongArch/LoongArch.td | 104 + lib/Target/LoongArch/LoongArch32InstrInfo.td | 737 ++ lib/Target/LoongArch/LoongArchAsmPrinter.cpp | 647 ++ lib/Target/LoongArch/LoongArchAsmPrinter.h | 138 + lib/Target/LoongArch/LoongArchCCState.cpp | 165 + lib/Target/LoongArch/LoongArchCCState.h | 165 + lib/Target/LoongArch/LoongArchCallingConv.td | 292 + .../LoongArch/LoongArchExpandPseudo.cpp | 2438 +++++ .../LoongArch/LoongArchFrameLowering.cpp | 561 ++ lib/Target/LoongArch/LoongArchFrameLowering.h | 71 + .../LoongArch/LoongArchISelDAGToDAG.cpp | 878 ++ lib/Target/LoongArch/LoongArchISelDAGToDAG.h | 151 + .../LoongArch/LoongArchISelLowering.cpp | 8208 +++++++++++++++++ lib/Target/LoongArch/LoongArchISelLowering.h | 557 ++ lib/Target/LoongArch/LoongArchInstrFormats.td | 790 ++ lib/Target/LoongArch/LoongArchInstrInfo.cpp | 1041 +++ lib/Target/LoongArch/LoongArchInstrInfo.h | 245 + lib/Target/LoongArch/LoongArchInstrInfo.td | 1882 ++++ lib/Target/LoongArch/LoongArchInstrInfoF.td | 629 ++ .../LoongArch/LoongArchLASXInstrFormats.td | 448 + .../LoongArch/LoongArchLASXInstrInfo.td | 5644 ++++++++++++ .../LoongArch/LoongArchLSXInstrFormats.td | 449 + lib/Target/LoongArch/LoongArchLSXInstrInfo.td | 5904 ++++++++++++ lib/Target/LoongArch/LoongArchMCInstLower.cpp | 342 + lib/Target/LoongArch/LoongArchMCInstLower.h | 55 + .../LoongArch/LoongArchMachineFunction.cpp | 51 + .../LoongArch/LoongArchMachineFunction.h | 98 + .../LoongArch/LoongArchModuleISelDAGToDAG.cpp | 53 + .../LoongArch/LoongArchRegisterInfo.cpp | 355 + lib/Target/LoongArch/LoongArchRegisterInfo.h | 80 + lib/Target/LoongArch/LoongArchRegisterInfo.td | 373 + lib/Target/LoongArch/LoongArchSubtarget.cpp | 112 + lib/Target/LoongArch/LoongArchSubtarget.h | 145 + .../LoongArch/LoongArchTargetMachine.cpp | 190 + lib/Target/LoongArch/LoongArchTargetMachine.h | 68 + .../LoongArch/LoongArchTargetObjectFile.cpp | 26 + .../LoongArch/LoongArchTargetObjectFile.h | 24 + .../LoongArch/LoongArchTargetStreamer.h | 130 + .../LoongArchTargetTransformInfo.cpp | 330 + .../LoongArch/LoongArchTargetTransformInfo.h | 91 + .../LoongArch/MCTargetDesc/CMakeLists.txt | 21 + .../MCTargetDesc/LoongArchABIInfo.cpp | 113 + .../LoongArch/MCTargetDesc/LoongArchABIInfo.h | 91 + .../LoongArchAnalyzeImmediate.cpp | 64 + .../MCTargetDesc/LoongArchAnalyzeImmediate.h | 29 + .../MCTargetDesc/LoongArchAsmBackend.cpp | 324 + .../MCTargetDesc/LoongArchAsmBackend.h | 90 + .../MCTargetDesc/LoongArchBaseInfo.h | 128 + .../MCTargetDesc/LoongArchELFObjectWriter.cpp | 209 + .../MCTargetDesc/LoongArchELFStreamer.cpp | 138 + .../MCTargetDesc/LoongArchELFStreamer.h | 53 + .../MCTargetDesc/LoongArchFixupKinds.h | 136 + .../MCTargetDesc/LoongArchInstPrinter.cpp | 246 + .../MCTargetDesc/LoongArchInstPrinter.h | 119 + .../MCTargetDesc/LoongArchMCAsmInfo.cpp | 59 + .../MCTargetDesc/LoongArchMCAsmInfo.h | 34 + .../MCTargetDesc/LoongArchMCCodeEmitter.cpp | 521 ++ .../MCTargetDesc/LoongArchMCCodeEmitter.h | 146 + .../MCTargetDesc/LoongArchMCExpr.cpp | 134 + .../LoongArch/MCTargetDesc/LoongArchMCExpr.h | 98 + .../MCTargetDesc/LoongArchMCTargetDesc.cpp | 187 + .../MCTargetDesc/LoongArchMCTargetDesc.h | 68 + .../MCTargetDesc/LoongArchTargetStreamer.cpp | 322 + .../LoongArch/TargetInfo/CMakeLists.txt | 9 + .../TargetInfo/LoongArchTargetInfo.cpp | 34 + .../TargetInfo/LoongArchTargetInfo.h | 21 + test/CodeGen/LoongArch/atomic-operand-imm0.ll | 17 + test/CodeGen/LoongArch/atomic_16_8.ll | 809 ++ test/CodeGen/LoongArch/atomic_64_32.ll | 327 + test/CodeGen/LoongArch/bss.ll | 5 + test/CodeGen/LoongArch/bstrins_d.ll | 53 + test/CodeGen/LoongArch/bstrins_w.ll | 28 + test/CodeGen/LoongArch/bstrpick_d.ll | 64 + test/CodeGen/LoongArch/bstrpick_w.ll | 18 + .../LoongArch/builtins-loongarch-base.ll | 752 ++ test/CodeGen/LoongArch/const-mult.ll | 245 + test/CodeGen/LoongArch/disable-tail-calls.ll | 94 + test/CodeGen/LoongArch/divrem.ll | 68 + test/CodeGen/LoongArch/e_flags.ll | 15 + test/CodeGen/LoongArch/eliminateFI.ll | 106 + .../CodeGen/LoongArch/emergency-spill-slot.ll | 103 + test/CodeGen/LoongArch/fcopysign.ll | 17 + test/CodeGen/LoongArch/frame-info.ll | 132 + test/CodeGen/LoongArch/fsel.ll | 47 + test/CodeGen/LoongArch/immediate.ll | 2542 +++++ .../CodeGen/LoongArch/inlineasm/extra-code.ll | 8 + .../inlineasm/floating-point-in-gpr.ll | 31 + .../non-native-value-type-registers-error.ll | 8 + .../non-native-value-type-registers.ll | 42 + test/CodeGen/LoongArch/inlineasm/preld.ll | 8 + test/CodeGen/LoongArch/jirl-verify.ll | 22 + test/CodeGen/LoongArch/lasx/VExtend.ll | 54 + .../CodeGen/LoongArch/lasx/imm_vector_lasx.ll | 176 + test/CodeGen/LoongArch/lasx/inline-asm.ll | 55 + test/CodeGen/LoongArch/lasx/lasxvclr.ll | 46 + test/CodeGen/LoongArch/lasx/logic-lasx.ll | 130 + test/CodeGen/LoongArch/lasx/set-lasx.ll | 38 + test/CodeGen/LoongArch/lasx/vext2xv.ll | 65 + test/CodeGen/LoongArch/lasx/xvadda.ll | 62 + test/CodeGen/LoongArch/lasx/xvaddsub.ll | 98 + test/CodeGen/LoongArch/lasx/xvhadd.ll | 21 + test/CodeGen/LoongArch/ldptr.ll | 70 + test/CodeGen/LoongArch/lit.local.cfg | 3 + test/CodeGen/LoongArch/logic-op.ll | 171 + test/CodeGen/LoongArch/lshr.ll | 12 + test/CodeGen/LoongArch/lsx/imm_vector_lsx.ll | 176 + test/CodeGen/LoongArch/lsx/inline-asm.ll | 34 + test/CodeGen/LoongArch/lsx/logic-lsx.ll | 132 + test/CodeGen/LoongArch/lsx/lsxvclr.ll | 50 + test/CodeGen/LoongArch/lsx/set-lsx.ll | 38 + test/CodeGen/LoongArch/lsx/vadda.ll | 62 + test/CodeGen/LoongArch/lu12i.ll | 7 + test/CodeGen/LoongArch/named-register.ll | 29 + test/CodeGen/LoongArch/nomerge.ll | 35 + test/CodeGen/LoongArch/noti32.ll | 119 + .../LoongArch/peephole-load-store-addi.ll | 101 + test/CodeGen/LoongArch/signext.ll | 37 + test/CodeGen/LoongArch/stptr.ll | 52 + test/CodeGen/LoongArch/tailcall-R.ll | 62 + test/CodeGen/LoongArch/tailcall-check.ll | 155 + test/CodeGen/LoongArch/tailcall-mem.ll | 35 + test/CodeGen/LoongArch/tailcall.ll | 13 + .../LoongArch/target-feature-double.ll | 31 + .../CodeGen/LoongArch/target-feature-float.ll | 33 + test/CodeGen/LoongArch/thread-pointer.ll | 9 + test/CodeGen/LoongArch/trunc.ll | 108 + test/CodeGen/LoongArch/unalignment.ll | 72 + test/MC/Disassembler/LoongArch/lit.local.cfg | 3 + test/MC/Disassembler/LoongArch/simd.txt | 1361 +++ test/MC/LoongArch/aligned-nops.s | 25 + test/MC/LoongArch/atomic-error.s | 7 + test/MC/LoongArch/atomic.s | 12 + test/MC/LoongArch/fixups-expr.s | 40 + test/MC/LoongArch/invalid.s | 50 + test/MC/LoongArch/lit.local.cfg | 3 + test/MC/LoongArch/macro-la.s | 35 + test/MC/LoongArch/macro-li.s | 773 ++ test/MC/LoongArch/target-abi-valid.s | 28 + test/MC/LoongArch/unaligned-nops.s | 5 + test/MC/LoongArch/valid_12imm.s | 33 + test/MC/LoongArch/valid_4operands.s | 53 + test/MC/LoongArch/valid_bigimm.s | 33 + test/MC/LoongArch/valid_branch.s | 155 + test/MC/LoongArch/valid_float.s | 297 + test/MC/LoongArch/valid_integer.s | 369 + test/MC/LoongArch/valid_memory.s | 405 + test/MC/LoongArch/valid_priv.s | 125 + test/Object/LoongArch/lit.local.cfg | 2 + .../llvm-readobj/ELF/loongarch-eflags.test | 103 + .../ELF/reloc-types-loongarch64.test | 195 + test/tools/obj2yaml/ELF/loongarch-eflags.yaml | 51 + .../lib/Target/LoongArch/AsmParser/BUILD.gn | 24 + .../llvm/lib/Target/LoongArch/BUILD.gn | 102 + .../Target/LoongArch/Disassembler/BUILD.gn | 23 + .../Target/LoongArch/MCTargetDesc/BUILD.gn | 74 + .../lib/Target/LoongArch/TargetInfo/BUILD.gn | 9 + 164 files changed, 57452 insertions(+) create mode 100644 include/llvm/BinaryFormat/ELFRelocs/LoongArch.def create mode 100644 include/llvm/IR/IntrinsicsLoongArch.td create mode 100644 lib/Target/LoongArch/AsmParser/CMakeLists.txt create mode 100644 lib/Target/LoongArch/AsmParser/LoongArchAsmParser.cpp create mode 100644 lib/Target/LoongArch/CMakeLists.txt create mode 100644 lib/Target/LoongArch/Disassembler/CMakeLists.txt create mode 100644 lib/Target/LoongArch/Disassembler/LoongArchDisassembler.cpp create mode 100644 lib/Target/LoongArch/LoongArch.h create mode 100644 lib/Target/LoongArch/LoongArch.td create mode 100644 lib/Target/LoongArch/LoongArch32InstrInfo.td create mode 100644 lib/Target/LoongArch/LoongArchAsmPrinter.cpp create mode 100644 lib/Target/LoongArch/LoongArchAsmPrinter.h create mode 100644 lib/Target/LoongArch/LoongArchCCState.cpp create mode 100644 lib/Target/LoongArch/LoongArchCCState.h create mode 100644 lib/Target/LoongArch/LoongArchCallingConv.td create mode 100644 lib/Target/LoongArch/LoongArchExpandPseudo.cpp create mode 100644 lib/Target/LoongArch/LoongArchFrameLowering.cpp create mode 100644 lib/Target/LoongArch/LoongArchFrameLowering.h create mode 100644 lib/Target/LoongArch/LoongArchISelDAGToDAG.cpp create mode 100644 lib/Target/LoongArch/LoongArchISelDAGToDAG.h create mode 100644 lib/Target/LoongArch/LoongArchISelLowering.cpp create mode 100644 lib/Target/LoongArch/LoongArchISelLowering.h create mode 100644 lib/Target/LoongArch/LoongArchInstrFormats.td create mode 100644 lib/Target/LoongArch/LoongArchInstrInfo.cpp create mode 100644 lib/Target/LoongArch/LoongArchInstrInfo.h create mode 100644 lib/Target/LoongArch/LoongArchInstrInfo.td create mode 100644 lib/Target/LoongArch/LoongArchInstrInfoF.td create mode 100644 lib/Target/LoongArch/LoongArchLASXInstrFormats.td create mode 100644 lib/Target/LoongArch/LoongArchLASXInstrInfo.td create mode 100644 lib/Target/LoongArch/LoongArchLSXInstrFormats.td create mode 100644 lib/Target/LoongArch/LoongArchLSXInstrInfo.td create mode 100644 lib/Target/LoongArch/LoongArchMCInstLower.cpp create mode 100644 lib/Target/LoongArch/LoongArchMCInstLower.h create mode 100644 lib/Target/LoongArch/LoongArchMachineFunction.cpp create mode 100644 lib/Target/LoongArch/LoongArchMachineFunction.h create mode 100644 lib/Target/LoongArch/LoongArchModuleISelDAGToDAG.cpp create mode 100644 lib/Target/LoongArch/LoongArchRegisterInfo.cpp create mode 100644 lib/Target/LoongArch/LoongArchRegisterInfo.h create mode 100644 lib/Target/LoongArch/LoongArchRegisterInfo.td create mode 100644 lib/Target/LoongArch/LoongArchSubtarget.cpp create mode 100644 lib/Target/LoongArch/LoongArchSubtarget.h create mode 100644 lib/Target/LoongArch/LoongArchTargetMachine.cpp create mode 100644 lib/Target/LoongArch/LoongArchTargetMachine.h create mode 100644 lib/Target/LoongArch/LoongArchTargetObjectFile.cpp create mode 100644 lib/Target/LoongArch/LoongArchTargetObjectFile.h create mode 100644 lib/Target/LoongArch/LoongArchTargetStreamer.h create mode 100644 lib/Target/LoongArch/LoongArchTargetTransformInfo.cpp create mode 100644 lib/Target/LoongArch/LoongArchTargetTransformInfo.h create mode 100644 lib/Target/LoongArch/MCTargetDesc/CMakeLists.txt create mode 100644 lib/Target/LoongArch/MCTargetDesc/LoongArchABIInfo.cpp create mode 100644 lib/Target/LoongArch/MCTargetDesc/LoongArchABIInfo.h create mode 100644 lib/Target/LoongArch/MCTargetDesc/LoongArchAnalyzeImmediate.cpp create mode 100644 lib/Target/LoongArch/MCTargetDesc/LoongArchAnalyzeImmediate.h create mode 100644 lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.cpp create mode 100644 lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.h create mode 100644 lib/Target/LoongArch/MCTargetDesc/LoongArchBaseInfo.h create mode 100644 lib/Target/LoongArch/MCTargetDesc/LoongArchELFObjectWriter.cpp create mode 100644 lib/Target/LoongArch/MCTargetDesc/LoongArchELFStreamer.cpp create mode 100644 lib/Target/LoongArch/MCTargetDesc/LoongArchELFStreamer.h create mode 100644 lib/Target/LoongArch/MCTargetDesc/LoongArchFixupKinds.h create mode 100644 lib/Target/LoongArch/MCTargetDesc/LoongArchInstPrinter.cpp create mode 100644 lib/Target/LoongArch/MCTargetDesc/LoongArchInstPrinter.h create mode 100644 lib/Target/LoongArch/MCTargetDesc/LoongArchMCAsmInfo.cpp create mode 100644 lib/Target/LoongArch/MCTargetDesc/LoongArchMCAsmInfo.h create mode 100644 lib/Target/LoongArch/MCTargetDesc/LoongArchMCCodeEmitter.cpp create mode 100644 lib/Target/LoongArch/MCTargetDesc/LoongArchMCCodeEmitter.h create mode 100644 lib/Target/LoongArch/MCTargetDesc/LoongArchMCExpr.cpp create mode 100644 lib/Target/LoongArch/MCTargetDesc/LoongArchMCExpr.h create mode 100644 lib/Target/LoongArch/MCTargetDesc/LoongArchMCTargetDesc.cpp create mode 100644 lib/Target/LoongArch/MCTargetDesc/LoongArchMCTargetDesc.h create mode 100644 lib/Target/LoongArch/MCTargetDesc/LoongArchTargetStreamer.cpp create mode 100644 lib/Target/LoongArch/TargetInfo/CMakeLists.txt create mode 100644 lib/Target/LoongArch/TargetInfo/LoongArchTargetInfo.cpp create mode 100644 lib/Target/LoongArch/TargetInfo/LoongArchTargetInfo.h create mode 100644 test/CodeGen/LoongArch/atomic-operand-imm0.ll create mode 100644 test/CodeGen/LoongArch/atomic_16_8.ll create mode 100644 test/CodeGen/LoongArch/atomic_64_32.ll create mode 100644 test/CodeGen/LoongArch/bss.ll create mode 100644 test/CodeGen/LoongArch/bstrins_d.ll create mode 100644 test/CodeGen/LoongArch/bstrins_w.ll create mode 100644 test/CodeGen/LoongArch/bstrpick_d.ll create mode 100644 test/CodeGen/LoongArch/bstrpick_w.ll create mode 100644 test/CodeGen/LoongArch/builtins-loongarch-base.ll create mode 100644 test/CodeGen/LoongArch/const-mult.ll create mode 100644 test/CodeGen/LoongArch/disable-tail-calls.ll create mode 100644 test/CodeGen/LoongArch/divrem.ll create mode 100644 test/CodeGen/LoongArch/e_flags.ll create mode 100644 test/CodeGen/LoongArch/eliminateFI.ll create mode 100644 test/CodeGen/LoongArch/emergency-spill-slot.ll create mode 100644 test/CodeGen/LoongArch/fcopysign.ll create mode 100644 test/CodeGen/LoongArch/frame-info.ll create mode 100644 test/CodeGen/LoongArch/fsel.ll create mode 100644 test/CodeGen/LoongArch/immediate.ll create mode 100644 test/CodeGen/LoongArch/inlineasm/extra-code.ll create mode 100644 test/CodeGen/LoongArch/inlineasm/floating-point-in-gpr.ll create mode 100644 test/CodeGen/LoongArch/inlineasm/non-native-value-type-registers-error.ll create mode 100644 test/CodeGen/LoongArch/inlineasm/non-native-value-type-registers.ll create mode 100644 test/CodeGen/LoongArch/inlineasm/preld.ll create mode 100644 test/CodeGen/LoongArch/jirl-verify.ll create mode 100644 test/CodeGen/LoongArch/lasx/VExtend.ll create mode 100644 test/CodeGen/LoongArch/lasx/imm_vector_lasx.ll create mode 100644 test/CodeGen/LoongArch/lasx/inline-asm.ll create mode 100644 test/CodeGen/LoongArch/lasx/lasxvclr.ll create mode 100644 test/CodeGen/LoongArch/lasx/logic-lasx.ll create mode 100644 test/CodeGen/LoongArch/lasx/set-lasx.ll create mode 100644 test/CodeGen/LoongArch/lasx/vext2xv.ll create mode 100644 test/CodeGen/LoongArch/lasx/xvadda.ll create mode 100644 test/CodeGen/LoongArch/lasx/xvaddsub.ll create mode 100644 test/CodeGen/LoongArch/lasx/xvhadd.ll create mode 100644 test/CodeGen/LoongArch/ldptr.ll create mode 100644 test/CodeGen/LoongArch/lit.local.cfg create mode 100644 test/CodeGen/LoongArch/logic-op.ll create mode 100644 test/CodeGen/LoongArch/lshr.ll create mode 100644 test/CodeGen/LoongArch/lsx/imm_vector_lsx.ll create mode 100644 test/CodeGen/LoongArch/lsx/inline-asm.ll create mode 100644 test/CodeGen/LoongArch/lsx/logic-lsx.ll create mode 100644 test/CodeGen/LoongArch/lsx/lsxvclr.ll create mode 100644 test/CodeGen/LoongArch/lsx/set-lsx.ll create mode 100644 test/CodeGen/LoongArch/lsx/vadda.ll create mode 100644 test/CodeGen/LoongArch/lu12i.ll create mode 100644 test/CodeGen/LoongArch/named-register.ll create mode 100644 test/CodeGen/LoongArch/nomerge.ll create mode 100644 test/CodeGen/LoongArch/noti32.ll create mode 100644 test/CodeGen/LoongArch/peephole-load-store-addi.ll create mode 100644 test/CodeGen/LoongArch/signext.ll create mode 100644 test/CodeGen/LoongArch/stptr.ll create mode 100644 test/CodeGen/LoongArch/tailcall-R.ll create mode 100644 test/CodeGen/LoongArch/tailcall-check.ll create mode 100644 test/CodeGen/LoongArch/tailcall-mem.ll create mode 100644 test/CodeGen/LoongArch/tailcall.ll create mode 100644 test/CodeGen/LoongArch/target-feature-double.ll create mode 100644 test/CodeGen/LoongArch/target-feature-float.ll create mode 100644 test/CodeGen/LoongArch/thread-pointer.ll create mode 100644 test/CodeGen/LoongArch/trunc.ll create mode 100644 test/CodeGen/LoongArch/unalignment.ll create mode 100644 test/MC/Disassembler/LoongArch/lit.local.cfg create mode 100644 test/MC/Disassembler/LoongArch/simd.txt create mode 100644 test/MC/LoongArch/aligned-nops.s create mode 100644 test/MC/LoongArch/atomic-error.s create mode 100644 test/MC/LoongArch/atomic.s create mode 100644 test/MC/LoongArch/fixups-expr.s create mode 100644 test/MC/LoongArch/invalid.s create mode 100644 test/MC/LoongArch/lit.local.cfg create mode 100644 test/MC/LoongArch/macro-la.s create mode 100644 test/MC/LoongArch/macro-li.s create mode 100644 test/MC/LoongArch/target-abi-valid.s create mode 100644 test/MC/LoongArch/unaligned-nops.s create mode 100644 test/MC/LoongArch/valid_12imm.s create mode 100644 test/MC/LoongArch/valid_4operands.s create mode 100644 test/MC/LoongArch/valid_bigimm.s create mode 100644 test/MC/LoongArch/valid_branch.s create mode 100644 test/MC/LoongArch/valid_float.s create mode 100644 test/MC/LoongArch/valid_integer.s create mode 100644 test/MC/LoongArch/valid_memory.s create mode 100644 test/MC/LoongArch/valid_priv.s create mode 100644 test/Object/LoongArch/lit.local.cfg create mode 100644 test/tools/llvm-readobj/ELF/loongarch-eflags.test create mode 100644 test/tools/llvm-readobj/ELF/reloc-types-loongarch64.test create mode 100644 test/tools/obj2yaml/ELF/loongarch-eflags.yaml create mode 100644 utils/gn/secondary/llvm/lib/Target/LoongArch/AsmParser/BUILD.gn create mode 100644 utils/gn/secondary/llvm/lib/Target/LoongArch/BUILD.gn create mode 100644 utils/gn/secondary/llvm/lib/Target/LoongArch/Disassembler/BUILD.gn create mode 100644 utils/gn/secondary/llvm/lib/Target/LoongArch/MCTargetDesc/BUILD.gn create mode 100644 utils/gn/secondary/llvm/lib/Target/LoongArch/TargetInfo/BUILD.gn diff --git a/include/llvm/BinaryFormat/ELFRelocs/LoongArch.def b/include/llvm/BinaryFormat/ELFRelocs/LoongArch.def new file mode 100644 index 00000000..6699e732 --- /dev/null +++ b/include/llvm/BinaryFormat/ELFRelocs/LoongArch.def @@ -0,0 +1,102 @@ + +#ifndef ELF_RELOC +#error "ELF_RELOC must be defined" +#endif + +ELF_RELOC(R_LARCH_NONE, 0) +ELF_RELOC(R_LARCH_32, 1) +ELF_RELOC(R_LARCH_64, 2) +ELF_RELOC(R_LARCH_RELATIVE, 3) +ELF_RELOC(R_LARCH_COPY, 4) +ELF_RELOC(R_LARCH_JUMP_SLOT, 5) +ELF_RELOC(R_LARCH_TLS_DTPMOD32, 6) +ELF_RELOC(R_LARCH_TLS_DTPMOD64, 7) +ELF_RELOC(R_LARCH_TLS_DTPREL32, 8) +ELF_RELOC(R_LARCH_TLS_DTPREL64, 9) +ELF_RELOC(R_LARCH_TLS_TPREL32, 10) +ELF_RELOC(R_LARCH_TLS_TPREL64, 11) +ELF_RELOC(R_LARCH_IRELATIVE, 12) + +ELF_RELOC(R_LARCH_MARK_LA, 20) +ELF_RELOC(R_LARCH_MARK_PCREL, 21) + +ELF_RELOC(R_LARCH_SOP_PUSH_PCREL, 22) + +ELF_RELOC(R_LARCH_SOP_PUSH_ABSOLUTE, 23) + +ELF_RELOC(R_LARCH_SOP_PUSH_DUP, 24) +ELF_RELOC(R_LARCH_SOP_PUSH_GPREL, 25) +ELF_RELOC(R_LARCH_SOP_PUSH_TLS_TPREL, 26) +ELF_RELOC(R_LARCH_SOP_PUSH_TLS_GOT, 27) +ELF_RELOC(R_LARCH_SOP_PUSH_TLS_GD, 28) +ELF_RELOC(R_LARCH_SOP_PUSH_PLT_PCREL, 29) + +ELF_RELOC(R_LARCH_SOP_ASSERT, 30) +ELF_RELOC(R_LARCH_SOP_NOT, 31) +ELF_RELOC(R_LARCH_SOP_SUB, 32) +ELF_RELOC(R_LARCH_SOP_SL, 33) +ELF_RELOC(R_LARCH_SOP_SR, 34) +ELF_RELOC(R_LARCH_SOP_ADD, 35) +ELF_RELOC(R_LARCH_SOP_AND, 36) +ELF_RELOC(R_LARCH_SOP_IF_ELSE, 37) +ELF_RELOC(R_LARCH_SOP_POP_32_S_10_5, 38) +ELF_RELOC(R_LARCH_SOP_POP_32_U_10_12, 39) +ELF_RELOC(R_LARCH_SOP_POP_32_S_10_12, 40) +ELF_RELOC(R_LARCH_SOP_POP_32_S_10_16, 41) +ELF_RELOC(R_LARCH_SOP_POP_32_S_10_16_S2, 42) +ELF_RELOC(R_LARCH_SOP_POP_32_S_5_20, 43) +ELF_RELOC(R_LARCH_SOP_POP_32_S_0_5_10_16_S2, 44) +ELF_RELOC(R_LARCH_SOP_POP_32_S_0_10_10_16_S2, 45) +ELF_RELOC(R_LARCH_SOP_POP_32_U, 46) + +ELF_RELOC(R_LARCH_ADD8, 47) +ELF_RELOC(R_LARCH_ADD16, 48) +ELF_RELOC(R_LARCH_ADD24, 49) +ELF_RELOC(R_LARCH_ADD32, 50) +ELF_RELOC(R_LARCH_ADD64, 51) +ELF_RELOC(R_LARCH_SUB8, 52) +ELF_RELOC(R_LARCH_SUB16, 53) +ELF_RELOC(R_LARCH_SUB24, 54) +ELF_RELOC(R_LARCH_SUB32, 55) +ELF_RELOC(R_LARCH_SUB64, 56) + +ELF_RELOC(R_LARCH_GNU_VTINHERIT, 57) +ELF_RELOC(R_LARCH_GNU_VTENTRY, 58) + +ELF_RELOC(R_LARCH_B16, 64) +ELF_RELOC(R_LARCH_B21, 65) +ELF_RELOC(R_LARCH_B26, 66) +ELF_RELOC(R_LARCH_ABS_HI20, 67) +ELF_RELOC(R_LARCH_ABS_LO12, 68) +ELF_RELOC(R_LARCH_ABS64_LO20, 69) +ELF_RELOC(R_LARCH_ABS64_HI12, 70) +ELF_RELOC(R_LARCH_PCALA_HI20, 71) +ELF_RELOC(R_LARCH_PCALA_LO12, 72) +ELF_RELOC(R_LARCH_PCALA64_LO20, 73) +ELF_RELOC(R_LARCH_PCALA64_HI12, 74) +ELF_RELOC(R_LARCH_GOT_PC_HI20, 75) +ELF_RELOC(R_LARCH_GOT_PC_LO12, 76) +ELF_RELOC(R_LARCH_GOT64_PC_LO20, 77) +ELF_RELOC(R_LARCH_GOT64_PC_HI12, 78) +ELF_RELOC(R_LARCH_GOT_HI20, 79) +ELF_RELOC(R_LARCH_GOT_LO12, 80) +ELF_RELOC(R_LARCH_GOT64_LO20, 81) +ELF_RELOC(R_LARCH_GOT64_HI12, 82) +ELF_RELOC(R_LARCH_TLS_LE_HI20, 83) +ELF_RELOC(R_LARCH_TLS_LE_LO12, 84) +ELF_RELOC(R_LARCH_TLS_LE64_LO20, 85) +ELF_RELOC(R_LARCH_TLS_LE64_HI12, 86) +ELF_RELOC(R_LARCH_TLS_IE_PC_HI20, 87) +ELF_RELOC(R_LARCH_TLS_IE_PC_LO12, 88) +ELF_RELOC(R_LARCH_TLS_IE64_PC_LO20, 89) +ELF_RELOC(R_LARCH_TLS_IE64_PC_HI12, 90) +ELF_RELOC(R_LARCH_TLS_IE_HI20, 91) +ELF_RELOC(R_LARCH_TLS_IE_LO12, 92) +ELF_RELOC(R_LARCH_TLS_IE64_LO20, 93) +ELF_RELOC(R_LARCH_TLS_IE64_HI12, 94) +ELF_RELOC(R_LARCH_TLS_LD_PC_HI20, 95) +ELF_RELOC(R_LARCH_TLS_LD_HI20, 96) +ELF_RELOC(R_LARCH_TLS_GD_PC_HI20, 97) +ELF_RELOC(R_LARCH_TLS_GD_HI20, 98) +ELF_RELOC(R_LARCH_32_PCREL, 99) +ELF_RELOC(R_LARCH_RELAX, 100) diff --git a/include/llvm/IR/IntrinsicsLoongArch.td b/include/llvm/IR/IntrinsicsLoongArch.td new file mode 100644 index 00000000..6e70173f --- /dev/null +++ b/include/llvm/IR/IntrinsicsLoongArch.td @@ -0,0 +1,3619 @@ +//===- IntrinsicsLoongArch.td - Defines LoongArch intrinsics ---------*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines all of the LoongArch-specific intrinsics. +// +//===----------------------------------------------------------------------===// + +let TargetPrefix = "loongarch" in { // All intrinsics start with "llvm.loongarch.". + +//===----------------------------------------------------------------------===// +// LoongArch LSX + +def int_loongarch_lsx_vclo_b : GCCBuiltin<"__builtin_lsx_vclo_b">, + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty], [IntrNoMem]>; +def int_loongarch_lsx_vclo_h : GCCBuiltin<"__builtin_lsx_vclo_h">, + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty], [IntrNoMem]>; +def int_loongarch_lsx_vclo_w : GCCBuiltin<"__builtin_lsx_vclo_w">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vclo_d : GCCBuiltin<"__builtin_lsx_vclo_d">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vflogb_s : GCCBuiltin<"__builtin_lsx_vflogb_s">, + Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vflogb_d : GCCBuiltin<"__builtin_lsx_vflogb_d">, + Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vpickve2gr_b : GCCBuiltin<"__builtin_lsx_vpickve2gr_b">, + Intrinsic<[llvm_i32_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vpickve2gr_h : GCCBuiltin<"__builtin_lsx_vpickve2gr_h">, + Intrinsic<[llvm_i32_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vpickve2gr_w : GCCBuiltin<"__builtin_lsx_vpickve2gr_w">, + Intrinsic<[llvm_i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vpickve2gr_d : GCCBuiltin<"__builtin_lsx_vpickve2gr_d">, + Intrinsic<[llvm_i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vpickve2gr_bu : GCCBuiltin<"__builtin_lsx_vpickve2gr_bu">, + Intrinsic<[llvm_i32_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vpickve2gr_hu : GCCBuiltin<"__builtin_lsx_vpickve2gr_hu">, + Intrinsic<[llvm_i32_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vpickve2gr_wu : GCCBuiltin<"__builtin_lsx_vpickve2gr_wu">, + Intrinsic<[llvm_i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vpickve2gr_du : GCCBuiltin<"__builtin_lsx_vpickve2gr_du">, + Intrinsic<[llvm_i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vreplvei_b : GCCBuiltin<"__builtin_lsx_vreplvei_b">, + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vreplvei_h : GCCBuiltin<"__builtin_lsx_vreplvei_h">, + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vreplvei_w : GCCBuiltin<"__builtin_lsx_vreplvei_w">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vreplvei_d : GCCBuiltin<"__builtin_lsx_vreplvei_d">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vmskltz_b : GCCBuiltin<"__builtin_lsx_vmskltz_b">, + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty], [IntrNoMem]>; +def int_loongarch_lsx_vmskltz_h : GCCBuiltin<"__builtin_lsx_vmskltz_h">, + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty], [IntrNoMem]>; +def int_loongarch_lsx_vmskltz_w : GCCBuiltin<"__builtin_lsx_vmskltz_w">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vmskltz_d : GCCBuiltin<"__builtin_lsx_vmskltz_d">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vfmadd_s : GCCBuiltin<"__builtin_lsx_vfmadd_s">, + Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vfmadd_d : GCCBuiltin<"__builtin_lsx_vfmadd_d">, + Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vfmsub_s : GCCBuiltin<"__builtin_lsx_vfmsub_s">, + Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vfmsub_d : GCCBuiltin<"__builtin_lsx_vfmsub_d">, + Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vfnmadd_s : GCCBuiltin<"__builtin_lsx_vfnmadd_s">, + Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vfnmadd_d : GCCBuiltin<"__builtin_lsx_vfnmadd_d">, + Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vfnmsub_s : GCCBuiltin<"__builtin_lsx_vfnmsub_s">, + Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vfnmsub_d : GCCBuiltin<"__builtin_lsx_vfnmsub_d">, + Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vfcmp_caf_s : GCCBuiltin<"__builtin_lsx_vfcmp_caf_s">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vfcmp_caf_d : GCCBuiltin<"__builtin_lsx_vfcmp_caf_d">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vfcmp_cor_s : GCCBuiltin<"__builtin_lsx_vfcmp_cor_s">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vfcmp_cor_d : GCCBuiltin<"__builtin_lsx_vfcmp_cor_d">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vfcmp_cun_s : GCCBuiltin<"__builtin_lsx_vfcmp_cun_s">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vfcmp_cun_d : GCCBuiltin<"__builtin_lsx_vfcmp_cun_d">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vfcmp_cune_s : GCCBuiltin<"__builtin_lsx_vfcmp_cune_s">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vfcmp_cune_d : GCCBuiltin<"__builtin_lsx_vfcmp_cune_d">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vfcmp_cueq_s : GCCBuiltin<"__builtin_lsx_vfcmp_cueq_s">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vfcmp_cueq_d : GCCBuiltin<"__builtin_lsx_vfcmp_cueq_d">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vfcmp_ceq_s : GCCBuiltin<"__builtin_lsx_vfcmp_ceq_s">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vfcmp_ceq_d : GCCBuiltin<"__builtin_lsx_vfcmp_ceq_d">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vfcmp_cne_s : GCCBuiltin<"__builtin_lsx_vfcmp_cne_s">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vfcmp_cne_d : GCCBuiltin<"__builtin_lsx_vfcmp_cne_d">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vfcmp_clt_s : GCCBuiltin<"__builtin_lsx_vfcmp_clt_s">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vfcmp_clt_d : GCCBuiltin<"__builtin_lsx_vfcmp_clt_d">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vfcmp_cult_s : GCCBuiltin<"__builtin_lsx_vfcmp_cult_s">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vfcmp_cult_d : GCCBuiltin<"__builtin_lsx_vfcmp_cult_d">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vfcmp_cle_s : GCCBuiltin<"__builtin_lsx_vfcmp_cle_s">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vfcmp_cle_d : GCCBuiltin<"__builtin_lsx_vfcmp_cle_d">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vfcmp_cule_s : GCCBuiltin<"__builtin_lsx_vfcmp_cule_s">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vfcmp_cule_d : GCCBuiltin<"__builtin_lsx_vfcmp_cule_d">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vfcmp_saf_s : GCCBuiltin<"__builtin_lsx_vfcmp_saf_s">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vfcmp_saf_d : GCCBuiltin<"__builtin_lsx_vfcmp_saf_d">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vfcmp_sor_s : GCCBuiltin<"__builtin_lsx_vfcmp_sor_s">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vfcmp_sor_d : GCCBuiltin<"__builtin_lsx_vfcmp_sor_d">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vfcmp_sun_s : GCCBuiltin<"__builtin_lsx_vfcmp_sun_s">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vfcmp_sun_d : GCCBuiltin<"__builtin_lsx_vfcmp_sun_d">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vfcmp_sune_s : GCCBuiltin<"__builtin_lsx_vfcmp_sune_s">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vfcmp_sune_d : GCCBuiltin<"__builtin_lsx_vfcmp_sune_d">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vfcmp_sueq_s : GCCBuiltin<"__builtin_lsx_vfcmp_sueq_s">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vfcmp_sueq_d : GCCBuiltin<"__builtin_lsx_vfcmp_sueq_d">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vfcmp_seq_s : GCCBuiltin<"__builtin_lsx_vfcmp_seq_s">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vfcmp_seq_d : GCCBuiltin<"__builtin_lsx_vfcmp_seq_d">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vfcmp_sne_s : GCCBuiltin<"__builtin_lsx_vfcmp_sne_s">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vfcmp_sne_d : GCCBuiltin<"__builtin_lsx_vfcmp_sne_d">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vfcmp_slt_s : GCCBuiltin<"__builtin_lsx_vfcmp_slt_s">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vfcmp_slt_d : GCCBuiltin<"__builtin_lsx_vfcmp_slt_d">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vfcmp_sult_s : GCCBuiltin<"__builtin_lsx_vfcmp_sult_s">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vfcmp_sult_d : GCCBuiltin<"__builtin_lsx_vfcmp_sult_d">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vfcmp_sle_s : GCCBuiltin<"__builtin_lsx_vfcmp_sle_s">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vfcmp_sle_d : GCCBuiltin<"__builtin_lsx_vfcmp_sle_d">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vfcmp_sule_s : GCCBuiltin<"__builtin_lsx_vfcmp_sule_s">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vfcmp_sule_d : GCCBuiltin<"__builtin_lsx_vfcmp_sule_d">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vbitsel_v : GCCBuiltin<"__builtin_lsx_vbitsel_v">, + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vshuf_b : GCCBuiltin<"__builtin_lsx_vshuf_b">, + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vldrepl_b : GCCBuiltin<"__builtin_lsx_vldrepl_b">, + Intrinsic<[llvm_v16i8_ty], [llvm_ptr_ty, llvm_i32_ty], [IntrReadMem, IntrArgMemOnly]>; +def int_loongarch_lsx_vldrepl_h : GCCBuiltin<"__builtin_lsx_vldrepl_h">, + Intrinsic<[llvm_v8i16_ty], [llvm_ptr_ty, llvm_i32_ty], [IntrReadMem, IntrArgMemOnly]>; +def int_loongarch_lsx_vldrepl_w : GCCBuiltin<"__builtin_lsx_vldrepl_w">, + Intrinsic<[llvm_v4i32_ty], [llvm_ptr_ty, llvm_i32_ty], [IntrReadMem, IntrArgMemOnly]>; +def int_loongarch_lsx_vldrepl_d : GCCBuiltin<"__builtin_lsx_vldrepl_d">, + Intrinsic<[llvm_v2i64_ty], [llvm_ptr_ty, llvm_i32_ty], [IntrReadMem, IntrArgMemOnly]>; + +def int_loongarch_lsx_vstelm_b : GCCBuiltin<"__builtin_lsx_vstelm_b">, + Intrinsic<[], [llvm_v16i8_ty, llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty], [IntrArgMemOnly]>; +def int_loongarch_lsx_vstelm_h : GCCBuiltin<"__builtin_lsx_vstelm_h">, + Intrinsic<[], [llvm_v8i16_ty, llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty], [IntrArgMemOnly]>; +def int_loongarch_lsx_vstelm_w : GCCBuiltin<"__builtin_lsx_vstelm_w">, + Intrinsic<[], [llvm_v4i32_ty, llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty], [IntrArgMemOnly]>; +def int_loongarch_lsx_vstelm_d : GCCBuiltin<"__builtin_lsx_vstelm_d">, + Intrinsic<[], [llvm_v2i64_ty, llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty], [IntrArgMemOnly]>; + +def int_loongarch_lsx_vldx : GCCBuiltin<"__builtin_lsx_vldx">, + Intrinsic<[llvm_v16i8_ty], [llvm_ptr_ty, llvm_i64_ty], + [IntrReadMem, IntrArgMemOnly]>; + +def int_loongarch_lsx_vstx : GCCBuiltin<"__builtin_lsx_vstx">, + Intrinsic<[], [llvm_v16i8_ty, llvm_ptr_ty, llvm_i64_ty], + [IntrArgMemOnly]>; + +def int_loongarch_lsx_vaddwev_d_w : GCCBuiltin<"__builtin_lsx_vaddwev_d_w">, + Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vaddwev_w_h : GCCBuiltin<"__builtin_lsx_vaddwev_w_h">, + Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; +def int_loongarch_lsx_vaddwev_h_b : GCCBuiltin<"__builtin_lsx_vaddwev_h_b">, + Intrinsic<[llvm_v8i16_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; +def int_loongarch_lsx_vaddwev_q_d : GCCBuiltin<"__builtin_lsx_vaddwev_q_d">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vsubwev_d_w : GCCBuiltin<"__builtin_lsx_vsubwev_d_w">, + Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vsubwev_w_h : GCCBuiltin<"__builtin_lsx_vsubwev_w_h">, + Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; +def int_loongarch_lsx_vsubwev_h_b : GCCBuiltin<"__builtin_lsx_vsubwev_h_b">, + Intrinsic<[llvm_v8i16_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; +def int_loongarch_lsx_vsubwev_q_d : GCCBuiltin<"__builtin_lsx_vsubwev_q_d">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; + + +def int_loongarch_lsx_vaddwod_d_w : GCCBuiltin<"__builtin_lsx_vaddwod_d_w">, + Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vaddwod_w_h : GCCBuiltin<"__builtin_lsx_vaddwod_w_h">, + Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; +def int_loongarch_lsx_vaddwod_h_b : GCCBuiltin<"__builtin_lsx_vaddwod_h_b">, + Intrinsic<[llvm_v8i16_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; +def int_loongarch_lsx_vaddwod_q_d : GCCBuiltin<"__builtin_lsx_vaddwod_q_d">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vsubwod_d_w : GCCBuiltin<"__builtin_lsx_vsubwod_d_w">, + Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vsubwod_w_h : GCCBuiltin<"__builtin_lsx_vsubwod_w_h">, + Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; +def int_loongarch_lsx_vsubwod_h_b : GCCBuiltin<"__builtin_lsx_vsubwod_h_b">, + Intrinsic<[llvm_v8i16_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; +def int_loongarch_lsx_vsubwod_q_d : GCCBuiltin<"__builtin_lsx_vsubwod_q_d">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vaddwev_d_wu : GCCBuiltin<"__builtin_lsx_vaddwev_d_wu">, + Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vaddwev_w_hu : GCCBuiltin<"__builtin_lsx_vaddwev_w_hu">, + Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; +def int_loongarch_lsx_vaddwev_h_bu : GCCBuiltin<"__builtin_lsx_vaddwev_h_bu">, + Intrinsic<[llvm_v8i16_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; +def int_loongarch_lsx_vaddwev_q_du : GCCBuiltin<"__builtin_lsx_vaddwev_q_du">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vsubwev_d_wu : GCCBuiltin<"__builtin_lsx_vsubwev_d_wu">, + Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vsubwev_w_hu : GCCBuiltin<"__builtin_lsx_vsubwev_w_hu">, + Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; +def int_loongarch_lsx_vsubwev_h_bu : GCCBuiltin<"__builtin_lsx_vsubwev_h_bu">, + Intrinsic<[llvm_v8i16_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; +def int_loongarch_lsx_vsubwev_q_du : GCCBuiltin<"__builtin_lsx_vsubwev_q_du">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vaddwod_d_wu : GCCBuiltin<"__builtin_lsx_vaddwod_d_wu">, + Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vaddwod_w_hu : GCCBuiltin<"__builtin_lsx_vaddwod_w_hu">, + Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; +def int_loongarch_lsx_vaddwod_h_bu : GCCBuiltin<"__builtin_lsx_vaddwod_h_bu">, + Intrinsic<[llvm_v8i16_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; +def int_loongarch_lsx_vaddwod_q_du : GCCBuiltin<"__builtin_lsx_vaddwod_q_du">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vsubwod_d_wu : GCCBuiltin<"__builtin_lsx_vsubwod_d_wu">, + Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vsubwod_w_hu : GCCBuiltin<"__builtin_lsx_vsubwod_w_hu">, + Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; +def int_loongarch_lsx_vsubwod_h_bu : GCCBuiltin<"__builtin_lsx_vsubwod_h_bu">, + Intrinsic<[llvm_v8i16_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; +def int_loongarch_lsx_vsubwod_q_du : GCCBuiltin<"__builtin_lsx_vsubwod_q_du">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vaddwev_d_wu_w : GCCBuiltin<"__builtin_lsx_vaddwev_d_wu_w">, + Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vaddwev_w_hu_h : GCCBuiltin<"__builtin_lsx_vaddwev_w_hu_h">, + Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; +def int_loongarch_lsx_vaddwev_h_bu_b : GCCBuiltin<"__builtin_lsx_vaddwev_h_bu_b">, + Intrinsic<[llvm_v8i16_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; +def int_loongarch_lsx_vaddwev_q_du_d : GCCBuiltin<"__builtin_lsx_vaddwev_q_du_d">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vaddwod_d_wu_w : GCCBuiltin<"__builtin_lsx_vaddwod_d_wu_w">, + Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vaddwod_w_hu_h : GCCBuiltin<"__builtin_lsx_vaddwod_w_hu_h">, + Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; +def int_loongarch_lsx_vaddwod_h_bu_b : GCCBuiltin<"__builtin_lsx_vaddwod_h_bu_b">, + Intrinsic<[llvm_v8i16_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; +def int_loongarch_lsx_vaddwod_q_du_d : GCCBuiltin<"__builtin_lsx_vaddwod_q_du_d">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vhaddw_qu_du : GCCBuiltin<"__builtin_lsx_vhaddw_qu_du">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; +def int_loongarch_lsx_vhsubw_qu_du : GCCBuiltin<"__builtin_lsx_vhsubw_qu_du">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vhaddw_q_d : GCCBuiltin<"__builtin_lsx_vhaddw_q_d">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; +def int_loongarch_lsx_vhsubw_q_d : GCCBuiltin<"__builtin_lsx_vhsubw_q_d">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vmuh_b : GCCBuiltin<"__builtin_lsx_vmuh_b">, + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; +def int_loongarch_lsx_vmuh_h : GCCBuiltin<"__builtin_lsx_vmuh_h">, + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; +def int_loongarch_lsx_vmuh_w : GCCBuiltin<"__builtin_lsx_vmuh_w">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vmuh_d : GCCBuiltin<"__builtin_lsx_vmuh_d">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vmuh_bu : GCCBuiltin<"__builtin_lsx_vmuh_bu">, + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; +def int_loongarch_lsx_vmuh_hu : GCCBuiltin<"__builtin_lsx_vmuh_hu">, + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; +def int_loongarch_lsx_vmuh_wu : GCCBuiltin<"__builtin_lsx_vmuh_wu">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vmuh_du : GCCBuiltin<"__builtin_lsx_vmuh_du">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vmulwev_d_w : GCCBuiltin<"__builtin_lsx_vmulwev_d_w">, + Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vmulwev_w_h : GCCBuiltin<"__builtin_lsx_vmulwev_w_h">, + Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; +def int_loongarch_lsx_vmulwev_h_b : GCCBuiltin<"__builtin_lsx_vmulwev_h_b">, + Intrinsic<[llvm_v8i16_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; +def int_loongarch_lsx_vmulwev_q_d : GCCBuiltin<"__builtin_lsx_vmulwev_q_d">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vmulwod_d_w : GCCBuiltin<"__builtin_lsx_vmulwod_d_w">, + Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vmulwod_w_h : GCCBuiltin<"__builtin_lsx_vmulwod_w_h">, + Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; +def int_loongarch_lsx_vmulwod_h_b : GCCBuiltin<"__builtin_lsx_vmulwod_h_b">, + Intrinsic<[llvm_v8i16_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; +def int_loongarch_lsx_vmulwod_q_d : GCCBuiltin<"__builtin_lsx_vmulwod_q_d">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vmulwev_d_wu : GCCBuiltin<"__builtin_lsx_vmulwev_d_wu">, + Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vmulwev_w_hu : GCCBuiltin<"__builtin_lsx_vmulwev_w_hu">, + Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; +def int_loongarch_lsx_vmulwev_h_bu : GCCBuiltin<"__builtin_lsx_vmulwev_h_bu">, + Intrinsic<[llvm_v8i16_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; +def int_loongarch_lsx_vmulwev_q_du : GCCBuiltin<"__builtin_lsx_vmulwev_q_du">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vmulwod_d_wu : GCCBuiltin<"__builtin_lsx_vmulwod_d_wu">, + Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vmulwod_w_hu : GCCBuiltin<"__builtin_lsx_vmulwod_w_hu">, + Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; +def int_loongarch_lsx_vmulwod_h_bu : GCCBuiltin<"__builtin_lsx_vmulwod_h_bu">, + Intrinsic<[llvm_v8i16_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; +def int_loongarch_lsx_vmulwod_q_du : GCCBuiltin<"__builtin_lsx_vmulwod_q_du">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vmulwev_d_wu_w : GCCBuiltin<"__builtin_lsx_vmulwev_d_wu_w">, + Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vmulwev_w_hu_h : GCCBuiltin<"__builtin_lsx_vmulwev_w_hu_h">, + Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; +def int_loongarch_lsx_vmulwev_h_bu_b : GCCBuiltin<"__builtin_lsx_vmulwev_h_bu_b">, + Intrinsic<[llvm_v8i16_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; +def int_loongarch_lsx_vmulwev_q_du_d : GCCBuiltin<"__builtin_lsx_vmulwev_q_du_d">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vmulwod_d_wu_w : GCCBuiltin<"__builtin_lsx_vmulwod_d_wu_w">, + Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vmulwod_w_hu_h : GCCBuiltin<"__builtin_lsx_vmulwod_w_hu_h">, + Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; +def int_loongarch_lsx_vmulwod_h_bu_b : GCCBuiltin<"__builtin_lsx_vmulwod_h_bu_b">, + Intrinsic<[llvm_v8i16_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; +def int_loongarch_lsx_vmulwod_q_du_d : GCCBuiltin<"__builtin_lsx_vmulwod_q_du_d">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vmaddwev_d_w : GCCBuiltin<"__builtin_lsx_vmaddwev_d_w">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vmaddwev_w_h : GCCBuiltin<"__builtin_lsx_vmaddwev_w_h">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; +def int_loongarch_lsx_vmaddwev_h_b : GCCBuiltin<"__builtin_lsx_vmaddwev_h_b">, + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; +def int_loongarch_lsx_vmaddwev_q_d : GCCBuiltin<"__builtin_lsx_vmaddwev_q_d">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vmaddwod_d_w : GCCBuiltin<"__builtin_lsx_vmaddwod_d_w">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vmaddwod_w_h : GCCBuiltin<"__builtin_lsx_vmaddwod_w_h">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; +def int_loongarch_lsx_vmaddwod_h_b : GCCBuiltin<"__builtin_lsx_vmaddwod_h_b">, + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; +def int_loongarch_lsx_vmaddwod_q_d : GCCBuiltin<"__builtin_lsx_vmaddwod_q_d">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vmaddwev_d_wu : GCCBuiltin<"__builtin_lsx_vmaddwev_d_wu">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vmaddwev_w_hu : GCCBuiltin<"__builtin_lsx_vmaddwev_w_hu">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; +def int_loongarch_lsx_vmaddwev_h_bu : GCCBuiltin<"__builtin_lsx_vmaddwev_h_bu">, + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; +def int_loongarch_lsx_vmaddwev_q_du : GCCBuiltin<"__builtin_lsx_vmaddwev_q_du">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vmaddwod_d_wu : GCCBuiltin<"__builtin_lsx_vmaddwod_d_wu">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vmaddwod_w_hu : GCCBuiltin<"__builtin_lsx_vmaddwod_w_hu">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; +def int_loongarch_lsx_vmaddwod_h_bu : GCCBuiltin<"__builtin_lsx_vmaddwod_h_bu">, + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; +def int_loongarch_lsx_vmaddwod_q_du : GCCBuiltin<"__builtin_lsx_vmaddwod_q_du">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vmaddwev_d_wu_w : GCCBuiltin<"__builtin_lsx_vmaddwev_d_wu_w">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vmaddwev_w_hu_h : GCCBuiltin<"__builtin_lsx_vmaddwev_w_hu_h">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; +def int_loongarch_lsx_vmaddwev_h_bu_b : GCCBuiltin<"__builtin_lsx_vmaddwev_h_bu_b">, + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; +def int_loongarch_lsx_vmaddwev_q_du_d : GCCBuiltin<"__builtin_lsx_vmaddwev_q_du_d">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vmaddwod_d_wu_w : GCCBuiltin<"__builtin_lsx_vmaddwod_d_wu_w">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vmaddwod_w_hu_h : GCCBuiltin<"__builtin_lsx_vmaddwod_w_hu_h">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; +def int_loongarch_lsx_vmaddwod_h_bu_b : GCCBuiltin<"__builtin_lsx_vmaddwod_h_bu_b">, + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; +def int_loongarch_lsx_vmaddwod_q_du_d : GCCBuiltin<"__builtin_lsx_vmaddwod_q_du_d">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vsrln_b_h : GCCBuiltin<"__builtin_lsx_vsrln_b_h">, + Intrinsic<[llvm_v16i8_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; +def int_loongarch_lsx_vsrln_h_w : GCCBuiltin<"__builtin_lsx_vsrln_h_w">, + Intrinsic<[llvm_v8i16_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vsrln_w_d : GCCBuiltin<"__builtin_lsx_vsrln_w_d">, + Intrinsic<[llvm_v4i32_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vsran_b_h : GCCBuiltin<"__builtin_lsx_vsran_b_h">, + Intrinsic<[llvm_v16i8_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; +def int_loongarch_lsx_vsran_h_w : GCCBuiltin<"__builtin_lsx_vsran_h_w">, + Intrinsic<[llvm_v8i16_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vsran_w_d : GCCBuiltin<"__builtin_lsx_vsran_w_d">, + Intrinsic<[llvm_v4i32_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vsrlrn_b_h : GCCBuiltin<"__builtin_lsx_vsrlrn_b_h">, + Intrinsic<[llvm_v16i8_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; +def int_loongarch_lsx_vsrlrn_h_w : GCCBuiltin<"__builtin_lsx_vsrlrn_h_w">, + Intrinsic<[llvm_v8i16_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vsrlrn_w_d : GCCBuiltin<"__builtin_lsx_vsrlrn_w_d">, + Intrinsic<[llvm_v4i32_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vsrarn_b_h : GCCBuiltin<"__builtin_lsx_vsrarn_b_h">, + Intrinsic<[llvm_v16i8_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; +def int_loongarch_lsx_vsrarn_h_w : GCCBuiltin<"__builtin_lsx_vsrarn_h_w">, + Intrinsic<[llvm_v8i16_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vsrarn_w_d : GCCBuiltin<"__builtin_lsx_vsrarn_w_d">, + Intrinsic<[llvm_v4i32_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vssrln_b_h : GCCBuiltin<"__builtin_lsx_vssrln_b_h">, + Intrinsic<[llvm_v16i8_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; +def int_loongarch_lsx_vssrln_h_w : GCCBuiltin<"__builtin_lsx_vssrln_h_w">, + Intrinsic<[llvm_v8i16_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vssrln_w_d : GCCBuiltin<"__builtin_lsx_vssrln_w_d">, + Intrinsic<[llvm_v4i32_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vssran_b_h : GCCBuiltin<"__builtin_lsx_vssran_b_h">, + Intrinsic<[llvm_v16i8_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; +def int_loongarch_lsx_vssran_h_w : GCCBuiltin<"__builtin_lsx_vssran_h_w">, + Intrinsic<[llvm_v8i16_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vssran_w_d : GCCBuiltin<"__builtin_lsx_vssran_w_d">, + Intrinsic<[llvm_v4i32_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vssrlrn_b_h : GCCBuiltin<"__builtin_lsx_vssrlrn_b_h">, + Intrinsic<[llvm_v16i8_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; +def int_loongarch_lsx_vssrlrn_h_w : GCCBuiltin<"__builtin_lsx_vssrlrn_h_w">, + Intrinsic<[llvm_v8i16_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vssrlrn_w_d : GCCBuiltin<"__builtin_lsx_vssrlrn_w_d">, + Intrinsic<[llvm_v4i32_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vssrarn_b_h : GCCBuiltin<"__builtin_lsx_vssrarn_b_h">, + Intrinsic<[llvm_v16i8_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; +def int_loongarch_lsx_vssrarn_h_w : GCCBuiltin<"__builtin_lsx_vssrarn_h_w">, + Intrinsic<[llvm_v8i16_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vssrarn_w_d : GCCBuiltin<"__builtin_lsx_vssrarn_w_d">, + Intrinsic<[llvm_v4i32_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vssrln_bu_h : GCCBuiltin<"__builtin_lsx_vssrln_bu_h">, + Intrinsic<[llvm_v16i8_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; +def int_loongarch_lsx_vssrln_hu_w : GCCBuiltin<"__builtin_lsx_vssrln_hu_w">, + Intrinsic<[llvm_v8i16_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vssrln_wu_d : GCCBuiltin<"__builtin_lsx_vssrln_wu_d">, + Intrinsic<[llvm_v4i32_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vssran_bu_h : GCCBuiltin<"__builtin_lsx_vssran_bu_h">, + Intrinsic<[llvm_v16i8_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; +def int_loongarch_lsx_vssran_hu_w : GCCBuiltin<"__builtin_lsx_vssran_hu_w">, + Intrinsic<[llvm_v8i16_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vssran_wu_d : GCCBuiltin<"__builtin_lsx_vssran_wu_d">, + Intrinsic<[llvm_v4i32_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vssrlrn_bu_h : GCCBuiltin<"__builtin_lsx_vssrlrn_bu_h">, + Intrinsic<[llvm_v16i8_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; +def int_loongarch_lsx_vssrlrn_hu_w : GCCBuiltin<"__builtin_lsx_vssrlrn_hu_w">, + Intrinsic<[llvm_v8i16_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vssrlrn_wu_d : GCCBuiltin<"__builtin_lsx_vssrlrn_wu_d">, + Intrinsic<[llvm_v4i32_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vssrarn_bu_h : GCCBuiltin<"__builtin_lsx_vssrarn_bu_h">, + Intrinsic<[llvm_v16i8_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; +def int_loongarch_lsx_vssrarn_hu_w : GCCBuiltin<"__builtin_lsx_vssrarn_hu_w">, + Intrinsic<[llvm_v8i16_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vssrarn_wu_d : GCCBuiltin<"__builtin_lsx_vssrarn_wu_d">, + Intrinsic<[llvm_v4i32_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vandn_v : GCCBuiltin<"__builtin_lsx_vandn_v">, + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vorn_v : GCCBuiltin<"__builtin_lsx_vorn_v">, + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vfrstp_b : GCCBuiltin<"__builtin_lsx_vfrstp_b">, + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty, llvm_v16i8_ty], + [IntrNoMem]>; +def int_loongarch_lsx_vfrstp_h : GCCBuiltin<"__builtin_lsx_vfrstp_h">, + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty, llvm_v8i16_ty], + [IntrNoMem]>; + +def int_loongarch_lsx_vadd_q : GCCBuiltin<"__builtin_lsx_vadd_q">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; +def int_loongarch_lsx_vsub_q : GCCBuiltin<"__builtin_lsx_vsub_q">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vsigncov_b : GCCBuiltin<"__builtin_lsx_vsigncov_b">, + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], + [IntrNoMem]>; +def int_loongarch_lsx_vsigncov_h : GCCBuiltin<"__builtin_lsx_vsigncov_h">, + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], + [IntrNoMem]>; +def int_loongarch_lsx_vsigncov_w : GCCBuiltin<"__builtin_lsx_vsigncov_w">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], + [IntrNoMem]>; +def int_loongarch_lsx_vsigncov_d : GCCBuiltin<"__builtin_lsx_vsigncov_d">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], + [IntrNoMem]>; + +def int_loongarch_lsx_vfcvt_h_s : GCCBuiltin<"__builtin_lsx_vfcvt_h_s">, + Intrinsic<[llvm_v8i16_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vfcvt_s_d : GCCBuiltin<"__builtin_lsx_vfcvt_s_d">, + Intrinsic<[llvm_v4f32_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vffint_s_l : GCCBuiltin<"__builtin_lsx_vffint_s_l">, + Intrinsic<[llvm_v4f32_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; +def int_loongarch_lsx_vftint_w_d : GCCBuiltin<"__builtin_lsx_vftint_w_d">, + Intrinsic<[llvm_v4i32_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vftintrz_w_d : GCCBuiltin<"__builtin_lsx_vftintrz_w_d">, + Intrinsic<[llvm_v4i32_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>; +def int_loongarch_lsx_vftintrp_w_d : GCCBuiltin<"__builtin_lsx_vftintrp_w_d">, + Intrinsic<[llvm_v4i32_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>; +def int_loongarch_lsx_vftintrm_w_d : GCCBuiltin<"__builtin_lsx_vftintrm_w_d">, + Intrinsic<[llvm_v4i32_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>; +def int_loongarch_lsx_vftintrne_w_d : GCCBuiltin<"__builtin_lsx_vftintrne_w_d">, + Intrinsic<[llvm_v4i32_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vbsrl_v : GCCBuiltin<"__builtin_lsx_vbsrl_v">, + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vbsll_v : GCCBuiltin<"__builtin_lsx_vbsll_v">, + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vfrstpi_b : GCCBuiltin<"__builtin_lsx_vfrstpi_b">, + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vfrstpi_h : GCCBuiltin<"__builtin_lsx_vfrstpi_h">, + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vneg_b : GCCBuiltin<"__builtin_lsx_vneg_b">, + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty], [IntrNoMem]>; +def int_loongarch_lsx_vneg_h : GCCBuiltin<"__builtin_lsx_vneg_h">, + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty], [IntrNoMem]>; +def int_loongarch_lsx_vneg_w : GCCBuiltin<"__builtin_lsx_vneg_w">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vneg_d : GCCBuiltin<"__builtin_lsx_vneg_d">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vmskgez_b : GCCBuiltin<"__builtin_lsx_vmskgez_b">, + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty], [IntrNoMem]>; +def int_loongarch_lsx_vmsknz_b : GCCBuiltin<"__builtin_lsx_vmsknz_b">, + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vfrintrm_s : GCCBuiltin<"__builtin_lsx_vfrintrm_s">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vfrintrm_d : GCCBuiltin<"__builtin_lsx_vfrintrm_d">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2f64_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vfrintrp_s : GCCBuiltin<"__builtin_lsx_vfrintrp_s">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vfrintrp_d : GCCBuiltin<"__builtin_lsx_vfrintrp_d">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2f64_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vfrintrz_s : GCCBuiltin<"__builtin_lsx_vfrintrz_s">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vfrintrz_d : GCCBuiltin<"__builtin_lsx_vfrintrz_d">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2f64_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vfrintrne_s : GCCBuiltin<"__builtin_lsx_vfrintrne_s">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vfrintrne_d : GCCBuiltin<"__builtin_lsx_vfrintrne_d">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2f64_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vffinth_d_w : GCCBuiltin<"__builtin_lsx_vffinth_d_w">, + Intrinsic<[llvm_v2f64_ty], [llvm_v4i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vffintl_d_w : GCCBuiltin<"__builtin_lsx_vffintl_d_w">, + Intrinsic<[llvm_v2f64_ty], [llvm_v4i32_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vftintrm_w_s : GCCBuiltin<"__builtin_lsx_vftintrm_w_s">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vftintrm_l_d : GCCBuiltin<"__builtin_lsx_vftintrm_l_d">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2f64_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vftintrp_w_s : GCCBuiltin<"__builtin_lsx_vftintrp_w_s">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vftintrp_l_d : GCCBuiltin<"__builtin_lsx_vftintrp_l_d">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2f64_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vftintrz_w_s : GCCBuiltin<"__builtin_lsx_vftintrz_w_s">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vftintrz_l_d : GCCBuiltin<"__builtin_lsx_vftintrz_l_d">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2f64_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vftintrne_w_s : GCCBuiltin<"__builtin_lsx_vftintrne_w_s">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vftintrne_l_d : GCCBuiltin<"__builtin_lsx_vftintrne_l_d">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2f64_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vftinth_l_s : GCCBuiltin<"__builtin_lsx_vftinth_l_s">, + Intrinsic<[llvm_v2i64_ty], [llvm_v4f32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vftintl_l_s : GCCBuiltin<"__builtin_lsx_vftintl_l_s">, + Intrinsic<[llvm_v2i64_ty], [llvm_v4f32_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vftintrmh_l_s : GCCBuiltin<"__builtin_lsx_vftintrmh_l_s">, + Intrinsic<[llvm_v2i64_ty], [llvm_v4f32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vftintrml_l_s : GCCBuiltin<"__builtin_lsx_vftintrml_l_s">, + Intrinsic<[llvm_v2i64_ty], [llvm_v4f32_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vftintrph_l_s : GCCBuiltin<"__builtin_lsx_vftintrph_l_s">, + Intrinsic<[llvm_v2i64_ty], [llvm_v4f32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vftintrpl_l_s : GCCBuiltin<"__builtin_lsx_vftintrpl_l_s">, + Intrinsic<[llvm_v2i64_ty], [llvm_v4f32_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vftintrzh_l_s : GCCBuiltin<"__builtin_lsx_vftintrzh_l_s">, + Intrinsic<[llvm_v2i64_ty], [llvm_v4f32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vftintrzl_l_s : GCCBuiltin<"__builtin_lsx_vftintrzl_l_s">, + Intrinsic<[llvm_v2i64_ty], [llvm_v4f32_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vftintrneh_l_s : GCCBuiltin<"__builtin_lsx_vftintrneh_l_s">, + Intrinsic<[llvm_v2i64_ty], [llvm_v4f32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vftintrnel_l_s : GCCBuiltin<"__builtin_lsx_vftintrnel_l_s">, + Intrinsic<[llvm_v2i64_ty], [llvm_v4f32_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vexth_d_w : GCCBuiltin<"__builtin_lsx_vexth_d_w">, + Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vexth_w_h : GCCBuiltin<"__builtin_lsx_vexth_w_h">, + Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty], [IntrNoMem]>; +def int_loongarch_lsx_vexth_h_b : GCCBuiltin<"__builtin_lsx_vexth_h_b">, + Intrinsic<[llvm_v8i16_ty], [llvm_v16i8_ty], [IntrNoMem]>; +def int_loongarch_lsx_vexth_q_d : GCCBuiltin<"__builtin_lsx_vexth_q_d">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vexth_du_wu : GCCBuiltin<"__builtin_lsx_vexth_du_wu">, + Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vexth_wu_hu : GCCBuiltin<"__builtin_lsx_vexth_wu_hu">, + Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty], [IntrNoMem]>; +def int_loongarch_lsx_vexth_hu_bu : GCCBuiltin<"__builtin_lsx_vexth_hu_bu">, + Intrinsic<[llvm_v8i16_ty], [llvm_v16i8_ty], [IntrNoMem]>; +def int_loongarch_lsx_vexth_qu_du : GCCBuiltin<"__builtin_lsx_vexth_qu_du">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvexth_du_wu : GCCBuiltin<"__builtin_lasx_xvexth_du_wu">, + Intrinsic<[llvm_v4i64_ty], [llvm_v8i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvexth_wu_hu : GCCBuiltin<"__builtin_lasx_xvexth_wu_hu">, + Intrinsic<[llvm_v8i32_ty], [llvm_v16i16_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvexth_hu_bu : GCCBuiltin<"__builtin_lasx_xvexth_hu_bu">, + Intrinsic<[llvm_v16i16_ty], [llvm_v32i8_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvexth_qu_du : GCCBuiltin<"__builtin_lasx_xvexth_qu_du">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vsllwil_d_w : GCCBuiltin<"__builtin_lsx_vsllwil_d_w">, + Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vsllwil_w_h : GCCBuiltin<"__builtin_lsx_vsllwil_w_h">, + Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vsllwil_h_b : GCCBuiltin<"__builtin_lsx_vsllwil_h_b">, + Intrinsic<[llvm_v8i16_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vextl_q_d : GCCBuiltin<"__builtin_lsx_vextl_q_d">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vsllwil_du_wu : GCCBuiltin<"__builtin_lsx_vsllwil_du_wu">, + Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vsllwil_wu_hu : GCCBuiltin<"__builtin_lsx_vsllwil_wu_hu">, + Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vsllwil_hu_bu : GCCBuiltin<"__builtin_lsx_vsllwil_hu_bu">, + Intrinsic<[llvm_v8i16_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vextl_qu_du : GCCBuiltin<"__builtin_lsx_vextl_qu_du">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vbitclri_b : GCCBuiltin<"__builtin_lsx_vbitclri_b">, + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vbitclri_h : GCCBuiltin<"__builtin_lsx_vbitclri_h">, + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vbitclri_w : GCCBuiltin<"__builtin_lsx_vbitclri_w">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vbitclri_d : GCCBuiltin<"__builtin_lsx_vbitclri_d">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vbitseti_b : GCCBuiltin<"__builtin_lsx_vbitseti_b">, + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vbitseti_h : GCCBuiltin<"__builtin_lsx_vbitseti_h">, + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vbitseti_w : GCCBuiltin<"__builtin_lsx_vbitseti_w">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vbitseti_d : GCCBuiltin<"__builtin_lsx_vbitseti_d">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vbitrevi_b : GCCBuiltin<"__builtin_lsx_vbitrevi_b">, + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vbitrevi_h : GCCBuiltin<"__builtin_lsx_vbitrevi_h">, + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vbitrevi_w : GCCBuiltin<"__builtin_lsx_vbitrevi_w">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vbitrevi_d : GCCBuiltin<"__builtin_lsx_vbitrevi_d">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vssrlrni_b_h : GCCBuiltin<"__builtin_lsx_vssrlrni_b_h">, + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vssrlrni_h_w : GCCBuiltin<"__builtin_lsx_vssrlrni_h_w">, + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vssrlrni_w_d : GCCBuiltin<"__builtin_lsx_vssrlrni_w_d">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vssrlrni_d_q : GCCBuiltin<"__builtin_lsx_vssrlrni_d_q">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vsrani_b_h : GCCBuiltin<"__builtin_lsx_vsrani_b_h">, + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vsrani_h_w : GCCBuiltin<"__builtin_lsx_vsrani_h_w">, + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vsrani_w_d : GCCBuiltin<"__builtin_lsx_vsrani_w_d">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vsrani_d_q : GCCBuiltin<"__builtin_lsx_vsrani_d_q">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vextrins_b : GCCBuiltin<"__builtin_lsx_vextrins_b">, + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vextrins_h : GCCBuiltin<"__builtin_lsx_vextrins_h">, + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vextrins_w : GCCBuiltin<"__builtin_lsx_vextrins_w">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vextrins_d : GCCBuiltin<"__builtin_lsx_vextrins_d">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vbitseli_b : GCCBuiltin<"__builtin_lsx_vbitseli_b">, + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vandi_b : GCCBuiltin<"__builtin_lsx_vandi_b">, + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vori_b : GCCBuiltin<"__builtin_lsx_vori_b">, + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vxori_b : GCCBuiltin<"__builtin_lsx_vxori_b">, + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vnori_b : GCCBuiltin<"__builtin_lsx_vnori_b">, + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vldi : GCCBuiltin<"__builtin_lsx_vldi">, + Intrinsic<[llvm_v2i64_ty], [llvm_i32_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vpermi_w : GCCBuiltin<"__builtin_lsx_vpermi_w">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vsadd_b : GCCBuiltin<"__builtin_lsx_vsadd_b">, + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], + [Commutative, IntrNoMem]>; +def int_loongarch_lsx_vsadd_h : GCCBuiltin<"__builtin_lsx_vsadd_h">, + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], + [Commutative, IntrNoMem]>; +def int_loongarch_lsx_vsadd_w : GCCBuiltin<"__builtin_lsx_vsadd_w">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], + [Commutative, IntrNoMem]>; +def int_loongarch_lsx_vsadd_d : GCCBuiltin<"__builtin_lsx_vsadd_d">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], + [Commutative, IntrNoMem]>; + +def int_loongarch_lsx_vssub_b : GCCBuiltin<"__builtin_lsx_vssub_b">, + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; +def int_loongarch_lsx_vssub_h : GCCBuiltin<"__builtin_lsx_vssub_h">, + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; +def int_loongarch_lsx_vssub_w : GCCBuiltin<"__builtin_lsx_vssub_w">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vssub_d : GCCBuiltin<"__builtin_lsx_vssub_d">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vsadd_bu : GCCBuiltin<"__builtin_lsx_vsadd_bu">, + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], + [Commutative, IntrNoMem]>; +def int_loongarch_lsx_vsadd_hu : GCCBuiltin<"__builtin_lsx_vsadd_hu">, + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], + [Commutative, IntrNoMem]>; +def int_loongarch_lsx_vsadd_wu : GCCBuiltin<"__builtin_lsx_vsadd_wu">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], + [Commutative, IntrNoMem]>; +def int_loongarch_lsx_vsadd_du : GCCBuiltin<"__builtin_lsx_vsadd_du">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], + [Commutative, IntrNoMem]>; + +def int_loongarch_lsx_vssub_bu : GCCBuiltin<"__builtin_lsx_vssub_bu">, + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; +def int_loongarch_lsx_vssub_hu : GCCBuiltin<"__builtin_lsx_vssub_hu">, + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; +def int_loongarch_lsx_vssub_wu : GCCBuiltin<"__builtin_lsx_vssub_wu">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vssub_du : GCCBuiltin<"__builtin_lsx_vssub_du">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vhaddw_h_b : GCCBuiltin<"__builtin_lsx_vhaddw_h_b">, + Intrinsic<[llvm_v8i16_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; +def int_loongarch_lsx_vhaddw_w_h : GCCBuiltin<"__builtin_lsx_vhaddw_w_h">, + Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; +def int_loongarch_lsx_vhaddw_d_w : GCCBuiltin<"__builtin_lsx_vhaddw_d_w">, + Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vhsubw_h_b : GCCBuiltin<"__builtin_lsx_vhsubw_h_b">, + Intrinsic<[llvm_v8i16_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; +def int_loongarch_lsx_vhsubw_w_h : GCCBuiltin<"__builtin_lsx_vhsubw_w_h">, + Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; +def int_loongarch_lsx_vhsubw_d_w : GCCBuiltin<"__builtin_lsx_vhsubw_d_w">, + Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vhaddw_hu_bu : GCCBuiltin<"__builtin_lsx_vhaddw_hu_bu">, + Intrinsic<[llvm_v8i16_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; +def int_loongarch_lsx_vhaddw_wu_hu : GCCBuiltin<"__builtin_lsx_vhaddw_wu_hu">, + Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; +def int_loongarch_lsx_vhaddw_du_wu : GCCBuiltin<"__builtin_lsx_vhaddw_du_wu">, + Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vhsubw_hu_bu : GCCBuiltin<"__builtin_lsx_vhsubw_hu_bu">, + Intrinsic<[llvm_v8i16_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; +def int_loongarch_lsx_vhsubw_wu_hu : GCCBuiltin<"__builtin_lsx_vhsubw_wu_hu">, + Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; +def int_loongarch_lsx_vhsubw_du_wu : GCCBuiltin<"__builtin_lsx_vhsubw_du_wu">, + Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vadda_b : GCCBuiltin<"__builtin_lsx_vadda_b">, + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], + [Commutative, IntrNoMem]>; +def int_loongarch_lsx_vadda_h : GCCBuiltin<"__builtin_lsx_vadda_h">, + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], + [Commutative, IntrNoMem]>; +def int_loongarch_lsx_vadda_w : GCCBuiltin<"__builtin_lsx_vadda_w">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], + [Commutative, IntrNoMem]>; +def int_loongarch_lsx_vadda_d : GCCBuiltin<"__builtin_lsx_vadda_d">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], + [Commutative, IntrNoMem]>; + +def int_loongarch_lsx_vabsd_b : GCCBuiltin<"__builtin_lsx_vabsd_b">, + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; +def int_loongarch_lsx_vabsd_h : GCCBuiltin<"__builtin_lsx_vabsd_h">, + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; +def int_loongarch_lsx_vabsd_w : GCCBuiltin<"__builtin_lsx_vabsd_w">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vabsd_d : GCCBuiltin<"__builtin_lsx_vabsd_d">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vabsd_bu : GCCBuiltin<"__builtin_lsx_vabsd_bu">, + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; +def int_loongarch_lsx_vabsd_hu : GCCBuiltin<"__builtin_lsx_vabsd_hu">, + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; +def int_loongarch_lsx_vabsd_wu : GCCBuiltin<"__builtin_lsx_vabsd_wu">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vabsd_du : GCCBuiltin<"__builtin_lsx_vabsd_du">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vavg_b : GCCBuiltin<"__builtin_lsx_vavg_b">, + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], + [Commutative, IntrNoMem]>; +def int_loongarch_lsx_vavg_h : GCCBuiltin<"__builtin_lsx_vavg_h">, + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], + [Commutative, IntrNoMem]>; +def int_loongarch_lsx_vavg_w : GCCBuiltin<"__builtin_lsx_vavg_w">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], + [Commutative, IntrNoMem]>; +def int_loongarch_lsx_vavg_d : GCCBuiltin<"__builtin_lsx_vavg_d">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], + [Commutative, IntrNoMem]>; + +def int_loongarch_lsx_vavg_bu : GCCBuiltin<"__builtin_lsx_vavg_bu">, + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], + [Commutative, IntrNoMem]>; +def int_loongarch_lsx_vavg_hu : GCCBuiltin<"__builtin_lsx_vavg_hu">, + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], + [Commutative, IntrNoMem]>; +def int_loongarch_lsx_vavg_wu : GCCBuiltin<"__builtin_lsx_vavg_wu">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], + [Commutative, IntrNoMem]>; +def int_loongarch_lsx_vavg_du : GCCBuiltin<"__builtin_lsx_vavg_du">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], + [Commutative, IntrNoMem]>; + +def int_loongarch_lsx_vavgr_b : GCCBuiltin<"__builtin_lsx_vavgr_b">, + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], + [Commutative, IntrNoMem]>; +def int_loongarch_lsx_vavgr_h : GCCBuiltin<"__builtin_lsx_vavgr_h">, + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], + [Commutative, IntrNoMem]>; +def int_loongarch_lsx_vavgr_w : GCCBuiltin<"__builtin_lsx_vavgr_w">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], + [Commutative, IntrNoMem]>; +def int_loongarch_lsx_vavgr_d : GCCBuiltin<"__builtin_lsx_vavgr_d">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], + [Commutative, IntrNoMem]>; + +def int_loongarch_lsx_vavgr_bu : GCCBuiltin<"__builtin_lsx_vavgr_bu">, + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], + [Commutative, IntrNoMem]>; +def int_loongarch_lsx_vavgr_hu : GCCBuiltin<"__builtin_lsx_vavgr_hu">, + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], + [Commutative, IntrNoMem]>; +def int_loongarch_lsx_vavgr_wu : GCCBuiltin<"__builtin_lsx_vavgr_wu">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], + [Commutative, IntrNoMem]>; +def int_loongarch_lsx_vavgr_du : GCCBuiltin<"__builtin_lsx_vavgr_du">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], + [Commutative, IntrNoMem]>; + +def int_loongarch_lsx_vsrlr_b : GCCBuiltin<"__builtin_lsx_vsrlr_b">, + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; +def int_loongarch_lsx_vsrlr_h : GCCBuiltin<"__builtin_lsx_vsrlr_h">, + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; +def int_loongarch_lsx_vsrlr_w : GCCBuiltin<"__builtin_lsx_vsrlr_w">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vsrlr_d : GCCBuiltin<"__builtin_lsx_vsrlr_d">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vsrar_b : GCCBuiltin<"__builtin_lsx_vsrar_b">, + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; +def int_loongarch_lsx_vsrar_h : GCCBuiltin<"__builtin_lsx_vsrar_h">, + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; +def int_loongarch_lsx_vsrar_w : GCCBuiltin<"__builtin_lsx_vsrar_w">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vsrar_d : GCCBuiltin<"__builtin_lsx_vsrar_d">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vfmax_s : GCCBuiltin<"__builtin_lsx_vfmax_s">, + Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vfmax_d : GCCBuiltin<"__builtin_lsx_vfmax_d">, + Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vfmin_s : GCCBuiltin<"__builtin_lsx_vfmin_s">, + Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vfmin_d : GCCBuiltin<"__builtin_lsx_vfmin_d">, + Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vfmaxa_s : GCCBuiltin<"__builtin_lsx_vfmaxa_s">, + Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vfmaxa_d : GCCBuiltin<"__builtin_lsx_vfmaxa_d">, + Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vfmina_s : GCCBuiltin<"__builtin_lsx_vfmina_s">, + Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vfmina_d : GCCBuiltin<"__builtin_lsx_vfmina_d">, + Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vfclass_s : GCCBuiltin<"__builtin_lsx_vfclass_s">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vfclass_d : GCCBuiltin<"__builtin_lsx_vfclass_d">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2f64_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vfrecip_s : GCCBuiltin<"__builtin_lsx_vfrecip_s">, + Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vfrecip_d : GCCBuiltin<"__builtin_lsx_vfrecip_d">, + Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vfrsqrt_s : GCCBuiltin<"__builtin_lsx_vfrsqrt_s">, + Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vfrsqrt_d : GCCBuiltin<"__builtin_lsx_vfrsqrt_d">, + Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vfcvtl_s_h : GCCBuiltin<"__builtin_lsx_vfcvtl_s_h">, + Intrinsic<[llvm_v4f32_ty], [llvm_v8i16_ty], [IntrNoMem]>; +def int_loongarch_lsx_vfcvtl_d_s : GCCBuiltin<"__builtin_lsx_vfcvtl_d_s">, + Intrinsic<[llvm_v2f64_ty], [llvm_v4f32_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vfcvth_s_h : GCCBuiltin<"__builtin_lsx_vfcvth_s_h">, + Intrinsic<[llvm_v4f32_ty], [llvm_v8i16_ty], [IntrNoMem]>; +def int_loongarch_lsx_vfcvth_d_s : GCCBuiltin<"__builtin_lsx_vfcvth_d_s">, + Intrinsic<[llvm_v2f64_ty], [llvm_v4f32_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vftint_w_s : GCCBuiltin<"__builtin_lsx_vftint_w_s">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vftint_l_d : GCCBuiltin<"__builtin_lsx_vftint_l_d">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2f64_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vftint_wu_s : GCCBuiltin<"__builtin_lsx_vftint_wu_s">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vftint_lu_d : GCCBuiltin<"__builtin_lsx_vftint_lu_d">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2f64_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vsrlri_b : GCCBuiltin<"__builtin_lsx_vsrlri_b">, + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vsrlri_h : GCCBuiltin<"__builtin_lsx_vsrlri_h">, + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vsrlri_w : GCCBuiltin<"__builtin_lsx_vsrlri_w">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vsrlri_d : GCCBuiltin<"__builtin_lsx_vsrlri_d">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vsrari_b : GCCBuiltin<"__builtin_lsx_vsrari_b">, + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vsrari_h : GCCBuiltin<"__builtin_lsx_vsrari_h">, + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vsrari_w : GCCBuiltin<"__builtin_lsx_vsrari_w">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vsrari_d : GCCBuiltin<"__builtin_lsx_vsrari_d">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vsat_b : GCCBuiltin<"__builtin_lsx_vsat_b">, + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vsat_h : GCCBuiltin<"__builtin_lsx_vsat_h">, + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vsat_w : GCCBuiltin<"__builtin_lsx_vsat_w">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vsat_d : GCCBuiltin<"__builtin_lsx_vsat_d">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vsat_bu : GCCBuiltin<"__builtin_lsx_vsat_bu">, + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vsat_hu : GCCBuiltin<"__builtin_lsx_vsat_hu">, + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vsat_wu : GCCBuiltin<"__builtin_lsx_vsat_wu">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vsat_du : GCCBuiltin<"__builtin_lsx_vsat_du">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vsrlni_b_h : GCCBuiltin<"__builtin_lsx_vsrlni_b_h">, + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vsrlni_h_w : GCCBuiltin<"__builtin_lsx_vsrlni_h_w">, + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vsrlni_w_d : GCCBuiltin<"__builtin_lsx_vsrlni_w_d">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vsrlni_d_q : GCCBuiltin<"__builtin_lsx_vsrlni_d_q">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vssrlni_b_h : GCCBuiltin<"__builtin_lsx_vssrlni_b_h">, + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vssrlni_h_w : GCCBuiltin<"__builtin_lsx_vssrlni_h_w">, + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vssrlni_w_d : GCCBuiltin<"__builtin_lsx_vssrlni_w_d">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vssrlni_d_q : GCCBuiltin<"__builtin_lsx_vssrlni_d_q">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vssrlrni_bu_h : GCCBuiltin<"__builtin_lsx_vssrlrni_bu_h">, + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vssrlrni_hu_w : GCCBuiltin<"__builtin_lsx_vssrlrni_hu_w">, + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vssrlrni_wu_d : GCCBuiltin<"__builtin_lsx_vssrlrni_wu_d">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vssrlrni_du_q : GCCBuiltin<"__builtin_lsx_vssrlrni_du_q">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vsrarni_b_h : GCCBuiltin<"__builtin_lsx_vsrarni_b_h">, + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vsrarni_h_w : GCCBuiltin<"__builtin_lsx_vsrarni_h_w">, + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vsrarni_w_d : GCCBuiltin<"__builtin_lsx_vsrarni_w_d">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vsrarni_d_q : GCCBuiltin<"__builtin_lsx_vsrarni_d_q">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vssrani_b_h : GCCBuiltin<"__builtin_lsx_vssrani_b_h">, + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vssrani_h_w : GCCBuiltin<"__builtin_lsx_vssrani_h_w">, + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vssrani_w_d : GCCBuiltin<"__builtin_lsx_vssrani_w_d">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vssrani_d_q : GCCBuiltin<"__builtin_lsx_vssrani_d_q">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vssrani_bu_h : GCCBuiltin<"__builtin_lsx_vssrani_bu_h">, + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vssrani_hu_w : GCCBuiltin<"__builtin_lsx_vssrani_hu_w">, + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vssrani_wu_d : GCCBuiltin<"__builtin_lsx_vssrani_wu_d">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vssrani_du_q : GCCBuiltin<"__builtin_lsx_vssrani_du_q">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vssrarni_b_h : GCCBuiltin<"__builtin_lsx_vssrarni_b_h">, + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vssrarni_h_w : GCCBuiltin<"__builtin_lsx_vssrarni_h_w">, + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vssrarni_w_d : GCCBuiltin<"__builtin_lsx_vssrarni_w_d">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vssrarni_d_q : GCCBuiltin<"__builtin_lsx_vssrarni_d_q">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vssrarni_bu_h : GCCBuiltin<"__builtin_lsx_vssrarni_bu_h">, + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vssrarni_hu_w : GCCBuiltin<"__builtin_lsx_vssrarni_hu_w">, + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vssrarni_wu_d : GCCBuiltin<"__builtin_lsx_vssrarni_wu_d">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vssrarni_du_q : GCCBuiltin<"__builtin_lsx_vssrarni_du_q">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vssrlni_bu_h : GCCBuiltin<"__builtin_lsx_vssrlni_bu_h">, + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vssrlni_hu_w : GCCBuiltin<"__builtin_lsx_vssrlni_hu_w">, + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vssrlni_wu_d : GCCBuiltin<"__builtin_lsx_vssrlni_wu_d">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vssrlni_du_q : GCCBuiltin<"__builtin_lsx_vssrlni_du_q">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vseq_b : GCCBuiltin<"__builtin_lsx_vseq_b">, + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; +def int_loongarch_lsx_vseq_h : GCCBuiltin<"__builtin_lsx_vseq_h">, + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; +def int_loongarch_lsx_vseq_w : GCCBuiltin<"__builtin_lsx_vseq_w">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vseq_d : GCCBuiltin<"__builtin_lsx_vseq_d">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vsle_b : GCCBuiltin<"__builtin_lsx_vsle_b">, + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; +def int_loongarch_lsx_vsle_h : GCCBuiltin<"__builtin_lsx_vsle_h">, + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; +def int_loongarch_lsx_vsle_w : GCCBuiltin<"__builtin_lsx_vsle_w">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vsle_d : GCCBuiltin<"__builtin_lsx_vsle_d">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vsle_bu : GCCBuiltin<"__builtin_lsx_vsle_bu">, + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; +def int_loongarch_lsx_vsle_hu : GCCBuiltin<"__builtin_lsx_vsle_hu">, + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; +def int_loongarch_lsx_vsle_wu : GCCBuiltin<"__builtin_lsx_vsle_wu">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vsle_du : GCCBuiltin<"__builtin_lsx_vsle_du">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vslt_b : GCCBuiltin<"__builtin_lsx_vslt_b">, + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; +def int_loongarch_lsx_vslt_h : GCCBuiltin<"__builtin_lsx_vslt_h">, + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; +def int_loongarch_lsx_vslt_w : GCCBuiltin<"__builtin_lsx_vslt_w">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vslt_d : GCCBuiltin<"__builtin_lsx_vslt_d">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vslt_bu : GCCBuiltin<"__builtin_lsx_vslt_bu">, + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; +def int_loongarch_lsx_vslt_hu : GCCBuiltin<"__builtin_lsx_vslt_hu">, + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; +def int_loongarch_lsx_vslt_wu : GCCBuiltin<"__builtin_lsx_vslt_wu">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vslt_du : GCCBuiltin<"__builtin_lsx_vslt_du">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vadd_b : GCCBuiltin<"__builtin_lsx_vadd_b">, + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], + [Commutative, IntrNoMem]>; +def int_loongarch_lsx_vadd_h : GCCBuiltin<"__builtin_lsx_vadd_h">, + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], + [Commutative, IntrNoMem]>; +def int_loongarch_lsx_vadd_w : GCCBuiltin<"__builtin_lsx_vadd_w">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], + [Commutative, IntrNoMem]>; +def int_loongarch_lsx_vadd_d : GCCBuiltin<"__builtin_lsx_vadd_d">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], + [Commutative, IntrNoMem]>; + +def int_loongarch_lsx_vsub_b : GCCBuiltin<"__builtin_lsx_vsub_b">, + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; +def int_loongarch_lsx_vsub_h : GCCBuiltin<"__builtin_lsx_vsub_h">, + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; +def int_loongarch_lsx_vsub_w : GCCBuiltin<"__builtin_lsx_vsub_w">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vsub_d : GCCBuiltin<"__builtin_lsx_vsub_d">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vmax_b : GCCBuiltin<"__builtin_lsx_vmax_b">, + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; +def int_loongarch_lsx_vmax_h : GCCBuiltin<"__builtin_lsx_vmax_h">, + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; +def int_loongarch_lsx_vmax_w : GCCBuiltin<"__builtin_lsx_vmax_w">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vmax_d : GCCBuiltin<"__builtin_lsx_vmax_d">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vmin_b : GCCBuiltin<"__builtin_lsx_vmin_b">, + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; +def int_loongarch_lsx_vmin_h : GCCBuiltin<"__builtin_lsx_vmin_h">, + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; +def int_loongarch_lsx_vmin_w : GCCBuiltin<"__builtin_lsx_vmin_w">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vmin_d : GCCBuiltin<"__builtin_lsx_vmin_d">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vmax_bu : GCCBuiltin<"__builtin_lsx_vmax_bu">, + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; +def int_loongarch_lsx_vmax_hu : GCCBuiltin<"__builtin_lsx_vmax_hu">, + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; +def int_loongarch_lsx_vmax_wu : GCCBuiltin<"__builtin_lsx_vmax_wu">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vmax_du : GCCBuiltin<"__builtin_lsx_vmax_du">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vmin_bu : GCCBuiltin<"__builtin_lsx_vmin_bu">, + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; +def int_loongarch_lsx_vmin_hu : GCCBuiltin<"__builtin_lsx_vmin_hu">, + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; +def int_loongarch_lsx_vmin_wu : GCCBuiltin<"__builtin_lsx_vmin_wu">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vmin_du : GCCBuiltin<"__builtin_lsx_vmin_du">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vmul_b : GCCBuiltin<"__builtin_lsx_vmul_b">, + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; +def int_loongarch_lsx_vmul_h : GCCBuiltin<"__builtin_lsx_vmul_h">, + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; +def int_loongarch_lsx_vmul_w : GCCBuiltin<"__builtin_lsx_vmul_w">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vmul_d : GCCBuiltin<"__builtin_lsx_vmul_d">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vmadd_b : GCCBuiltin<"__builtin_lsx_vmadd_b">, + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty, llvm_v16i8_ty], + [IntrNoMem]>; +def int_loongarch_lsx_vmadd_h : GCCBuiltin<"__builtin_lsx_vmadd_h">, + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty, llvm_v8i16_ty], + [IntrNoMem]>; +def int_loongarch_lsx_vmadd_w : GCCBuiltin<"__builtin_lsx_vmadd_w">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty, llvm_v4i32_ty], + [IntrNoMem]>; +def int_loongarch_lsx_vmadd_d : GCCBuiltin<"__builtin_lsx_vmadd_d">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty, llvm_v2i64_ty], + [IntrNoMem]>; + +def int_loongarch_lsx_vmsub_b : GCCBuiltin<"__builtin_lsx_vmsub_b">, + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty, llvm_v16i8_ty], + [IntrNoMem]>; +def int_loongarch_lsx_vmsub_h : GCCBuiltin<"__builtin_lsx_vmsub_h">, + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty, llvm_v8i16_ty], + [IntrNoMem]>; +def int_loongarch_lsx_vmsub_w : GCCBuiltin<"__builtin_lsx_vmsub_w">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty, llvm_v4i32_ty], + [IntrNoMem]>; +def int_loongarch_lsx_vmsub_d : GCCBuiltin<"__builtin_lsx_vmsub_d">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty, llvm_v2i64_ty], + [IntrNoMem]>; + +def int_loongarch_lsx_vdiv_b : GCCBuiltin<"__builtin_lsx_vdiv_b">, + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; +def int_loongarch_lsx_vdiv_h : GCCBuiltin<"__builtin_lsx_vdiv_h">, + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; +def int_loongarch_lsx_vdiv_w : GCCBuiltin<"__builtin_lsx_vdiv_w">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vdiv_d : GCCBuiltin<"__builtin_lsx_vdiv_d">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vmod_b : GCCBuiltin<"__builtin_lsx_vmod_b">, + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; +def int_loongarch_lsx_vmod_h : GCCBuiltin<"__builtin_lsx_vmod_h">, + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; +def int_loongarch_lsx_vmod_w : GCCBuiltin<"__builtin_lsx_vmod_w">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vmod_d : GCCBuiltin<"__builtin_lsx_vmod_d">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vdiv_bu : GCCBuiltin<"__builtin_lsx_vdiv_bu">, + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; +def int_loongarch_lsx_vdiv_hu : GCCBuiltin<"__builtin_lsx_vdiv_hu">, + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; +def int_loongarch_lsx_vdiv_wu : GCCBuiltin<"__builtin_lsx_vdiv_wu">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vdiv_du : GCCBuiltin<"__builtin_lsx_vdiv_du">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vsll_b : GCCBuiltin<"__builtin_lsx_vsll_b">, + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; +def int_loongarch_lsx_vsll_h : GCCBuiltin<"__builtin_lsx_vsll_h">, + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; +def int_loongarch_lsx_vsll_w : GCCBuiltin<"__builtin_lsx_vsll_w">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vsll_d : GCCBuiltin<"__builtin_lsx_vsll_d">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vsrl_b : GCCBuiltin<"__builtin_lsx_vsrl_b">, + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; +def int_loongarch_lsx_vsrl_h : GCCBuiltin<"__builtin_lsx_vsrl_h">, + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; +def int_loongarch_lsx_vsrl_w : GCCBuiltin<"__builtin_lsx_vsrl_w">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vsrl_d : GCCBuiltin<"__builtin_lsx_vsrl_d">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vbitclr_b : GCCBuiltin<"__builtin_lsx_vbitclr_b">, + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; +def int_loongarch_lsx_vbitclr_h : GCCBuiltin<"__builtin_lsx_vbitclr_h">, + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; +def int_loongarch_lsx_vbitclr_w : GCCBuiltin<"__builtin_lsx_vbitclr_w">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vbitclr_d : GCCBuiltin<"__builtin_lsx_vbitclr_d">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vbitset_b : GCCBuiltin<"__builtin_lsx_vbitset_b">, + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; +def int_loongarch_lsx_vbitset_h : GCCBuiltin<"__builtin_lsx_vbitset_h">, + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; +def int_loongarch_lsx_vbitset_w : GCCBuiltin<"__builtin_lsx_vbitset_w">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vbitset_d : GCCBuiltin<"__builtin_lsx_vbitset_d">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vpackev_b : GCCBuiltin<"__builtin_lsx_vpackev_b">, + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; +def int_loongarch_lsx_vpackev_h : GCCBuiltin<"__builtin_lsx_vpackev_h">, + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; +def int_loongarch_lsx_vpackev_w : GCCBuiltin<"__builtin_lsx_vpackev_w">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vpackev_d : GCCBuiltin<"__builtin_lsx_vpackev_d">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vpackod_b : GCCBuiltin<"__builtin_lsx_vpackod_b">, + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; +def int_loongarch_lsx_vpackod_h : GCCBuiltin<"__builtin_lsx_vpackod_h">, + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; +def int_loongarch_lsx_vpackod_w : GCCBuiltin<"__builtin_lsx_vpackod_w">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vpackod_d : GCCBuiltin<"__builtin_lsx_vpackod_d">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vilvl_b : GCCBuiltin<"__builtin_lsx_vilvl_b">, + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; +def int_loongarch_lsx_vilvl_h : GCCBuiltin<"__builtin_lsx_vilvl_h">, + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; +def int_loongarch_lsx_vilvl_w : GCCBuiltin<"__builtin_lsx_vilvl_w">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vilvl_d : GCCBuiltin<"__builtin_lsx_vilvl_d">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vilvh_b : GCCBuiltin<"__builtin_lsx_vilvh_b">, + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; +def int_loongarch_lsx_vilvh_h : GCCBuiltin<"__builtin_lsx_vilvh_h">, + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; +def int_loongarch_lsx_vilvh_w : GCCBuiltin<"__builtin_lsx_vilvh_w">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vilvh_d : GCCBuiltin<"__builtin_lsx_vilvh_d">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vpickev_b : GCCBuiltin<"__builtin_lsx_vpickev_b">, + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; +def int_loongarch_lsx_vpickev_h : GCCBuiltin<"__builtin_lsx_vpickev_h">, + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; +def int_loongarch_lsx_vpickev_w : GCCBuiltin<"__builtin_lsx_vpickev_w">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vpickev_d : GCCBuiltin<"__builtin_lsx_vpickev_d">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vand_v : GCCBuiltin<"__builtin_lsx_vand_v">, + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vor_v : GCCBuiltin<"__builtin_lsx_vor_v">, + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vbitrev_b : GCCBuiltin<"__builtin_lsx_vbitrev_b">, + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; +def int_loongarch_lsx_vbitrev_h : GCCBuiltin<"__builtin_lsx_vbitrev_h">, + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; +def int_loongarch_lsx_vbitrev_w : GCCBuiltin<"__builtin_lsx_vbitrev_w">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vbitrev_d : GCCBuiltin<"__builtin_lsx_vbitrev_d">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vmod_bu : GCCBuiltin<"__builtin_lsx_vmod_bu">, + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; +def int_loongarch_lsx_vmod_hu : GCCBuiltin<"__builtin_lsx_vmod_hu">, + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; +def int_loongarch_lsx_vmod_wu : GCCBuiltin<"__builtin_lsx_vmod_wu">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vmod_du : GCCBuiltin<"__builtin_lsx_vmod_du">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vpickod_b : GCCBuiltin<"__builtin_lsx_vpickod_b">, + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; +def int_loongarch_lsx_vpickod_h : GCCBuiltin<"__builtin_lsx_vpickod_h">, + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; +def int_loongarch_lsx_vpickod_w : GCCBuiltin<"__builtin_lsx_vpickod_w">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vpickod_d : GCCBuiltin<"__builtin_lsx_vpickod_d">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vreplve_b : GCCBuiltin<"__builtin_lsx_vreplve_b">, + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vreplve_h : GCCBuiltin<"__builtin_lsx_vreplve_h">, + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vreplve_w : GCCBuiltin<"__builtin_lsx_vreplve_w">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vreplve_d : GCCBuiltin<"__builtin_lsx_vreplve_d">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vsra_b : GCCBuiltin<"__builtin_lsx_vsra_b">, + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; +def int_loongarch_lsx_vsra_h : GCCBuiltin<"__builtin_lsx_vsra_h">, + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; +def int_loongarch_lsx_vsra_w : GCCBuiltin<"__builtin_lsx_vsra_w">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vsra_d : GCCBuiltin<"__builtin_lsx_vsra_d">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vxor_v : GCCBuiltin<"__builtin_lsx_vxor_v">, + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vnor_v : GCCBuiltin<"__builtin_lsx_vnor_v">, + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vfadd_s : GCCBuiltin<"__builtin_lsx_vfadd_s">, + Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vfadd_d : GCCBuiltin<"__builtin_lsx_vfadd_d">, + Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vfsub_s : GCCBuiltin<"__builtin_lsx_vfsub_s">, + Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vfsub_d : GCCBuiltin<"__builtin_lsx_vfsub_d">, + Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vfmul_s : GCCBuiltin<"__builtin_lsx_vfmul_s">, + Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vfmul_d : GCCBuiltin<"__builtin_lsx_vfmul_d">, + Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vshuf_h : GCCBuiltin<"__builtin_lsx_vshuf_h">, + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty, llvm_v8i16_ty], + [IntrNoMem]>; +def int_loongarch_lsx_vshuf_w : GCCBuiltin<"__builtin_lsx_vshuf_w">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty, llvm_v4i32_ty], + [IntrNoMem]>; +def int_loongarch_lsx_vshuf_d : GCCBuiltin<"__builtin_lsx_vshuf_d">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty, llvm_v2i64_ty], + [IntrNoMem]>; + +def int_loongarch_lsx_vseqi_b : GCCBuiltin<"__builtin_lsx_vseqi_b">, + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vseqi_h : GCCBuiltin<"__builtin_lsx_vseqi_h">, + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vseqi_w : GCCBuiltin<"__builtin_lsx_vseqi_w">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vseqi_d : GCCBuiltin<"__builtin_lsx_vseqi_d">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vslei_b : GCCBuiltin<"__builtin_lsx_vslei_b">, + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vslei_h : GCCBuiltin<"__builtin_lsx_vslei_h">, + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vslei_w : GCCBuiltin<"__builtin_lsx_vslei_w">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vslei_d : GCCBuiltin<"__builtin_lsx_vslei_d">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vslei_bu : GCCBuiltin<"__builtin_lsx_vslei_bu">, + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vslei_hu : GCCBuiltin<"__builtin_lsx_vslei_hu">, + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vslei_wu : GCCBuiltin<"__builtin_lsx_vslei_wu">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vslei_du : GCCBuiltin<"__builtin_lsx_vslei_du">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vslti_b : GCCBuiltin<"__builtin_lsx_vslti_b">, + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vslti_h : GCCBuiltin<"__builtin_lsx_vslti_h">, + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vslti_w : GCCBuiltin<"__builtin_lsx_vslti_w">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vslti_d : GCCBuiltin<"__builtin_lsx_vslti_d">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vslti_bu : GCCBuiltin<"__builtin_lsx_vslti_bu">, + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vslti_hu : GCCBuiltin<"__builtin_lsx_vslti_hu">, + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vslti_wu : GCCBuiltin<"__builtin_lsx_vslti_wu">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vslti_du : GCCBuiltin<"__builtin_lsx_vslti_du">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vaddi_bu : GCCBuiltin<"__builtin_lsx_vaddi_bu">, + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], + [Commutative, IntrNoMem]>; +def int_loongarch_lsx_vaddi_hu : GCCBuiltin<"__builtin_lsx_vaddi_hu">, + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], + [Commutative, IntrNoMem]>; +def int_loongarch_lsx_vaddi_wu : GCCBuiltin<"__builtin_lsx_vaddi_wu">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], + [Commutative, IntrNoMem]>; +def int_loongarch_lsx_vaddi_du : GCCBuiltin<"__builtin_lsx_vaddi_du">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], + [Commutative, IntrNoMem]>; + +def int_loongarch_lsx_vsubi_bu : GCCBuiltin<"__builtin_lsx_vsubi_bu">, + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vsubi_hu : GCCBuiltin<"__builtin_lsx_vsubi_hu">, + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vsubi_wu : GCCBuiltin<"__builtin_lsx_vsubi_wu">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vsubi_du : GCCBuiltin<"__builtin_lsx_vsubi_du">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vmaxi_b : GCCBuiltin<"__builtin_lsx_vmaxi_b">, + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vmaxi_h : GCCBuiltin<"__builtin_lsx_vmaxi_h">, + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vmaxi_w : GCCBuiltin<"__builtin_lsx_vmaxi_w">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vmaxi_d : GCCBuiltin<"__builtin_lsx_vmaxi_d">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vmini_b : GCCBuiltin<"__builtin_lsx_vmini_b">, + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vmini_h : GCCBuiltin<"__builtin_lsx_vmini_h">, + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vmini_w : GCCBuiltin<"__builtin_lsx_vmini_w">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vmini_d : GCCBuiltin<"__builtin_lsx_vmini_d">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vmaxi_bu : GCCBuiltin<"__builtin_lsx_vmaxi_bu">, + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vmaxi_hu : GCCBuiltin<"__builtin_lsx_vmaxi_hu">, + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vmaxi_wu : GCCBuiltin<"__builtin_lsx_vmaxi_wu">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vmaxi_du : GCCBuiltin<"__builtin_lsx_vmaxi_du">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vmini_bu : GCCBuiltin<"__builtin_lsx_vmini_bu">, + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vmini_hu : GCCBuiltin<"__builtin_lsx_vmini_hu">, + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vmini_wu : GCCBuiltin<"__builtin_lsx_vmini_wu">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vmini_du : GCCBuiltin<"__builtin_lsx_vmini_du">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vclz_b : GCCBuiltin<"__builtin_lsx_vclz_b">, + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty], [IntrNoMem]>; +def int_loongarch_lsx_vclz_h : GCCBuiltin<"__builtin_lsx_vclz_h">, + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty], [IntrNoMem]>; +def int_loongarch_lsx_vclz_w : GCCBuiltin<"__builtin_lsx_vclz_w">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vclz_d : GCCBuiltin<"__builtin_lsx_vclz_d">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vpcnt_b : GCCBuiltin<"__builtin_lsx_vpcnt_b">, + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty], [IntrNoMem]>; +def int_loongarch_lsx_vpcnt_h : GCCBuiltin<"__builtin_lsx_vpcnt_h">, + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty], [IntrNoMem]>; +def int_loongarch_lsx_vpcnt_w : GCCBuiltin<"__builtin_lsx_vpcnt_w">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vpcnt_d : GCCBuiltin<"__builtin_lsx_vpcnt_d">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vfsqrt_s : GCCBuiltin<"__builtin_lsx_vfsqrt_s">, + Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vfsqrt_d : GCCBuiltin<"__builtin_lsx_vfsqrt_d">, + Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vfrint_s : GCCBuiltin<"__builtin_lsx_vfrint_s">, + Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vfrint_d : GCCBuiltin<"__builtin_lsx_vfrint_d">, + Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vffint_s_w : GCCBuiltin<"__builtin_lsx_vffint_s_w">, + Intrinsic<[llvm_v4f32_ty], [llvm_v4i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vffint_d_l : GCCBuiltin<"__builtin_lsx_vffint_d_l">, + Intrinsic<[llvm_v2f64_ty], [llvm_v2i64_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vffint_s_wu : GCCBuiltin<"__builtin_lsx_vffint_s_wu">, + Intrinsic<[llvm_v4f32_ty], [llvm_v4i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vffint_d_lu : GCCBuiltin<"__builtin_lsx_vffint_d_lu">, + Intrinsic<[llvm_v2f64_ty], [llvm_v2i64_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vftintrz_wu_s : GCCBuiltin<"__builtin_lsx_vftintrz_wu_s">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vftintrz_lu_d : GCCBuiltin<"__builtin_lsx_vftintrz_lu_d">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2f64_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vreplgr2vr_b : GCCBuiltin<"__builtin_lsx_vreplgr2vr_b">, + Intrinsic<[llvm_v16i8_ty], [llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vreplgr2vr_h : GCCBuiltin<"__builtin_lsx_vreplgr2vr_h">, + Intrinsic<[llvm_v8i16_ty], [llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vreplgr2vr_w : GCCBuiltin<"__builtin_lsx_vreplgr2vr_w">, + Intrinsic<[llvm_v4i32_ty], [llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vreplgr2vr_d : GCCBuiltin<"__builtin_lsx_vreplgr2vr_d">, + Intrinsic<[llvm_v2i64_ty], [llvm_i64_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vinsgr2vr_b : GCCBuiltin<"__builtin_lsx_vinsgr2vr_b">, + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty, llvm_i32_ty], + [IntrNoMem]>; +def int_loongarch_lsx_vinsgr2vr_h : GCCBuiltin<"__builtin_lsx_vinsgr2vr_h">, + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty, llvm_i32_ty], + [IntrNoMem]>; +def int_loongarch_lsx_vinsgr2vr_w : GCCBuiltin<"__builtin_lsx_vinsgr2vr_w">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty, llvm_i32_ty], + [IntrNoMem]>; +def int_loongarch_lsx_vinsgr2vr_d : GCCBuiltin<"__builtin_lsx_vinsgr2vr_d">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i64_ty, llvm_i32_ty], + [IntrNoMem]>; + +def int_loongarch_lsx_vfdiv_s : GCCBuiltin<"__builtin_lsx_vfdiv_s">, + Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vfdiv_d : GCCBuiltin<"__builtin_lsx_vfdiv_d">, + Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vslli_b : GCCBuiltin<"__builtin_lsx_vslli_b">, + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vslli_h : GCCBuiltin<"__builtin_lsx_vslli_h">, + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vslli_w : GCCBuiltin<"__builtin_lsx_vslli_w">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vslli_d : GCCBuiltin<"__builtin_lsx_vslli_d">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vsrli_b : GCCBuiltin<"__builtin_lsx_vsrli_b">, + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vsrli_h : GCCBuiltin<"__builtin_lsx_vsrli_h">, + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vsrli_w : GCCBuiltin<"__builtin_lsx_vsrli_w">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vsrli_d : GCCBuiltin<"__builtin_lsx_vsrli_d">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vsrai_b : GCCBuiltin<"__builtin_lsx_vsrai_b">, + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vsrai_h : GCCBuiltin<"__builtin_lsx_vsrai_h">, + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vsrai_w : GCCBuiltin<"__builtin_lsx_vsrai_w">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vsrai_d : GCCBuiltin<"__builtin_lsx_vsrai_d">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vshuf4i_b : GCCBuiltin<"__builtin_lsx_vshuf4i_b">, + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vshuf4i_h : GCCBuiltin<"__builtin_lsx_vshuf4i_h">, + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vshuf4i_w : GCCBuiltin<"__builtin_lsx_vshuf4i_w">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vshuf4i_d : GCCBuiltin<"__builtin_lsx_vshuf4i_d">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vrotr_b : GCCBuiltin<"__builtin_lsx_vrotr_b">, + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; +def int_loongarch_lsx_vrotr_h : GCCBuiltin<"__builtin_lsx_vrotr_h">, + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; +def int_loongarch_lsx_vrotr_w : GCCBuiltin<"__builtin_lsx_vrotr_w">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vrotr_d : GCCBuiltin<"__builtin_lsx_vrotr_d">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vrotri_b : GCCBuiltin<"__builtin_lsx_vrotri_b">, + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vrotri_h : GCCBuiltin<"__builtin_lsx_vrotri_h">, + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vrotri_w : GCCBuiltin<"__builtin_lsx_vrotri_w">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vrotri_d : GCCBuiltin<"__builtin_lsx_vrotri_d">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vld : GCCBuiltin<"__builtin_lsx_vld">, + Intrinsic<[llvm_v16i8_ty], [llvm_ptr_ty, llvm_i32_ty], + [IntrReadMem, IntrArgMemOnly]>; + +def int_loongarch_lsx_vst : GCCBuiltin<"__builtin_lsx_vst">, + Intrinsic<[], [llvm_v16i8_ty, llvm_ptr_ty, llvm_i32_ty], + [IntrArgMemOnly]>; + +def int_loongarch_lsx_bz_v : GCCBuiltin<"__builtin_lsx_bz_v">, + Intrinsic<[llvm_i32_ty], [llvm_v16i8_ty], [IntrNoMem]>; + +def int_loongarch_lsx_bnz_v : GCCBuiltin<"__builtin_lsx_bnz_v">, + Intrinsic<[llvm_i32_ty], [llvm_v16i8_ty], [IntrNoMem]>; + +def int_loongarch_lsx_bz_b : GCCBuiltin<"__builtin_lsx_bz_b">, + Intrinsic<[llvm_i32_ty], [llvm_v16i8_ty], [IntrNoMem]>; +def int_loongarch_lsx_bz_h : GCCBuiltin<"__builtin_lsx_bz_h">, + Intrinsic<[llvm_i32_ty], [llvm_v8i16_ty], [IntrNoMem]>; +def int_loongarch_lsx_bz_w : GCCBuiltin<"__builtin_lsx_bz_w">, + Intrinsic<[llvm_i32_ty], [llvm_v4i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_bz_d : GCCBuiltin<"__builtin_lsx_bz_d">, + Intrinsic<[llvm_i32_ty], [llvm_v2i64_ty], [IntrNoMem]>; + +def int_loongarch_lsx_bnz_b : GCCBuiltin<"__builtin_lsx_bnz_b">, + Intrinsic<[llvm_i32_ty], [llvm_v16i8_ty], [IntrNoMem]>; +def int_loongarch_lsx_bnz_h : GCCBuiltin<"__builtin_lsx_bnz_h">, + Intrinsic<[llvm_i32_ty], [llvm_v8i16_ty], [IntrNoMem]>; +def int_loongarch_lsx_bnz_w : GCCBuiltin<"__builtin_lsx_bnz_w">, + Intrinsic<[llvm_i32_ty], [llvm_v4i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_bnz_d : GCCBuiltin<"__builtin_lsx_bnz_d">, + Intrinsic<[llvm_i32_ty], [llvm_v2i64_ty], [IntrNoMem]>; + +//===----------------------------------------------------------------------===// +//LoongArch LASX + +def int_loongarch_lasx_xvfmadd_s : GCCBuiltin<"__builtin_lasx_xvfmadd_s">, + Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty], + [IntrNoMem]>; +def int_loongarch_lasx_xvfmadd_d : GCCBuiltin<"__builtin_lasx_xvfmadd_d">, + Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty], + [IntrNoMem]>; + +def int_loongarch_lasx_xvfmsub_s : GCCBuiltin<"__builtin_lasx_xvfmsub_s">, + Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty], + [IntrNoMem]>; +def int_loongarch_lasx_xvfmsub_d : GCCBuiltin<"__builtin_lasx_xvfmsub_d">, + Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty], + [IntrNoMem]>; + + +def int_loongarch_lasx_xvfnmadd_s : GCCBuiltin<"__builtin_lasx_xvfnmadd_s">, + Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty], + [IntrNoMem]>; +def int_loongarch_lasx_xvfnmadd_d : GCCBuiltin<"__builtin_lasx_xvfnmadd_d">, + Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty], + [IntrNoMem]>; + +def int_loongarch_lasx_xvfnmsub_s : GCCBuiltin<"__builtin_lasx_xvfnmsub_s">, + Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty], + [IntrNoMem]>; +def int_loongarch_lasx_xvfnmsub_d : GCCBuiltin<"__builtin_lasx_xvfnmsub_d">, + Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty], + [IntrNoMem]>; + +def int_loongarch_lasx_xvclo_b : GCCBuiltin<"__builtin_lasx_xvclo_b">, + Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvclo_h : GCCBuiltin<"__builtin_lasx_xvclo_h">, + Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvclo_w : GCCBuiltin<"__builtin_lasx_xvclo_w">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvclo_d : GCCBuiltin<"__builtin_lasx_xvclo_d">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvflogb_s : GCCBuiltin<"__builtin_lasx_xvflogb_s">, + Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvflogb_d : GCCBuiltin<"__builtin_lasx_xvflogb_d">, + Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvpickve2gr_w : GCCBuiltin<"__builtin_lasx_xvpickve2gr_w">, + Intrinsic<[llvm_i32_ty], [llvm_v8i32_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvpickve2gr_d : GCCBuiltin<"__builtin_lasx_xvpickve2gr_d">, + Intrinsic<[llvm_i64_ty], [llvm_v4i64_ty, llvm_i32_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvpickve2gr_wu : GCCBuiltin<"__builtin_lasx_xvpickve2gr_wu">, + Intrinsic<[llvm_i32_ty], [llvm_v8i32_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvpickve2gr_du : GCCBuiltin<"__builtin_lasx_xvpickve2gr_du">, + Intrinsic<[llvm_i64_ty], [llvm_v4i64_ty, llvm_i32_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvmskltz_b : GCCBuiltin<"__builtin_lasx_xvmskltz_b">, + Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvmskltz_h : GCCBuiltin<"__builtin_lasx_xvmskltz_h">, + Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvmskltz_w : GCCBuiltin<"__builtin_lasx_xvmskltz_w">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvmskltz_d : GCCBuiltin<"__builtin_lasx_xvmskltz_d">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvfcmp_caf_s : GCCBuiltin<"__builtin_lasx_xvfcmp_caf_s">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8f32_ty, llvm_v8f32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvfcmp_caf_d : GCCBuiltin<"__builtin_lasx_xvfcmp_caf_d">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4f64_ty, llvm_v4f64_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvfcmp_cor_s : GCCBuiltin<"__builtin_lasx_xvfcmp_cor_s">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8f32_ty, llvm_v8f32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvfcmp_cor_d : GCCBuiltin<"__builtin_lasx_xvfcmp_cor_d">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4f64_ty, llvm_v4f64_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvfcmp_cun_s : GCCBuiltin<"__builtin_lasx_xvfcmp_cun_s">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8f32_ty, llvm_v8f32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvfcmp_cun_d : GCCBuiltin<"__builtin_lasx_xvfcmp_cun_d">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4f64_ty, llvm_v4f64_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvfcmp_cune_s : GCCBuiltin<"__builtin_lasx_xvfcmp_cune_s">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8f32_ty, llvm_v8f32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvfcmp_cune_d : GCCBuiltin<"__builtin_lasx_xvfcmp_cune_d">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4f64_ty, llvm_v4f64_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvfcmp_cueq_s : GCCBuiltin<"__builtin_lasx_xvfcmp_cueq_s">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8f32_ty, llvm_v8f32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvfcmp_cueq_d : GCCBuiltin<"__builtin_lasx_xvfcmp_cueq_d">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4f64_ty, llvm_v4f64_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvfcmp_ceq_s : GCCBuiltin<"__builtin_lasx_xvfcmp_ceq_s">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8f32_ty, llvm_v8f32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvfcmp_ceq_d : GCCBuiltin<"__builtin_lasx_xvfcmp_ceq_d">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4f64_ty, llvm_v4f64_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvfcmp_cne_s : GCCBuiltin<"__builtin_lasx_xvfcmp_cne_s">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8f32_ty, llvm_v8f32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvfcmp_cne_d : GCCBuiltin<"__builtin_lasx_xvfcmp_cne_d">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4f64_ty, llvm_v4f64_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvfcmp_clt_s : GCCBuiltin<"__builtin_lasx_xvfcmp_clt_s">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8f32_ty, llvm_v8f32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvfcmp_clt_d : GCCBuiltin<"__builtin_lasx_xvfcmp_clt_d">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4f64_ty, llvm_v4f64_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvfcmp_cult_s : GCCBuiltin<"__builtin_lasx_xvfcmp_cult_s">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8f32_ty, llvm_v8f32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvfcmp_cult_d : GCCBuiltin<"__builtin_lasx_xvfcmp_cult_d">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4f64_ty, llvm_v4f64_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvfcmp_cle_s : GCCBuiltin<"__builtin_lasx_xvfcmp_cle_s">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8f32_ty, llvm_v8f32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvfcmp_cle_d : GCCBuiltin<"__builtin_lasx_xvfcmp_cle_d">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4f64_ty, llvm_v4f64_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvfcmp_cule_s : GCCBuiltin<"__builtin_lasx_xvfcmp_cule_s">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8f32_ty, llvm_v8f32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvfcmp_cule_d : GCCBuiltin<"__builtin_lasx_xvfcmp_cule_d">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4f64_ty, llvm_v4f64_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvfcmp_saf_s : GCCBuiltin<"__builtin_lasx_xvfcmp_saf_s">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8f32_ty, llvm_v8f32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvfcmp_saf_d : GCCBuiltin<"__builtin_lasx_xvfcmp_saf_d">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4f64_ty, llvm_v4f64_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvfcmp_sor_s : GCCBuiltin<"__builtin_lasx_xvfcmp_sor_s">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8f32_ty, llvm_v8f32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvfcmp_sor_d : GCCBuiltin<"__builtin_lasx_xvfcmp_sor_d">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4f64_ty, llvm_v4f64_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvfcmp_sun_s : GCCBuiltin<"__builtin_lasx_xvfcmp_sun_s">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8f32_ty, llvm_v8f32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvfcmp_sun_d : GCCBuiltin<"__builtin_lasx_xvfcmp_sun_d">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4f64_ty, llvm_v4f64_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvfcmp_sune_s : GCCBuiltin<"__builtin_lasx_xvfcmp_sune_s">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8f32_ty, llvm_v8f32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvfcmp_sune_d : GCCBuiltin<"__builtin_lasx_xvfcmp_sune_d">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4f64_ty, llvm_v4f64_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvfcmp_sueq_s : GCCBuiltin<"__builtin_lasx_xvfcmp_sueq_s">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8f32_ty, llvm_v8f32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvfcmp_sueq_d : GCCBuiltin<"__builtin_lasx_xvfcmp_sueq_d">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4f64_ty, llvm_v4f64_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvfcmp_seq_s : GCCBuiltin<"__builtin_lasx_xvfcmp_seq_s">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8f32_ty, llvm_v8f32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvfcmp_seq_d : GCCBuiltin<"__builtin_lasx_xvfcmp_seq_d">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4f64_ty, llvm_v4f64_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvfcmp_sne_s : GCCBuiltin<"__builtin_lasx_xvfcmp_sne_s">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8f32_ty, llvm_v8f32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvfcmp_sne_d : GCCBuiltin<"__builtin_lasx_xvfcmp_sne_d">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4f64_ty, llvm_v4f64_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvfcmp_slt_s : GCCBuiltin<"__builtin_lasx_xvfcmp_slt_s">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8f32_ty, llvm_v8f32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvfcmp_slt_d : GCCBuiltin<"__builtin_lasx_xvfcmp_slt_d">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4f64_ty, llvm_v4f64_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvfcmp_sult_s : GCCBuiltin<"__builtin_lasx_xvfcmp_sult_s">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8f32_ty, llvm_v8f32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvfcmp_sult_d : GCCBuiltin<"__builtin_lasx_xvfcmp_sult_d">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4f64_ty, llvm_v4f64_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvfcmp_sle_s : GCCBuiltin<"__builtin_lasx_xvfcmp_sle_s">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8f32_ty, llvm_v8f32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvfcmp_sle_d : GCCBuiltin<"__builtin_lasx_xvfcmp_sle_d">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4f64_ty, llvm_v4f64_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvfcmp_sule_s : GCCBuiltin<"__builtin_lasx_xvfcmp_sule_s">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8f32_ty, llvm_v8f32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvfcmp_sule_d : GCCBuiltin<"__builtin_lasx_xvfcmp_sule_d">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4f64_ty, llvm_v4f64_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvbitsel_v : GCCBuiltin<"__builtin_lasx_xvbitsel_v">, + Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvshuf_b : GCCBuiltin<"__builtin_lasx_xvshuf_b">, + Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvldrepl_b : GCCBuiltin<"__builtin_lasx_xvldrepl_b">, + Intrinsic<[llvm_v32i8_ty], [llvm_ptr_ty, llvm_i32_ty], [IntrReadMem, IntrArgMemOnly]>; +def int_loongarch_lasx_xvldrepl_h : GCCBuiltin<"__builtin_lasx_xvldrepl_h">, + Intrinsic<[llvm_v16i16_ty], [llvm_ptr_ty, llvm_i32_ty], [IntrReadMem, IntrArgMemOnly]>; +def int_loongarch_lasx_xvldrepl_w : GCCBuiltin<"__builtin_lasx_xvldrepl_w">, + Intrinsic<[llvm_v8i32_ty], [llvm_ptr_ty, llvm_i32_ty], [IntrReadMem, IntrArgMemOnly]>; +def int_loongarch_lasx_xvldrepl_d : GCCBuiltin<"__builtin_lasx_xvldrepl_d">, + Intrinsic<[llvm_v4i64_ty], [llvm_ptr_ty, llvm_i32_ty], [IntrReadMem, IntrArgMemOnly]>; + +def int_loongarch_lasx_xvstelm_b : GCCBuiltin<"__builtin_lasx_xvstelm_b">, + Intrinsic<[], [llvm_v32i8_ty, llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty], [IntrArgMemOnly]>; +def int_loongarch_lasx_xvstelm_h : GCCBuiltin<"__builtin_lasx_xvstelm_h">, + Intrinsic<[], [llvm_v16i16_ty, llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty], [IntrArgMemOnly]>; +def int_loongarch_lasx_xvstelm_w : GCCBuiltin<"__builtin_lasx_xvstelm_w">, + Intrinsic<[], [llvm_v8i32_ty, llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty], [IntrArgMemOnly]>; +def int_loongarch_lasx_xvstelm_d : GCCBuiltin<"__builtin_lasx_xvstelm_d">, + Intrinsic<[], [llvm_v4i64_ty, llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty], [IntrArgMemOnly]>; + +def int_loongarch_lasx_xvldx : GCCBuiltin<"__builtin_lasx_xvldx">, + Intrinsic<[llvm_v32i8_ty], [llvm_ptr_ty, llvm_i64_ty], + [IntrReadMem, IntrArgMemOnly]>; + +def int_loongarch_lasx_xvstx : GCCBuiltin<"__builtin_lasx_xvstx">, + Intrinsic<[], [llvm_v32i8_ty, llvm_ptr_ty, llvm_i64_ty], + [IntrArgMemOnly]>; + +def int_loongarch_lasx_xvaddwev_d_w : GCCBuiltin<"__builtin_lasx_xvaddwev_d_w">, + Intrinsic<[llvm_v4i64_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvaddwev_w_h : GCCBuiltin<"__builtin_lasx_xvaddwev_w_h">, + Intrinsic<[llvm_v8i32_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvaddwev_h_b : GCCBuiltin<"__builtin_lasx_xvaddwev_h_b">, + Intrinsic<[llvm_v16i16_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvaddwev_q_d : GCCBuiltin<"__builtin_lasx_xvaddwev_q_d">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvsubwev_d_w : GCCBuiltin<"__builtin_lasx_xvsubwev_d_w">, + Intrinsic<[llvm_v4i64_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvsubwev_w_h : GCCBuiltin<"__builtin_lasx_xvsubwev_w_h">, + Intrinsic<[llvm_v8i32_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvsubwev_h_b : GCCBuiltin<"__builtin_lasx_xvsubwev_h_b">, + Intrinsic<[llvm_v16i16_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvsubwev_q_d : GCCBuiltin<"__builtin_lasx_xvsubwev_q_d">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvaddwod_d_w : GCCBuiltin<"__builtin_lasx_xvaddwod_d_w">, + Intrinsic<[llvm_v4i64_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvaddwod_w_h : GCCBuiltin<"__builtin_lasx_xvaddwod_w_h">, + Intrinsic<[llvm_v8i32_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvaddwod_h_b : GCCBuiltin<"__builtin_lasx_xvaddwod_h_b">, + Intrinsic<[llvm_v16i16_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvaddwod_q_d : GCCBuiltin<"__builtin_lasx_xvaddwod_q_d">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvsubwod_d_w : GCCBuiltin<"__builtin_lasx_xvsubwod_d_w">, + Intrinsic<[llvm_v4i64_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvsubwod_w_h : GCCBuiltin<"__builtin_lasx_xvsubwod_w_h">, + Intrinsic<[llvm_v8i32_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvsubwod_h_b : GCCBuiltin<"__builtin_lasx_xvsubwod_h_b">, + Intrinsic<[llvm_v16i16_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvsubwod_q_d : GCCBuiltin<"__builtin_lasx_xvsubwod_q_d">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvaddwev_d_wu : GCCBuiltin<"__builtin_lasx_xvaddwev_d_wu">, + Intrinsic<[llvm_v4i64_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvaddwev_w_hu : GCCBuiltin<"__builtin_lasx_xvaddwev_w_hu">, + Intrinsic<[llvm_v8i32_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvaddwev_h_bu : GCCBuiltin<"__builtin_lasx_xvaddwev_h_bu">, + Intrinsic<[llvm_v16i16_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvaddwev_q_du : GCCBuiltin<"__builtin_lasx_xvaddwev_q_du">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvsubwev_d_wu : GCCBuiltin<"__builtin_lasx_xvsubwev_d_wu">, + Intrinsic<[llvm_v4i64_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvsubwev_w_hu : GCCBuiltin<"__builtin_lasx_xvsubwev_w_hu">, + Intrinsic<[llvm_v8i32_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvsubwev_h_bu : GCCBuiltin<"__builtin_lasx_xvsubwev_h_bu">, + Intrinsic<[llvm_v16i16_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvsubwev_q_du : GCCBuiltin<"__builtin_lasx_xvsubwev_q_du">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvaddwod_d_wu : GCCBuiltin<"__builtin_lasx_xvaddwod_d_wu">, + Intrinsic<[llvm_v4i64_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvaddwod_w_hu : GCCBuiltin<"__builtin_lasx_xvaddwod_w_hu">, + Intrinsic<[llvm_v8i32_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvaddwod_h_bu : GCCBuiltin<"__builtin_lasx_xvaddwod_h_bu">, + Intrinsic<[llvm_v16i16_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvaddwod_q_du : GCCBuiltin<"__builtin_lasx_xvaddwod_q_du">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvsubwod_d_wu : GCCBuiltin<"__builtin_lasx_xvsubwod_d_wu">, + Intrinsic<[llvm_v4i64_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvsubwod_w_hu : GCCBuiltin<"__builtin_lasx_xvsubwod_w_hu">, + Intrinsic<[llvm_v8i32_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvsubwod_h_bu : GCCBuiltin<"__builtin_lasx_xvsubwod_h_bu">, + Intrinsic<[llvm_v16i16_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvsubwod_q_du : GCCBuiltin<"__builtin_lasx_xvsubwod_q_du">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvaddwev_d_wu_w : GCCBuiltin<"__builtin_lasx_xvaddwev_d_wu_w">, + Intrinsic<[llvm_v4i64_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvaddwev_w_hu_h : GCCBuiltin<"__builtin_lasx_xvaddwev_w_hu_h">, + Intrinsic<[llvm_v8i32_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvaddwev_h_bu_b : GCCBuiltin<"__builtin_lasx_xvaddwev_h_bu_b">, + Intrinsic<[llvm_v16i16_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvaddwev_q_du_d : GCCBuiltin<"__builtin_lasx_xvaddwev_q_du_d">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvaddwod_d_wu_w : GCCBuiltin<"__builtin_lasx_xvaddwod_d_wu_w">, + Intrinsic<[llvm_v4i64_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvaddwod_w_hu_h : GCCBuiltin<"__builtin_lasx_xvaddwod_w_hu_h">, + Intrinsic<[llvm_v8i32_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvaddwod_h_bu_b : GCCBuiltin<"__builtin_lasx_xvaddwod_h_bu_b">, + Intrinsic<[llvm_v16i16_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvaddwod_q_du_d : GCCBuiltin<"__builtin_lasx_xvaddwod_q_du_d">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvhaddw_qu_du : GCCBuiltin<"__builtin_lasx_xvhaddw_qu_du">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvhsubw_qu_du : GCCBuiltin<"__builtin_lasx_xvhsubw_qu_du">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvhaddw_q_d : GCCBuiltin<"__builtin_lasx_xvhaddw_q_d">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvhsubw_q_d : GCCBuiltin<"__builtin_lasx_xvhsubw_q_d">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvmuh_b : GCCBuiltin<"__builtin_lasx_xvmuh_b">, + Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvmuh_h : GCCBuiltin<"__builtin_lasx_xvmuh_h">, + Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvmuh_w : GCCBuiltin<"__builtin_lasx_xvmuh_w">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvmuh_d : GCCBuiltin<"__builtin_lasx_xvmuh_d">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvmuh_bu : GCCBuiltin<"__builtin_lasx_xvmuh_bu">, + Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvmuh_hu : GCCBuiltin<"__builtin_lasx_xvmuh_hu">, + Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvmuh_wu : GCCBuiltin<"__builtin_lasx_xvmuh_wu">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvmuh_du : GCCBuiltin<"__builtin_lasx_xvmuh_du">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvmulwev_d_w : GCCBuiltin<"__builtin_lasx_xvmulwev_d_w">, + Intrinsic<[llvm_v4i64_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvmulwev_w_h : GCCBuiltin<"__builtin_lasx_xvmulwev_w_h">, + Intrinsic<[llvm_v8i32_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvmulwev_h_b : GCCBuiltin<"__builtin_lasx_xvmulwev_h_b">, + Intrinsic<[llvm_v16i16_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvmulwev_q_d : GCCBuiltin<"__builtin_lasx_xvmulwev_q_d">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvmulwod_d_w : GCCBuiltin<"__builtin_lasx_xvmulwod_d_w">, + Intrinsic<[llvm_v4i64_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvmulwod_w_h : GCCBuiltin<"__builtin_lasx_xvmulwod_w_h">, + Intrinsic<[llvm_v8i32_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvmulwod_h_b : GCCBuiltin<"__builtin_lasx_xvmulwod_h_b">, + Intrinsic<[llvm_v16i16_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvmulwod_q_d : GCCBuiltin<"__builtin_lasx_xvmulwod_q_d">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvmulwev_d_wu : GCCBuiltin<"__builtin_lasx_xvmulwev_d_wu">, + Intrinsic<[llvm_v4i64_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvmulwev_w_hu : GCCBuiltin<"__builtin_lasx_xvmulwev_w_hu">, + Intrinsic<[llvm_v8i32_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvmulwev_h_bu : GCCBuiltin<"__builtin_lasx_xvmulwev_h_bu">, + Intrinsic<[llvm_v16i16_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvmulwev_q_du : GCCBuiltin<"__builtin_lasx_xvmulwev_q_du">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvmulwod_d_wu : GCCBuiltin<"__builtin_lasx_xvmulwod_d_wu">, + Intrinsic<[llvm_v4i64_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvmulwod_w_hu : GCCBuiltin<"__builtin_lasx_xvmulwod_w_hu">, + Intrinsic<[llvm_v8i32_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvmulwod_h_bu : GCCBuiltin<"__builtin_lasx_xvmulwod_h_bu">, + Intrinsic<[llvm_v16i16_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvmulwod_q_du : GCCBuiltin<"__builtin_lasx_xvmulwod_q_du">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvmulwev_d_wu_w : GCCBuiltin<"__builtin_lasx_xvmulwev_d_wu_w">, + Intrinsic<[llvm_v4i64_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvmulwev_w_hu_h : GCCBuiltin<"__builtin_lasx_xvmulwev_w_hu_h">, + Intrinsic<[llvm_v8i32_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvmulwev_h_bu_b : GCCBuiltin<"__builtin_lasx_xvmulwev_h_bu_b">, + Intrinsic<[llvm_v16i16_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvmulwev_q_du_d : GCCBuiltin<"__builtin_lasx_xvmulwev_q_du_d">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvmulwod_d_wu_w : GCCBuiltin<"__builtin_lasx_xvmulwod_d_wu_w">, + Intrinsic<[llvm_v4i64_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvmulwod_w_hu_h : GCCBuiltin<"__builtin_lasx_xvmulwod_w_hu_h">, + Intrinsic<[llvm_v8i32_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvmulwod_h_bu_b : GCCBuiltin<"__builtin_lasx_xvmulwod_h_bu_b">, + Intrinsic<[llvm_v16i16_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvmulwod_q_du_d : GCCBuiltin<"__builtin_lasx_xvmulwod_q_du_d">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvmaddwev_d_w : GCCBuiltin<"__builtin_lasx_xvmaddwev_d_w">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvmaddwev_w_h : GCCBuiltin<"__builtin_lasx_xvmaddwev_w_h">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvmaddwev_h_b : GCCBuiltin<"__builtin_lasx_xvmaddwev_h_b">, + Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvmaddwev_q_d : GCCBuiltin<"__builtin_lasx_xvmaddwev_q_d">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvmaddwod_d_w : GCCBuiltin<"__builtin_lasx_xvmaddwod_d_w">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvmaddwod_w_h : GCCBuiltin<"__builtin_lasx_xvmaddwod_w_h">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvmaddwod_h_b : GCCBuiltin<"__builtin_lasx_xvmaddwod_h_b">, + Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvmaddwod_q_d : GCCBuiltin<"__builtin_lasx_xvmaddwod_q_d">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvmaddwev_d_wu : GCCBuiltin<"__builtin_lasx_xvmaddwev_d_wu">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvmaddwev_w_hu : GCCBuiltin<"__builtin_lasx_xvmaddwev_w_hu">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvmaddwev_h_bu : GCCBuiltin<"__builtin_lasx_xvmaddwev_h_bu">, + Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvmaddwev_q_du : GCCBuiltin<"__builtin_lasx_xvmaddwev_q_du">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvmaddwod_d_wu : GCCBuiltin<"__builtin_lasx_xvmaddwod_d_wu">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvmaddwod_w_hu : GCCBuiltin<"__builtin_lasx_xvmaddwod_w_hu">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvmaddwod_h_bu : GCCBuiltin<"__builtin_lasx_xvmaddwod_h_bu">, + Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvmaddwod_q_du : GCCBuiltin<"__builtin_lasx_xvmaddwod_q_du">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvmaddwev_d_wu_w : GCCBuiltin<"__builtin_lasx_xvmaddwev_d_wu_w">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvmaddwev_w_hu_h : GCCBuiltin<"__builtin_lasx_xvmaddwev_w_hu_h">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvmaddwev_h_bu_b : GCCBuiltin<"__builtin_lasx_xvmaddwev_h_bu_b">, + Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvmaddwev_q_du_d : GCCBuiltin<"__builtin_lasx_xvmaddwev_q_du_d">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvmaddwod_d_wu_w : GCCBuiltin<"__builtin_lasx_xvmaddwod_d_wu_w">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvmaddwod_w_hu_h : GCCBuiltin<"__builtin_lasx_xvmaddwod_w_hu_h">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvmaddwod_h_bu_b : GCCBuiltin<"__builtin_lasx_xvmaddwod_h_bu_b">, + Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvmaddwod_q_du_d : GCCBuiltin<"__builtin_lasx_xvmaddwod_q_du_d">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvsrln_b_h : GCCBuiltin<"__builtin_lasx_xvsrln_b_h">, + Intrinsic<[llvm_v32i8_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvsrln_h_w : GCCBuiltin<"__builtin_lasx_xvsrln_h_w">, + Intrinsic<[llvm_v16i16_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvsrln_w_d : GCCBuiltin<"__builtin_lasx_xvsrln_w_d">, + Intrinsic<[llvm_v8i32_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvsran_b_h : GCCBuiltin<"__builtin_lasx_xvsran_b_h">, + Intrinsic<[llvm_v32i8_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvsran_h_w : GCCBuiltin<"__builtin_lasx_xvsran_h_w">, + Intrinsic<[llvm_v16i16_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvsran_w_d : GCCBuiltin<"__builtin_lasx_xvsran_w_d">, + Intrinsic<[llvm_v8i32_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvsrlrn_b_h : GCCBuiltin<"__builtin_lasx_xvsrlrn_b_h">, + Intrinsic<[llvm_v32i8_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvsrlrn_h_w : GCCBuiltin<"__builtin_lasx_xvsrlrn_h_w">, + Intrinsic<[llvm_v16i16_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvsrlrn_w_d : GCCBuiltin<"__builtin_lasx_xvsrlrn_w_d">, + Intrinsic<[llvm_v8i32_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvsrarn_b_h : GCCBuiltin<"__builtin_lasx_xvsrarn_b_h">, + Intrinsic<[llvm_v32i8_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvsrarn_h_w : GCCBuiltin<"__builtin_lasx_xvsrarn_h_w">, + Intrinsic<[llvm_v16i16_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvsrarn_w_d : GCCBuiltin<"__builtin_lasx_xvsrarn_w_d">, + Intrinsic<[llvm_v8i32_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvssrln_b_h : GCCBuiltin<"__builtin_lasx_xvssrln_b_h">, + Intrinsic<[llvm_v32i8_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvssrln_h_w : GCCBuiltin<"__builtin_lasx_xvssrln_h_w">, + Intrinsic<[llvm_v16i16_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvssrln_w_d : GCCBuiltin<"__builtin_lasx_xvssrln_w_d">, + Intrinsic<[llvm_v8i32_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvssran_b_h : GCCBuiltin<"__builtin_lasx_xvssran_b_h">, + Intrinsic<[llvm_v32i8_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvssran_h_w : GCCBuiltin<"__builtin_lasx_xvssran_h_w">, + Intrinsic<[llvm_v16i16_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvssran_w_d : GCCBuiltin<"__builtin_lasx_xvssran_w_d">, + Intrinsic<[llvm_v8i32_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvssrlrn_b_h : GCCBuiltin<"__builtin_lasx_xvssrlrn_b_h">, + Intrinsic<[llvm_v32i8_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvssrlrn_h_w : GCCBuiltin<"__builtin_lasx_xvssrlrn_h_w">, + Intrinsic<[llvm_v16i16_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvssrlrn_w_d : GCCBuiltin<"__builtin_lasx_xvssrlrn_w_d">, + Intrinsic<[llvm_v8i32_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvssrarn_b_h : GCCBuiltin<"__builtin_lasx_xvssrarn_b_h">, + Intrinsic<[llvm_v32i8_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvssrarn_h_w : GCCBuiltin<"__builtin_lasx_xvssrarn_h_w">, + Intrinsic<[llvm_v16i16_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvssrarn_w_d : GCCBuiltin<"__builtin_lasx_xvssrarn_w_d">, + Intrinsic<[llvm_v8i32_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvssrln_bu_h : GCCBuiltin<"__builtin_lasx_xvssrln_bu_h">, + Intrinsic<[llvm_v32i8_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvssrln_hu_w : GCCBuiltin<"__builtin_lasx_xvssrln_hu_w">, + Intrinsic<[llvm_v16i16_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvssrln_wu_d : GCCBuiltin<"__builtin_lasx_xvssrln_wu_d">, + Intrinsic<[llvm_v8i32_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvssran_bu_h : GCCBuiltin<"__builtin_lasx_xvssran_bu_h">, + Intrinsic<[llvm_v32i8_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvssran_hu_w : GCCBuiltin<"__builtin_lasx_xvssran_hu_w">, + Intrinsic<[llvm_v16i16_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvssran_wu_d : GCCBuiltin<"__builtin_lasx_xvssran_wu_d">, + Intrinsic<[llvm_v8i32_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvssrlrn_bu_h : GCCBuiltin<"__builtin_lasx_xvssrlrn_bu_h">, + Intrinsic<[llvm_v32i8_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvssrlrn_hu_w : GCCBuiltin<"__builtin_lasx_xvssrlrn_hu_w">, + Intrinsic<[llvm_v16i16_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvssrlrn_wu_d : GCCBuiltin<"__builtin_lasx_xvssrlrn_wu_d">, + Intrinsic<[llvm_v8i32_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvssrarn_bu_h : GCCBuiltin<"__builtin_lasx_xvssrarn_bu_h">, + Intrinsic<[llvm_v32i8_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvssrarn_hu_w : GCCBuiltin<"__builtin_lasx_xvssrarn_hu_w">, + Intrinsic<[llvm_v16i16_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvssrarn_wu_d : GCCBuiltin<"__builtin_lasx_xvssrarn_wu_d">, + Intrinsic<[llvm_v8i32_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvandn_v : GCCBuiltin<"__builtin_lasx_xvandn_v">, + Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvorn_v : GCCBuiltin<"__builtin_lasx_xvorn_v">, + Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvfrstp_b : GCCBuiltin<"__builtin_lasx_xvfrstp_b">, + Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty, llvm_v32i8_ty], + [IntrNoMem]>; +def int_loongarch_lasx_xvfrstp_h : GCCBuiltin<"__builtin_lasx_xvfrstp_h">, + Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty, llvm_v16i16_ty], + [IntrNoMem]>; + +def int_loongarch_lasx_xvadd_q : GCCBuiltin<"__builtin_lasx_xvadd_q">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvsub_q : GCCBuiltin<"__builtin_lasx_xvsub_q">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvsigncov_b : GCCBuiltin<"__builtin_lasx_xvsigncov_b">, + Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty], + [IntrNoMem]>; +def int_loongarch_lasx_xvsigncov_h : GCCBuiltin<"__builtin_lasx_xvsigncov_h">, + Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty], + [IntrNoMem]>; +def int_loongarch_lasx_xvsigncov_w : GCCBuiltin<"__builtin_lasx_xvsigncov_w">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty], + [IntrNoMem]>; +def int_loongarch_lasx_xvsigncov_d : GCCBuiltin<"__builtin_lasx_xvsigncov_d">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], + [IntrNoMem]>; + +def int_loongarch_lasx_xvfcvt_h_s : GCCBuiltin<"__builtin_lasx_xvfcvt_h_s">, + Intrinsic<[llvm_v16i16_ty], [llvm_v8f32_ty, llvm_v8f32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvfcvt_s_d : GCCBuiltin<"__builtin_lasx_xvfcvt_s_d">, + Intrinsic<[llvm_v8f32_ty], [llvm_v4f64_ty, llvm_v4f64_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvffint_s_l : GCCBuiltin<"__builtin_lasx_xvffint_s_l">, + Intrinsic<[llvm_v8f32_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvftint_w_d : GCCBuiltin<"__builtin_lasx_xvftint_w_d">, + Intrinsic<[llvm_v8i32_ty], [llvm_v4f64_ty, llvm_v4f64_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvftintrz_w_d : GCCBuiltin<"__builtin_lasx_xvftintrz_w_d">, + Intrinsic<[llvm_v8i32_ty], [llvm_v4f64_ty, llvm_v4f64_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvftintrp_w_d : GCCBuiltin<"__builtin_lasx_xvftintrp_w_d">, + Intrinsic<[llvm_v8i32_ty], [llvm_v4f64_ty, llvm_v4f64_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvftintrm_w_d : GCCBuiltin<"__builtin_lasx_xvftintrm_w_d">, + Intrinsic<[llvm_v8i32_ty], [llvm_v4f64_ty, llvm_v4f64_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvftintrne_w_d : GCCBuiltin<"__builtin_lasx_xvftintrne_w_d">, + Intrinsic<[llvm_v8i32_ty], [llvm_v4f64_ty, llvm_v4f64_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvbsrl_v : GCCBuiltin<"__builtin_lasx_xvbsrl_v">, + Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvbsll_v : GCCBuiltin<"__builtin_lasx_xvbsll_v">, + Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvfrstpi_b : GCCBuiltin<"__builtin_lasx_xvfrstpi_b">, + Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvfrstpi_h : GCCBuiltin<"__builtin_lasx_xvfrstpi_h">, + Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty, llvm_i32_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvneg_b : GCCBuiltin<"__builtin_lasx_xvneg_b">, + Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvneg_h : GCCBuiltin<"__builtin_lasx_xvneg_h">, + Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvneg_w : GCCBuiltin<"__builtin_lasx_xvneg_w">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvneg_d : GCCBuiltin<"__builtin_lasx_xvneg_d">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvmskgez_b : GCCBuiltin<"__builtin_lasx_xvmskgez_b">, + Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvmsknz_b : GCCBuiltin<"__builtin_lasx_xvmsknz_b">, + Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvfrintrm_s : GCCBuiltin<"__builtin_lasx_xvfrintrm_s">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8f32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvfrintrm_d : GCCBuiltin<"__builtin_lasx_xvfrintrm_d">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4f64_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvfrintrp_s : GCCBuiltin<"__builtin_lasx_xvfrintrp_s">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8f32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvfrintrp_d : GCCBuiltin<"__builtin_lasx_xvfrintrp_d">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4f64_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvfrintrz_s : GCCBuiltin<"__builtin_lasx_xvfrintrz_s">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8f32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvfrintrz_d : GCCBuiltin<"__builtin_lasx_xvfrintrz_d">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4f64_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvfrintrne_s : GCCBuiltin<"__builtin_lasx_xvfrintrne_s">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8f32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvfrintrne_d : GCCBuiltin<"__builtin_lasx_xvfrintrne_d">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4f64_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvffinth_d_w : GCCBuiltin<"__builtin_lasx_xvffinth_d_w">, + Intrinsic<[llvm_v4f64_ty], [llvm_v8i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvffintl_d_w : GCCBuiltin<"__builtin_lasx_xvffintl_d_w">, + Intrinsic<[llvm_v4f64_ty], [llvm_v8i32_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvftintrm_w_s : GCCBuiltin<"__builtin_lasx_xvftintrm_w_s">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8f32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvftintrm_l_d : GCCBuiltin<"__builtin_lasx_xvftintrm_l_d">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4f64_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvftintrp_w_s : GCCBuiltin<"__builtin_lasx_xvftintrp_w_s">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8f32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvftintrp_l_d : GCCBuiltin<"__builtin_lasx_xvftintrp_l_d">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4f64_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvftintrz_w_s : GCCBuiltin<"__builtin_lasx_xvftintrz_w_s">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8f32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvftintrz_l_d : GCCBuiltin<"__builtin_lasx_xvftintrz_l_d">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4f64_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvftintrne_w_s : GCCBuiltin<"__builtin_lasx_xvftintrne_w_s">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8f32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvftintrne_l_d : GCCBuiltin<"__builtin_lasx_xvftintrne_l_d">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4f64_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvftinth_l_s : GCCBuiltin<"__builtin_lasx_xvftinth_l_s">, + Intrinsic<[llvm_v4i64_ty], [llvm_v8f32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvftintl_l_s : GCCBuiltin<"__builtin_lasx_xvftintl_l_s">, + Intrinsic<[llvm_v4i64_ty], [llvm_v8f32_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvftintrmh_l_s : GCCBuiltin<"__builtin_lasx_xvftintrmh_l_s">, + Intrinsic<[llvm_v4i64_ty], [llvm_v8f32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvftintrml_l_s : GCCBuiltin<"__builtin_lasx_xvftintrml_l_s">, + Intrinsic<[llvm_v4i64_ty], [llvm_v8f32_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvftintrph_l_s : GCCBuiltin<"__builtin_lasx_xvftintrph_l_s">, + Intrinsic<[llvm_v4i64_ty], [llvm_v8f32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvftintrpl_l_s : GCCBuiltin<"__builtin_lasx_xvftintrpl_l_s">, + Intrinsic<[llvm_v4i64_ty], [llvm_v8f32_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvftintrzh_l_s : GCCBuiltin<"__builtin_lasx_xvftintrzh_l_s">, + Intrinsic<[llvm_v4i64_ty], [llvm_v8f32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvftintrzl_l_s : GCCBuiltin<"__builtin_lasx_xvftintrzl_l_s">, + Intrinsic<[llvm_v4i64_ty], [llvm_v8f32_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvftintrneh_l_s : GCCBuiltin<"__builtin_lasx_xvftintrneh_l_s">, + Intrinsic<[llvm_v4i64_ty], [llvm_v8f32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvftintrnel_l_s : GCCBuiltin<"__builtin_lasx_xvftintrnel_l_s">, + Intrinsic<[llvm_v4i64_ty], [llvm_v8f32_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvexth_d_w : GCCBuiltin<"__builtin_lasx_xvexth_d_w">, + Intrinsic<[llvm_v4i64_ty], [llvm_v8i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvexth_w_h : GCCBuiltin<"__builtin_lasx_xvexth_w_h">, + Intrinsic<[llvm_v8i32_ty], [llvm_v16i16_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvexth_h_b : GCCBuiltin<"__builtin_lasx_xvexth_h_b">, + Intrinsic<[llvm_v16i16_ty], [llvm_v32i8_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvexth_q_d : GCCBuiltin<"__builtin_lasx_xvexth_q_d">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvsllwil_d_w : GCCBuiltin<"__builtin_lasx_xvsllwil_d_w">, + Intrinsic<[llvm_v4i64_ty], [llvm_v8i32_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvsllwil_w_h : GCCBuiltin<"__builtin_lasx_xvsllwil_w_h">, + Intrinsic<[llvm_v8i32_ty], [llvm_v16i16_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvsllwil_h_b : GCCBuiltin<"__builtin_lasx_xvsllwil_h_b">, + Intrinsic<[llvm_v16i16_ty], [llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvsllwil_du_wu : GCCBuiltin<"__builtin_lasx_xvsllwil_du_wu">, + Intrinsic<[llvm_v4i64_ty], [llvm_v8i32_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvsllwil_wu_hu : GCCBuiltin<"__builtin_lasx_xvsllwil_wu_hu">, + Intrinsic<[llvm_v8i32_ty], [llvm_v16i16_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvsllwil_hu_bu : GCCBuiltin<"__builtin_lasx_xvsllwil_hu_bu">, + Intrinsic<[llvm_v16i16_ty], [llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvbitclri_b : GCCBuiltin<"__builtin_lasx_xvbitclri_b">, + Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvbitclri_h : GCCBuiltin<"__builtin_lasx_xvbitclri_h">, + Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvbitclri_w : GCCBuiltin<"__builtin_lasx_xvbitclri_w">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvbitclri_d : GCCBuiltin<"__builtin_lasx_xvbitclri_d">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_i32_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvbitseti_b : GCCBuiltin<"__builtin_lasx_xvbitseti_b">, + Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvbitseti_h : GCCBuiltin<"__builtin_lasx_xvbitseti_h">, + Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvbitseti_w : GCCBuiltin<"__builtin_lasx_xvbitseti_w">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvbitseti_d : GCCBuiltin<"__builtin_lasx_xvbitseti_d">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_i32_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvbitrevi_b : GCCBuiltin<"__builtin_lasx_xvbitrevi_b">, + Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvbitrevi_h : GCCBuiltin<"__builtin_lasx_xvbitrevi_h">, + Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvbitrevi_w : GCCBuiltin<"__builtin_lasx_xvbitrevi_w">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvbitrevi_d : GCCBuiltin<"__builtin_lasx_xvbitrevi_d">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_i32_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvssrlrni_b_h : GCCBuiltin<"__builtin_lasx_xvssrlrni_b_h">, + Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvssrlrni_h_w : GCCBuiltin<"__builtin_lasx_xvssrlrni_h_w">, + Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvssrlrni_w_d : GCCBuiltin<"__builtin_lasx_xvssrlrni_w_d">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvssrlrni_d_q : GCCBuiltin<"__builtin_lasx_xvssrlrni_d_q">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty, llvm_i32_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvsrani_b_h : GCCBuiltin<"__builtin_lasx_xvsrani_b_h">, + Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvsrani_h_w : GCCBuiltin<"__builtin_lasx_xvsrani_h_w">, + Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvsrani_w_d : GCCBuiltin<"__builtin_lasx_xvsrani_w_d">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvsrani_d_q : GCCBuiltin<"__builtin_lasx_xvsrani_d_q">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty, llvm_i32_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvextrins_b : GCCBuiltin<"__builtin_lasx_xvextrins_b">, + Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvextrins_h : GCCBuiltin<"__builtin_lasx_xvextrins_h">, + Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvextrins_w : GCCBuiltin<"__builtin_lasx_xvextrins_w">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvextrins_d : GCCBuiltin<"__builtin_lasx_xvextrins_d">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty, llvm_i32_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvbitseli_b : GCCBuiltin<"__builtin_lasx_xvbitseli_b">, + Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvandi_b : GCCBuiltin<"__builtin_lasx_xvandi_b">, + Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvori_b : GCCBuiltin<"__builtin_lasx_xvori_b">, + Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvxori_b : GCCBuiltin<"__builtin_lasx_xvxori_b">, + Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvnori_b : GCCBuiltin<"__builtin_lasx_xvnori_b">, + Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvldi : GCCBuiltin<"__builtin_lasx_xvldi">, + Intrinsic<[llvm_v4i64_ty], [llvm_i32_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvpermi_w : GCCBuiltin<"__builtin_lasx_xvpermi_w">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty, llvm_i32_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvsadd_b : GCCBuiltin<"__builtin_lasx_xvsadd_b">, + Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty], + [Commutative, IntrNoMem]>; +def int_loongarch_lasx_xvsadd_h : GCCBuiltin<"__builtin_lasx_xvsadd_h">, + Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty], + [Commutative, IntrNoMem]>; +def int_loongarch_lasx_xvsadd_w : GCCBuiltin<"__builtin_lasx_xvsadd_w">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty], + [Commutative, IntrNoMem]>; +def int_loongarch_lasx_xvsadd_d : GCCBuiltin<"__builtin_lasx_xvsadd_d">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], + [Commutative, IntrNoMem]>; + +def int_loongarch_lasx_xvssub_b : GCCBuiltin<"__builtin_lasx_xvssub_b">, + Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvssub_h : GCCBuiltin<"__builtin_lasx_xvssub_h">, + Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvssub_w : GCCBuiltin<"__builtin_lasx_xvssub_w">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvssub_d : GCCBuiltin<"__builtin_lasx_xvssub_d">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvsadd_bu : GCCBuiltin<"__builtin_lasx_xvsadd_bu">, + Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty], + [Commutative, IntrNoMem]>; +def int_loongarch_lasx_xvsadd_hu : GCCBuiltin<"__builtin_lasx_xvsadd_hu">, + Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty], + [Commutative, IntrNoMem]>; +def int_loongarch_lasx_xvsadd_wu : GCCBuiltin<"__builtin_lasx_xvsadd_wu">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty], + [Commutative, IntrNoMem]>; +def int_loongarch_lasx_xvsadd_du : GCCBuiltin<"__builtin_lasx_xvsadd_du">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], + [Commutative, IntrNoMem]>; + +def int_loongarch_lasx_xvssub_bu : GCCBuiltin<"__builtin_lasx_xvssub_bu">, + Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvssub_hu : GCCBuiltin<"__builtin_lasx_xvssub_hu">, + Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvssub_wu : GCCBuiltin<"__builtin_lasx_xvssub_wu">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvssub_du : GCCBuiltin<"__builtin_lasx_xvssub_du">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvhaddw_h_b : GCCBuiltin<"__builtin_lasx_xvhaddw_h_b">, + Intrinsic<[llvm_v16i16_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvhaddw_w_h : GCCBuiltin<"__builtin_lasx_xvhaddw_w_h">, + Intrinsic<[llvm_v8i32_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvhaddw_d_w : GCCBuiltin<"__builtin_lasx_xvhaddw_d_w">, + Intrinsic<[llvm_v4i64_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvhsubw_h_b : GCCBuiltin<"__builtin_lasx_xvhsubw_h_b">, + Intrinsic<[llvm_v16i16_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvhsubw_w_h : GCCBuiltin<"__builtin_lasx_xvhsubw_w_h">, + Intrinsic<[llvm_v8i32_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvhsubw_d_w : GCCBuiltin<"__builtin_lasx_xvhsubw_d_w">, + Intrinsic<[llvm_v4i64_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvhaddw_hu_bu : GCCBuiltin<"__builtin_lasx_xvhaddw_hu_bu">, + Intrinsic<[llvm_v16i16_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvhaddw_wu_hu : GCCBuiltin<"__builtin_lasx_xvhaddw_wu_hu">, + Intrinsic<[llvm_v8i32_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvhaddw_du_wu : GCCBuiltin<"__builtin_lasx_xvhaddw_du_wu">, + Intrinsic<[llvm_v4i64_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvhsubw_hu_bu : GCCBuiltin<"__builtin_lasx_xvhsubw_hu_bu">, + Intrinsic<[llvm_v16i16_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvhsubw_wu_hu : GCCBuiltin<"__builtin_lasx_xvhsubw_wu_hu">, + Intrinsic<[llvm_v8i32_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvhsubw_du_wu : GCCBuiltin<"__builtin_lasx_xvhsubw_du_wu">, + Intrinsic<[llvm_v4i64_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvadda_b : GCCBuiltin<"__builtin_lasx_xvadda_b">, + Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty], + [Commutative, IntrNoMem]>; +def int_loongarch_lasx_xvadda_h : GCCBuiltin<"__builtin_lasx_xvadda_h">, + Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty], + [Commutative, IntrNoMem]>; +def int_loongarch_lasx_xvadda_w : GCCBuiltin<"__builtin_lasx_xvadda_w">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty], + [Commutative, IntrNoMem]>; +def int_loongarch_lasx_xvadda_d : GCCBuiltin<"__builtin_lasx_xvadda_d">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], + [Commutative, IntrNoMem]>; + +def int_loongarch_lasx_xvabsd_b : GCCBuiltin<"__builtin_lasx_xvabsd_b">, + Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvabsd_h : GCCBuiltin<"__builtin_lasx_xvabsd_h">, + Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvabsd_w : GCCBuiltin<"__builtin_lasx_xvabsd_w">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvabsd_d : GCCBuiltin<"__builtin_lasx_xvabsd_d">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvabsd_bu : GCCBuiltin<"__builtin_lasx_xvabsd_bu">, + Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvabsd_hu : GCCBuiltin<"__builtin_lasx_xvabsd_hu">, + Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvabsd_wu : GCCBuiltin<"__builtin_lasx_xvabsd_wu">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvabsd_du : GCCBuiltin<"__builtin_lasx_xvabsd_du">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvavg_b : GCCBuiltin<"__builtin_lasx_xvavg_b">, + Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty], + [Commutative, IntrNoMem]>; +def int_loongarch_lasx_xvavg_h : GCCBuiltin<"__builtin_lasx_xvavg_h">, + Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty], + [Commutative, IntrNoMem]>; +def int_loongarch_lasx_xvavg_w : GCCBuiltin<"__builtin_lasx_xvavg_w">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty], + [Commutative, IntrNoMem]>; +def int_loongarch_lasx_xvavg_d : GCCBuiltin<"__builtin_lasx_xvavg_d">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], + [Commutative, IntrNoMem]>; + +def int_loongarch_lasx_xvavg_bu : GCCBuiltin<"__builtin_lasx_xvavg_bu">, + Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty], + [Commutative, IntrNoMem]>; +def int_loongarch_lasx_xvavg_hu : GCCBuiltin<"__builtin_lasx_xvavg_hu">, + Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty], + [Commutative, IntrNoMem]>; +def int_loongarch_lasx_xvavg_wu : GCCBuiltin<"__builtin_lasx_xvavg_wu">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty], + [Commutative, IntrNoMem]>; +def int_loongarch_lasx_xvavg_du : GCCBuiltin<"__builtin_lasx_xvavg_du">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], + [Commutative, IntrNoMem]>; + +def int_loongarch_lasx_xvavgr_b : GCCBuiltin<"__builtin_lasx_xvavgr_b">, + Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty], + [Commutative, IntrNoMem]>; +def int_loongarch_lasx_xvavgr_h : GCCBuiltin<"__builtin_lasx_xvavgr_h">, + Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty], + [Commutative, IntrNoMem]>; +def int_loongarch_lasx_xvavgr_w : GCCBuiltin<"__builtin_lasx_xvavgr_w">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty], + [Commutative, IntrNoMem]>; +def int_loongarch_lasx_xvavgr_d : GCCBuiltin<"__builtin_lasx_xvavgr_d">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], + [Commutative, IntrNoMem]>; + +def int_loongarch_lasx_xvavgr_bu : GCCBuiltin<"__builtin_lasx_xvavgr_bu">, + Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty], + [Commutative, IntrNoMem]>; +def int_loongarch_lasx_xvavgr_hu : GCCBuiltin<"__builtin_lasx_xvavgr_hu">, + Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty], + [Commutative, IntrNoMem]>; +def int_loongarch_lasx_xvavgr_wu : GCCBuiltin<"__builtin_lasx_xvavgr_wu">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty], + [Commutative, IntrNoMem]>; +def int_loongarch_lasx_xvavgr_du : GCCBuiltin<"__builtin_lasx_xvavgr_du">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], + [Commutative, IntrNoMem]>; + +def int_loongarch_lasx_xvsrlr_b : GCCBuiltin<"__builtin_lasx_xvsrlr_b">, + Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvsrlr_h : GCCBuiltin<"__builtin_lasx_xvsrlr_h">, + Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvsrlr_w : GCCBuiltin<"__builtin_lasx_xvsrlr_w">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvsrlr_d : GCCBuiltin<"__builtin_lasx_xvsrlr_d">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvsrar_b : GCCBuiltin<"__builtin_lasx_xvsrar_b">, + Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvsrar_h : GCCBuiltin<"__builtin_lasx_xvsrar_h">, + Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvsrar_w : GCCBuiltin<"__builtin_lasx_xvsrar_w">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvsrar_d : GCCBuiltin<"__builtin_lasx_xvsrar_d">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvfmax_s : GCCBuiltin<"__builtin_lasx_xvfmax_s">, + Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_v8f32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvfmax_d : GCCBuiltin<"__builtin_lasx_xvfmax_d">, + Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v4f64_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvfmin_s : GCCBuiltin<"__builtin_lasx_xvfmin_s">, + Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_v8f32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvfmin_d : GCCBuiltin<"__builtin_lasx_xvfmin_d">, + Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v4f64_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvfmaxa_s : GCCBuiltin<"__builtin_lasx_xvfmaxa_s">, + Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_v8f32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvfmaxa_d : GCCBuiltin<"__builtin_lasx_xvfmaxa_d">, + Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v4f64_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvfmina_s : GCCBuiltin<"__builtin_lasx_xvfmina_s">, + Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_v8f32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvfmina_d : GCCBuiltin<"__builtin_lasx_xvfmina_d">, + Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v4f64_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvfclass_s : GCCBuiltin<"__builtin_lasx_xvfclass_s">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8f32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvfclass_d : GCCBuiltin<"__builtin_lasx_xvfclass_d">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4f64_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvfrecip_s : GCCBuiltin<"__builtin_lasx_xvfrecip_s">, + Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvfrecip_d : GCCBuiltin<"__builtin_lasx_xvfrecip_d">, + Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvfrsqrt_s : GCCBuiltin<"__builtin_lasx_xvfrsqrt_s">, + Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvfrsqrt_d : GCCBuiltin<"__builtin_lasx_xvfrsqrt_d">, + Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvfcvtl_s_h : GCCBuiltin<"__builtin_lasx_xvfcvtl_s_h">, + Intrinsic<[llvm_v8f32_ty], [llvm_v16i16_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvfcvtl_d_s : GCCBuiltin<"__builtin_lasx_xvfcvtl_d_s">, + Intrinsic<[llvm_v4f64_ty], [llvm_v8f32_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvfcvth_s_h : GCCBuiltin<"__builtin_lasx_xvfcvth_s_h">, + Intrinsic<[llvm_v8f32_ty], [llvm_v16i16_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvfcvth_d_s : GCCBuiltin<"__builtin_lasx_xvfcvth_d_s">, + Intrinsic<[llvm_v4f64_ty], [llvm_v8f32_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvftint_w_s : GCCBuiltin<"__builtin_lasx_xvftint_w_s">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8f32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvftint_l_d : GCCBuiltin<"__builtin_lasx_xvftint_l_d">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4f64_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvftint_wu_s : GCCBuiltin<"__builtin_lasx_xvftint_wu_s">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8f32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvftint_lu_d : GCCBuiltin<"__builtin_lasx_xvftint_lu_d">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4f64_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvsrlri_b : GCCBuiltin<"__builtin_lasx_xvsrlri_b">, + Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvsrlri_h : GCCBuiltin<"__builtin_lasx_xvsrlri_h">, + Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvsrlri_w : GCCBuiltin<"__builtin_lasx_xvsrlri_w">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvsrlri_d : GCCBuiltin<"__builtin_lasx_xvsrlri_d">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_i32_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvsrari_b : GCCBuiltin<"__builtin_lasx_xvsrari_b">, + Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvsrari_h : GCCBuiltin<"__builtin_lasx_xvsrari_h">, + Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvsrari_w : GCCBuiltin<"__builtin_lasx_xvsrari_w">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvsrari_d : GCCBuiltin<"__builtin_lasx_xvsrari_d">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_i32_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvsat_b : GCCBuiltin<"__builtin_lasx_xvsat_b">, + Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvsat_h : GCCBuiltin<"__builtin_lasx_xvsat_h">, + Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvsat_w : GCCBuiltin<"__builtin_lasx_xvsat_w">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvsat_d : GCCBuiltin<"__builtin_lasx_xvsat_d">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_i32_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvsat_bu : GCCBuiltin<"__builtin_lasx_xvsat_bu">, + Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvsat_hu : GCCBuiltin<"__builtin_lasx_xvsat_hu">, + Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvsat_wu : GCCBuiltin<"__builtin_lasx_xvsat_wu">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvsat_du : GCCBuiltin<"__builtin_lasx_xvsat_du">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_i32_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvsrlni_b_h : GCCBuiltin<"__builtin_lasx_xvsrlni_b_h">, + Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvsrlni_h_w : GCCBuiltin<"__builtin_lasx_xvsrlni_h_w">, + Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvsrlni_w_d : GCCBuiltin<"__builtin_lasx_xvsrlni_w_d">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvsrlni_d_q : GCCBuiltin<"__builtin_lasx_xvsrlni_d_q">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty, llvm_i32_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvssrlni_b_h : GCCBuiltin<"__builtin_lasx_xvssrlni_b_h">, + Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvssrlni_h_w : GCCBuiltin<"__builtin_lasx_xvssrlni_h_w">, + Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvssrlni_w_d : GCCBuiltin<"__builtin_lasx_xvssrlni_w_d">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvssrlni_d_q : GCCBuiltin<"__builtin_lasx_xvssrlni_d_q">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty, llvm_i32_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvssrlrni_bu_h : GCCBuiltin<"__builtin_lasx_xvssrlrni_bu_h">, + Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvssrlrni_hu_w : GCCBuiltin<"__builtin_lasx_xvssrlrni_hu_w">, + Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvssrlrni_wu_d : GCCBuiltin<"__builtin_lasx_xvssrlrni_wu_d">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvssrlrni_du_q : GCCBuiltin<"__builtin_lasx_xvssrlrni_du_q">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty, llvm_i32_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvsrarni_b_h : GCCBuiltin<"__builtin_lasx_xvsrarni_b_h">, + Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvsrarni_h_w : GCCBuiltin<"__builtin_lasx_xvsrarni_h_w">, + Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvsrarni_w_d : GCCBuiltin<"__builtin_lasx_xvsrarni_w_d">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvsrarni_d_q : GCCBuiltin<"__builtin_lasx_xvsrarni_d_q">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty, llvm_i32_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvssrani_b_h : GCCBuiltin<"__builtin_lasx_xvssrani_b_h">, + Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvssrani_h_w : GCCBuiltin<"__builtin_lasx_xvssrani_h_w">, + Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvssrani_w_d : GCCBuiltin<"__builtin_lasx_xvssrani_w_d">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvssrani_d_q : GCCBuiltin<"__builtin_lasx_xvssrani_d_q">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty, llvm_i32_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvssrani_bu_h : GCCBuiltin<"__builtin_lasx_xvssrani_bu_h">, + Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvssrani_hu_w : GCCBuiltin<"__builtin_lasx_xvssrani_hu_w">, + Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvssrani_wu_d : GCCBuiltin<"__builtin_lasx_xvssrani_wu_d">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvssrani_du_q : GCCBuiltin<"__builtin_lasx_xvssrani_du_q">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty, llvm_i32_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvssrarni_b_h : GCCBuiltin<"__builtin_lasx_xvssrarni_b_h">, + Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvssrarni_h_w : GCCBuiltin<"__builtin_lasx_xvssrarni_h_w">, + Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvssrarni_w_d : GCCBuiltin<"__builtin_lasx_xvssrarni_w_d">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvssrarni_d_q : GCCBuiltin<"__builtin_lasx_xvssrarni_d_q">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty, llvm_i32_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvssrarni_bu_h : GCCBuiltin<"__builtin_lasx_xvssrarni_bu_h">, + Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvssrarni_hu_w : GCCBuiltin<"__builtin_lasx_xvssrarni_hu_w">, + Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvssrarni_wu_d : GCCBuiltin<"__builtin_lasx_xvssrarni_wu_d">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvssrarni_du_q : GCCBuiltin<"__builtin_lasx_xvssrarni_du_q">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty, llvm_i32_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvssrlni_bu_h : GCCBuiltin<"__builtin_lasx_xvssrlni_bu_h">, + Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvssrlni_hu_w : GCCBuiltin<"__builtin_lasx_xvssrlni_hu_w">, + Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvssrlni_wu_d : GCCBuiltin<"__builtin_lasx_xvssrlni_wu_d">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvssrlni_du_q : GCCBuiltin<"__builtin_lasx_xvssrlni_du_q">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty, llvm_i32_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvseq_b : GCCBuiltin<"__builtin_lasx_xvseq_b">, + Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvseq_h : GCCBuiltin<"__builtin_lasx_xvseq_h">, + Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvseq_w : GCCBuiltin<"__builtin_lasx_xvseq_w">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvseq_d : GCCBuiltin<"__builtin_lasx_xvseq_d">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvsle_b : GCCBuiltin<"__builtin_lasx_xvsle_b">, + Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvsle_h : GCCBuiltin<"__builtin_lasx_xvsle_h">, + Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvsle_w : GCCBuiltin<"__builtin_lasx_xvsle_w">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvsle_d : GCCBuiltin<"__builtin_lasx_xvsle_d">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvsle_bu : GCCBuiltin<"__builtin_lasx_xvsle_bu">, + Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvsle_hu : GCCBuiltin<"__builtin_lasx_xvsle_hu">, + Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvsle_wu : GCCBuiltin<"__builtin_lasx_xvsle_wu">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvsle_du : GCCBuiltin<"__builtin_lasx_xvsle_du">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvslt_b : GCCBuiltin<"__builtin_lasx_xvslt_b">, + Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvslt_h : GCCBuiltin<"__builtin_lasx_xvslt_h">, + Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvslt_w : GCCBuiltin<"__builtin_lasx_xvslt_w">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvslt_d : GCCBuiltin<"__builtin_lasx_xvslt_d">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvslt_bu : GCCBuiltin<"__builtin_lasx_xvslt_bu">, + Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvslt_hu : GCCBuiltin<"__builtin_lasx_xvslt_hu">, + Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvslt_wu : GCCBuiltin<"__builtin_lasx_xvslt_wu">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvslt_du : GCCBuiltin<"__builtin_lasx_xvslt_du">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvadd_b : GCCBuiltin<"__builtin_lasx_xvadd_b">, + Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty], + [Commutative, IntrNoMem]>; +def int_loongarch_lasx_xvadd_h : GCCBuiltin<"__builtin_lasx_xvadd_h">, + Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty], + [Commutative, IntrNoMem]>; +def int_loongarch_lasx_xvadd_w : GCCBuiltin<"__builtin_lasx_xvadd_w">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty], + [Commutative, IntrNoMem]>; +def int_loongarch_lasx_xvadd_d : GCCBuiltin<"__builtin_lasx_xvadd_d">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], + [Commutative, IntrNoMem]>; + +def int_loongarch_lasx_xvsub_b : GCCBuiltin<"__builtin_lasx_xvsub_b">, + Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvsub_h : GCCBuiltin<"__builtin_lasx_xvsub_h">, + Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvsub_w : GCCBuiltin<"__builtin_lasx_xvsub_w">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvsub_d : GCCBuiltin<"__builtin_lasx_xvsub_d">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvmax_b : GCCBuiltin<"__builtin_lasx_xvmax_b">, + Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvmax_h : GCCBuiltin<"__builtin_lasx_xvmax_h">, + Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvmax_w : GCCBuiltin<"__builtin_lasx_xvmax_w">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvmax_d : GCCBuiltin<"__builtin_lasx_xvmax_d">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvmin_b : GCCBuiltin<"__builtin_lasx_xvmin_b">, + Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvmin_h : GCCBuiltin<"__builtin_lasx_xvmin_h">, + Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvmin_w : GCCBuiltin<"__builtin_lasx_xvmin_w">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvmin_d : GCCBuiltin<"__builtin_lasx_xvmin_d">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvmax_bu : GCCBuiltin<"__builtin_lasx_xvmax_bu">, + Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvmax_hu : GCCBuiltin<"__builtin_lasx_xvmax_hu">, + Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvmax_wu : GCCBuiltin<"__builtin_lasx_xvmax_wu">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvmax_du : GCCBuiltin<"__builtin_lasx_xvmax_du">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvmin_bu : GCCBuiltin<"__builtin_lasx_xvmin_bu">, + Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvmin_hu : GCCBuiltin<"__builtin_lasx_xvmin_hu">, + Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvmin_wu : GCCBuiltin<"__builtin_lasx_xvmin_wu">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvmin_du : GCCBuiltin<"__builtin_lasx_xvmin_du">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvmul_b : GCCBuiltin<"__builtin_lasx_xvmul_b">, + Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvmul_h : GCCBuiltin<"__builtin_lasx_xvmul_h">, + Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvmul_w : GCCBuiltin<"__builtin_lasx_xvmul_w">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvmul_d : GCCBuiltin<"__builtin_lasx_xvmul_d">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvmadd_b : GCCBuiltin<"__builtin_lasx_xvmadd_b">, + Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty, llvm_v32i8_ty], + [IntrNoMem]>; +def int_loongarch_lasx_xvmadd_h : GCCBuiltin<"__builtin_lasx_xvmadd_h">, + Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty, llvm_v16i16_ty], + [IntrNoMem]>; +def int_loongarch_lasx_xvmadd_w : GCCBuiltin<"__builtin_lasx_xvmadd_w">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty, llvm_v8i32_ty], + [IntrNoMem]>; +def int_loongarch_lasx_xvmadd_d : GCCBuiltin<"__builtin_lasx_xvmadd_d">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty, llvm_v4i64_ty], + [IntrNoMem]>; + +def int_loongarch_lasx_xvmsub_b : GCCBuiltin<"__builtin_lasx_xvmsub_b">, + Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty, llvm_v32i8_ty], + [IntrNoMem]>; +def int_loongarch_lasx_xvmsub_h : GCCBuiltin<"__builtin_lasx_xvmsub_h">, + Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty, llvm_v16i16_ty], + [IntrNoMem]>; +def int_loongarch_lasx_xvmsub_w : GCCBuiltin<"__builtin_lasx_xvmsub_w">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty, llvm_v8i32_ty], + [IntrNoMem]>; +def int_loongarch_lasx_xvmsub_d : GCCBuiltin<"__builtin_lasx_xvmsub_d">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty, llvm_v4i64_ty], + [IntrNoMem]>; + +def int_loongarch_lasx_xvdiv_b : GCCBuiltin<"__builtin_lasx_xvdiv_b">, + Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvdiv_h : GCCBuiltin<"__builtin_lasx_xvdiv_h">, + Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvdiv_w : GCCBuiltin<"__builtin_lasx_xvdiv_w">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvdiv_d : GCCBuiltin<"__builtin_lasx_xvdiv_d">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvmod_b : GCCBuiltin<"__builtin_lasx_xvmod_b">, + Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvmod_h : GCCBuiltin<"__builtin_lasx_xvmod_h">, + Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvmod_w : GCCBuiltin<"__builtin_lasx_xvmod_w">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvmod_d : GCCBuiltin<"__builtin_lasx_xvmod_d">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvdiv_bu : GCCBuiltin<"__builtin_lasx_xvdiv_bu">, + Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvdiv_hu : GCCBuiltin<"__builtin_lasx_xvdiv_hu">, + Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvdiv_wu : GCCBuiltin<"__builtin_lasx_xvdiv_wu">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvdiv_du : GCCBuiltin<"__builtin_lasx_xvdiv_du">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvsll_b : GCCBuiltin<"__builtin_lasx_xvsll_b">, + Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvsll_h : GCCBuiltin<"__builtin_lasx_xvsll_h">, + Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvsll_w : GCCBuiltin<"__builtin_lasx_xvsll_w">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvsll_d : GCCBuiltin<"__builtin_lasx_xvsll_d">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvsrl_b : GCCBuiltin<"__builtin_lasx_xvsrl_b">, + Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvsrl_h : GCCBuiltin<"__builtin_lasx_xvsrl_h">, + Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvsrl_w : GCCBuiltin<"__builtin_lasx_xvsrl_w">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvsrl_d : GCCBuiltin<"__builtin_lasx_xvsrl_d">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvbitclr_b : GCCBuiltin<"__builtin_lasx_xvbitclr_b">, + Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvbitclr_h : GCCBuiltin<"__builtin_lasx_xvbitclr_h">, + Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvbitclr_w : GCCBuiltin<"__builtin_lasx_xvbitclr_w">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvbitclr_d : GCCBuiltin<"__builtin_lasx_xvbitclr_d">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvbitset_b : GCCBuiltin<"__builtin_lasx_xvbitset_b">, + Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvbitset_h : GCCBuiltin<"__builtin_lasx_xvbitset_h">, + Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvbitset_w : GCCBuiltin<"__builtin_lasx_xvbitset_w">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvbitset_d : GCCBuiltin<"__builtin_lasx_xvbitset_d">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvpackev_b : GCCBuiltin<"__builtin_lasx_xvpackev_b">, + Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvpackev_h : GCCBuiltin<"__builtin_lasx_xvpackev_h">, + Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvpackev_w : GCCBuiltin<"__builtin_lasx_xvpackev_w">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvpackev_d : GCCBuiltin<"__builtin_lasx_xvpackev_d">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvpackod_b : GCCBuiltin<"__builtin_lasx_xvpackod_b">, + Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvpackod_h : GCCBuiltin<"__builtin_lasx_xvpackod_h">, + Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvpackod_w : GCCBuiltin<"__builtin_lasx_xvpackod_w">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvpackod_d : GCCBuiltin<"__builtin_lasx_xvpackod_d">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvilvl_b : GCCBuiltin<"__builtin_lasx_xvilvl_b">, + Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvilvl_h : GCCBuiltin<"__builtin_lasx_xvilvl_h">, + Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvilvl_w : GCCBuiltin<"__builtin_lasx_xvilvl_w">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvilvl_d : GCCBuiltin<"__builtin_lasx_xvilvl_d">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvilvh_b : GCCBuiltin<"__builtin_lasx_xvilvh_b">, + Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvilvh_h : GCCBuiltin<"__builtin_lasx_xvilvh_h">, + Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvilvh_w : GCCBuiltin<"__builtin_lasx_xvilvh_w">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvilvh_d : GCCBuiltin<"__builtin_lasx_xvilvh_d">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvpickev_b : GCCBuiltin<"__builtin_lasx_xvpickev_b">, + Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvpickev_h : GCCBuiltin<"__builtin_lasx_xvpickev_h">, + Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvpickev_w : GCCBuiltin<"__builtin_lasx_xvpickev_w">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvpickev_d : GCCBuiltin<"__builtin_lasx_xvpickev_d">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvand_v : GCCBuiltin<"__builtin_lasx_xvand_v">, + Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvor_v : GCCBuiltin<"__builtin_lasx_xvor_v">, + Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvbitrev_b : GCCBuiltin<"__builtin_lasx_xvbitrev_b">, + Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvbitrev_h : GCCBuiltin<"__builtin_lasx_xvbitrev_h">, + Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvbitrev_w : GCCBuiltin<"__builtin_lasx_xvbitrev_w">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvbitrev_d : GCCBuiltin<"__builtin_lasx_xvbitrev_d">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvmod_bu : GCCBuiltin<"__builtin_lasx_xvmod_bu">, + Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvmod_hu : GCCBuiltin<"__builtin_lasx_xvmod_hu">, + Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvmod_wu : GCCBuiltin<"__builtin_lasx_xvmod_wu">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvmod_du : GCCBuiltin<"__builtin_lasx_xvmod_du">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvpickod_b : GCCBuiltin<"__builtin_lasx_xvpickod_b">, + Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvpickod_h : GCCBuiltin<"__builtin_lasx_xvpickod_h">, + Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvpickod_w : GCCBuiltin<"__builtin_lasx_xvpickod_w">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvpickod_d : GCCBuiltin<"__builtin_lasx_xvpickod_d">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvreplve_b : GCCBuiltin<"__builtin_lasx_xvreplve_b">, + Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvreplve_h : GCCBuiltin<"__builtin_lasx_xvreplve_h">, + Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvreplve_w : GCCBuiltin<"__builtin_lasx_xvreplve_w">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvreplve_d : GCCBuiltin<"__builtin_lasx_xvreplve_d">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_i32_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvsra_b : GCCBuiltin<"__builtin_lasx_xvsra_b">, + Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvsra_h : GCCBuiltin<"__builtin_lasx_xvsra_h">, + Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvsra_w : GCCBuiltin<"__builtin_lasx_xvsra_w">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvsra_d : GCCBuiltin<"__builtin_lasx_xvsra_d">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvxor_v : GCCBuiltin<"__builtin_lasx_xvxor_v">, + Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvnor_v : GCCBuiltin<"__builtin_lasx_xvnor_v">, + Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvfadd_s : GCCBuiltin<"__builtin_lasx_xvfadd_s">, + Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_v8f32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvfadd_d : GCCBuiltin<"__builtin_lasx_xvfadd_d">, + Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v4f64_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvfsub_s : GCCBuiltin<"__builtin_lasx_xvfsub_s">, + Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_v8f32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvfsub_d : GCCBuiltin<"__builtin_lasx_xvfsub_d">, + Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v4f64_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvfmul_s : GCCBuiltin<"__builtin_lasx_xvfmul_s">, + Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_v8f32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvfmul_d : GCCBuiltin<"__builtin_lasx_xvfmul_d">, + Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v4f64_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvshuf_h : GCCBuiltin<"__builtin_lasx_xvshuf_h">, + Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty, llvm_v16i16_ty], + [IntrNoMem]>; +def int_loongarch_lasx_xvshuf_w : GCCBuiltin<"__builtin_lasx_xvshuf_w">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty, llvm_v8i32_ty], + [IntrNoMem]>; +def int_loongarch_lasx_xvshuf_d : GCCBuiltin<"__builtin_lasx_xvshuf_d">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty, llvm_v4i64_ty], + [IntrNoMem]>; + +def int_loongarch_lasx_xvseqi_b : GCCBuiltin<"__builtin_lasx_xvseqi_b">, + Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvseqi_h : GCCBuiltin<"__builtin_lasx_xvseqi_h">, + Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvseqi_w : GCCBuiltin<"__builtin_lasx_xvseqi_w">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvseqi_d : GCCBuiltin<"__builtin_lasx_xvseqi_d">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_i32_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvslei_b : GCCBuiltin<"__builtin_lasx_xvslei_b">, + Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvslei_h : GCCBuiltin<"__builtin_lasx_xvslei_h">, + Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvslei_w : GCCBuiltin<"__builtin_lasx_xvslei_w">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvslei_d : GCCBuiltin<"__builtin_lasx_xvslei_d">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_i32_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvslei_bu : GCCBuiltin<"__builtin_lasx_xvslei_bu">, + Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvslei_hu : GCCBuiltin<"__builtin_lasx_xvslei_hu">, + Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvslei_wu : GCCBuiltin<"__builtin_lasx_xvslei_wu">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvslei_du : GCCBuiltin<"__builtin_lasx_xvslei_du">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_i32_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvslti_b : GCCBuiltin<"__builtin_lasx_xvslti_b">, + Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvslti_h : GCCBuiltin<"__builtin_lasx_xvslti_h">, + Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvslti_w : GCCBuiltin<"__builtin_lasx_xvslti_w">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvslti_d : GCCBuiltin<"__builtin_lasx_xvslti_d">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_i32_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvslti_bu : GCCBuiltin<"__builtin_lasx_xvslti_bu">, + Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvslti_hu : GCCBuiltin<"__builtin_lasx_xvslti_hu">, + Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvslti_wu : GCCBuiltin<"__builtin_lasx_xvslti_wu">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvslti_du : GCCBuiltin<"__builtin_lasx_xvslti_du">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_i32_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvaddi_bu : GCCBuiltin<"__builtin_lasx_xvaddi_bu">, + Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_i32_ty], + [Commutative, IntrNoMem]>; +def int_loongarch_lasx_xvaddi_hu : GCCBuiltin<"__builtin_lasx_xvaddi_hu">, + Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_i32_ty], + [Commutative, IntrNoMem]>; +def int_loongarch_lasx_xvaddi_wu : GCCBuiltin<"__builtin_lasx_xvaddi_wu">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_i32_ty], + [Commutative, IntrNoMem]>; +def int_loongarch_lasx_xvaddi_du : GCCBuiltin<"__builtin_lasx_xvaddi_du">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_i32_ty], + [Commutative, IntrNoMem]>; + +def int_loongarch_lasx_xvsubi_bu : GCCBuiltin<"__builtin_lasx_xvsubi_bu">, + Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvsubi_hu : GCCBuiltin<"__builtin_lasx_xvsubi_hu">, + Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvsubi_wu : GCCBuiltin<"__builtin_lasx_xvsubi_wu">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvsubi_du : GCCBuiltin<"__builtin_lasx_xvsubi_du">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_i32_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvmaxi_b : GCCBuiltin<"__builtin_lasx_xvmaxi_b">, + Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvmaxi_h : GCCBuiltin<"__builtin_lasx_xvmaxi_h">, + Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvmaxi_w : GCCBuiltin<"__builtin_lasx_xvmaxi_w">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvmaxi_d : GCCBuiltin<"__builtin_lasx_xvmaxi_d">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_i32_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvmini_b : GCCBuiltin<"__builtin_lasx_xvmini_b">, + Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvmini_h : GCCBuiltin<"__builtin_lasx_xvmini_h">, + Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvmini_w : GCCBuiltin<"__builtin_lasx_xvmini_w">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvmini_d : GCCBuiltin<"__builtin_lasx_xvmini_d">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_i32_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvmaxi_bu : GCCBuiltin<"__builtin_lasx_xvmaxi_bu">, + Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvmaxi_hu : GCCBuiltin<"__builtin_lasx_xvmaxi_hu">, + Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvmaxi_wu : GCCBuiltin<"__builtin_lasx_xvmaxi_wu">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvmaxi_du : GCCBuiltin<"__builtin_lasx_xvmaxi_du">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_i32_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvmini_bu : GCCBuiltin<"__builtin_lasx_xvmini_bu">, + Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvmini_hu : GCCBuiltin<"__builtin_lasx_xvmini_hu">, + Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvmini_wu : GCCBuiltin<"__builtin_lasx_xvmini_wu">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvmini_du : GCCBuiltin<"__builtin_lasx_xvmini_du">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_i32_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvclz_b : GCCBuiltin<"__builtin_lasx_xvclz_b">, + Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvclz_h : GCCBuiltin<"__builtin_lasx_xvclz_h">, + Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvclz_w : GCCBuiltin<"__builtin_lasx_xvclz_w">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvclz_d : GCCBuiltin<"__builtin_lasx_xvclz_d">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvpcnt_b : GCCBuiltin<"__builtin_lasx_xvpcnt_b">, + Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvpcnt_h : GCCBuiltin<"__builtin_lasx_xvpcnt_h">, + Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvpcnt_w : GCCBuiltin<"__builtin_lasx_xvpcnt_w">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvpcnt_d : GCCBuiltin<"__builtin_lasx_xvpcnt_d">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvfsqrt_s : GCCBuiltin<"__builtin_lasx_xvfsqrt_s">, + Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvfsqrt_d : GCCBuiltin<"__builtin_lasx_xvfsqrt_d">, + Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvfrint_s : GCCBuiltin<"__builtin_lasx_xvfrint_s">, + Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvfrint_d : GCCBuiltin<"__builtin_lasx_xvfrint_d">, + Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvffint_s_w : GCCBuiltin<"__builtin_lasx_xvffint_s_w">, + Intrinsic<[llvm_v8f32_ty], [llvm_v8i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvffint_d_l : GCCBuiltin<"__builtin_lasx_xvffint_d_l">, + Intrinsic<[llvm_v4f64_ty], [llvm_v4i64_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvffint_s_wu : GCCBuiltin<"__builtin_lasx_xvffint_s_wu">, + Intrinsic<[llvm_v8f32_ty], [llvm_v8i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvffint_d_lu : GCCBuiltin<"__builtin_lasx_xvffint_d_lu">, + Intrinsic<[llvm_v4f64_ty], [llvm_v4i64_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvftintrz_wu_s : GCCBuiltin<"__builtin_lasx_xvftintrz_wu_s">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8f32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvftintrz_lu_d : GCCBuiltin<"__builtin_lasx_xvftintrz_lu_d">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4f64_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvreplgr2vr_b : GCCBuiltin<"__builtin_lasx_xvreplgr2vr_b">, + Intrinsic<[llvm_v32i8_ty], [llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvreplgr2vr_h : GCCBuiltin<"__builtin_lasx_xvreplgr2vr_h">, + Intrinsic<[llvm_v16i16_ty], [llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvreplgr2vr_w : GCCBuiltin<"__builtin_lasx_xvreplgr2vr_w">, + Intrinsic<[llvm_v8i32_ty], [llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvreplgr2vr_d : GCCBuiltin<"__builtin_lasx_xvreplgr2vr_d">, + Intrinsic<[llvm_v4i64_ty], [llvm_i64_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvinsgr2vr_w : GCCBuiltin<"__builtin_lasx_xvinsgr2vr_w">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_i32_ty, llvm_i32_ty], + [IntrNoMem]>; +def int_loongarch_lasx_xvinsgr2vr_d : GCCBuiltin<"__builtin_lasx_xvinsgr2vr_d">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_i64_ty, llvm_i32_ty], + [IntrNoMem]>; + +def int_loongarch_lasx_xvfdiv_s : GCCBuiltin<"__builtin_lasx_xvfdiv_s">, + Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_v8f32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvfdiv_d : GCCBuiltin<"__builtin_lasx_xvfdiv_d">, + Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v4f64_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvslli_b : GCCBuiltin<"__builtin_lasx_xvslli_b">, + Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvslli_h : GCCBuiltin<"__builtin_lasx_xvslli_h">, + Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvslli_w : GCCBuiltin<"__builtin_lasx_xvslli_w">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvslli_d : GCCBuiltin<"__builtin_lasx_xvslli_d">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_i32_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvsrli_b : GCCBuiltin<"__builtin_lasx_xvsrli_b">, + Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvsrli_h : GCCBuiltin<"__builtin_lasx_xvsrli_h">, + Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvsrli_w : GCCBuiltin<"__builtin_lasx_xvsrli_w">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvsrli_d : GCCBuiltin<"__builtin_lasx_xvsrli_d">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_i32_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvsrai_b : GCCBuiltin<"__builtin_lasx_xvsrai_b">, + Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvsrai_h : GCCBuiltin<"__builtin_lasx_xvsrai_h">, + Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvsrai_w : GCCBuiltin<"__builtin_lasx_xvsrai_w">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvsrai_d : GCCBuiltin<"__builtin_lasx_xvsrai_d">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_i32_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvshuf4i_b : GCCBuiltin<"__builtin_lasx_xvshuf4i_b">, + Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvshuf4i_h : GCCBuiltin<"__builtin_lasx_xvshuf4i_h">, + Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvshuf4i_w : GCCBuiltin<"__builtin_lasx_xvshuf4i_w">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvshuf4i_d : GCCBuiltin<"__builtin_lasx_xvshuf4i_d">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty, llvm_i32_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvrotr_b : GCCBuiltin<"__builtin_lasx_xvrotr_b">, + Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvrotr_h : GCCBuiltin<"__builtin_lasx_xvrotr_h">, + Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvrotr_w : GCCBuiltin<"__builtin_lasx_xvrotr_w">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvrotr_d : GCCBuiltin<"__builtin_lasx_xvrotr_d">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvrotri_b : GCCBuiltin<"__builtin_lasx_xvrotri_b">, + Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvrotri_h : GCCBuiltin<"__builtin_lasx_xvrotri_h">, + Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvrotri_w : GCCBuiltin<"__builtin_lasx_xvrotri_w">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvrotri_d : GCCBuiltin<"__builtin_lasx_xvrotri_d">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_i32_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvld : GCCBuiltin<"__builtin_lasx_xvld">, + Intrinsic<[llvm_v32i8_ty], [llvm_ptr_ty, llvm_i32_ty], + [IntrReadMem, IntrArgMemOnly]>; + +def int_loongarch_lasx_xvst : GCCBuiltin<"__builtin_lasx_xvst">, + Intrinsic<[], [llvm_v32i8_ty, llvm_ptr_ty, llvm_i32_ty], + [IntrArgMemOnly]>; + +def int_loongarch_lasx_xvrepl128vei_b : GCCBuiltin<"__builtin_lasx_xvrepl128vei_b">, + Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvrepl128vei_h : GCCBuiltin<"__builtin_lasx_xvrepl128vei_h">, + Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvrepl128vei_w : GCCBuiltin<"__builtin_lasx_xvrepl128vei_w">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvrepl128vei_d : GCCBuiltin<"__builtin_lasx_xvrepl128vei_d">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_i32_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvinsve0_w : GCCBuiltin<"__builtin_lasx_xvinsve0_w">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvinsve0_d : GCCBuiltin<"__builtin_lasx_xvinsve0_d">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty, llvm_i32_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvpickve_w : GCCBuiltin<"__builtin_lasx_xvpickve_w">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvpickve_d : GCCBuiltin<"__builtin_lasx_xvpickve_d">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_i32_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvreplve0_b : GCCBuiltin<"__builtin_lasx_xvreplve0_b">, + Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvreplve0_h : GCCBuiltin<"__builtin_lasx_xvreplve0_h">, + Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvreplve0_w : GCCBuiltin<"__builtin_lasx_xvreplve0_w">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvreplve0_d : GCCBuiltin<"__builtin_lasx_xvreplve0_d">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvreplve0_q : GCCBuiltin<"__builtin_lasx_xvreplve0_q">, + Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty], [IntrNoMem]>; + +def int_loongarch_lasx_vext2xv_d_w : GCCBuiltin<"__builtin_lasx_vext2xv_d_w">, + Intrinsic<[llvm_v4i64_ty], [llvm_v8i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_vext2xv_w_h : GCCBuiltin<"__builtin_lasx_vext2xv_w_h">, + Intrinsic<[llvm_v8i32_ty], [llvm_v16i16_ty], [IntrNoMem]>; +def int_loongarch_lasx_vext2xv_h_b : GCCBuiltin<"__builtin_lasx_vext2xv_h_b">, + Intrinsic<[llvm_v16i16_ty], [llvm_v32i8_ty], [IntrNoMem]>; + +def int_loongarch_lasx_vext2xv_d_h : GCCBuiltin<"__builtin_lasx_vext2xv_d_h">, + Intrinsic<[llvm_v4i64_ty], [llvm_v16i16_ty], [IntrNoMem]>; +def int_loongarch_lasx_vext2xv_w_b : GCCBuiltin<"__builtin_lasx_vext2xv_w_b">, + Intrinsic<[llvm_v8i32_ty], [llvm_v32i8_ty], [IntrNoMem]>; +def int_loongarch_lasx_vext2xv_d_b : GCCBuiltin<"__builtin_lasx_vext2xv_d_b">, + Intrinsic<[llvm_v4i64_ty], [llvm_v32i8_ty], [IntrNoMem]>; + +def int_loongarch_lasx_vext2xv_du_wu : GCCBuiltin<"__builtin_lasx_vext2xv_du_wu">, + Intrinsic<[llvm_v4i64_ty], [llvm_v8i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_vext2xv_wu_hu : GCCBuiltin<"__builtin_lasx_vext2xv_wu_hu">, + Intrinsic<[llvm_v8i32_ty], [llvm_v16i16_ty], [IntrNoMem]>; +def int_loongarch_lasx_vext2xv_hu_bu : GCCBuiltin<"__builtin_lasx_vext2xv_hu_bu">, + Intrinsic<[llvm_v16i16_ty], [llvm_v32i8_ty], [IntrNoMem]>; + +def int_loongarch_lasx_vext2xv_du_hu : GCCBuiltin<"__builtin_lasx_vext2xv_du_hu">, + Intrinsic<[llvm_v4i64_ty], [llvm_v16i16_ty], [IntrNoMem]>; +def int_loongarch_lasx_vext2xv_wu_bu : GCCBuiltin<"__builtin_lasx_vext2xv_wu_bu">, + Intrinsic<[llvm_v8i32_ty], [llvm_v32i8_ty], [IntrNoMem]>; +def int_loongarch_lasx_vext2xv_du_bu : GCCBuiltin<"__builtin_lasx_vext2xv_du_bu">, + Intrinsic<[llvm_v4i64_ty], [llvm_v32i8_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvpermi_q : GCCBuiltin<"__builtin_lasx_xvpermi_q">, + Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvpermi_d : GCCBuiltin<"__builtin_lasx_xvpermi_d">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_i32_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvperm_w : GCCBuiltin<"__builtin_lasx_xvperm_w">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvsrlrni_b_h : GCCBuiltin<"__builtin_lasx_xvsrlrni_b_h">, + Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvsrlrni_h_w : GCCBuiltin<"__builtin_lasx_xvsrlrni_h_w">, + Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvsrlrni_w_d : GCCBuiltin<"__builtin_lasx_xvsrlrni_w_d">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvsrlrni_d_q : GCCBuiltin<"__builtin_lasx_xvsrlrni_d_q">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty, llvm_i32_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xbz_v : GCCBuiltin<"__builtin_lasx_xbz_v">, + Intrinsic<[llvm_i32_ty], [llvm_v32i8_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xbnz_v : GCCBuiltin<"__builtin_lasx_xbnz_v">, + Intrinsic<[llvm_i32_ty], [llvm_v32i8_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xbz_b : GCCBuiltin<"__builtin_lasx_xbz_b">, + Intrinsic<[llvm_i32_ty], [llvm_v32i8_ty], [IntrNoMem]>; +def int_loongarch_lasx_xbz_h : GCCBuiltin<"__builtin_lasx_xbz_h">, + Intrinsic<[llvm_i32_ty], [llvm_v16i16_ty], [IntrNoMem]>; +def int_loongarch_lasx_xbz_w : GCCBuiltin<"__builtin_lasx_xbz_w">, + Intrinsic<[llvm_i32_ty], [llvm_v8i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xbz_d : GCCBuiltin<"__builtin_lasx_xbz_d">, + Intrinsic<[llvm_i32_ty], [llvm_v4i64_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xbnz_b : GCCBuiltin<"__builtin_lasx_xbnz_b">, + Intrinsic<[llvm_i32_ty], [llvm_v32i8_ty], [IntrNoMem]>; +def int_loongarch_lasx_xbnz_h : GCCBuiltin<"__builtin_lasx_xbnz_h">, + Intrinsic<[llvm_i32_ty], [llvm_v16i16_ty], [IntrNoMem]>; +def int_loongarch_lasx_xbnz_w : GCCBuiltin<"__builtin_lasx_xbnz_w">, + Intrinsic<[llvm_i32_ty], [llvm_v8i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xbnz_d : GCCBuiltin<"__builtin_lasx_xbnz_d">, + Intrinsic<[llvm_i32_ty], [llvm_v4i64_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvextl_q_d : GCCBuiltin<"__builtin_lasx_xvextl_q_d">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvextl_qu_du : GCCBuiltin<"__builtin_lasx_xvextl_qu_du">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty], [IntrNoMem]>; + +//===----------------------------------------------------------------------===// +// LoongArch BASE + +def int_loongarch_cpucfg : GCCBuiltin<"__builtin_loongarch_cpucfg">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty], []>; + +def int_loongarch_csrrd : GCCBuiltin<"__builtin_loongarch_csrrd">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty], []>; + +def int_loongarch_dcsrrd : GCCBuiltin<"__builtin_loongarch_dcsrrd">, + Intrinsic<[llvm_i64_ty], [llvm_i64_ty], []>; + +def int_loongarch_csrwr : GCCBuiltin<"__builtin_loongarch_csrwr">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], []>; + +def int_loongarch_dcsrwr : GCCBuiltin<"__builtin_loongarch_dcsrwr">, + Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty], []>; + +def int_loongarch_csrxchg : GCCBuiltin<"__builtin_loongarch_csrxchg">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>; + +def int_loongarch_dcsrxchg : GCCBuiltin<"__builtin_loongarch_dcsrxchg">, + Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty, llvm_i64_ty], []>; + +def int_loongarch_iocsrrd_b : GCCBuiltin<"__builtin_loongarch_iocsrrd_b">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty], []>; + +def int_loongarch_iocsrrd_h : GCCBuiltin<"__builtin_loongarch_iocsrrd_h">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty], []>; + +def int_loongarch_iocsrrd_w : GCCBuiltin<"__builtin_loongarch_iocsrrd_w">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty], []>; + +def int_loongarch_iocsrrd_d : GCCBuiltin<"__builtin_loongarch_iocsrrd_d">, + Intrinsic<[llvm_i64_ty], [llvm_i32_ty], []>; + +def int_loongarch_iocsrwr_b : GCCBuiltin<"__builtin_loongarch_iocsrwr_b">, + Intrinsic<[], [llvm_i32_ty, llvm_i32_ty], []>; + +def int_loongarch_iocsrwr_h : GCCBuiltin<"__builtin_loongarch_iocsrwr_h">, + Intrinsic<[], [llvm_i32_ty, llvm_i32_ty], []>; + +def int_loongarch_iocsrwr_w : GCCBuiltin<"__builtin_loongarch_iocsrwr_w">, + Intrinsic<[], [llvm_i32_ty, llvm_i32_ty], []>; + +def int_loongarch_iocsrwr_d : GCCBuiltin<"__builtin_loongarch_iocsrwr_d">, + Intrinsic<[], [llvm_i64_ty, llvm_i32_ty], []>; + +def int_loongarch_cacop : GCCBuiltin<"__builtin_loongarch_cacop">, + Intrinsic<[], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>; + +def int_loongarch_dcacop : GCCBuiltin<"__builtin_loongarch_dcacop">, + Intrinsic<[], [llvm_i32_ty, llvm_i64_ty, llvm_i64_ty], []>; + +def int_loongarch_crc_w_b_w : GCCBuiltin<"__builtin_loongarch_crc_w_b_w">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], []>; + +def int_loongarch_crc_w_h_w : GCCBuiltin<"__builtin_loongarch_crc_w_h_w">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], []>; + +def int_loongarch_crc_w_w_w : GCCBuiltin<"__builtin_loongarch_crc_w_w_w">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], []>; + +def int_loongarch_crc_w_d_w : GCCBuiltin<"__builtin_loongarch_crc_w_d_w">, + Intrinsic<[llvm_i32_ty], [llvm_i64_ty, llvm_i32_ty], []>; + +def int_loongarch_crcc_w_b_w : GCCBuiltin<"__builtin_loongarch_crcc_w_b_w">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], []>; + +def int_loongarch_crcc_w_h_w : GCCBuiltin<"__builtin_loongarch_crcc_w_h_w">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], []>; + +def int_loongarch_crcc_w_w_w : GCCBuiltin<"__builtin_loongarch_crcc_w_w_w">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], []>; + +def int_loongarch_crcc_w_d_w : GCCBuiltin<"__builtin_loongarch_crcc_w_d_w">, + Intrinsic<[llvm_i32_ty], [llvm_i64_ty, llvm_i32_ty], []>; + +def int_loongarch_tlbclr : GCCBuiltin<"__builtin_loongarch_tlbclr">, + Intrinsic<[], [], []>; + +def int_loongarch_tlbflush : GCCBuiltin<"__builtin_loongarch_tlbflush">, + Intrinsic<[], [], []>; + +def int_loongarch_tlbfill : GCCBuiltin<"__builtin_loongarch_tlbfill">, + Intrinsic<[], [], []>; + +def int_loongarch_tlbrd : GCCBuiltin<"__builtin_loongarch_tlbrd">, + Intrinsic<[], [], []>; + +def int_loongarch_tlbwr : GCCBuiltin<"__builtin_loongarch_tlbwr">, + Intrinsic<[], [], []>; + +def int_loongarch_tlbsrch : GCCBuiltin<"__builtin_loongarch_tlbsrch">, + Intrinsic<[], [], []>; + +def int_loongarch_syscall : GCCBuiltin<"__builtin_loongarch_syscall">, + Intrinsic<[], [llvm_i64_ty], []>; + +def int_loongarch_break : GCCBuiltin<"__builtin_loongarch_break">, + Intrinsic<[], [llvm_i64_ty], []>; + +def int_loongarch_asrtle_d : GCCBuiltin<"__builtin_loongarch_asrtle_d">, + Intrinsic<[], [llvm_i64_ty, llvm_i64_ty], []>; + +def int_loongarch_asrtgt_d : GCCBuiltin<"__builtin_loongarch_asrtgt_d">, + Intrinsic<[], [llvm_i64_ty, llvm_i64_ty], []>; + +def int_loongarch_dbar : GCCBuiltin<"__builtin_loongarch_dbar">, + Intrinsic<[], [llvm_i64_ty], []>; + +def int_loongarch_ibar : GCCBuiltin<"__builtin_loongarch_ibar">, + Intrinsic<[], [llvm_i64_ty], []>; + +} diff --git a/lib/Target/LoongArch/AsmParser/CMakeLists.txt b/lib/Target/LoongArch/AsmParser/CMakeLists.txt new file mode 100644 index 00000000..cb8b768d --- /dev/null +++ b/lib/Target/LoongArch/AsmParser/CMakeLists.txt @@ -0,0 +1,13 @@ +add_llvm_component_library(LLVMLoongArchAsmParser + LoongArchAsmParser.cpp + + LINK_COMPONENTS + MC + MCParser + LoongArchDesc + LoongArchInfo + Support + + ADD_TO_COMPONENT + LoongArch + ) diff --git a/lib/Target/LoongArch/AsmParser/LoongArchAsmParser.cpp b/lib/Target/LoongArch/AsmParser/LoongArchAsmParser.cpp new file mode 100644 index 00000000..2cb6cba5 --- /dev/null +++ b/lib/Target/LoongArch/AsmParser/LoongArchAsmParser.cpp @@ -0,0 +1,2269 @@ +//===-- LoongArchAsmParser.cpp - Parse LoongArch assembly to MCInst instructions ----===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "LoongArchTargetStreamer.h" +#include "MCTargetDesc/LoongArchABIInfo.h" +#include "MCTargetDesc/LoongArchAnalyzeImmediate.h" +#include "MCTargetDesc/LoongArchBaseInfo.h" +#include "MCTargetDesc/LoongArchMCExpr.h" +#include "MCTargetDesc/LoongArchMCTargetDesc.h" +#include "TargetInfo/LoongArchTargetInfo.h" +#include "llvm/ADT/APFloat.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/StringSwitch.h" +#include "llvm/ADT/Triple.h" +#include "llvm/ADT/Twine.h" +#include "llvm/BinaryFormat/ELF.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCInst.h" +#include "llvm/MC/MCInstrDesc.h" +#include "llvm/MC/MCObjectFileInfo.h" +#include "llvm/MC/MCParser/MCAsmLexer.h" +#include "llvm/MC/MCParser/MCAsmParser.h" +#include "llvm/MC/MCParser/MCAsmParserExtension.h" +#include "llvm/MC/MCParser/MCParsedAsmOperand.h" +#include "llvm/MC/MCParser/MCTargetAsmParser.h" +#include "llvm/MC/MCSectionELF.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/MC/MCSymbol.h" +#include "llvm/MC/MCSymbolELF.h" +#include "llvm/MC/MCValue.h" +#include "llvm/MC/SubtargetFeature.h" +#include "llvm/Support/TargetRegistry.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Support/SMLoc.h" +#include "llvm/Support/SourceMgr.h" +#include "llvm/Support/raw_ostream.h" +#include +#include +#include +#include +#include +#include + +using namespace llvm; + +#define DEBUG_TYPE "loongarch-asm-parser" + +namespace llvm { + +class MCInstrInfo; + +} // end namespace llvm + +namespace { + +class LoongArchAssemblerOptions { +public: + LoongArchAssemblerOptions(const FeatureBitset &Features_) : Features(Features_) {} + + LoongArchAssemblerOptions(const LoongArchAssemblerOptions *Opts) { + Features = Opts->getFeatures(); + } + + const FeatureBitset &getFeatures() const { return Features; } + void setFeatures(const FeatureBitset &Features_) { Features = Features_; } + +private: + FeatureBitset Features; +}; + +} // end anonymous namespace + +namespace { + +class LoongArchAsmParser : public MCTargetAsmParser { + LoongArchTargetStreamer &getTargetStreamer() { + MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer(); + return static_cast(TS); + } + + LoongArchABIInfo ABI; + SmallVector, 2> AssemblerOptions; + MCSymbol *CurrentFn; // Pointer to the function being parsed. It may be a + // nullptr, which indicates that no function is currently + // selected. This usually happens after an '.end' + // directive. + bool IsPicEnabled; + + // Map of register aliases created via the .set directive. + StringMap RegisterSets; + +#define GET_ASSEMBLER_HEADER +#include "LoongArchGenAsmMatcher.inc" + + unsigned checkTargetMatchPredicate(MCInst &Inst) override; + + bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, + OperandVector &Operands, MCStreamer &Out, + uint64_t &ErrorInfo, + bool MatchingInlineAsm) override; + + /// Parse a register as used in CFI directives + bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override; + OperandMatchResultTy tryParseRegister(unsigned &RegNo, SMLoc &StartLoc, + SMLoc &EndLoc) override; + + bool mnemonicIsValid(StringRef Mnemonic); + + bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name, + SMLoc NameLoc, OperandVector &Operands) override; + + bool ParseDirective(AsmToken DirectiveID) override; + + OperandMatchResultTy parseMemOperand(OperandVector &Operands); + OperandMatchResultTy parseAMemOperand(OperandVector &Operands); + OperandMatchResultTy + matchAnyRegisterNameWithoutDollar(OperandVector &Operands, + StringRef Identifier, SMLoc S); + OperandMatchResultTy matchAnyRegisterWithoutDollar(OperandVector &Operands, + const AsmToken &Token, + SMLoc S); + OperandMatchResultTy matchAnyRegisterWithoutDollar(OperandVector &Operands, + SMLoc S); + OperandMatchResultTy parseAnyRegister(OperandVector &Operands); + OperandMatchResultTy parseJumpTarget(OperandVector &Operands); + + bool searchSymbolAlias(OperandVector &Operands); + + bool parseOperand(OperandVector &, StringRef Mnemonic); + + enum MacroExpanderResultTy { + MER_NotAMacro, + MER_Success, + MER_Fail, + }; + + // Expands assembly pseudo instructions. + MacroExpanderResultTy tryExpandInstruction(MCInst &Inst, SMLoc IDLoc, + MCStreamer &Out, + const MCSubtargetInfo *STI); + + bool expandLoadImm(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out, + const MCSubtargetInfo *STI); + + bool expandLoadAddress(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out, + const MCSubtargetInfo *STI); + + bool reportParseError(Twine ErrorMsg); + + bool parseMemOffset(const MCExpr *&Res); + + bool isEvaluated(const MCExpr *Expr); + bool parseDirectiveSet(); + + bool parseSetAssignment(); + + bool parseInternalDirectiveReallowModule(); + + int matchCPURegisterName(StringRef Symbol); + + int matchFPURegisterName(StringRef Name); + + int matchFCFRRegisterName(StringRef Name); + int matchFCSRRegisterName(StringRef Name); + + int matchLSX128RegisterName(StringRef Name); + + int matchLASX256RegisterName(StringRef Name); + + bool processInstruction(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out, + const MCSubtargetInfo *STI); + + // Helper function that checks if the value of a vector index is within the + // boundaries of accepted values for each RegisterKind + // Example: VINSGR2VR.B $v0[n], $1 => 16 > n >= 0 + bool validateLSXIndex(int Val, int RegKind); + + void setFeatureBits(uint64_t Feature, StringRef FeatureString) { + if (!(getSTI().getFeatureBits()[Feature])) { + MCSubtargetInfo &STI = copySTI(); + setAvailableFeatures( + ComputeAvailableFeatures(STI.ToggleFeature(FeatureString))); + AssemblerOptions.back()->setFeatures(STI.getFeatureBits()); + } + } + + void clearFeatureBits(uint64_t Feature, StringRef FeatureString) { + if (getSTI().getFeatureBits()[Feature]) { + MCSubtargetInfo &STI = copySTI(); + setAvailableFeatures( + ComputeAvailableFeatures(STI.ToggleFeature(FeatureString))); + AssemblerOptions.back()->setFeatures(STI.getFeatureBits()); + } + } + + void setModuleFeatureBits(uint64_t Feature, StringRef FeatureString) { + setFeatureBits(Feature, FeatureString); + AssemblerOptions.front()->setFeatures(getSTI().getFeatureBits()); + } + + void clearModuleFeatureBits(uint64_t Feature, StringRef FeatureString) { + clearFeatureBits(Feature, FeatureString); + AssemblerOptions.front()->setFeatures(getSTI().getFeatureBits()); + } + +public: + enum LoongArchMatchResultTy { + Match_RequiresNoZeroRegister = FIRST_TARGET_MATCH_RESULT_TY, + Match_RequiresNoRaRegister, + Match_RequiresRange0_31, + Match_RequiresRange0_63, + Match_MsbHigherThanLsb, + Match_RequiresPosSizeUImm6, +#define GET_OPERAND_DIAGNOSTIC_TYPES +#include "LoongArchGenAsmMatcher.inc" +#undef GET_OPERAND_DIAGNOSTIC_TYPES + }; + + LoongArchAsmParser(const MCSubtargetInfo &sti, MCAsmParser &parser, + const MCInstrInfo &MII, const MCTargetOptions &Options) + : MCTargetAsmParser(Options, sti, MII), + ABI(LoongArchABIInfo::computeTargetABI(Triple(sti.getTargetTriple()), + sti.getCPU(), Options)) { + MCAsmParserExtension::Initialize(parser); + + parser.addAliasForDirective(".asciiz", ".asciz"); + parser.addAliasForDirective(".hword", ".2byte"); + parser.addAliasForDirective(".word", ".4byte"); + parser.addAliasForDirective(".dword", ".8byte"); + + // Initialize the set of available features. + setAvailableFeatures(ComputeAvailableFeatures(getSTI().getFeatureBits())); + + // Remember the initial assembler options. The user can not modify these. + AssemblerOptions.push_back( + std::make_unique(getSTI().getFeatureBits())); + + // Create an assembler options environment for the user to modify. + AssemblerOptions.push_back( + std::make_unique(getSTI().getFeatureBits())); + + getTargetStreamer().updateABIInfo(*this); + + CurrentFn = nullptr; + + IsPicEnabled = getContext().getObjectFileInfo()->isPositionIndependent(); + } + + bool is64Bit() const { + return getSTI().getFeatureBits()[LoongArch::Feature64Bit]; + } + + const LoongArchABIInfo &getABI() const { return ABI; } + bool isABI_LP64D() const { return ABI.IsLP64D(); } + bool isABI_LP64S() const { return ABI.IsLP64S(); } + bool isABI_LP64F() const { return ABI.IsLP64F(); } + bool isABI_ILP32D() const { return ABI.IsILP32D(); } + bool isABI_ILP32F() const { return ABI.IsILP32F(); } + bool isABI_ILP32S() const { return ABI.IsILP32S(); } + + bool hasLSX() const { + return getSTI().getFeatureBits()[LoongArch::FeatureLSX]; + } + + bool hasLASX() const { + return getSTI().getFeatureBits()[LoongArch::FeatureLASX]; + } + + bool inPicMode() { + return IsPicEnabled; + } + + const MCExpr *createTargetUnaryExpr(const MCExpr *E, + AsmToken::TokenKind OperatorToken, + MCContext &Ctx) override { + switch(OperatorToken) { + default: + llvm_unreachable("Unknown token"); + return nullptr; +#if 0 + case AsmToken::PercentPlt: + return LoongArchMCExpr::create(LoongArchMCExpr::MEK_PLT, E, Ctx); +#endif + } + } +}; + +/// LoongArchOperand - Instances of this class represent a parsed LoongArch machine +/// instruction. +class LoongArchOperand : public MCParsedAsmOperand { +public: + /// Broad categories of register classes + /// The exact class is finalized by the render method. + enum RegKind { + RegKind_GPR = 1, /// GPR32 and GPR64 (depending on is64Bit()) + RegKind_FGR = 2, /// FGR32, FGR64 (depending on hasBasicD()) + RegKind_FCFR = 4, /// FCFR + RegKind_FCSR = 8, /// FCSR + RegKind_LSX128 = 16, /// LSX128[BHWD] (makes no difference which) + RegKind_LASX256 = 32, /// LASX256[BHWD] (makes no difference which) + RegKind_Numeric = RegKind_GPR | RegKind_FGR | RegKind_FCFR | RegKind_FCSR | + RegKind_LSX128 | RegKind_LASX256 + }; + +private: + enum KindTy { + k_Immediate, /// An immediate (possibly involving symbol references) + k_Memory, /// Base + Offset Memory Address + k_RegisterIndex, /// A register index in one or more RegKind. + k_Token, /// A simple token + k_RegList, /// A physical register list + } Kind; + +public: + LoongArchOperand(KindTy K, LoongArchAsmParser &Parser) + : MCParsedAsmOperand(), Kind(K), AsmParser(Parser) {} + + ~LoongArchOperand() override { + switch (Kind) { + case k_Memory: + delete Mem.Base; + break; + case k_RegList: + delete RegList.List; + break; + case k_Immediate: + case k_RegisterIndex: + case k_Token: + break; + } + } + +private: + /// For diagnostics, and checking the assembler temporary + LoongArchAsmParser &AsmParser; + + struct Token { + const char *Data; + unsigned Length; + }; + + struct RegIdxOp { + unsigned Index; /// Index into the register class + RegKind Kind; /// Bitfield of the kinds it could possibly be + struct Token Tok; /// The input token this operand originated from. + const MCRegisterInfo *RegInfo; + }; + + struct ImmOp { + const MCExpr *Val; + }; + + struct MemOp { + LoongArchOperand *Base; + const MCExpr *Off; + }; + + struct RegListOp { + SmallVector *List; + }; + + union { + struct Token Tok; + struct RegIdxOp RegIdx; + struct ImmOp Imm; + struct MemOp Mem; + struct RegListOp RegList; + }; + + SMLoc StartLoc, EndLoc; + + /// Internal constructor for register kinds + static std::unique_ptr CreateReg(unsigned Index, StringRef Str, + RegKind RegKind, + const MCRegisterInfo *RegInfo, + SMLoc S, SMLoc E, + LoongArchAsmParser &Parser) { + auto Op = std::make_unique(k_RegisterIndex, Parser); + Op->RegIdx.Index = Index; + Op->RegIdx.RegInfo = RegInfo; + Op->RegIdx.Kind = RegKind; + Op->RegIdx.Tok.Data = Str.data(); + Op->RegIdx.Tok.Length = Str.size(); + Op->StartLoc = S; + Op->EndLoc = E; + return Op; + } + +public: + /// Coerce the register to GPR32 and return the real register for the current + /// target. + unsigned getGPR32Reg() const { + assert(isRegIdx() && (RegIdx.Kind & RegKind_GPR) && "Invalid access!"); + unsigned ClassID = LoongArch::GPR32RegClassID; + return RegIdx.RegInfo->getRegClass(ClassID).getRegister(RegIdx.Index); + } + + /// Coerce the register to GPR32 and return the real register for the current + /// target. + unsigned getGPRMM16Reg() const { + assert(isRegIdx() && (RegIdx.Kind & RegKind_GPR) && "Invalid access!"); + unsigned ClassID = LoongArch::GPR32RegClassID; + return RegIdx.RegInfo->getRegClass(ClassID).getRegister(RegIdx.Index); + } + + /// Coerce the register to GPR64 and return the real register for the current + /// target. + unsigned getGPR64Reg() const { + assert(isRegIdx() && (RegIdx.Kind & RegKind_GPR) && "Invalid access!"); + unsigned ClassID = LoongArch::GPR64RegClassID; + return RegIdx.RegInfo->getRegClass(ClassID).getRegister(RegIdx.Index); + } + +private: + /// Coerce the register to FGR64 and return the real register for the current + /// target. + unsigned getFGR64Reg() const { + assert(isRegIdx() && (RegIdx.Kind & RegKind_FGR) && "Invalid access!"); + return RegIdx.RegInfo->getRegClass(LoongArch::FGR64RegClassID) + .getRegister(RegIdx.Index); + } + + /// Coerce the register to FGR32 and return the real register for the current + /// target. + unsigned getFGR32Reg() const { + assert(isRegIdx() && (RegIdx.Kind & RegKind_FGR) && "Invalid access!"); + return RegIdx.RegInfo->getRegClass(LoongArch::FGR32RegClassID) + .getRegister(RegIdx.Index); + } + + /// Coerce the register to FCFR and return the real register for the current + /// target. + unsigned getFCFRReg() const { + assert(isRegIdx() && (RegIdx.Kind & RegKind_FCFR) && "Invalid access!"); + return RegIdx.RegInfo->getRegClass(LoongArch::FCFRRegClassID) + .getRegister(RegIdx.Index); + } + + /// Coerce the register to LSX128 and return the real register for the current + /// target. + unsigned getLSX128Reg() const { + assert(isRegIdx() && (RegIdx.Kind & RegKind_LSX128) && "Invalid access!"); + // It doesn't matter which of the LSX128[BHWD] classes we use. They are all + // identical + unsigned ClassID = LoongArch::LSX128BRegClassID; + return RegIdx.RegInfo->getRegClass(ClassID).getRegister(RegIdx.Index); + } + + unsigned getLASX256Reg() const { + assert(isRegIdx() && (RegIdx.Kind & RegKind_LASX256) && "Invalid access!"); + unsigned ClassID = LoongArch::LASX256BRegClassID; + return RegIdx.RegInfo->getRegClass(ClassID).getRegister(RegIdx.Index); + } + + /// Coerce the register to CCR and return the real register for the + /// current target. + unsigned getFCSRReg() const { + assert(isRegIdx() && (RegIdx.Kind & RegKind_FCSR) && "Invalid access!"); + unsigned ClassID = LoongArch::FCSRRegClassID; + return RegIdx.RegInfo->getRegClass(ClassID).getRegister(RegIdx.Index); + } + +public: + void addExpr(MCInst &Inst, const MCExpr *Expr) const { + // Add as immediate when possible. Null MCExpr = 0. + if (!Expr) + Inst.addOperand(MCOperand::createImm(0)); + else if (const MCConstantExpr *CE = dyn_cast(Expr)) + Inst.addOperand(MCOperand::createImm(CE->getValue())); + else + Inst.addOperand(MCOperand::createExpr(Expr)); + } + + void addRegOperands(MCInst &Inst, unsigned N) const { + llvm_unreachable("Use a custom parser instead"); + } + + /// Render the operand to an MCInst as a GPR32 + /// Asserts if the wrong number of operands are requested, or the operand + /// is not a k_RegisterIndex compatible with RegKind_GPR + void addGPR32ZeroAsmRegOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + Inst.addOperand(MCOperand::createReg(getGPR32Reg())); + } + + void addGPR32NonZeroAsmRegOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + Inst.addOperand(MCOperand::createReg(getGPR32Reg())); + } + + void addGPR32AsmRegOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + Inst.addOperand(MCOperand::createReg(getGPR32Reg())); + } + + void addGPRMM16AsmRegOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + Inst.addOperand(MCOperand::createReg(getGPRMM16Reg())); + } + + void addGPRMM16AsmRegZeroOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + Inst.addOperand(MCOperand::createReg(getGPRMM16Reg())); + } + + void addGPRMM16AsmRegMovePOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + Inst.addOperand(MCOperand::createReg(getGPRMM16Reg())); + } + + void addGPRMM16AsmRegMovePPairFirstOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + Inst.addOperand(MCOperand::createReg(getGPRMM16Reg())); + } + + void addGPRMM16AsmRegMovePPairSecondOperands(MCInst &Inst, + unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + Inst.addOperand(MCOperand::createReg(getGPRMM16Reg())); + } + + /// Render the operand to an MCInst as a GPR64 + /// Asserts if the wrong number of operands are requested, or the operand + /// is not a k_RegisterIndex compatible with RegKind_GPR + void addGPR64AsmRegOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + Inst.addOperand(MCOperand::createReg(getGPR64Reg())); + } + + void addStrictlyFGR64AsmRegOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + Inst.addOperand(MCOperand::createReg(getFGR64Reg())); + } + + void addFGR64AsmRegOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + Inst.addOperand(MCOperand::createReg(getFGR64Reg())); + } + + void addFGR32AsmRegOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + Inst.addOperand(MCOperand::createReg(getFGR32Reg())); + } + + void addStrictlyFGR32AsmRegOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + Inst.addOperand(MCOperand::createReg(getFGR32Reg())); + } + + void addFCFRAsmRegOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + Inst.addOperand(MCOperand::createReg(getFCFRReg())); + } + + void addLSX128AsmRegOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + Inst.addOperand(MCOperand::createReg(getLSX128Reg())); + } + + void addLASX256AsmRegOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + Inst.addOperand(MCOperand::createReg(getLASX256Reg())); + } + + void addFCSRAsmRegOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + Inst.addOperand(MCOperand::createReg(getFCSRReg())); + } + + template + void addConstantUImmOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + uint64_t Imm = getConstantImm() - Offset; + Imm &= (1ULL << Bits) - 1; + Imm += Offset; + Imm += AdjustOffset; + Inst.addOperand(MCOperand::createImm(Imm)); + } + + template + void addSImmOperands(MCInst &Inst, unsigned N) const { + if (isImm() && !isConstantImm()) { + addExpr(Inst, getImm()); + return; + } + addConstantSImmOperands(Inst, N); + } + + template + void addUImmOperands(MCInst &Inst, unsigned N) const { + if (isImm() && !isConstantImm()) { + addExpr(Inst, getImm()); + return; + } + addConstantUImmOperands(Inst, N); + } + + template + void addConstantSImmOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + int64_t Imm = getConstantImm() - Offset; + Imm = SignExtend64(Imm); + Imm += Offset; + Imm += AdjustOffset; + Inst.addOperand(MCOperand::createImm(Imm)); + } + + void addImmOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + const MCExpr *Expr = getImm(); + addExpr(Inst, Expr); + } + + void addMemOperands(MCInst &Inst, unsigned N) const { + assert(N == 2 && "Invalid number of operands!"); + + Inst.addOperand(MCOperand::createReg(AsmParser.getABI().ArePtrs64bit() + ? getMemBase()->getGPR64Reg() + : getMemBase()->getGPR32Reg())); + + const MCExpr *Expr = getMemOff(); + addExpr(Inst, Expr); + } + + void addRegListOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + + for (auto RegNo : getRegList()) + Inst.addOperand(MCOperand::createReg(RegNo)); + } + + bool isReg() const override { + // As a special case until we sort out the definition of div/divu, accept + // $0/$zero here so that MCK_ZERO works correctly. + return isGPRAsmReg() && RegIdx.Index == 0; + } + + bool isRegIdx() const { return Kind == k_RegisterIndex; } + bool isImm() const override { return Kind == k_Immediate; } + + bool isConstantImm() const { + int64_t Res; + return isImm() && getImm()->evaluateAsAbsolute(Res); + } + + bool isConstantImmz() const { + return isConstantImm() && getConstantImm() == 0; + } + + template bool isConstantUImm() const { + return isConstantImm() && isUInt(getConstantImm() - Offset); + } + + template bool isSImm() const { + return isConstantImm() ? isInt(getConstantImm()) : isImm(); + } + + template bool isUImm() const { + return isConstantImm() ? isUInt(getConstantImm()) : isImm(); + } + + template bool isAnyImm() const { + return isConstantImm() ? (isInt(getConstantImm()) || + isUInt(getConstantImm())) + : isImm(); + } + + template bool isConstantSImm() const { + return isConstantImm() && isInt(getConstantImm() - Offset); + } + + template bool isConstantUImmRange() const { + return isConstantImm() && getConstantImm() >= Bottom && + getConstantImm() <= Top; + } + + bool isToken() const override { + // Note: It's not possible to pretend that other operand kinds are tokens. + // The matcher emitter checks tokens first. + return Kind == k_Token; + } + + bool isMem() const override { return Kind == k_Memory; } + + bool isConstantMemOff() const { + return isMem() && isa(getMemOff()); + } + + bool isZeroMemOff() const { + return isMem() && isa(getMemOff()) && + getConstantMemOff() == 0; + } + + // Allow relocation operators. + // FIXME: This predicate and others need to look through binary expressions + // and determine whether a Value is a constant or not. + template + bool isMemWithSimmOffset() const { + if (!isMem()) + return false; + if (!getMemBase()->isGPRAsmReg()) + return false; + if (isa(getMemOff()) || + (isConstantMemOff() && + isShiftedInt(getConstantMemOff()))) + return true; + MCValue Res; + bool IsReloc = getMemOff()->evaluateAsRelocatable(Res, nullptr, nullptr); + return IsReloc && isShiftedInt(Res.getConstant()); + } + + bool isMemWithPtrSizeOffset() const { + if (!isMem()) + return false; + if (!getMemBase()->isGPRAsmReg()) + return false; + const unsigned PtrBits = AsmParser.getABI().ArePtrs64bit() ? 64 : 32; + if (isa(getMemOff()) || + (isConstantMemOff() && isIntN(PtrBits, getConstantMemOff()))) + return true; + MCValue Res; + bool IsReloc = getMemOff()->evaluateAsRelocatable(Res, nullptr, nullptr); + return IsReloc && isIntN(PtrBits, Res.getConstant()); + } + + bool isMemWithGRPMM16Base() const { + return isMem() && getMemBase()->isMM16AsmReg(); + } + + template bool isMemWithUimmOffsetSP() const { + return isMem() && isConstantMemOff() && isUInt(getConstantMemOff()) + && getMemBase()->isRegIdx() && (getMemBase()->getGPR32Reg() == LoongArch::SP); + } + + template bool isMemWithUimmWordAlignedOffsetSP() const { + return isMem() && isConstantMemOff() && isUInt(getConstantMemOff()) + && (getConstantMemOff() % 4 == 0) && getMemBase()->isRegIdx() + && (getMemBase()->getGPR32Reg() == LoongArch::SP); + } + + template + bool isScaledUImm() const { + return isConstantImm() && + isShiftedUInt(getConstantImm()); + } + + template + bool isScaledSImm() const { + if (isConstantImm() && + isShiftedInt(getConstantImm())) + return true; + // Operand can also be a symbol or symbol plus + // offset in case of relocations. + if (Kind != k_Immediate) + return false; + MCValue Res; + bool Success = getImm()->evaluateAsRelocatable(Res, nullptr, nullptr); + return Success && isShiftedInt(Res.getConstant()); + } + + bool isRegList16() const { + if (!isRegList()) + return false; + + int Size = RegList.List->size(); + if (Size < 2 || Size > 5) + return false; + + unsigned R0 = RegList.List->front(); + unsigned R1 = RegList.List->back(); + if (!((R0 == LoongArch::S0 && R1 == LoongArch::RA) || + (R0 == LoongArch::S0_64 && R1 == LoongArch::RA_64))) + return false; + + int PrevReg = *RegList.List->begin(); + for (int i = 1; i < Size - 1; i++) { + int Reg = (*(RegList.List))[i]; + if ( Reg != PrevReg + 1) + return false; + PrevReg = Reg; + } + + return true; + } + + bool isInvNum() const { return Kind == k_Immediate; } + + bool isLSAImm() const { + if (!isConstantImm()) + return false; + int64_t Val = getConstantImm(); + return 1 <= Val && Val <= 4; + } + + bool isRegList() const { return Kind == k_RegList; } + + StringRef getToken() const { + assert(Kind == k_Token && "Invalid access!"); + return StringRef(Tok.Data, Tok.Length); + } + + unsigned getReg() const override { + // As a special case until we sort out the definition of div/divu, accept + // $0/$zero here so that MCK_ZERO works correctly. + if (Kind == k_RegisterIndex && RegIdx.Index == 0 && + RegIdx.Kind & RegKind_GPR) + return getGPR32Reg(); // FIXME: GPR64 too + + llvm_unreachable("Invalid access!"); + return 0; + } + + const MCExpr *getImm() const { + assert((Kind == k_Immediate) && "Invalid access!"); + return Imm.Val; + } + + int64_t getConstantImm() const { + const MCExpr *Val = getImm(); + int64_t Value = 0; + (void)Val->evaluateAsAbsolute(Value); + return Value; + } + + LoongArchOperand *getMemBase() const { + assert((Kind == k_Memory) && "Invalid access!"); + return Mem.Base; + } + + const MCExpr *getMemOff() const { + assert((Kind == k_Memory) && "Invalid access!"); + return Mem.Off; + } + + int64_t getConstantMemOff() const { + return static_cast(getMemOff())->getValue(); + } + + const SmallVectorImpl &getRegList() const { + assert((Kind == k_RegList) && "Invalid access!"); + return *(RegList.List); + } + + static std::unique_ptr CreateToken(StringRef Str, SMLoc S, + LoongArchAsmParser &Parser) { + auto Op = std::make_unique(k_Token, Parser); + Op->Tok.Data = Str.data(); + Op->Tok.Length = Str.size(); + Op->StartLoc = S; + Op->EndLoc = S; + return Op; + } + + /// Create a numeric register (e.g. $1). The exact register remains + /// unresolved until an instruction successfully matches + static std::unique_ptr + createNumericReg(unsigned Index, StringRef Str, const MCRegisterInfo *RegInfo, + SMLoc S, SMLoc E, LoongArchAsmParser &Parser) { + LLVM_DEBUG(dbgs() << "createNumericReg(" << Index << ", ...)\n"); + return CreateReg(Index, Str, RegKind_Numeric, RegInfo, S, E, Parser); + } + + /// Create a register that is definitely a GPR. + /// This is typically only used for named registers such as $gp. + static std::unique_ptr + createGPRReg(unsigned Index, StringRef Str, const MCRegisterInfo *RegInfo, + SMLoc S, SMLoc E, LoongArchAsmParser &Parser) { + return CreateReg(Index, Str, RegKind_GPR, RegInfo, S, E, Parser); + } + + /// Create a register that is definitely a FGR. + /// This is typically only used for named registers such as $f0. + static std::unique_ptr + createFGRReg(unsigned Index, StringRef Str, const MCRegisterInfo *RegInfo, + SMLoc S, SMLoc E, LoongArchAsmParser &Parser) { + return CreateReg(Index, Str, RegKind_FGR, RegInfo, S, E, Parser); + } + + /// Create a register that is definitely an FCFR. + /// This is typically only used for named registers such as $fcc0. + static std::unique_ptr + createFCFRReg(unsigned Index, StringRef Str, const MCRegisterInfo *RegInfo, + SMLoc S, SMLoc E, LoongArchAsmParser &Parser) { + return CreateReg(Index, Str, RegKind_FCFR, RegInfo, S, E, Parser); + } + + /// Create a register that is definitely an FCSR. + /// This is typically only used for named registers such as $fcsr0. + static std::unique_ptr + createFCSRReg(unsigned Index, StringRef Str, const MCRegisterInfo *RegInfo, + SMLoc S, SMLoc E, LoongArchAsmParser &Parser) { + return CreateReg(Index, Str, RegKind_FCSR, RegInfo, S, E, Parser); + } + + /// Create a register that is definitely an LSX128. + /// This is typically only used for named registers such as $v0. + static std::unique_ptr + createLSX128Reg(unsigned Index, StringRef Str, const MCRegisterInfo *RegInfo, + SMLoc S, SMLoc E, LoongArchAsmParser &Parser) { + return CreateReg(Index, Str, RegKind_LSX128, RegInfo, S, E, Parser); + } + + static std::unique_ptr + createLASX256Reg(unsigned Index, StringRef Str, const MCRegisterInfo *RegInfo, + SMLoc S, SMLoc E, LoongArchAsmParser &Parser) { + return CreateReg(Index, Str, RegKind_LASX256, RegInfo, S, E, Parser); + } + + static std::unique_ptr + CreateImm(const MCExpr *Val, SMLoc S, SMLoc E, LoongArchAsmParser &Parser) { + auto Op = std::make_unique(k_Immediate, Parser); + Op->Imm.Val = Val; + Op->StartLoc = S; + Op->EndLoc = E; + return Op; + } + + static std::unique_ptr + CreateMem(std::unique_ptr Base, const MCExpr *Off, SMLoc S, + SMLoc E, LoongArchAsmParser &Parser) { + auto Op = std::make_unique(k_Memory, Parser); + Op->Mem.Base = Base.release(); + Op->Mem.Off = Off; + Op->StartLoc = S; + Op->EndLoc = E; + return Op; + } + + static std::unique_ptr + CreateRegList(SmallVectorImpl &Regs, SMLoc StartLoc, SMLoc EndLoc, + LoongArchAsmParser &Parser) { + assert(Regs.size() > 0 && "Empty list not allowed"); + + auto Op = std::make_unique(k_RegList, Parser); + Op->RegList.List = new SmallVector(Regs.begin(), Regs.end()); + Op->StartLoc = StartLoc; + Op->EndLoc = EndLoc; + return Op; + } + + bool isGPRZeroAsmReg() const { + return isRegIdx() && RegIdx.Kind & RegKind_GPR && RegIdx.Index == 0; + } + + bool isGPRNonZeroAsmReg() const { + return isRegIdx() && RegIdx.Kind & RegKind_GPR && RegIdx.Index > 0 && + RegIdx.Index <= 31; + } + + bool isGPRAsmReg() const { + return isRegIdx() && RegIdx.Kind & RegKind_GPR && RegIdx.Index <= 31; + } + + bool isMM16AsmReg() const { + if (!(isRegIdx() && RegIdx.Kind)) + return false; + return ((RegIdx.Index >= 2 && RegIdx.Index <= 7) + || RegIdx.Index == 16 || RegIdx.Index == 17); + + } + bool isMM16AsmRegZero() const { + if (!(isRegIdx() && RegIdx.Kind)) + return false; + return (RegIdx.Index == 0 || + (RegIdx.Index >= 2 && RegIdx.Index <= 7) || + RegIdx.Index == 17); + } + + bool isMM16AsmRegMoveP() const { + if (!(isRegIdx() && RegIdx.Kind)) + return false; + return (RegIdx.Index == 0 || (RegIdx.Index >= 2 && RegIdx.Index <= 3) || + (RegIdx.Index >= 16 && RegIdx.Index <= 20)); + } + + bool isMM16AsmRegMovePPairFirst() const { + if (!(isRegIdx() && RegIdx.Kind)) + return false; + return RegIdx.Index >= 4 && RegIdx.Index <= 6; + } + + bool isMM16AsmRegMovePPairSecond() const { + if (!(isRegIdx() && RegIdx.Kind)) + return false; + return (RegIdx.Index == 21 || RegIdx.Index == 22 || + (RegIdx.Index >= 5 && RegIdx.Index <= 7)); + } + + bool isFGRAsmReg() const { + return isRegIdx() && RegIdx.Kind & RegKind_FGR && RegIdx.Index <= 31; + } + + bool isStrictlyFGRAsmReg() const { + return isRegIdx() && RegIdx.Kind == RegKind_FGR && RegIdx.Index <= 31; + } + + bool isFCSRAsmReg() const { + return isRegIdx() && RegIdx.Kind & RegKind_FCSR && RegIdx.Index <= 3; + } + + bool isFCFRAsmReg() const { + if (!(isRegIdx() && RegIdx.Kind & RegKind_FCFR)) + return false; + return RegIdx.Index <= 7; + } + + bool isLSX128AsmReg() const { + return isRegIdx() && RegIdx.Kind & RegKind_LSX128 && RegIdx.Index <= 31; + } + + bool isLASX256AsmReg() const { + return isRegIdx() && RegIdx.Kind & RegKind_LASX256 && RegIdx.Index <= 31; + } + + /// getStartLoc - Get the location of the first token of this operand. + SMLoc getStartLoc() const override { return StartLoc; } + /// getEndLoc - Get the location of the last token of this operand. + SMLoc getEndLoc() const override { return EndLoc; } + + void print(raw_ostream &OS) const override { + switch (Kind) { + case k_Immediate: + OS << "Imm<"; + OS << *Imm.Val; + OS << ">"; + break; + case k_Memory: + OS << "Mem<"; + Mem.Base->print(OS); + OS << ", "; + OS << *Mem.Off; + OS << ">"; + break; + case k_RegisterIndex: + OS << "RegIdx<" << RegIdx.Index << ":" << RegIdx.Kind << ", " + << StringRef(RegIdx.Tok.Data, RegIdx.Tok.Length) << ">"; + break; + case k_Token: + OS << getToken(); + break; + case k_RegList: + OS << "RegList< "; + for (auto Reg : (*RegList.List)) + OS << Reg << " "; + OS << ">"; + break; + } + } + + bool isValidForTie(const LoongArchOperand &Other) const { + if (Kind != Other.Kind) + return false; + + switch (Kind) { + default: + llvm_unreachable("Unexpected kind"); + return false; + case k_RegisterIndex: { + StringRef Token(RegIdx.Tok.Data, RegIdx.Tok.Length); + StringRef OtherToken(Other.RegIdx.Tok.Data, Other.RegIdx.Tok.Length); + return Token == OtherToken; + } + } + } +}; // class LoongArchOperand + +} // end anonymous namespace + +namespace llvm { + +extern const MCInstrDesc LoongArchInsts[]; + +} // end namespace llvm + +static const MCInstrDesc &getInstDesc(unsigned Opcode) { + return LoongArchInsts[Opcode]; +} + +static const MCSymbol *getSingleMCSymbol(const MCExpr *Expr) { + if (const MCSymbolRefExpr *SRExpr = dyn_cast(Expr)) { + return &SRExpr->getSymbol(); + } + + if (const MCBinaryExpr *BExpr = dyn_cast(Expr)) { + const MCSymbol *LHSSym = getSingleMCSymbol(BExpr->getLHS()); + const MCSymbol *RHSSym = getSingleMCSymbol(BExpr->getRHS()); + + if (LHSSym) + return LHSSym; + + if (RHSSym) + return RHSSym; + + return nullptr; + } + + if (const MCUnaryExpr *UExpr = dyn_cast(Expr)) + return getSingleMCSymbol(UExpr->getSubExpr()); + + return nullptr; +} + +static unsigned countMCSymbolRefExpr(const MCExpr *Expr) { + if (isa(Expr)) + return 1; + + if (const MCBinaryExpr *BExpr = dyn_cast(Expr)) + return countMCSymbolRefExpr(BExpr->getLHS()) + + countMCSymbolRefExpr(BExpr->getRHS()); + + if (const MCUnaryExpr *UExpr = dyn_cast(Expr)) + return countMCSymbolRefExpr(UExpr->getSubExpr()); + + return 0; +} + +bool LoongArchAsmParser::processInstruction(MCInst &Inst, SMLoc IDLoc, + MCStreamer &Out, + const MCSubtargetInfo *STI) { + const MCInstrDesc &MCID = getInstDesc(Inst.getOpcode()); + + Inst.setLoc(IDLoc); + + // Check branch instructions. + if (MCID.isBranch() || MCID.isCall()) { + const unsigned Opcode = Inst.getOpcode(); + MCOperand Offset; + bool check = true; + unsigned OffsetOpndIdx, OffsetOpndWidth; + switch (Opcode) { + default: + check = false; + break; + case LoongArch::BEQ: + case LoongArch::BNE: + case LoongArch::BLT: + case LoongArch::BGE: + case LoongArch::BLTU: + case LoongArch::BGEU: + OffsetOpndIdx = 2; + OffsetOpndWidth = 16; + break; + case LoongArch::BEQZ: + case LoongArch::BNEZ: + case LoongArch::BCEQZ: + case LoongArch::BCNEZ: + OffsetOpndIdx = 1; + OffsetOpndWidth = 21; + break; + case LoongArch::B: + case LoongArch::BL: + OffsetOpndIdx = 0; + OffsetOpndWidth = 26; + break; + } + if (check) { + assert(MCID.getNumOperands() == OffsetOpndIdx + 1 && + "unexpected number of operands"); + Offset = Inst.getOperand(OffsetOpndIdx); + // Non-Imm situation will be dealed with later on when applying fixups. + if (Offset.isImm()) { + if (!isIntN(OffsetOpndWidth + 2, Offset.getImm())) + return Error(IDLoc, "branch target out of range"); + if (offsetToAlignment(Offset.getImm(), Align(1LL << 2))) + return Error(IDLoc, "branch to misaligned address"); + } + } + } + + bool IsPCRelativeLoad = (MCID.TSFlags & LoongArchII::IsPCRelativeLoad) != 0; + if ((MCID.mayLoad() || MCID.mayStore()) && !IsPCRelativeLoad) { + // Check the offset of memory operand, if it is a symbol + // reference or immediate we may have to expand instructions. + for (unsigned i = 0; i < MCID.getNumOperands(); i++) { + const MCOperandInfo &OpInfo = MCID.OpInfo[i]; + if ((OpInfo.OperandType == MCOI::OPERAND_MEMORY) || + (OpInfo.OperandType == MCOI::OPERAND_UNKNOWN)) { + MCOperand &Op = Inst.getOperand(i); + if (Op.isImm()) { + int64_t MemOffset = Op.getImm(); + if (MemOffset < -32768 || MemOffset > 32767) { + return getParser().hasPendingError(); + } + } else if (Op.isExpr()) { + const MCExpr *Expr = Op.getExpr(); + if (Expr->getKind() == MCExpr::SymbolRef) { + const MCSymbolRefExpr *SR = + static_cast(Expr); + if (SR->getKind() == MCSymbolRefExpr::VK_None) { + return getParser().hasPendingError(); + } + } else if (!isEvaluated(Expr)) { + return getParser().hasPendingError(); + } + } + } + } // for + } // if load/store + + MacroExpanderResultTy ExpandResult = + tryExpandInstruction(Inst, IDLoc, Out, STI); + switch (ExpandResult) { + case MER_NotAMacro: + Out.emitInstruction(Inst, *STI); + break; + case MER_Success: + break; + case MER_Fail: + return true; + } + + return false; +} + +LoongArchAsmParser::MacroExpanderResultTy +LoongArchAsmParser::tryExpandInstruction(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out, + const MCSubtargetInfo *STI) { + switch (Inst.getOpcode()) { + default: + return MER_NotAMacro; + case LoongArch::LoadImm32: // li.w $rd, $imm32 + case LoongArch::LoadImm64: // li.d $rd, $imm64 + return expandLoadImm(Inst, IDLoc, Out, STI) ? MER_Fail : MER_Success; + case LoongArch::LoadAddrLocal: // la.local $rd, symbol + case LoongArch::LoadAddrGlobal: // la.global $rd, symbol + case LoongArch::LoadAddrGlobal_Alias: // la $rd, symbol + case LoongArch::LoadAddrTLS_LE: // la.tls.le $rd, symbol + case LoongArch::LoadAddrTLS_IE: // la.tls.ie $rd, symbol + case LoongArch::LoadAddrTLS_LD: // la.tls.ld $rd, symbol + case LoongArch::LoadAddrTLS_GD: // la.tls.gd $rd, symbol + return expandLoadAddress(Inst, IDLoc, Out, STI) ? MER_Fail : MER_Success; + } +} + +/// Can the value be represented by a unsigned N-bit value and a shift left? +template static bool isShiftedUIntAtAnyPosition(uint64_t x) { + unsigned BitNum = findFirstSet(x); + + return (x == x >> BitNum << BitNum) && isUInt(x >> BitNum); +} + +bool LoongArchAsmParser::expandLoadImm(MCInst &Inst, SMLoc IDLoc, + MCStreamer &Out, + const MCSubtargetInfo *STI) { + const int64_t Imm = Inst.getOperand(1).getImm(); + const unsigned DstReg = Inst.getOperand(0).getReg(); + LoongArchTargetStreamer &TOut = getTargetStreamer(); + bool Is64Bit = Inst.getOpcode() == LoongArch::LoadImm64; + unsigned SrcReg = Is64Bit ? LoongArch::ZERO_64 : LoongArch::ZERO; + LoongArchAnalyzeImmediate::InstSeq Seq = + LoongArchAnalyzeImmediate::generateInstSeq( + Is64Bit ? Imm : SignExtend64<32>(Imm), Is64Bit); + + for (auto &Inst : Seq) { + if (Inst.Opc == LoongArch::LU12I_W || Inst.Opc == LoongArch::LU12I_W32) + TOut.emitRI(Inst.Opc, DstReg, Inst.Imm, IDLoc, STI); + else + TOut.emitRRI(Inst.Opc, DstReg, SrcReg, Inst.Imm, IDLoc, STI); + SrcReg = DstReg; + } + + return false; +} + +bool LoongArchAsmParser::expandLoadAddress(MCInst &Inst, SMLoc IDLoc, + MCStreamer &Out, + const MCSubtargetInfo *STI) { + LoongArchTargetStreamer &TOut = getTargetStreamer(); + const MCExpr *SymExpr = Inst.getOperand(1).getExpr(); + const LoongArchMCExpr *HiExpr = nullptr; + const LoongArchMCExpr *LoExpr = nullptr; + const LoongArchMCExpr *HigherExpr = nullptr; + const LoongArchMCExpr *HighestExpr = nullptr; + unsigned DstReg = Inst.getOperand(0).getReg(); + + MCValue Res; + if (!SymExpr->evaluateAsRelocatable(Res, nullptr, nullptr)) { + Error(IDLoc, "expected relocatable expression"); + return true; + } + if (Res.getSymB() != nullptr) { + Error(IDLoc, "expected relocatable expression with only one symbol"); + return true; + } + + switch (Inst.getOpcode()) { + case LoongArch::LoadAddrLocal: + HiExpr = LoongArchMCExpr::create(LoongArchMCExpr::MEK_PCREL_HI, SymExpr, + getContext()); + LoExpr = LoongArchMCExpr::create(LoongArchMCExpr::MEK_PCREL_LO, SymExpr, + getContext()); + + TOut.emitRX(LoongArch::PCALAU12I_ri, DstReg, MCOperand::createExpr(HiExpr), + IDLoc, STI); + TOut.emitRRX(LoongArch::ADDI_D_rri, DstReg, DstReg, + MCOperand::createExpr(LoExpr), IDLoc, STI); + return false; + case LoongArch::LoadAddrGlobal: + case LoongArch::LoadAddrGlobal_Alias: + HiExpr = LoongArchMCExpr::create(LoongArchMCExpr::MEK_GOT_HI, SymExpr, + getContext()); + LoExpr = LoongArchMCExpr::create(LoongArchMCExpr::MEK_GOT_LO, SymExpr, + getContext()); + TOut.emitRX(LoongArch::PCALAU12I_ri, DstReg, MCOperand::createExpr(HiExpr), + IDLoc, STI); + TOut.emitRRX(LoongArch::LD_D_rri, DstReg, DstReg, + MCOperand::createExpr(LoExpr), IDLoc, STI); + return false; + case LoongArch::LoadAddrTLS_LE: + HiExpr = LoongArchMCExpr::create(LoongArchMCExpr::MEK_TLSLE_HI, SymExpr, + getContext()); + LoExpr = LoongArchMCExpr::create(LoongArchMCExpr::MEK_TLSLE_LO, SymExpr, + getContext()); + HigherExpr = LoongArchMCExpr::create(LoongArchMCExpr::MEK_TLSLE_HIGHER, + SymExpr, getContext()); + HighestExpr = LoongArchMCExpr::create(LoongArchMCExpr::MEK_TLSLE_HIGHEST, + SymExpr, getContext()); + TOut.emitRX(LoongArch::LU12I_W_ri, DstReg, MCOperand::createExpr(HiExpr), + IDLoc, STI); + TOut.emitRRX(LoongArch::ORI_rri, DstReg, DstReg, + MCOperand::createExpr(LoExpr), IDLoc, STI); + TOut.emitRX(LoongArch::LU32I_D_ri, DstReg, + MCOperand::createExpr(HigherExpr), IDLoc, STI); + TOut.emitRRX(LoongArch::LU52I_D_rri, DstReg, DstReg, + MCOperand::createExpr(HighestExpr), IDLoc, STI); + return false; + case LoongArch::LoadAddrTLS_IE: + HiExpr = LoongArchMCExpr::create(LoongArchMCExpr::MEK_TLSIE_HI, SymExpr, + getContext()); + LoExpr = LoongArchMCExpr::create(LoongArchMCExpr::MEK_TLSIE_LO, SymExpr, + getContext()); + TOut.emitRX(LoongArch::PCALAU12I_ri, DstReg, MCOperand::createExpr(HiExpr), + IDLoc, STI); + TOut.emitRRX(LoongArch::LD_D_rri, DstReg, DstReg, + MCOperand::createExpr(LoExpr), IDLoc, STI); + return false; + case LoongArch::LoadAddrTLS_LD: + case LoongArch::LoadAddrTLS_GD: + HiExpr = LoongArchMCExpr::create(LoongArchMCExpr::MEK_TLSGD_HI, SymExpr, + getContext()); + LoExpr = LoongArchMCExpr::create(LoongArchMCExpr::MEK_TLSGD_LO, SymExpr, + getContext()); + TOut.emitRX(LoongArch::PCALAU12I_ri, DstReg, MCOperand::createExpr(HiExpr), + IDLoc, STI); + TOut.emitRRX(LoongArch::ADDI_D_rri, DstReg, DstReg, + MCOperand::createExpr(LoExpr), IDLoc, STI); + return false; + default: + llvm_unreachable(""); + } +} + +unsigned LoongArchAsmParser::checkTargetMatchPredicate(MCInst &Inst) { + switch (Inst.getOpcode()) { + case LoongArch::BSTRINS_W: + case LoongArch::BSTRPICK_W: { + assert(Inst.getOperand(2).isImm() && Inst.getOperand(3).isImm() && + "Operands must be immediates for bstrins.w/bstrpick.w!"); + const signed Msbw = Inst.getOperand(2).getImm(); + const signed Lsbw = Inst.getOperand(3).getImm(); + if (Msbw < Lsbw) + return Match_MsbHigherThanLsb; + if ((Lsbw < 0) || (Msbw > 31)) + return Match_RequiresRange0_31; + return Match_Success; + } + case LoongArch::BSTRINS_D: + case LoongArch::BSTRPICK_D: { + assert(Inst.getOperand(2).isImm() && Inst.getOperand(3).isImm() && + "Operands must be immediates for bstrins.d/bstrpick.d!"); + const signed Msbd = Inst.getOperand(2).getImm(); + const signed Lsbd = Inst.getOperand(3).getImm(); + if (Msbd < Lsbd) + return Match_MsbHigherThanLsb; + if ((Lsbd < 0) || (Msbd > 63)) + return Match_RequiresRange0_63; + return Match_Success; + } + case LoongArch::CSRXCHG32: + case LoongArch::CSRXCHG: + if (Inst.getOperand(2).getReg() == LoongArch::ZERO || + Inst.getOperand(2).getReg() == LoongArch::ZERO_64) + return Match_RequiresNoZeroRegister; + if (Inst.getOperand(2).getReg() == LoongArch::RA || + Inst.getOperand(2).getReg() == LoongArch::RA_64) + return Match_RequiresNoRaRegister; + return Match_Success; + } + + return Match_Success; +} + +static SMLoc RefineErrorLoc(const SMLoc Loc, const OperandVector &Operands, + uint64_t ErrorInfo) { + if (ErrorInfo != ~0ULL && ErrorInfo < Operands.size()) { + SMLoc ErrorLoc = Operands[ErrorInfo]->getStartLoc(); + if (ErrorLoc == SMLoc()) + return Loc; + return ErrorLoc; + } + return Loc; +} + +bool LoongArchAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, + OperandVector &Operands, + MCStreamer &Out, + uint64_t &ErrorInfo, + bool MatchingInlineAsm) { + MCInst Inst; + unsigned MatchResult = + MatchInstructionImpl(Operands, Inst, ErrorInfo, MatchingInlineAsm); + switch (MatchResult) { + case Match_Success: + if (processInstruction(Inst, IDLoc, Out, STI)) + return true; + return false; + case Match_MissingFeature: + Error(IDLoc, "instruction requires a CPU feature not currently enabled"); + return true; + case Match_InvalidOperand: { + SMLoc ErrorLoc = IDLoc; + if (ErrorInfo != ~0ULL) { + if (ErrorInfo >= Operands.size()) + return Error(IDLoc, "too few operands for instruction"); + + ErrorLoc = Operands[ErrorInfo]->getStartLoc(); + if (ErrorLoc == SMLoc()) + ErrorLoc = IDLoc; + } + + return Error(ErrorLoc, "invalid operand for instruction"); + } + case Match_MnemonicFail: + return Error(IDLoc, "invalid instruction"); + case Match_RequiresNoZeroRegister: + return Error(IDLoc, "invalid operand ($zero) for instruction"); + case Match_RequiresNoRaRegister: + return Error(IDLoc, "invalid operand ($r1) for instruction"); + case Match_InvalidImm0_3: + return Error(RefineErrorLoc(IDLoc, Operands, ErrorInfo), + "immediate must be an integer in range [0, 3]."); + case Match_InvalidImm0_7: + return Error(RefineErrorLoc(IDLoc, Operands, ErrorInfo), + "immediate must be an integer in range [0, 7]."); + case Match_InvalidImm0_31: + return Error(RefineErrorLoc(IDLoc, Operands, ErrorInfo), + "immediate must be an integer in range [0, 31]."); + case Match_InvalidImm0_63: + return Error(RefineErrorLoc(IDLoc, Operands, ErrorInfo), + "immediate must be an integer in range [0, 63]."); + case Match_InvalidImm0_4095: + case Match_UImm12_Relaxed: + return Error(RefineErrorLoc(IDLoc, Operands, ErrorInfo), + "immediate must be an integer in range [0, 4095]."); + case Match_InvalidImm0_32767: + return Error(RefineErrorLoc(IDLoc, Operands, ErrorInfo), + "immediate must be an integer in range [0, 32767]."); + case Match_UImm16_Relaxed: + return Error(RefineErrorLoc(IDLoc, Operands, ErrorInfo), + "expected 16-bit unsigned immediate"); + case Match_UImm20_0: + return Error(RefineErrorLoc(IDLoc, Operands, ErrorInfo), + "expected 20-bit unsigned immediate"); + case Match_UImm26_0: + return Error(RefineErrorLoc(IDLoc, Operands, ErrorInfo), + "expected 26-bit unsigned immediate"); + case Match_UImm32_Coerced: + return Error(RefineErrorLoc(IDLoc, Operands, ErrorInfo), + "expected 32-bit immediate"); + case Match_InvalidSImm2: + return Error(RefineErrorLoc(IDLoc, Operands, ErrorInfo), + "expected 2-bit signed immediate"); + case Match_InvalidSImm3: + return Error(RefineErrorLoc(IDLoc, Operands, ErrorInfo), + "expected 3-bit signed immediate"); + case Match_InvalidSImm5: + return Error(RefineErrorLoc(IDLoc, Operands, ErrorInfo), + "expected 5-bit signed immediate"); + case Match_InvalidSImm8: + return Error(RefineErrorLoc(IDLoc, Operands, ErrorInfo), + "expected 8-bit signed immediate"); + case Match_InvalidSImm12: + case Match_SImm12_Relaxed: + return Error(RefineErrorLoc(IDLoc, Operands, ErrorInfo), + "expected 12-bit signed immediate"); + case Match_InvalidSImm14: + return Error(RefineErrorLoc(IDLoc, Operands, ErrorInfo), + "expected 14-bit signed immediate"); + case Match_InvalidSImm15: + return Error(RefineErrorLoc(IDLoc, Operands, ErrorInfo), + "expected 15-bit signed immediate"); + case Match_InvalidSImm16: + return Error(RefineErrorLoc(IDLoc, Operands, ErrorInfo), + "expected 16-bit signed immediate"); + case Match_InvalidSImm20: + return Error(RefineErrorLoc(IDLoc, Operands, ErrorInfo), + "expected 20-bit signed immediate"); + case Match_InvalidSImm21: + return Error(RefineErrorLoc(IDLoc, Operands, ErrorInfo), + "expected 21-bit signed immediate"); + case Match_InvalidSImm26: + return Error(RefineErrorLoc(IDLoc, Operands, ErrorInfo), + "expected 26-bit signed immediate"); + case Match_SImm32: + case Match_SImm32_Relaxed: + return Error(RefineErrorLoc(IDLoc, Operands, ErrorInfo), + "expected 32-bit signed immediate"); + case Match_MemSImm14: + return Error(RefineErrorLoc(IDLoc, Operands, ErrorInfo), + "expected memory with 14-bit signed offset"); + case Match_MemSImmPtr: + return Error(RefineErrorLoc(IDLoc, Operands, ErrorInfo), + "expected memory with 32-bit signed offset"); + case Match_UImm2_1: + return Error(RefineErrorLoc(IDLoc, Operands, ErrorInfo), + "expected immediate in range 1 .. 4"); + case Match_MemSImm14Lsl2: + return Error(RefineErrorLoc(IDLoc, Operands, ErrorInfo), + "expected memory with 16-bit signed offset and multiple of 4"); + case Match_RequiresRange0_31: { + SMLoc ErrorStart = Operands[3]->getStartLoc(); + SMLoc ErrorEnd = Operands[4]->getEndLoc(); + return Error(ErrorStart, "from lsbw to msbw are not in the range 0 .. 31", + SMRange(ErrorStart, ErrorEnd)); + } + case Match_RequiresPosSizeUImm6: { + SMLoc ErrorStart = Operands[3]->getStartLoc(); + SMLoc ErrorEnd = Operands[4]->getEndLoc(); + return Error(ErrorStart, "size plus position are not in the range 1 .. 63", + SMRange(ErrorStart, ErrorEnd)); + } + case Match_RequiresRange0_63: { + SMLoc ErrorStart = Operands[3]->getStartLoc(); + SMLoc ErrorEnd = Operands[4]->getEndLoc(); + return Error(ErrorStart, "from lsbd to msbd are not in the range 0 .. 63", + SMRange(ErrorStart, ErrorEnd)); + } + case Match_MsbHigherThanLsb: { + SMLoc ErrorStart = Operands[3]->getStartLoc(); + SMLoc ErrorEnd = Operands[4]->getEndLoc(); + return Error(ErrorStart, "msb are not higher than lsb", SMRange(ErrorStart, ErrorEnd)); + } + case Match_MemZeroOff: + return Error(RefineErrorLoc(IDLoc, Operands, ErrorInfo), + "expected memory with constant 0 offset"); + } + + llvm_unreachable("Implement any new match types added!"); +} + +/* + * Note: The implementation of this function must be sync with the definition + * of GPR32/GPR64 RegisterClass in LoongArchRegisterInfo.td + */ +int LoongArchAsmParser::matchCPURegisterName(StringRef Name) { + int CC; + + CC = StringSwitch(Name) + .Cases("zero", "r0", 0) + .Cases("a0", "v0", "r4", 1) + .Cases("a1", "v1", "r5", 2) + .Cases("a2", "r6", 3) + .Cases("a3", "r7", 4) + .Cases("a4", "r8", 5) + .Cases("a5", "r9", 6) + .Cases("a6", "r10", 7) + .Cases("a7", "r11", 8) + .Cases("t0", "r12", 9) + .Cases("t1", "r13", 10) + .Cases("t2", "r14", 11) + .Cases("t3", "r15", 12) + .Cases("t4", "r16", 13) + .Cases("t5", "r17", 14) + .Cases("t6", "r18", 15) + .Cases("t7", "r19", 16) + .Cases("t8", "r20", 17) + .Cases("s0", "r23", 18) + .Cases("s1", "r24", 19) + .Cases("s2", "r25", 20) + .Cases("s3", "r26", 21) + .Cases("s4", "r27", 22) + .Cases("s5", "r28", 23) + .Cases("s6", "r29", 24) + .Cases("s7", "r30", 25) + .Cases("s8", "r31", 26) + .Cases("ra", "r1", 27) + .Cases("tp", "r2", 28) + .Cases("sp", "r3", 29) + .Case("r21", 30) + .Cases("fp", "r22", 31) + .Default(-1); + + return CC; +} + +int LoongArchAsmParser::matchFPURegisterName(StringRef Name) { + if (Name[0] == 'f') { + int CC; + + CC = StringSwitch(Name) + .Cases("f0", "fa0", "fv0", 0) + .Cases("f1", "fa1", "fv1", 1) + .Cases("f2", "fa2", 2) + .Cases("f3", "fa3", 3) + .Cases("f4", "fa4", 4) + .Cases("f5", "fa5", 5) + .Cases("f6", "fa6", 6) + .Cases("f7", "fa7", 7) + .Cases("f8", "ft0", 8) + .Cases("f9", "ft1", 9) + .Cases("f10", "ft2", 10) + .Cases("f11", "ft3", 11) + .Cases("f12", "ft4", 12) + .Cases("f13", "ft5", 13) + .Cases("f14", "ft6", 14) + .Cases("f15", "ft7", 15) + .Cases("f16", "ft8", 16) + .Cases("f17", "ft9", 17) + .Cases("f18", "ft10", 18) + .Cases("f19", "ft11", 19) + .Cases("f20", "ft12", 20) + .Cases("f21", "ft13", 21) + .Cases("f22", "ft14", 22) + .Cases("f23", "ft15", 23) + .Cases("f24", "fs0", 24) + .Cases("f25", "fs1", 25) + .Cases("f26", "fs2", 26) + .Cases("f27", "fs3", 27) + .Cases("f28", "fs4", 28) + .Cases("f29", "fs5", 29) + .Cases("f30", "fs6", 30) + .Cases("f31", "fs7", 31) + .Default(-1); + + return CC; + } + return -1; +} + +int LoongArchAsmParser::matchFCFRRegisterName(StringRef Name) { + if (Name.startswith("fcc")) { + StringRef NumString = Name.substr(3); + unsigned IntVal; + if (NumString.getAsInteger(10, IntVal)) + return -1; // This is not an integer. + if (IntVal > 7) // There are only 8 fcc registers. + return -1; + return IntVal; + } + return -1; +} + +int LoongArchAsmParser::matchFCSRRegisterName(StringRef Name) { + if (Name.startswith("fcsr")) { + StringRef NumString = Name.substr(4); + unsigned IntVal; + if (NumString.getAsInteger(10, IntVal)) + return -1; // This is not an integer. + if (IntVal > 3) // There are only 4 fcsr registers. + return -1; + return IntVal; + } + return -1; +} + +int LoongArchAsmParser::matchLSX128RegisterName(StringRef Name) { + unsigned IntVal; + + if (Name.front() != 'v' || Name.drop_front(2).getAsInteger(10, IntVal)) + return -1; + + if (IntVal > 31) + return -1; + + return IntVal; +} + +int LoongArchAsmParser::matchLASX256RegisterName(StringRef Name) { + unsigned IntVal; + + if (Name.front() != 'x' || Name.drop_front(2).getAsInteger(10, IntVal)) + return -1; + + if (IntVal > 31) + return -1; + + return IntVal; +} + +bool LoongArchAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic) { + MCAsmParser &Parser = getParser(); + LLVM_DEBUG(dbgs() << "parseOperand\n"); + + // Check if the current operand has a custom associated parser, if so, try to + // custom parse the operand, or fallback to the general approach. + OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic); + if (ResTy == MatchOperand_Success) + return false; + // If there wasn't a custom match, try the generic matcher below. Otherwise, + // there was a match, but an error occurred, in which case, just return that + // the operand parsing failed. + if (ResTy == MatchOperand_ParseFail) + return true; + + LLVM_DEBUG(dbgs() << ".. Generic Parser\n"); + + switch (getLexer().getKind()) { + case AsmToken::Dollar: { + // Parse the register. + SMLoc S = Parser.getTok().getLoc(); + + // Almost all registers have been parsed by custom parsers. There is only + // one exception to this. $zero (and it's alias $0) will reach this point + // for div, divu, and similar instructions because it is not an operand + // to the instruction definition but an explicit register. Special case + // this situation for now. + if (parseAnyRegister(Operands) != MatchOperand_NoMatch) + return false; + + // Maybe it is a symbol reference. + StringRef Identifier; + if (Parser.parseIdentifier(Identifier)) + return true; + + SMLoc E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1); + MCSymbol *Sym = getContext().getOrCreateSymbol("$" + Identifier); + // Otherwise create a symbol reference. + const MCExpr *Res = + MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_None, getContext()); + + Operands.push_back(LoongArchOperand::CreateImm(Res, S, E, *this)); + return false; + } + default: { + LLVM_DEBUG(dbgs() << ".. generic integer expression\n"); + + const MCExpr *Expr; + SMLoc S = Parser.getTok().getLoc(); // Start location of the operand. + if (getParser().parseExpression(Expr)) + return true; + + SMLoc E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1); + + Operands.push_back(LoongArchOperand::CreateImm(Expr, S, E, *this)); + return false; + } + } // switch(getLexer().getKind()) + return true; +} + +bool LoongArchAsmParser::isEvaluated(const MCExpr *Expr) { + switch (Expr->getKind()) { + case MCExpr::Constant: + return true; + case MCExpr::SymbolRef: + return (cast(Expr)->getKind() != MCSymbolRefExpr::VK_None); + case MCExpr::Binary: { + const MCBinaryExpr *BE = cast(Expr); + if (!isEvaluated(BE->getLHS())) + return false; + return isEvaluated(BE->getRHS()); + } + case MCExpr::Unary: + return isEvaluated(cast(Expr)->getSubExpr()); + case MCExpr::Target: + return true; + } + return false; +} + +bool LoongArchAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, + SMLoc &EndLoc) { + return tryParseRegister(RegNo, StartLoc, EndLoc) != MatchOperand_Success; +} + +OperandMatchResultTy LoongArchAsmParser::tryParseRegister(unsigned &RegNo, + SMLoc &StartLoc, + SMLoc &EndLoc) { + SmallVector, 1> Operands; + OperandMatchResultTy ResTy = parseAnyRegister(Operands); + if (ResTy == MatchOperand_Success) { + assert(Operands.size() == 1); + LoongArchOperand &Operand = static_cast(*Operands.front()); + StartLoc = Operand.getStartLoc(); + EndLoc = Operand.getEndLoc(); + + // AFAIK, we only support numeric registers and named GPR's in CFI + // directives. + // Don't worry about eating tokens before failing. Using an unrecognised + // register is a parse error. + if (Operand.isGPRAsmReg()) { + // Resolve to GPR32 or GPR64 appropriately. + RegNo = is64Bit() ? Operand.getGPR64Reg() : Operand.getGPR32Reg(); + } + + return (RegNo == (unsigned)-1) ? MatchOperand_NoMatch + : MatchOperand_Success; + } + + assert(Operands.size() == 0); + return (RegNo == (unsigned)-1) ? MatchOperand_NoMatch : MatchOperand_Success; +} + +bool LoongArchAsmParser::parseMemOffset(const MCExpr *&Res) { + return getParser().parseExpression(Res); +} + +OperandMatchResultTy +LoongArchAsmParser::parseMemOperand(OperandVector &Operands) { + MCAsmParser &Parser = getParser(); + LLVM_DEBUG(dbgs() << "parseMemOperand\n"); + const MCExpr *IdVal = nullptr; + SMLoc S; + OperandMatchResultTy Res = MatchOperand_NoMatch; + // First operand is the base. + S = Parser.getTok().getLoc(); + + Res = parseAnyRegister(Operands); + if (Res != MatchOperand_Success) + return Res; + + if (Parser.getTok().isNot(AsmToken::Comma)) { + Error(Parser.getTok().getLoc(), "',' expected"); + return MatchOperand_ParseFail; + } + + Parser.Lex(); // Eat the ',' token. + + if (parseMemOffset(IdVal)) + return MatchOperand_ParseFail; + + SMLoc E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1); + + // Replace the register operand with the memory operand. + std::unique_ptr op( + static_cast(Operands.back().release())); + // Remove the register from the operands. + // "op" will be managed by k_Memory. + Operands.pop_back(); + + // when symbol not defined, error report. + if (dyn_cast(IdVal)) { + return MatchOperand_ParseFail; + } + + // Add the memory operand. + if (dyn_cast(IdVal)) { + int64_t Imm; + if (IdVal->evaluateAsAbsolute(Imm)) + IdVal = MCConstantExpr::create(Imm, getContext()); + else + return MatchOperand_ParseFail; + } + + Operands.push_back(LoongArchOperand::CreateMem(std::move(op), IdVal, S, E, *this)); + return MatchOperand_Success; +} + +OperandMatchResultTy +LoongArchAsmParser::parseAMemOperand(OperandVector &Operands) { + MCAsmParser &Parser = getParser(); + LLVM_DEBUG(dbgs() << "parseAMemOperand\n"); + const MCExpr *IdVal = nullptr; + SMLoc S; + OperandMatchResultTy Res = MatchOperand_NoMatch; + // First operand is the base. + S = Parser.getTok().getLoc(); + + Res = parseAnyRegister(Operands); + if (Res != MatchOperand_Success) + return Res; + + SMLoc E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1); + + // AM* instructions allow an optional '0' memory offset. + if (Parser.getTok().is(AsmToken::Comma)) { + Parser.Lex(); // Eat the ',' token. + + if (parseMemOffset(IdVal)) + return MatchOperand_ParseFail; + + // when symbol not defined, error report. + if (dyn_cast(IdVal)) + return MatchOperand_ParseFail; + + if (dyn_cast(IdVal)) { + int64_t Imm; + if (IdVal->evaluateAsAbsolute(Imm)) { + assert(Imm == 0 && "imm must be 0"); + IdVal = MCConstantExpr::create(Imm, getContext()); + } else { + return MatchOperand_ParseFail; + } + } + } else { + // Offset defaults to 0. + IdVal = MCConstantExpr::create(0, getContext()); + } + + // Replace the register operand with the memory operand. + std::unique_ptr op( + static_cast(Operands.back().release())); + // Remove the register from the operands. + // "op" will be managed by k_Memory. + Operands.pop_back(); + // Add the memory operand. + Operands.push_back( + LoongArchOperand::CreateMem(std::move(op), IdVal, S, E, *this)); + return MatchOperand_Success; +} + +bool LoongArchAsmParser::searchSymbolAlias(OperandVector &Operands) { + MCAsmParser &Parser = getParser(); + MCSymbol *Sym = getContext().lookupSymbol(Parser.getTok().getIdentifier()); + if (!Sym) + return false; + + SMLoc S = Parser.getTok().getLoc(); + if (Sym->isVariable()) { + const MCExpr *Expr = Sym->getVariableValue(); + if (Expr->getKind() == MCExpr::SymbolRef) { + const MCSymbolRefExpr *Ref = static_cast(Expr); + StringRef DefSymbol = Ref->getSymbol().getName(); + if (DefSymbol.startswith("$")) { + OperandMatchResultTy ResTy = + matchAnyRegisterNameWithoutDollar(Operands, DefSymbol.substr(1), S); + if (ResTy == MatchOperand_Success) { + Parser.Lex(); + return true; + } + if (ResTy == MatchOperand_ParseFail) + llvm_unreachable("Should never ParseFail"); + } + } + } else if (Sym->isUnset()) { + // If symbol is unset, it might be created in the `parseSetAssignment` + // routine as an alias for a numeric register name. + // Lookup in the aliases list. + auto Entry = RegisterSets.find(Sym->getName()); + if (Entry != RegisterSets.end()) { + OperandMatchResultTy ResTy = + matchAnyRegisterWithoutDollar(Operands, Entry->getValue(), S); + if (ResTy == MatchOperand_Success) { + Parser.Lex(); + return true; + } + } + } + + return false; +} + +OperandMatchResultTy +LoongArchAsmParser::matchAnyRegisterNameWithoutDollar(OperandVector &Operands, + StringRef Identifier, + SMLoc S) { + int Index = matchCPURegisterName(Identifier); + if (Index != -1) { + Operands.push_back(LoongArchOperand::createGPRReg( + Index, Identifier, getContext().getRegisterInfo(), S, + getLexer().getLoc(), *this)); + return MatchOperand_Success; + } + + Index = matchFPURegisterName(Identifier); + if (Index != -1) { + Operands.push_back(LoongArchOperand::createFGRReg( + Index, Identifier, getContext().getRegisterInfo(), S, + getLexer().getLoc(), *this)); + return MatchOperand_Success; + } + + Index = matchFCFRRegisterName(Identifier); + if (Index != -1) { + Operands.push_back(LoongArchOperand::createFCFRReg( + Index, Identifier, getContext().getRegisterInfo(), S, + getLexer().getLoc(), *this)); + return MatchOperand_Success; + } + + Index = matchFCSRRegisterName(Identifier); + if (Index != -1) { + Operands.push_back(LoongArchOperand::createFCSRReg( + Index, Identifier, getContext().getRegisterInfo(), S, + getLexer().getLoc(), *this)); + return MatchOperand_Success; + } + + Index = matchLSX128RegisterName(Identifier); + if (Index != -1) { + Operands.push_back(LoongArchOperand::createLSX128Reg( + Index, Identifier, getContext().getRegisterInfo(), S, + getLexer().getLoc(), *this)); + return MatchOperand_Success; + } + + Index = matchLASX256RegisterName(Identifier); + if (Index != -1) { + Operands.push_back(LoongArchOperand::createLASX256Reg( + Index, Identifier, getContext().getRegisterInfo(), S, + getLexer().getLoc(), *this)); + return MatchOperand_Success; + } + + return MatchOperand_NoMatch; +} + +OperandMatchResultTy +LoongArchAsmParser::matchAnyRegisterWithoutDollar(OperandVector &Operands, + const AsmToken &Token, SMLoc S) { + if (Token.is(AsmToken::Identifier)) { + LLVM_DEBUG(dbgs() << ".. identifier\n"); + StringRef Identifier = Token.getIdentifier(); + OperandMatchResultTy ResTy = + matchAnyRegisterNameWithoutDollar(Operands, Identifier, S); + return ResTy; + } else if (Token.is(AsmToken::Integer)) { + LLVM_DEBUG(dbgs() << ".. integer\n"); + int64_t RegNum = Token.getIntVal(); + if (RegNum < 0 || RegNum > 31) { + // Show the error, but treat invalid register + // number as a normal one to continue parsing + // and catch other possible errors. + Error(getLexer().getLoc(), "invalid register number"); + } + Operands.push_back(LoongArchOperand::createNumericReg( + RegNum, Token.getString(), getContext().getRegisterInfo(), S, + Token.getLoc(), *this)); + return MatchOperand_Success; + } + + LLVM_DEBUG(dbgs() << Token.getKind() << "\n"); + + return MatchOperand_NoMatch; +} + +OperandMatchResultTy +LoongArchAsmParser::matchAnyRegisterWithoutDollar(OperandVector &Operands, SMLoc S) { + auto Token = getLexer().peekTok(false); + return matchAnyRegisterWithoutDollar(Operands, Token, S); +} + +OperandMatchResultTy +LoongArchAsmParser::parseAnyRegister(OperandVector &Operands) { + MCAsmParser &Parser = getParser(); + LLVM_DEBUG(dbgs() << "parseAnyRegister\n"); + + auto Token = Parser.getTok(); + + SMLoc S = Token.getLoc(); + + if (Token.isNot(AsmToken::Dollar)) { + LLVM_DEBUG(dbgs() << ".. !$ -> try sym aliasing\n"); + if (Token.is(AsmToken::Identifier)) { + if (searchSymbolAlias(Operands)) + return MatchOperand_Success; + } + LLVM_DEBUG(dbgs() << ".. !symalias -> NoMatch\n"); + return MatchOperand_NoMatch; + } + LLVM_DEBUG(dbgs() << ".. $\n"); + + OperandMatchResultTy ResTy = matchAnyRegisterWithoutDollar(Operands, S); + if (ResTy == MatchOperand_Success) { + Parser.Lex(); // $ + Parser.Lex(); // identifier + } + return ResTy; +} + +OperandMatchResultTy +LoongArchAsmParser::parseJumpTarget(OperandVector &Operands) { + MCAsmParser &Parser = getParser(); + LLVM_DEBUG(dbgs() << "parseJumpTarget\n"); + + SMLoc S = getLexer().getLoc(); + + // Registers are a valid target and have priority over symbols. + OperandMatchResultTy ResTy = parseAnyRegister(Operands); + if (ResTy != MatchOperand_NoMatch) + return ResTy; + + // Integers and expressions are acceptable + const MCExpr *Expr = nullptr; + if (Parser.parseExpression(Expr)) { + // We have no way of knowing if a symbol was consumed so we must ParseFail + return MatchOperand_ParseFail; + } + Operands.push_back( + LoongArchOperand::CreateImm(Expr, S, getLexer().getLoc(), *this)); + return MatchOperand_Success; +} + +static std::string LoongArchMnemonicSpellCheck(StringRef S, + const FeatureBitset &FBS, + unsigned VariantID = 0); + +bool LoongArchAsmParser::ParseInstruction(ParseInstructionInfo &Info, + StringRef Name, SMLoc NameLoc, + OperandVector &Operands) { + MCAsmParser &Parser = getParser(); + LLVM_DEBUG(dbgs() << "ParseInstruction\n"); + + // We have reached first instruction, module directive are now forbidden. + getTargetStreamer().forbidModuleDirective(); + + // Check if we have valid mnemonic + if (!mnemonicIsValid(Name)) { + FeatureBitset FBS = ComputeAvailableFeatures(getSTI().getFeatureBits()); + std::string Suggestion = LoongArchMnemonicSpellCheck(Name, FBS); + return Error(NameLoc, "unknown instruction" + Suggestion); + } + + // First operand in MCInst is instruction mnemonic. + Operands.push_back(LoongArchOperand::CreateToken(Name, NameLoc, *this)); + + // Read the remaining operands. + if (getLexer().isNot(AsmToken::EndOfStatement)) { + // Read the first operand. + if (parseOperand(Operands, Name)) { + SMLoc Loc = getLexer().getLoc(); + return Error(Loc, "unexpected token in argument list"); + } + + while (getLexer().is(AsmToken::Comma)) { + Parser.Lex(); // Eat the comma. + // Parse and remember the operand. + if (parseOperand(Operands, Name)) { + SMLoc Loc = getLexer().getLoc(); + return Error(Loc, "unexpected token in argument list"); + } + } + } + if (getLexer().isNot(AsmToken::EndOfStatement)) { + SMLoc Loc = getLexer().getLoc(); + return Error(Loc, "unexpected token in argument list"); + } + Parser.Lex(); // Consume the EndOfStatement. + return false; +} + +// FIXME: Given that these have the same name, these should both be +// consistent on affecting the Parser. +bool LoongArchAsmParser::reportParseError(Twine ErrorMsg) { + SMLoc Loc = getLexer().getLoc(); + return Error(Loc, ErrorMsg); +} + +bool LoongArchAsmParser::parseSetAssignment() { + StringRef Name; + const MCExpr *Value; + MCAsmParser &Parser = getParser(); + + if (Parser.parseIdentifier(Name)) + return reportParseError("expected identifier after .set"); + + if (getLexer().isNot(AsmToken::Comma)) + return reportParseError("unexpected token, expected comma"); + Lex(); // Eat comma + + if (!Parser.parseExpression(Value)) { + // Parse assignment of an expression including + // symbolic registers: + // .set $tmp, $BB0-$BB1 + // .set r2, $f2 + MCSymbol *Sym = getContext().getOrCreateSymbol(Name); + Sym->setVariableValue(Value); + } else { + return reportParseError("expected valid expression after comma"); + } + + return false; +} + +bool LoongArchAsmParser::parseDirectiveSet() { + const AsmToken &Tok = getParser().getTok(); + StringRef IdVal = Tok.getString(); + SMLoc Loc = Tok.getLoc(); + + if (IdVal == "bopt") { + Warning(Loc, "'bopt' feature is unsupported"); + getParser().Lex(); + return false; + } + if (IdVal == "nobopt") { + // We're already running in nobopt mode, so nothing to do. + getParser().Lex(); + return false; + } + + // It is just an identifier, look for an assignment. + return parseSetAssignment(); +} + +bool LoongArchAsmParser::ParseDirective(AsmToken DirectiveID) { + // This returns false if this function recognizes the directive + // regardless of whether it is successfully handles or reports an + // error. Otherwise it returns true to give the generic parser a + // chance at recognizing it. + + MCAsmParser &Parser = getParser(); + StringRef IDVal = DirectiveID.getString(); + + if (IDVal == ".end") { + while (getLexer().isNot(AsmToken::Eof)) + Parser.Lex(); + return false; + } + + if (IDVal == ".set") { + parseDirectiveSet(); + return false; + } + + if (IDVal == ".llvm_internal_loongarch_reallow_module_directive") { + parseInternalDirectiveReallowModule(); + return false; + } + + return true; +} + +bool LoongArchAsmParser::parseInternalDirectiveReallowModule() { + // If this is not the end of the statement, report an error. + if (getLexer().isNot(AsmToken::EndOfStatement)) { + reportParseError("unexpected token, expected end of statement"); + return false; + } + + getTargetStreamer().reallowModuleDirective(); + + getParser().Lex(); // Eat EndOfStatement token. + return false; +} + +extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeLoongArchAsmParser() { + RegisterMCAsmParser X(getTheLoongArch32Target()); + RegisterMCAsmParser A(getTheLoongArch64Target()); +} + +#define GET_REGISTER_MATCHER +#define GET_MATCHER_IMPLEMENTATION +#define GET_MNEMONIC_SPELL_CHECKER +#include "LoongArchGenAsmMatcher.inc" + +bool LoongArchAsmParser::mnemonicIsValid(StringRef Mnemonic) { + // Find the appropriate table for this asm variant. + const MatchEntry *Start, *End; + Start = std::begin(MatchTable0); + End = std::end(MatchTable0); + + // Search the table. + auto MnemonicRange = std::equal_range(Start, End, Mnemonic, LessOpcode()); + return MnemonicRange.first != MnemonicRange.second; +} diff --git a/lib/Target/LoongArch/CMakeLists.txt b/lib/Target/LoongArch/CMakeLists.txt new file mode 100644 index 00000000..8540b97f --- /dev/null +++ b/lib/Target/LoongArch/CMakeLists.txt @@ -0,0 +1,55 @@ +add_llvm_component_group(LoongArch HAS_JIT) + +set(LLVM_TARGET_DEFINITIONS LoongArch.td) + +tablegen(LLVM LoongArchGenAsmMatcher.inc -gen-asm-matcher) +tablegen(LLVM LoongArchGenAsmWriter.inc -gen-asm-writer) +tablegen(LLVM LoongArchGenCallingConv.inc -gen-callingconv) +tablegen(LLVM LoongArchGenDAGISel.inc -gen-dag-isel) +tablegen(LLVM LoongArchGenDisassemblerTables.inc -gen-disassembler) +tablegen(LLVM LoongArchGenInstrInfo.inc -gen-instr-info) +tablegen(LLVM LoongArchGenMCCodeEmitter.inc -gen-emitter) +tablegen(LLVM LoongArchGenMCPseudoLowering.inc -gen-pseudo-lowering) +tablegen(LLVM LoongArchGenRegisterInfo.inc -gen-register-info) +tablegen(LLVM LoongArchGenSubtargetInfo.inc -gen-subtarget) + +add_public_tablegen_target(LoongArchCommonTableGen) + +add_llvm_target(LoongArchCodeGen + LoongArchAsmPrinter.cpp + LoongArchCCState.cpp + LoongArchExpandPseudo.cpp + LoongArchInstrInfo.cpp + LoongArchISelDAGToDAG.cpp + LoongArchISelLowering.cpp + LoongArchFrameLowering.cpp + LoongArchMCInstLower.cpp + LoongArchMachineFunction.cpp + LoongArchModuleISelDAGToDAG.cpp + LoongArchRegisterInfo.cpp + LoongArchSubtarget.cpp + LoongArchTargetMachine.cpp + LoongArchTargetObjectFile.cpp + LoongArchTargetTransformInfo.cpp + + LINK_COMPONENTS + Analysis + AsmPrinter + CodeGen + Core + MC + LoongArchDesc + LoongArchInfo + SelectionDAG + Support + Target + GlobalISel + + ADD_TO_COMPONENT + LoongArch + ) + +add_subdirectory(AsmParser) +add_subdirectory(Disassembler) +add_subdirectory(MCTargetDesc) +add_subdirectory(TargetInfo) diff --git a/lib/Target/LoongArch/Disassembler/CMakeLists.txt b/lib/Target/LoongArch/Disassembler/CMakeLists.txt new file mode 100644 index 00000000..864be631 --- /dev/null +++ b/lib/Target/LoongArch/Disassembler/CMakeLists.txt @@ -0,0 +1,11 @@ +add_llvm_component_library(LLVMLoongArchDisassembler + LoongArchDisassembler.cpp + + LINK_COMPONENTS + MCDisassembler + LoongArchInfo + Support + + ADD_TO_COMPONENT + LoongArch + ) diff --git a/lib/Target/LoongArch/Disassembler/LoongArchDisassembler.cpp b/lib/Target/LoongArch/Disassembler/LoongArchDisassembler.cpp new file mode 100644 index 00000000..85075115 --- /dev/null +++ b/lib/Target/LoongArch/Disassembler/LoongArchDisassembler.cpp @@ -0,0 +1,938 @@ +//===- LoongArchDisassembler.cpp - Disassembler for LoongArch -----------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file is part of the LoongArch Disassembler. +// +//===----------------------------------------------------------------------===// + +#include "MCTargetDesc/LoongArchMCTargetDesc.h" +#include "LoongArch.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCDisassembler/MCDisassembler.h" +#include "llvm/MC/MCFixedLenDisassembler.h" +#include "llvm/MC/MCInst.h" +#include "llvm/MC/MCRegisterInfo.h" +#include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Support/TargetRegistry.h" +#include "llvm/Support/raw_ostream.h" +#include +#include + +using namespace llvm; + +#define DEBUG_TYPE "loongarch-disassembler" + +using DecodeStatus = MCDisassembler::DecodeStatus; + +namespace { + +class LoongArchDisassembler : public MCDisassembler { + +public: + LoongArchDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx) + : MCDisassembler(STI, Ctx) {} + + bool is64Bit() const { return STI.getFeatureBits()[LoongArch::Feature64Bit]; } + + DecodeStatus getInstruction(MCInst &Instr, uint64_t &Size, + ArrayRef Bytes, uint64_t Address, + raw_ostream &CStream) const override; +}; + +} // end anonymous namespace + +// Forward declare these because the autogenerated code will reference them. +// Definitions are further down. +static DecodeStatus DecodeGPR64RegisterClass(MCInst &Inst, + unsigned RegNo, + uint64_t Address, + const void *Decoder); + +static DecodeStatus DecodeGPR32RegisterClass(MCInst &Inst, + unsigned RegNo, + uint64_t Address, + const void *Decoder); + +static DecodeStatus DecodePtrRegisterClass(MCInst &Inst, + unsigned Insn, + uint64_t Address, + const void *Decoder); + +static DecodeStatus DecodeFGR64RegisterClass(MCInst &Inst, + unsigned RegNo, + uint64_t Address, + const void *Decoder); + +static DecodeStatus DecodeFGR32RegisterClass(MCInst &Inst, + unsigned RegNo, + uint64_t Address, + const void *Decoder); + +static DecodeStatus DecodeFCSRRegisterClass(MCInst &Inst, + unsigned RegNo, + uint64_t Address, + const void *Decoder); + +static DecodeStatus DecodeFCFRRegisterClass(MCInst &Inst, + unsigned RegNo, + uint64_t Address, + const void *Decoder); + +static DecodeStatus DecodeLSX128BRegisterClass(MCInst &Inst, unsigned RegNo, + uint64_t Address, + const void *Decoder); + +static DecodeStatus DecodeLSX128HRegisterClass(MCInst &Inst, unsigned RegNo, + uint64_t Address, + const void *Decoder); + +static DecodeStatus DecodeLSX128WRegisterClass(MCInst &Inst, unsigned RegNo, + uint64_t Address, + const void *Decoder); + +static DecodeStatus DecodeLSX128DRegisterClass(MCInst &Inst, unsigned RegNo, + uint64_t Address, + const void *Decoder); + +static DecodeStatus DecodeLASX256BRegisterClass(MCInst &Inst, unsigned RegNo, + uint64_t Address, + const void *Decoder); + +static DecodeStatus DecodeLASX256HRegisterClass(MCInst &Inst, unsigned RegNo, + uint64_t Address, + const void *Decoder); + +static DecodeStatus DecodeLASX256WRegisterClass(MCInst &Inst, unsigned RegNo, + uint64_t Address, + const void *Decoder); + +static DecodeStatus DecodeLASX256DRegisterClass(MCInst &Inst, unsigned RegNo, + uint64_t Address, + const void *Decoder); + +static DecodeStatus DecodeBranchTarget(MCInst &Inst, + unsigned Offset, + uint64_t Address, + const void *Decoder); + +static DecodeStatus DecodeJumpTarget(MCInst &Inst, + unsigned Insn, + uint64_t Address, + const void *Decoder); + +static DecodeStatus DecodeMem(MCInst &Inst, + unsigned Insn, + uint64_t Address, + const void *Decoder); + +static DecodeStatus DecodeAMem(MCInst &Inst, unsigned Insn, uint64_t Address, + const void *Decoder); + +static DecodeStatus DecodeMemSimm14(MCInst &Inst, + unsigned Insn, + uint64_t Address, + const void *Decoder); + +static DecodeStatus DecodeLSX128Mem(MCInst &Inst, unsigned Insn, + uint64_t Address, const void *Decoder); + +static DecodeStatus DecodeLSX128Mem13(MCInst &Inst, unsigned Insn, + uint64_t Address, const void *Decoder); + +static DecodeStatus DecodeLSX128Mem10(MCInst &Inst, unsigned Insn, + uint64_t Address, const void *Decoder); + +static DecodeStatus DecodeLASX256Mem13(MCInst &Inst, unsigned Insn, + uint64_t Address, const void *Decoder); + +static DecodeStatus DecodeLASX256Mem10(MCInst &Inst, unsigned Insn, + uint64_t Address, const void *Decoder); + +static DecodeStatus DecodeLSX128memlsl(MCInst &Inst, unsigned Insn, + uint64_t Address, const void *Decoder); + +static DecodeStatus DecodeLSX128memstl(MCInst &Inst, unsigned Insn, + uint64_t Address, const void *Decoder); + +static DecodeStatus DecodeLASX256memlsl(MCInst &Inst, unsigned Insn, + uint64_t Address, const void *Decoder); + +static DecodeStatus DecodeLASX256memstl(MCInst &Inst, unsigned Insn, + uint64_t Address, const void *Decoder); + +static DecodeStatus DecodeLASX256Mem(MCInst &Inst, unsigned Insn, + uint64_t Address, const void *Decoder); + +static DecodeStatus DecodeFMem(MCInst &Inst, unsigned Insn, + uint64_t Address, + const void *Decoder); + +template +static DecodeStatus DecodeUImmWithOffsetAndScale(MCInst &Inst, unsigned Value, + uint64_t Address, + const void *Decoder); + +template +static DecodeStatus DecodeUImmWithOffset(MCInst &Inst, unsigned Value, + uint64_t Address, + const void *Decoder) { + return DecodeUImmWithOffsetAndScale(Inst, Value, Address, + Decoder); +} + +template +static DecodeStatus DecodeSImmWithOffsetAndScale(MCInst &Inst, unsigned Value, + uint64_t Address, + const void *Decoder); + +/// INSVE_[BHWD] have an implicit operand that the generated decoder doesn't +/// handle. +template +static DecodeStatus DecodeINSVE_DF(MCInst &MI, InsnType insn, uint64_t Address, + const void *Decoder); + +namespace llvm { + +Target &getTheLoongArch32Target(); +Target &getTheLoongArch64Target(); + +} // end namespace llvm + +static MCDisassembler *createLoongArchDisassembler( + const Target &T, + const MCSubtargetInfo &STI, + MCContext &Ctx) { + return new LoongArchDisassembler(STI, Ctx); +} + +extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeLoongArchDisassembler() { + // Register the disassembler. + TargetRegistry::RegisterMCDisassembler(getTheLoongArch32Target(), + createLoongArchDisassembler); + TargetRegistry::RegisterMCDisassembler(getTheLoongArch64Target(), + createLoongArchDisassembler); +} + +#include "LoongArchGenDisassemblerTables.inc" + +static unsigned getReg(const void *D, unsigned RC, unsigned RegNo) { + const LoongArchDisassembler *Dis = static_cast(D); + const MCRegisterInfo *RegInfo = Dis->getContext().getRegisterInfo(); + if (RC == LoongArch::GPR64RegClassID || RC == LoongArch::GPR32RegClassID) { + // sync with the GPR32/GPR64 RegisterClass in LoongArchRegisterInfo.td + // that just like LoongArchAsmParser.cpp and LoongArchISelLowering.cpp + unsigned char indexes[] = { 0, 27, 28, 29, 1, 2, 3, 4, + 5, 6, 7, 8, 9, 10, 11, 12, + 13, 14, 15, 16, 17, 30, 31, 18, + 19, 20, 21, 22, 23, 24, 25, 26 + }; + assert(RegNo < sizeof(indexes)); + return *(RegInfo->getRegClass(RC).begin() + indexes[RegNo]); + } + return *(RegInfo->getRegClass(RC).begin() + RegNo); +} + +template +static DecodeStatus DecodeINSVE_DF(MCInst &MI, InsnType insn, uint64_t Address, + const void *Decoder) { + using DecodeFN = DecodeStatus (*)(MCInst &, unsigned, uint64_t, const void *); + + // The size of the n field depends on the element size + // The register class also depends on this. + InsnType tmp = fieldFromInstruction(insn, 17, 5); + unsigned NSize = 0; + DecodeFN RegDecoder = nullptr; + if ((tmp & 0x18) == 0x00) { + NSize = 4; + RegDecoder = DecodeLSX128BRegisterClass; + } else if ((tmp & 0x1c) == 0x10) { + NSize = 3; + RegDecoder = DecodeLSX128HRegisterClass; + } else if ((tmp & 0x1e) == 0x18) { + NSize = 2; + RegDecoder = DecodeLSX128WRegisterClass; + } else if ((tmp & 0x1f) == 0x1c) { + NSize = 1; + RegDecoder = DecodeLSX128DRegisterClass; + } else + llvm_unreachable("Invalid encoding"); + + assert(NSize != 0 && RegDecoder != nullptr); + + // $vd + tmp = fieldFromInstruction(insn, 6, 5); + if (RegDecoder(MI, tmp, Address, Decoder) == MCDisassembler::Fail) + return MCDisassembler::Fail; + // $vd_in + if (RegDecoder(MI, tmp, Address, Decoder) == MCDisassembler::Fail) + return MCDisassembler::Fail; + // $n + tmp = fieldFromInstruction(insn, 16, NSize); + MI.addOperand(MCOperand::createImm(tmp)); + // $vs + tmp = fieldFromInstruction(insn, 11, 5); + if (RegDecoder(MI, tmp, Address, Decoder) == MCDisassembler::Fail) + return MCDisassembler::Fail; + // $n2 + MI.addOperand(MCOperand::createImm(0)); + + return MCDisassembler::Success; +} + +/// Read four bytes from the ArrayRef and return 32 bit word. +static DecodeStatus readInstruction32(ArrayRef Bytes, uint64_t Address, + uint64_t &Size, uint32_t &Insn) { + // We want to read exactly 4 Bytes of data. + if (Bytes.size() < 4) { + Size = 0; + return MCDisassembler::Fail; + } + + Insn = (Bytes[0] << 0) | (Bytes[1] << 8) | (Bytes[2] << 16) | + (Bytes[3] << 24); + + return MCDisassembler::Success; +} + +DecodeStatus LoongArchDisassembler::getInstruction(MCInst &Instr, uint64_t &Size, + ArrayRef Bytes, + uint64_t Address, + raw_ostream &CStream) const { + uint32_t Insn; + DecodeStatus Result; + Size = 0; + + // Attempt to read the instruction so that we can attempt to decode it. If + // the buffer is not 4 bytes long, let the higher level logic figure out + // what to do with a size of zero and MCDisassembler::Fail. + Result = readInstruction32(Bytes, Address, Size, Insn); + if (Result == MCDisassembler::Fail) + return MCDisassembler::Fail; + + // The only instruction size for standard encoded LoongArch. + Size = 4; + + if (is64Bit()) { + LLVM_DEBUG(dbgs() << "Trying LoongArch (GPR64) table (32-bit opcodes):\n"); + Result = decodeInstruction(DecoderTableLoongArch32, Instr, Insn, + Address, this, STI); + if (Result != MCDisassembler::Fail) + return Result; + } + + LLVM_DEBUG(dbgs() << "Trying LoongArch32 (GPR32) table (32-bit opcodes):\n"); + Result = decodeInstruction(DecoderTableLoongArch3232, Instr, Insn, + Address, this, STI); + if (Result != MCDisassembler::Fail) + return Result; + + return MCDisassembler::Fail; +} + +static DecodeStatus DecodeGPR64RegisterClass(MCInst &Inst, + unsigned RegNo, + uint64_t Address, + const void *Decoder) { + if (RegNo > 31) + return MCDisassembler::Fail; + + unsigned Reg = getReg(Decoder, LoongArch::GPR64RegClassID, RegNo); + Inst.addOperand(MCOperand::createReg(Reg)); + return MCDisassembler::Success; +} + +static DecodeStatus DecodeGPR32RegisterClass(MCInst &Inst, + unsigned RegNo, + uint64_t Address, + const void *Decoder) { + if (RegNo > 31) + return MCDisassembler::Fail; + unsigned Reg = getReg(Decoder, LoongArch::GPR32RegClassID, RegNo); + Inst.addOperand(MCOperand::createReg(Reg)); + return MCDisassembler::Success; +} + +static DecodeStatus DecodePtrRegisterClass(MCInst &Inst, + unsigned RegNo, + uint64_t Address, + const void *Decoder) { + if (static_cast(Decoder)->is64Bit()) + return DecodeGPR64RegisterClass(Inst, RegNo, Address, Decoder); + + return DecodeGPR32RegisterClass(Inst, RegNo, Address, Decoder); +} + +static DecodeStatus DecodeFGR64RegisterClass(MCInst &Inst, + unsigned RegNo, + uint64_t Address, + const void *Decoder) { + if (RegNo > 31) + return MCDisassembler::Fail; + unsigned Reg = getReg(Decoder, LoongArch::FGR64RegClassID, RegNo); + Inst.addOperand(MCOperand::createReg(Reg)); + return MCDisassembler::Success; +} + +static DecodeStatus DecodeFGR32RegisterClass(MCInst &Inst, + unsigned RegNo, + uint64_t Address, + const void *Decoder) { + if (RegNo > 31) + return MCDisassembler::Fail; + + unsigned Reg = getReg(Decoder, LoongArch::FGR32RegClassID, RegNo); + Inst.addOperand(MCOperand::createReg(Reg)); + return MCDisassembler::Success; +} + +static DecodeStatus DecodeFCSRRegisterClass(MCInst &Inst, + unsigned RegNo, + uint64_t Address, + const void *Decoder) { + if (RegNo > 31) + return MCDisassembler::Fail; + + unsigned Reg = getReg(Decoder, LoongArch::FCSRRegClassID, RegNo); + Inst.addOperand(MCOperand::createReg(Reg)); + return MCDisassembler::Success; +} + +static DecodeStatus DecodeFCFRRegisterClass(MCInst &Inst, + unsigned RegNo, + uint64_t Address, + const void *Decoder) { + if (RegNo > 7) + return MCDisassembler::Fail; + + unsigned Reg = getReg(Decoder, LoongArch::FCFRRegClassID, RegNo); + Inst.addOperand(MCOperand::createReg(Reg)); + + return MCDisassembler::Success; +} + +static DecodeStatus DecodeMem(MCInst &Inst, + unsigned Insn, + uint64_t Address, + const void *Decoder) { + int Offset = SignExtend32<12>((Insn >> 10) & 0xfff); + unsigned Reg = fieldFromInstruction(Insn, 0, 5); + unsigned Base = fieldFromInstruction(Insn, 5, 5); + + Reg = getReg(Decoder, LoongArch::GPR32RegClassID, Reg); + Base = getReg(Decoder, LoongArch::GPR32RegClassID, Base); + + if (Inst.getOpcode() == LoongArch::SC_W || + Inst.getOpcode() == LoongArch::SC_D) + Inst.addOperand(MCOperand::createReg(Reg)); + + Inst.addOperand(MCOperand::createReg(Reg)); + Inst.addOperand(MCOperand::createReg(Base)); + Inst.addOperand(MCOperand::createImm(Offset)); + + return MCDisassembler::Success; +} + +static DecodeStatus DecodeAMem(MCInst &Inst, unsigned Insn, uint64_t Address, + const void *Decoder) { + unsigned Rd = fieldFromInstruction(Insn, 0, 5); + unsigned Rj = fieldFromInstruction(Insn, 5, 5); + unsigned Rk = fieldFromInstruction(Insn, 10, 5); + + Rd = getReg(Decoder, LoongArch::GPR32RegClassID, Rd); + Rj = getReg(Decoder, LoongArch::GPR32RegClassID, Rj); + Rk = getReg(Decoder, LoongArch::GPR32RegClassID, Rk); + + // Note the operands sequence is "rd,rk,rj". + Inst.addOperand(MCOperand::createReg(Rd)); + Inst.addOperand(MCOperand::createReg(Rk)); + Inst.addOperand(MCOperand::createReg(Rj)); + + return MCDisassembler::Success; +} + +static DecodeStatus DecodeMemSimm14(MCInst &Inst, + unsigned Insn, + uint64_t Address, + const void *Decoder) { + int Offset = SignExtend32<12>((Insn >> 10) & 0x3fff); + unsigned Reg = fieldFromInstruction(Insn, 0, 5); + unsigned Base = fieldFromInstruction(Insn, 5, 5); + + Reg = getReg(Decoder, LoongArch::GPR32RegClassID, Reg); + Base = getReg(Decoder, LoongArch::GPR32RegClassID, Base); + + if (Inst.getOpcode() == LoongArch::SC_W || + Inst.getOpcode() == LoongArch::SC_D) + Inst.addOperand(MCOperand::createReg(Reg)); + + Inst.addOperand(MCOperand::createReg(Reg)); + Inst.addOperand(MCOperand::createReg(Base)); + Inst.addOperand(MCOperand::createImm(Offset)); + + return MCDisassembler::Success; +} + +static DecodeStatus DecodeLSX128Mem(MCInst &Inst, unsigned Insn, + uint64_t Address, const void *Decoder) { + int Offset = SignExtend32<12>(fieldFromInstruction(Insn, 10, 12)); + unsigned Reg = fieldFromInstruction(Insn, 0, 5); + unsigned Base = fieldFromInstruction(Insn, 5, 5); + Reg = getReg(Decoder, LoongArch::LSX128BRegClassID, Reg); + Base = getReg(Decoder, LoongArch::GPR32RegClassID, Base); + + Inst.addOperand(MCOperand::createReg(Reg)); + Inst.addOperand(MCOperand::createReg(Base)); + + Inst.addOperand(MCOperand::createImm(Offset)); + + return MCDisassembler::Success; +} + +static DecodeStatus DecodeLSX128Mem13(MCInst &Inst, unsigned Insn, + uint64_t Address, const void *Decoder) { + int Offset = SignExtend32<13>(fieldFromInstruction(Insn, 5, 13)); + unsigned Reg = fieldFromInstruction(Insn, 0, 5); + Reg = getReg(Decoder, LoongArch::LSX128BRegClassID, Reg); + + Inst.addOperand(MCOperand::createReg(Reg)); + + Inst.addOperand(MCOperand::createImm(Offset)); + + return MCDisassembler::Success; +} + +static DecodeStatus DecodeLSX128Mem10(MCInst &Inst, unsigned Insn, + uint64_t Address, const void *Decoder) { + int Offset = SignExtend32<10>(fieldFromInstruction(Insn, 5, 10)); + unsigned Reg = fieldFromInstruction(Insn, 0, 5); + Reg = getReg(Decoder, LoongArch::LSX128BRegClassID, Reg); + + Inst.addOperand(MCOperand::createReg(Reg)); + + Inst.addOperand(MCOperand::createImm(Offset)); + + return MCDisassembler::Success; +} + +static DecodeStatus DecodeLASX256Mem13(MCInst &Inst, unsigned Insn, + uint64_t Address, const void *Decoder) { + int Offset = SignExtend32<13>(fieldFromInstruction(Insn, 5, 13)); + unsigned Reg = fieldFromInstruction(Insn, 0, 5); + Reg = getReg(Decoder, LoongArch::LASX256BRegClassID, Reg); + + Inst.addOperand(MCOperand::createReg(Reg)); + + Inst.addOperand(MCOperand::createImm(Offset)); + + return MCDisassembler::Success; +} + +static DecodeStatus DecodeLASX256Mem10(MCInst &Inst, unsigned Insn, + uint64_t Address, const void *Decoder) { + int Offset = SignExtend32<10>(fieldFromInstruction(Insn, 5, 10)); + unsigned Reg = fieldFromInstruction(Insn, 0, 5); + Reg = getReg(Decoder, LoongArch::LASX256BRegClassID, Reg); + + Inst.addOperand(MCOperand::createReg(Reg)); + + Inst.addOperand(MCOperand::createImm(Offset)); + + return MCDisassembler::Success; +} + +static DecodeStatus DecodeLSX128memstl(MCInst &Inst, unsigned Insn, + uint64_t Address, const void *Decoder) { + int Offset = SignExtend32<8>(fieldFromInstruction(Insn, 10, 8)); + unsigned Reg = fieldFromInstruction(Insn, 0, 5); + unsigned Base = fieldFromInstruction(Insn, 5, 5); + Reg = getReg(Decoder, LoongArch::LSX128BRegClassID, Reg); + Base = getReg(Decoder, LoongArch::GPR32RegClassID, Base); + unsigned idx; + + Inst.addOperand(MCOperand::createReg(Reg)); + Inst.addOperand(MCOperand::createReg(Base)); + + switch (Inst.getOpcode()) { + default: + assert(false && "Unexpected instruction"); + return MCDisassembler::Fail; + break; + case LoongArch::VSTELM_B: + Inst.addOperand(MCOperand::createImm(Offset)); + idx = fieldFromInstruction(Insn, 18, 4); + Inst.addOperand(MCOperand::createImm(idx)); + break; + case LoongArch::VSTELM_H: + Inst.addOperand(MCOperand::createImm(Offset * 2)); + idx = fieldFromInstruction(Insn, 18, 3); + Inst.addOperand(MCOperand::createImm(idx)); + break; + case LoongArch::VSTELM_W: + Inst.addOperand(MCOperand::createImm(Offset * 4)); + idx = fieldFromInstruction(Insn, 18, 2); + Inst.addOperand(MCOperand::createImm(idx)); + break; + case LoongArch::VSTELM_D: + Inst.addOperand(MCOperand::createImm(Offset * 8)); + idx = fieldFromInstruction(Insn, 18, 1); + Inst.addOperand(MCOperand::createImm(idx)); + break; + } + + return MCDisassembler::Success; +} + +static DecodeStatus DecodeLSX128memlsl(MCInst &Inst, unsigned Insn, + uint64_t Address, const void *Decoder) { + + int Offset; + unsigned Reg, Base; + switch (Inst.getOpcode()) { + default: + assert(false && "Unexpected instruction"); + return MCDisassembler::Fail; + break; + case LoongArch::VLDREPL_B: + + Offset = SignExtend32<12>(fieldFromInstruction(Insn, 10, 12)); + Reg = fieldFromInstruction(Insn, 0, 5); + Base = fieldFromInstruction(Insn, 5, 5); + Reg = getReg(Decoder, LoongArch::LSX128BRegClassID, Reg); + Base = getReg(Decoder, LoongArch::GPR32RegClassID, Base); + + Inst.addOperand(MCOperand::createReg(Reg)); + Inst.addOperand(MCOperand::createReg(Base)); + + Inst.addOperand(MCOperand::createImm(Offset)); + break; + case LoongArch::VLDREPL_H: + + Offset = SignExtend32<11>(fieldFromInstruction(Insn, 10, 11)); + Reg = fieldFromInstruction(Insn, 0, 5); + Base = fieldFromInstruction(Insn, 5, 5); + Reg = getReg(Decoder, LoongArch::LSX128HRegClassID, Reg); + Base = getReg(Decoder, LoongArch::GPR32RegClassID, Base); + + Inst.addOperand(MCOperand::createReg(Reg)); + Inst.addOperand(MCOperand::createReg(Base)); + Inst.addOperand(MCOperand::createImm(Offset * 2)); + break; + case LoongArch::VLDREPL_W: + + Offset = SignExtend32<10>(fieldFromInstruction(Insn, 10, 10)); + Reg = fieldFromInstruction(Insn, 0, 5); + Base = fieldFromInstruction(Insn, 5, 5); + Reg = getReg(Decoder, LoongArch::LSX128WRegClassID, Reg); + Base = getReg(Decoder, LoongArch::GPR32RegClassID, Base); + + Inst.addOperand(MCOperand::createReg(Reg)); + Inst.addOperand(MCOperand::createReg(Base)); + Inst.addOperand(MCOperand::createImm(Offset * 4)); + break; + case LoongArch::VLDREPL_D: + + Offset = SignExtend32<9>(fieldFromInstruction(Insn, 10, 9)); + Reg = fieldFromInstruction(Insn, 0, 5); + Base = fieldFromInstruction(Insn, 5, 5); + Reg = getReg(Decoder, LoongArch::LSX128WRegClassID, Reg); + Base = getReg(Decoder, LoongArch::GPR32RegClassID, Base); + + Inst.addOperand(MCOperand::createReg(Reg)); + Inst.addOperand(MCOperand::createReg(Base)); + Inst.addOperand(MCOperand::createImm(Offset * 8)); + break; + } + + return MCDisassembler::Success; +} +static DecodeStatus DecodeLASX256Mem(MCInst &Inst, unsigned Insn, + uint64_t Address, const void *Decoder) { + int Offset = SignExtend32<12>(fieldFromInstruction(Insn, 10, 12)); + unsigned Reg = fieldFromInstruction(Insn, 0, 5); + unsigned Base = fieldFromInstruction(Insn, 5, 5); + Reg = getReg(Decoder, LoongArch::LASX256BRegClassID, Reg); + Base = getReg(Decoder, LoongArch::GPR32RegClassID, Base); + + Inst.addOperand(MCOperand::createReg(Reg)); + Inst.addOperand(MCOperand::createReg(Base)); + + Inst.addOperand(MCOperand::createImm(Offset)); + + return MCDisassembler::Success; +} + +static DecodeStatus DecodeLASX256memstl(MCInst &Inst, unsigned Insn, + uint64_t Address, const void *Decoder) { + int Offset = SignExtend32<8>(fieldFromInstruction(Insn, 10, 8)); + unsigned Reg = fieldFromInstruction(Insn, 0, 5); + unsigned Base = fieldFromInstruction(Insn, 5, 5); + Reg = getReg(Decoder, LoongArch::LASX256BRegClassID, Reg); + Base = getReg(Decoder, LoongArch::GPR32RegClassID, Base); + unsigned idx; + + Inst.addOperand(MCOperand::createReg(Reg)); + Inst.addOperand(MCOperand::createReg(Base)); + + switch (Inst.getOpcode()) { + default: + assert(false && "Unexpected instruction"); + return MCDisassembler::Fail; + break; + case LoongArch::XVSTELM_B: + Inst.addOperand(MCOperand::createImm(Offset)); + idx = fieldFromInstruction(Insn, 18, 5); + Inst.addOperand(MCOperand::createImm(idx)); + break; + case LoongArch::XVSTELM_H: + Inst.addOperand(MCOperand::createImm(Offset * 2)); + idx = fieldFromInstruction(Insn, 18, 4); + Inst.addOperand(MCOperand::createImm(idx)); + break; + case LoongArch::XVSTELM_W: + Inst.addOperand(MCOperand::createImm(Offset * 4)); + idx = fieldFromInstruction(Insn, 18, 3); + Inst.addOperand(MCOperand::createImm(idx)); + break; + case LoongArch::XVSTELM_D: + Inst.addOperand(MCOperand::createImm(Offset * 8)); + idx = fieldFromInstruction(Insn, 18, 2); + Inst.addOperand(MCOperand::createImm(idx)); + break; + } + + return MCDisassembler::Success; +} + +static DecodeStatus DecodeLASX256memlsl(MCInst &Inst, unsigned Insn, + uint64_t Address, const void *Decoder) { + + int Offset; + unsigned Reg, Base; + switch (Inst.getOpcode()) { + default: + assert(false && "Unexpected instruction"); + return MCDisassembler::Fail; + break; + case LoongArch::XVLDREPL_B: + + Offset = SignExtend32<12>(fieldFromInstruction(Insn, 10, 12)); + Reg = fieldFromInstruction(Insn, 0, 5); + Base = fieldFromInstruction(Insn, 5, 5); + Reg = getReg(Decoder, LoongArch::LASX256BRegClassID, Reg); + Base = getReg(Decoder, LoongArch::GPR32RegClassID, Base); + + Inst.addOperand(MCOperand::createReg(Reg)); + Inst.addOperand(MCOperand::createReg(Base)); + + Inst.addOperand(MCOperand::createImm(Offset)); + break; + case LoongArch::XVLDREPL_H: + + Offset = SignExtend32<11>(fieldFromInstruction(Insn, 10, 11)); + Reg = fieldFromInstruction(Insn, 0, 5); + Base = fieldFromInstruction(Insn, 5, 5); + Reg = getReg(Decoder, LoongArch::LASX256HRegClassID, Reg); + Base = getReg(Decoder, LoongArch::GPR32RegClassID, Base); + + Inst.addOperand(MCOperand::createReg(Reg)); + Inst.addOperand(MCOperand::createReg(Base)); + Inst.addOperand(MCOperand::createImm(Offset * 2)); + break; + case LoongArch::XVLDREPL_W: + + Offset = SignExtend32<10>(fieldFromInstruction(Insn, 10, 10)); + Reg = fieldFromInstruction(Insn, 0, 5); + Base = fieldFromInstruction(Insn, 5, 5); + Reg = getReg(Decoder, LoongArch::LASX256WRegClassID, Reg); + Base = getReg(Decoder, LoongArch::GPR32RegClassID, Base); + + Inst.addOperand(MCOperand::createReg(Reg)); + Inst.addOperand(MCOperand::createReg(Base)); + Inst.addOperand(MCOperand::createImm(Offset * 4)); + break; + case LoongArch::XVLDREPL_D: + + Offset = SignExtend32<9>(fieldFromInstruction(Insn, 10, 9)); + Reg = fieldFromInstruction(Insn, 0, 5); + Base = fieldFromInstruction(Insn, 5, 5); + Reg = getReg(Decoder, LoongArch::LASX256WRegClassID, Reg); + Base = getReg(Decoder, LoongArch::GPR32RegClassID, Base); + + Inst.addOperand(MCOperand::createReg(Reg)); + Inst.addOperand(MCOperand::createReg(Base)); + Inst.addOperand(MCOperand::createImm(Offset * 8)); + break; + } + + return MCDisassembler::Success; +} + +static DecodeStatus DecodeFMem(MCInst &Inst, + unsigned Insn, + uint64_t Address, + const void *Decoder) { + int Offset = SignExtend32<12>((Insn >> 10) & 0xffff); + unsigned Reg = fieldFromInstruction(Insn, 0, 5); + unsigned Base = fieldFromInstruction(Insn, 5, 5); + Reg = getReg(Decoder, LoongArch::FGR64RegClassID, Reg); + Base = getReg(Decoder, LoongArch::GPR32RegClassID, Base); + + Inst.addOperand(MCOperand::createReg(Reg)); + Inst.addOperand(MCOperand::createReg(Base)); + Inst.addOperand(MCOperand::createImm(Offset)); + + return MCDisassembler::Success; +} + +static DecodeStatus DecodeLSX128BRegisterClass(MCInst &Inst, unsigned RegNo, + uint64_t Address, + const void *Decoder) { + if (RegNo > 31) + return MCDisassembler::Fail; + unsigned Reg = getReg(Decoder, LoongArch::LSX128BRegClassID, RegNo); + Inst.addOperand(MCOperand::createReg(Reg)); + return MCDisassembler::Success; +} + +static DecodeStatus DecodeLSX128HRegisterClass(MCInst &Inst, unsigned RegNo, + uint64_t Address, + const void *Decoder) { + if (RegNo > 31) + return MCDisassembler::Fail; + unsigned Reg = getReg(Decoder, LoongArch::LSX128HRegClassID, RegNo); + Inst.addOperand(MCOperand::createReg(Reg)); + return MCDisassembler::Success; +} + +static DecodeStatus DecodeLSX128WRegisterClass(MCInst &Inst, unsigned RegNo, + uint64_t Address, + const void *Decoder) { + if (RegNo > 31) + return MCDisassembler::Fail; + unsigned Reg = getReg(Decoder, LoongArch::LSX128WRegClassID, RegNo); + Inst.addOperand(MCOperand::createReg(Reg)); + return MCDisassembler::Success; +} + +static DecodeStatus DecodeLSX128DRegisterClass(MCInst &Inst, unsigned RegNo, + uint64_t Address, + const void *Decoder) { + if (RegNo > 31) + return MCDisassembler::Fail; + unsigned Reg = getReg(Decoder, LoongArch::LSX128DRegClassID, RegNo); + Inst.addOperand(MCOperand::createReg(Reg)); + return MCDisassembler::Success; +} + +static DecodeStatus DecodeLASX256BRegisterClass(MCInst &Inst, unsigned RegNo, + uint64_t Address, + const void *Decoder) { + if (RegNo > 31) + return MCDisassembler::Fail; + unsigned Reg = getReg(Decoder, LoongArch::LASX256BRegClassID, RegNo); + Inst.addOperand(MCOperand::createReg(Reg)); + return MCDisassembler::Success; +} + +static DecodeStatus DecodeLASX256HRegisterClass(MCInst &Inst, unsigned RegNo, + uint64_t Address, + const void *Decoder) { + if (RegNo > 31) + return MCDisassembler::Fail; + unsigned Reg = getReg(Decoder, LoongArch::LASX256HRegClassID, RegNo); + Inst.addOperand(MCOperand::createReg(Reg)); + return MCDisassembler::Success; +} + +static DecodeStatus DecodeLASX256WRegisterClass(MCInst &Inst, unsigned RegNo, + uint64_t Address, + const void *Decoder) { + if (RegNo > 31) + return MCDisassembler::Fail; + unsigned Reg = getReg(Decoder, LoongArch::LASX256WRegClassID, RegNo); + Inst.addOperand(MCOperand::createReg(Reg)); + return MCDisassembler::Success; +} + +static DecodeStatus DecodeLASX256DRegisterClass(MCInst &Inst, unsigned RegNo, + uint64_t Address, + const void *Decoder) { + if (RegNo > 31) + return MCDisassembler::Fail; + unsigned Reg = getReg(Decoder, LoongArch::LASX256DRegClassID, RegNo); + Inst.addOperand(MCOperand::createReg(Reg)); + return MCDisassembler::Success; +} + +static DecodeStatus DecodeBranchTarget(MCInst &Inst, + unsigned Offset, + uint64_t Address, + const void *Decoder) { + int32_t BranchOffset; + // Similar to LoongArchAsmParser::processInstruction, decode the branch target + // for different instructions. + switch (Inst.getOpcode()) { + default: + llvm_unreachable(""); + case LoongArch::BEQ: + case LoongArch::BNE: + case LoongArch::BLT: + case LoongArch::BGE: + case LoongArch::BLTU: + case LoongArch::BGEU: + BranchOffset = (SignExtend32<16>(Offset) * 4); + break; + case LoongArch::BEQZ: + case LoongArch::BNEZ: + case LoongArch::BCEQZ: + case LoongArch::BCNEZ: + BranchOffset = (SignExtend32<21>(Offset) * 4); + break; + case LoongArch::B: + case LoongArch::BL: + BranchOffset = (SignExtend32<26>(Offset) * 4); + break; + } + Inst.addOperand(MCOperand::createImm(BranchOffset)); + return MCDisassembler::Success; +} + +static DecodeStatus DecodeJumpTarget(MCInst &Inst, + unsigned Insn, + uint64_t Address, + const void *Decoder) { + unsigned hi10 = fieldFromInstruction(Insn, 0, 10); + unsigned lo16 = fieldFromInstruction(Insn, 10, 16); + int32_t JumpOffset = SignExtend32<28>((hi10 << 16 | lo16) << 2); + Inst.addOperand(MCOperand::createImm(JumpOffset)); + return MCDisassembler::Success; +} + +template +static DecodeStatus DecodeUImmWithOffsetAndScale(MCInst &Inst, unsigned Value, + uint64_t Address, + const void *Decoder) { + Value &= ((1 << Bits) - 1); + Value *= Scale; + Inst.addOperand(MCOperand::createImm(Value + Offset)); + return MCDisassembler::Success; +} + +template +static DecodeStatus DecodeSImmWithOffsetAndScale(MCInst &Inst, unsigned Value, + uint64_t Address, + const void *Decoder) { + int32_t Imm = SignExtend32(Value) * ScaleBy; + Inst.addOperand(MCOperand::createImm(Imm + Offset)); + return MCDisassembler::Success; +} diff --git a/lib/Target/LoongArch/LoongArch.h b/lib/Target/LoongArch/LoongArch.h new file mode 100644 index 00000000..73fd4a62 --- /dev/null +++ b/lib/Target/LoongArch/LoongArch.h @@ -0,0 +1,37 @@ +//===-- LoongArch.h - Top-level interface for LoongArch representation ----*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file contains the entry points for global functions defined in +// the LLVM LoongArch back-end. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_LOONGARCH_LOONGARCH_H +#define LLVM_LIB_TARGET_LOONGARCH_LOONGARCH_H + +#include "MCTargetDesc/LoongArchMCTargetDesc.h" +#include "llvm/Target/TargetMachine.h" + +namespace llvm { + class LoongArchTargetMachine; + class ModulePass; + class FunctionPass; + class LoongArchSubtarget; + class LoongArchTargetMachine; + class InstructionSelector; + class PassRegistry; + + FunctionPass *createLoongArchModuleISelDagPass(); + FunctionPass *createLoongArchOptimizePICCallPass(); + FunctionPass *createLoongArchBranchExpansion(); + FunctionPass *createLoongArchExpandPseudoPass(); + + void initializeLoongArchBranchExpansionPass(PassRegistry &); +} // end namespace llvm; + +#endif diff --git a/lib/Target/LoongArch/LoongArch.td b/lib/Target/LoongArch/LoongArch.td new file mode 100644 index 00000000..49e320ec --- /dev/null +++ b/lib/Target/LoongArch/LoongArch.td @@ -0,0 +1,104 @@ +//===-- LoongArch.td - Describe the LoongArch Target Machine ---------*- tablegen -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// This is the top level entry point for the LoongArch target. +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// Target-independent interfaces +//===----------------------------------------------------------------------===// + +include "llvm/Target/Target.td" + +// The overall idea of the PredicateControl class is to chop the Predicates list +// into subsets that are usually overridden independently. This allows +// subclasses to partially override the predicates of their superclasses without +// having to re-add all the existing predicates. +class PredicateControl { + // Predicates for the encoding scheme in use such as HasStdEnc + list EncodingPredicates = []; + // Predicates for the GPR size such as is64Bit + list GPRPredicates = []; + // Predicates for the FGR size and layout such as HasBasicD + list FGRPredicates = []; + // Predicates for the instruction group membership such as ISA's + list InsnPredicates = []; + // Predicate for the ISA extension that an instruction belongs to + list ExtPredicate = []; + // Predicate for marking the instruction as usable in hard-float mode only + list HardFloatPredicate = []; + // Predicates for anything else + list AdditionalPredicates = []; + list Predicates = !listconcat(EncodingPredicates, + GPRPredicates, + FGRPredicates, + InsnPredicates, + HardFloatPredicate, + ExtPredicate, + AdditionalPredicates); +} + +// Like Requires<> but for the AdditionalPredicates list +class AdditionalRequires preds> { + list AdditionalPredicates = preds; +} + +//===----------------------------------------------------------------------===// +// LoongArch Subtarget features // +//===----------------------------------------------------------------------===// + +def Feature64Bit + : SubtargetFeature<"64bit", "HasLA64", "true", + "LA64 Basic Integer and Privilege Instruction Set">; +def FeatureBasicF : SubtargetFeature<"f", "HasBasicF", "true", + "'F' (Single-Precision Floating-Point)">; +def FeatureBasicD : SubtargetFeature<"d", "HasBasicD", "true", + "'D' (Double-Precision Floating-Point)", + [FeatureBasicF]>; + +def FeatureLSX : SubtargetFeature<"lsx", "HasLSX", "true", "Support LSX", [FeatureBasicD]>; +def FeatureLASX : SubtargetFeature<"lasx", "HasLASX", "true", "Support LASX", [FeatureLSX]>; + +def FeatureUnalignedAccess + : SubtargetFeature<"unaligned-access", "UnalignedAccess", "true", + "Allow all unaligned memory access">; +//===----------------------------------------------------------------------===// +// Register File, Calling Conv, Instruction Descriptions +//===----------------------------------------------------------------------===// + +include "LoongArchRegisterInfo.td" +include "LoongArchInstrInfo.td" +include "LoongArchCallingConv.td" + +def LoongArchInstrInfo : InstrInfo; + +//===----------------------------------------------------------------------===// +// LoongArch processors supported. +//===----------------------------------------------------------------------===// + +def : ProcessorModel<"generic-la32", NoSchedModel, []>; +def : ProcessorModel<"generic-la64", NoSchedModel, [Feature64Bit]>; +def : ProcessorModel<"la464", NoSchedModel, + [Feature64Bit, FeatureUnalignedAccess]>; + +def LoongArchAsmParser : AsmParser { + let ShouldEmitMatchRegisterName = 0; +} + +def LoongArchAsmParserVariant : AsmParserVariant { + int Variant = 0; + + // Recognize hard coded registers. + string RegisterPrefix = "$"; +} + +def LoongArch : Target { + let InstructionSet = LoongArchInstrInfo; + let AssemblyParsers = [LoongArchAsmParser]; + let AssemblyParserVariants = [LoongArchAsmParserVariant]; + let AllowRegisterRenaming = 1; +} diff --git a/lib/Target/LoongArch/LoongArch32InstrInfo.td b/lib/Target/LoongArch/LoongArch32InstrInfo.td new file mode 100644 index 00000000..e5c97860 --- /dev/null +++ b/lib/Target/LoongArch/LoongArch32InstrInfo.td @@ -0,0 +1,737 @@ +//===- LoongArch32InstrInfo.td - Target Description for LoongArch Target -*- tablegen -*-=// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file describes LoongArch32 instructions. +// +//===----------------------------------------------------------------------===// + +//===---------------------------------------------------------------------===/ +// Instruction Definitions. +//===---------------------------------------------------------------------===/ + +let DecoderNamespace = "LoongArch32" in { + /// + /// R2 + /// + def CLO_W : Count1<"clo.w", GPR32Opnd, ctlz>, R2I<0b00100>; + def CLZ_W : Int_Reg2<"clz.w", GPR32Opnd, ctlz>, R2I<0b00101>; + def CTO_W : Count1<"cto.w", GPR32Opnd, cttz>, R2I<0b00110>; + def CTZ_W : Int_Reg2<"ctz.w", GPR32Opnd, cttz>, R2I<0b00111>; + + def REVB_2H : Int_Reg2<"revb.2h", GPR32Opnd>, R2I<0b01100>;//see below bswap pattern + + def BITREV_4B : Int_Reg2<"bitrev.4b", GPR32Opnd>, R2I<0b10010>; + def BITREV_W : Int_Reg2<"bitrev.w", GPR32Opnd, bitreverse>, R2I<0b10100>; + + let isCodeGenOnly = 1 in { + def EXT_W_H32 : SignExtInReg<"ext.w.h", GPR32Opnd, i16>, R2I<0b10110>; + def EXT_W_B32 : SignExtInReg<"ext.w.b", GPR32Opnd, i8>, R2I<0b10111>; + + } + + def CPUCFG : Int_Reg2<"cpucfg", GPR32Opnd, int_loongarch_cpucfg>, R2I<0b11011>; + def RDTIMEL_W32 : Int_Reg2_Rdtime<"rdtimel.w", GPR32Opnd>, R2I<0b11000>; + def RDTIMEH_W32 : Int_Reg2_Rdtime<"rdtimeh.w", GPR32Opnd>, R2I<0b11001>; + + /// + /// R3 + /// + def ADD_W : Int_Reg3<"add.w", GPR32Opnd, add>, R3I<0b0100000>; + def SUB_W : Int_Reg3<"sub.w", GPR32Opnd, sub>, R3I<0b0100010>; + + let isCodeGenOnly = 1 in { + def SLT32 : SetCC_R<"slt", GPR32Opnd, setlt>, R3I<0b0100100>; + def SLTU32 : SetCC_R<"sltu", GPR32Opnd, setult>, R3I<0b0100101>; + def MASKEQZ32 : Int_Reg3<"maskeqz", GPR32Opnd>, R3I<0b0100110>;//see below patterns + def MASKNEZ32 : Int_Reg3<"masknez", GPR32Opnd>, R3I<0b0100111>;//see below patterns + + def NOR32 : Nor<"nor", GPR32Opnd>, R3I<0b0101000>; + def AND32 : Int_Reg3<"and", GPR32Opnd, and>, R3I<0b0101001>; + def OR32 : Int_Reg3<"or", GPR32Opnd, or>, R3I<0b0101010>; + def XOR32 : Int_Reg3<"xor", GPR32Opnd, xor>, R3I<0b0101011>; + def ANDN32 : Int_Reg3<"andn", GPR32Opnd>, R3I<0b0101101>; + def ORN32 : Int_Reg3<"orn", GPR32Opnd>, R3I<0b0101100>; + } + + def SLL_W : Shift_Var<"sll.w", GPR32Opnd, shl>, R3I<0b0101110>; + def SRL_W : Shift_Var<"srl.w", GPR32Opnd, srl>, R3I<0b0101111>; + def SRA_W : Shift_Var<"sra.w", GPR32Opnd, sra>, R3I<0b0110000>; + def ROTR_W: Shift_Var<"rotr.w", GPR32Opnd, rotr>, R3I<0b0110110>; + + def MUL_W : Int_Reg3<"mul.w", GPR32Opnd, mul>, R3I<0b0111000>; + def MULH_W : Int_Reg3<"mulh.w", GPR32Opnd, mulhs>, R3I<0b0111001>; + def MULH_WU : Int_Reg3<"mulh.wu", GPR32Opnd, mulhu>, R3I<0b0111010>; + +let usesCustomInserter = 1 in { + def DIV_W : Int_Reg3<"div.w", GPR32Opnd, sdiv>, R3I<0b1000000>; + def MOD_W : Int_Reg3<"mod.w", GPR32Opnd, srem>, R3I<0b1000001>; + def DIV_WU : Int_Reg3<"div.wu", GPR32Opnd, udiv>, R3I<0b1000010>; + def MOD_WU : Int_Reg3<"mod.wu", GPR32Opnd, urem>, R3I<0b1000011>; +} + + def CRC_W_B_W : Int_Reg3<"crc.w.b.w", GPR32Opnd, int_loongarch_crc_w_b_w>, R3I<0b1001000>; + def CRC_W_H_W : Int_Reg3<"crc.w.h.w", GPR32Opnd, int_loongarch_crc_w_h_w>, R3I<0b1001001>; + def CRC_W_W_W : Int_Reg3<"crc.w.w.w", GPR32Opnd, int_loongarch_crc_w_w_w>, R3I<0b1001010>; + def CRCC_W_B_W : Int_Reg3<"crcc.w.b.w", GPR32Opnd, int_loongarch_crcc_w_b_w>, R3I<0b1001100>; + def CRCC_W_H_W : Int_Reg3<"crcc.w.h.w", GPR32Opnd, int_loongarch_crcc_w_h_w>, R3I<0b1001101>; + def CRCC_W_W_W : Int_Reg3<"crcc.w.w.w", GPR32Opnd, int_loongarch_crcc_w_w_w>, R3I<0b1001110>; + /// + /// SLLI + /// + def SLLI_W : Shift_Imm32<"slli.w", GPR32Opnd, shl>, R2_IMM5<0b00>; + def SRLI_W : Shift_Imm32<"srli.w", GPR32Opnd, srl>, R2_IMM5<0b01>; + def SRAI_W : Shift_Imm32<"srai.w", GPR32Opnd, sra>, R2_IMM5<0b10>; + def ROTRI_W : Shift_Imm32<"rotri.w", GPR32Opnd, rotr>, R2_IMM5<0b11>; + /// + /// Misc + /// + def ALSL_W : Reg3_Sa<"alsl.w", GPR32Opnd, uimm2_plus1>, R3_SA2<0b00010> { + let Pattern = [(set GPR32Opnd:$rd, + (add GPR32Opnd:$rk, (shl GPR32Opnd:$rj, immZExt2Alsl:$sa)))]; + } + def BYTEPICK_W : Reg3_Sa<"bytepick.w", GPR32Opnd, uimm2>, R3_SA2<0b00100>;//pattern:[] + + def BREAK : Code15<"break", int_loongarch_break>, CODE15<0b1010100>; + def SYSCALL : Code15<"syscall", int_loongarch_syscall>, CODE15<0b1010110>; + def TRAP : TrapBase; + + def BSTRINS_W : InsBase_32<"bstrins.w", GPR32Opnd, uimm5, LoongArchBstrins>, + INSERT_BIT32<0>; + def BSTRPICK_W : PickBase_32<"bstrpick.w", GPR32Opnd, uimm5, LoongArchBstrpick>, + INSERT_BIT32<1>; + + /// + /// R2_IMM12 + /// + let isCodeGenOnly = 1 in { + def SLTI32 : SetCC_I<"slti", GPR32Opnd, simm12_32>, R2_IMM12<0b000>; //PatFrag + def SLTUI32 : SetCC_I<"sltui", GPR32Opnd, simm12_32>, R2_IMM12<0b001>; //PatFrag + } + def ADDI_W : Int_Reg2_Imm12<"addi.w", GPR32Opnd, simm12_32, add>, R2_IMM12<0b010>; + + let isCodeGenOnly = 1 in { + def ANDI32 : Int_Reg2_Imm12<"andi", GPR32Opnd, uimm12_32, and>, R2_IMM12<0b101>; + def ORI32 : Int_Reg2_Imm12<"ori", GPR32Opnd, uimm12_32, or>, R2_IMM12<0b110>; + def XORI32 : Int_Reg2_Imm12<"xori", GPR32Opnd, uimm12_32, xor>, R2_IMM12<0b111>; + } + + /// + /// Privilege Instructions + /// + def CSRRD32 : CSR<"csrrd", GPR32Opnd, uimm14_32, int_loongarch_csrrd>, R1_CSR<0b0000000000100>; + def CSRWR32 : CSRW<"csrwr", GPR32Opnd, uimm14_32, int_loongarch_csrwr>, R1_CSR<0b0000100000100>; + def CSRXCHG32 : CSRX<"csrxchg", GPR32Opnd, uimm14_32, int_loongarch_csrxchg>, R2_CSR<0b00000100>; + def IOCSRRD_B32 : Int_Reg2<"iocsrrd.b", GPR32Opnd, int_loongarch_iocsrrd_b>, R2P<0b000>; + def IOCSRRD_H32 : Int_Reg2<"iocsrrd.h", GPR32Opnd, int_loongarch_iocsrrd_h>, R2P<0b001>; + def IOCSRRD_W32 : Int_Reg2<"iocsrrd.w", GPR32Opnd, int_loongarch_iocsrrd_w>, R2P<0b010>; + def IOCSRWR_B32 : Int_Reg2_Iocsrwr<"iocsrwr.b", GPR32Opnd, GPR32Opnd, int_loongarch_iocsrwr_b>, R2P<0b100>; + def IOCSRWR_H32 : Int_Reg2_Iocsrwr<"iocsrwr.h", GPR32Opnd, GPR32Opnd, int_loongarch_iocsrwr_h>, R2P<0b101>; + def IOCSRWR_W32 : Int_Reg2_Iocsrwr<"iocsrwr.w", GPR32Opnd, GPR32Opnd, int_loongarch_iocsrwr_w>, R2P<0b110>; + def CACOP32 : CAC<"cacop", GPR32Opnd, simm12_32, int_loongarch_cacop>, R1_CACHE; + def LDDIR32 : LEVEL<"lddir", GPR32Opnd>, R2_LEVEL<0b00000110010000>; + def LDPTE32 : SEQ<"ldpte", GPR32Opnd>, R1_SEQ<0b00000110010001>; + + //def WAIT : Wait<"wait">; + // + //def IOCSRRD_D : R2P<0b011>, Int_Reg2<"iocsrrd.d", GPR32Opnd>; + //def IOCSRWR_D : R2P<0b111>, Int_Reg2<"iocsrwr.d", GPR32Opnd>; + // + //def TLBINV : IMM32<0b001000>, OP32<"tlbinv">; + //def TLBFLUSH : IMM32<0b001001>, OP32<"tlbflush">; + //def TLBP : IMM32<0b001010>, OP32<"tlbp">; + //def TLBR : IMM32<0b001011>, OP32<"tlbr">; + //def TLBWI : IMM32<0b001100>, OP32<"tlbwi">; + //def TLBWR : IMM32<0b001101>, OP32<"tlbwr">; + + /// + /// R1_IMM20 + /// + let isCodeGenOnly = 1 in { + def LU12I_W32 : SI20<"lu12i.w", GPR32Opnd, simm20_32>, R1_SI20<0b0001010>; + def PCADDI32 : SI20<"pcaddi", GPR32Opnd, simm20_32>, R1_SI20<0b0001100>; + def PCALAU12I32 : SI20<"pcalau12i", GPR32Opnd, simm20_32>, R1_SI20<0b0001101>; + def PCADDU12I32 : SI20<"pcaddu12i", GPR32Opnd, simm20_32>, R1_SI20<0b0001110>; + } + + let isCodeGenOnly = 1 in { + def BEQZ32 : Beqz<"beqz", brtarget, seteq, GPR32Opnd>, R1_IMM21BEQZ<0b010000>; + def BNEZ32 : Beqz<"bnez", brtarget, setne, GPR32Opnd>, R1_IMM21BEQZ<0b010001>; + + def JIRL32 : FJirl<"jirl", calltarget, GPR32Opnd>, R2_IMM16JIRL; + + def B32 : JumpFB, IMM26B<0b010100>; + + def BEQ32 : Beq<"beq", brtarget, seteq, GPR32Opnd>, R2_IMM16BEQ<0b010110>; + def BNE32 : Beq<"bne", brtarget, setne, GPR32Opnd>, R2_IMM16BEQ<0b010111>; + def BLT32 : Beq<"blt", brtarget, setlt, GPR32Opnd>, R2_IMM16BEQ<0b011000>; + def BGE32 : Beq<"bge", brtarget, setge, GPR32Opnd>, R2_IMM16BEQ<0b011001>; + def BLTU32 : Beq<"bltu", brtarget, setult, GPR32Opnd>, R2_IMM16BEQ<0b011010>; + def BGEU32 : Beq<"bgeu", brtarget, setuge, GPR32Opnd>, R2_IMM16BEQ<0b011011>; + } + + /// + /// Mem access + /// + def LL_W : LLBase<"ll.w", GPR32Opnd, mem_simm14_lsl2>, LL_SC<0b000>; + def SC_W : SCBase<"sc.w", GPR32Opnd, mem_simm14_lsl2>, LL_SC<0b001>; + + def PRELD_Raw32 : Preld_Raw<"preld", GPR32Opnd>, PRELD_FM; + + let isCodeGenOnly = 1 in { + def LD_B32 : Ld<"ld.b", GPR32Opnd, mem_simmptr, sextloadi8>, LOAD_STORE<0b0000>; + def LD_H32 : Ld<"ld.h", GPR32Opnd, mem_simmptr, sextloadi16, addrDefault>, LOAD_STORE<0b0001>; + def LD_W32 : Ld<"ld.w", GPR32Opnd, mem, load, addrDefault>, LOAD_STORE<0b0010>; + def ST_B32 : St<"st.b", GPR32Opnd, mem, truncstorei8>, LOAD_STORE<0b0100>; + def ST_H32 : St<"st.h", GPR32Opnd, mem, truncstorei16>, LOAD_STORE<0b0101>; + def ST_W32 : St<"st.w", GPR32Opnd, mem, store>, LOAD_STORE<0b0110>; + def LD_BU32 : Ld<"ld.bu", GPR32Opnd, mem_simmptr, zextloadi8, addrDefault>, LOAD_STORE<0b1000>; + def LD_HU32 : Ld<"ld.hu", GPR32Opnd, mem_simmptr, zextloadi16>, LOAD_STORE<0b1001>; + + def PRELD32 : Preld<"preld", mem, GPR32Opnd>, PRELD_FM; + + def LDPTR_W32 : LdPtr<"ldptr.w", GPR32Opnd>, LL_SC<0b100>; + def STPTR_W32 : StPtr<"stptr.w", GPR32Opnd>, LL_SC<0b101>; + } + + def IBAR : Bar<"ibar", int_loongarch_ibar>, BAR_FM<1>; + def DBAR : Bar<"dbar", int_loongarch_dbar>, BAR_FM<0>; + + def LONG_BRANCH_ADDIW : LoongArchPseudo<(outs GPR32Opnd:$dst), + (ins GPR32Opnd:$src, brtarget:$tgt, brtarget:$baltgt), []>; + + def LONG_BRANCH_ADDIW2Op : LoongArchPseudo<(outs GPR32Opnd:$dst), + (ins GPR32Opnd:$src, brtarget:$tgt), []>; + + def PseudoReturn : PseudoReturnBase; + + let isCodeGenOnly = 1 in { + def LDX_W32 : LDX_FT_LA<"ldx.w", GPR32Opnd, load>, + R3MI<0b00010000>; + def LDX_HU32 : LDX_FT_LA<"ldx.hu", GPR32Opnd, extloadi16>, + R3MI<0b01001000>; + def LDX_BU32 : LDX_FT_LA<"ldx.bu", GPR32Opnd, extloadi8>, + R3MI<0b01000000>; + def STX_W32 : STX_FT_LA<"stx.w", GPR32Opnd, store>, + R3MI<0b00110000>; + def LDX_H32 : LDX_FT_LA<"ldx.h", GPR32Opnd, sextloadi16>, + R3MI<0b00001000>; + def LDX_B32 : LDX_FT_LA<"ldx.b", GPR32Opnd, sextloadi8>, + R3MI<0b00000000>; + def STX_B32 : STX_FT_LA<"stx.b", GPR32Opnd, truncstorei8>, + R3MI<0b00100000>; + def STX_H32 : STX_FT_LA<"stx.h", GPR32Opnd, truncstorei16>, + R3MI<0b00101000>; + } +} + +def LEA_ADDI_W: EffectiveAddress<"addi.w", GPR32Opnd>, LEA_ADDI_FM<0b010>; + +def : LoongArchPat<(LoongArchAddress (i32 tglobaladdr:$in)), + (ADDI_W (PCADDU12I32 tglobaladdr:$in) ,0)>,GPR_32; +def : LoongArchPat<(LoongArchAddress (i32 tblockaddress:$in)), + (ADDI_W (PCADDU12I32 tblockaddress:$in),0)>, GPR_32; +def : LoongArchPat<(LoongArchAddress (i32 tjumptable:$in)), + (ADDI_W (PCADDU12I32 tjumptable:$in),0)>, GPR_32; +def : LoongArchPat<(LoongArchAddress (i32 texternalsym:$in)), + (ADDI_W (PCADDU12I32 texternalsym:$in),0)>, GPR_32; + +//===----------------------------------------------------------------------===// +// Arbitrary patterns that map to one or more instructions +//===----------------------------------------------------------------------===// + +let isCodeGenOnly = 1 in { + def REVB_2W_32 : Int_Reg2<"revb.2w", GPR32Opnd>, R2I<0b01110>; + def REVH_2W_32 : Int_Reg2<"revh.2w", GPR32Opnd>, R2I<0b10000>; +} + +// bswap pattern +def : LoongArchPat<(bswap GPR32:$rj), (ROTRI_W (REVB_2H GPR32:$rj), 16)>; +//def : LoongArchPat<(bswap GPR32:$rj), (REVB_2W_32 GPR32:$rj)>; +//def : LoongArchPat<(bswap GPR32:$rj), (REVH_2W_32 (REVB_2H GPR32:$rj))>; + +// i32 selects +multiclass SelectInt_Pats { + +// reg, immz +def : LoongArchPat<(select (Opg (seteq RC:$cond, immz)), RC:$t, RC:$f), + (OROp (MASKNEZOp RC:$t, RC:$cond), (MASKEQZOp RC:$f, RC:$cond))>; +def : LoongArchPat<(select (Opg (setne RC:$cond, immz)), RC:$t, RC:$f), + (OROp (MASKEQZOp RC:$t, RC:$cond), (MASKNEZOp RC:$f, RC:$cond))>; + +//def : LoongArchPat<(select (Opg (seteq RC:$cond, imm_type:$imm)), RC:$t, RC:$f), +// (OROp (MASKNEZOp RC:$t, (XORiOp RC:$cond, imm_type:$imm)), +// (MASKEQZOp RC:$f, (XORiOp RC:$cond, imm_type:$imm)))>; +//def : LoongArchPat<(select (Opg (setne RC:$cond, imm_type:$imm)), RC:$t, RC:$f), +// (OROp (MASKEQZOp RC:$t, (XORiOp RC:$cond, imm_type:$imm)), +// (MASKNEZOp RC:$f, (XORiOp RC:$cond, imm_type:$imm)))>; + +// reg, immSExt12Plus1 +//def : LoongArchPat<(select (Opg (setgt RC:$cond, immSExt12Plus1:$imm)), RC:$t, RC:$f), +// (OROp (MASKNEZOp RC:$t, (SLTiOp RC:$cond, (Plus1 imm:$imm))), +// (MASKEQZOp RC:$f, (SLTiOp RC:$cond, (Plus1 imm:$imm))))>; +//def : LoongArchPat<(select (Opg (setugt RC:$cond, immSExt16Plus1:$imm)), RC:$t, RC:$f), +// (OROp (MASKNEZOp RC:$t, (SLTiuOp RC:$cond, (Plus1 imm:$imm))), +// (MASKEQZOp RC:$f, (SLTiuOp RC:$cond, (Plus1 imm:$imm))))>; + +def : LoongArchPat<(select (Opg (seteq RC:$cond, immz)), RC:$t, immz), + (MASKNEZOp RC:$t, RC:$cond)>; +def : LoongArchPat<(select (Opg (setne RC:$cond, immz)), RC:$t, immz), + (MASKEQZOp RC:$t, RC:$cond)>; +def : LoongArchPat<(select (Opg (seteq RC:$cond, immz)), immz, RC:$f), + (MASKEQZOp RC:$f, RC:$cond)>; +def : LoongArchPat<(select (Opg (setne RC:$cond, immz)), immz, RC:$f), + (MASKNEZOp RC:$f, RC:$cond)>; +} + +defm : SelectInt_Pats; + +def : LoongArchPat<(select i32:$cond, i32:$t, i32:$f), + (OR32 (MASKEQZ32 i32:$t, i32:$cond), + (MASKNEZ32 i32:$f, i32:$cond))>; +def : LoongArchPat<(select i32:$cond, i32:$t, immz), + (MASKEQZ32 i32:$t, i32:$cond)>; +def : LoongArchPat<(select i32:$cond, immz, i32:$f), + (MASKNEZ32 i32:$f, i32:$cond)>; + +// truncate +def : LoongArchPat<(i32 (trunc (assertzext_lt_i32 GPR64:$src))), + (EXTRACT_SUBREG GPR64:$src, sub_32)>, GPR_64; +def : LoongArchPat<(i32 (trunc GPR64:$src)), + (SLLI_W (EXTRACT_SUBREG GPR64:$src, sub_32), 0)>, GPR_64; + +// Patterns used for matching away redundant sign extensions. +// LA32 arithmetic instructions sign extend their result implicitly. +def : LoongArchPat<(i64 (sext (i32 (add GPR32:$src, GPR32:$src2)))), + (INSERT_SUBREG (i64 (IMPLICIT_DEF)), + (ADD_W GPR32:$src, GPR32:$src2), sub_32)>; +def : LoongArchPat<(i64 (sext (i32 (sub GPR32:$src, GPR32:$src2)))), + (INSERT_SUBREG (i64 (IMPLICIT_DEF)), + (SUB_W GPR32:$src, GPR32:$src2), sub_32)>; +def : LoongArchPat<(i64 (sext (i32 (mul GPR32:$src, GPR32:$src2)))), + (INSERT_SUBREG (i64 (IMPLICIT_DEF)), + (MUL_W GPR32:$src, GPR32:$src2), sub_32)>; + +def : LoongArchPat<(store (i32 0), addr:$dst), (ST_W32 ZERO, addr:$dst)>; + +def : InstAlias<"break", (BREAK 0), 1>; +def : InstAlias<"break $imm", (BREAK uimm15:$imm), 1>; +def : LoongArchInstAlias<"move $dst, $src", + (OR32 GPR32Opnd:$dst, GPR32Opnd:$src, ZERO), 1>, GPR_32; + +def immSExt12Plus1 : PatLeaf<(imm), [{ + return isInt<13>(N->getSExtValue()) && isInt<12>(N->getSExtValue() + 1); +}]>; + +def Plus1 : SDNodeXFormgetSExtValue() + 1); }]>; + +multiclass BrcondPats { + +def : LoongArchPat<(brcond (i32 (setne RC:$lhs, 0)), bb:$dst), + (BNEOp RC:$lhs, ZEROReg, bb:$dst)>; +def : LoongArchPat<(brcond (i32 (seteq RC:$lhs, 0)), bb:$dst), + (BEQOp RC:$lhs, ZEROReg, bb:$dst)>; +def : LoongArchPat<(brcond (i32 (setge RC:$lhs, RC:$rhs)), bb:$dst), + (BEQOp1 (SLTOp RC:$lhs, RC:$rhs), ZEROReg, bb:$dst)>; +def : LoongArchPat<(brcond (i32 (setuge RC:$lhs, RC:$rhs)), bb:$dst), + (BEQOp1 (SLTUOp RC:$lhs, RC:$rhs), ZEROReg, bb:$dst)>; +def : LoongArchPat<(brcond (i32 (setge RC:$lhs, immSExt12:$rhs)), bb:$dst), + (BEQOp1 (SLTIOp RC:$lhs, immSExt12:$rhs), ZEROReg, bb:$dst)>; +def : LoongArchPat<(brcond (i32 (setuge RC:$lhs, immSExt12:$rhs)), bb:$dst), + (BEQOp1 (SLTUIOp RC:$lhs, immSExt12:$rhs), ZEROReg, bb:$dst)>; +def : LoongArchPat<(brcond (i32 (setgt RC:$lhs, immSExt12Plus1:$rhs)), bb:$dst), + (BEQOp1 (SLTIOp RC:$lhs, (Plus1 imm:$rhs)), ZEROReg, bb:$dst)>; +def : LoongArchPat<(brcond (i32 (setugt RC:$lhs, immSExt12Plus1:$rhs)), bb:$dst), + (BEQOp1 (SLTUIOp RC:$lhs, (Plus1 imm:$rhs)), ZEROReg, bb:$dst)>; +def : LoongArchPat<(brcond (i32 (setle RC:$lhs, RC:$rhs)), bb:$dst), + (BEQOp1 (SLTOp RC:$rhs, RC:$lhs), ZEROReg, bb:$dst)>; +def : LoongArchPat<(brcond (i32 (setule RC:$lhs, RC:$rhs)), bb:$dst), + (BEQOp1 (SLTUOp RC:$rhs, RC:$lhs), ZEROReg, bb:$dst)>; +def : LoongArchPat<(brcond RC:$cond, bb:$dst), + (BNEOp RC:$cond, ZEROReg, bb:$dst)>; +} + +defm : BrcondPats, GPR_64; + +let usesCustomInserter = 1 in { + def ATOMIC_LOAD_ADD_I8 : Atomic2Ops; + def ATOMIC_LOAD_ADD_I16 : Atomic2Ops; + def ATOMIC_LOAD_ADD_I32 : Atomic2Ops; + def ATOMIC_LOAD_SUB_I8 : Atomic2Ops; + def ATOMIC_LOAD_SUB_I16 : Atomic2Ops; + def ATOMIC_LOAD_SUB_I32 : Atomic2Ops; + def ATOMIC_LOAD_AND_I8 : Atomic2Ops; + def ATOMIC_LOAD_AND_I16 : Atomic2Ops; + def ATOMIC_LOAD_AND_I32 : Atomic2Ops; + def ATOMIC_LOAD_OR_I8 : Atomic2Ops; + def ATOMIC_LOAD_OR_I16 : Atomic2Ops; + def ATOMIC_LOAD_OR_I32 : Atomic2Ops; + def ATOMIC_LOAD_XOR_I8 : Atomic2Ops; + def ATOMIC_LOAD_XOR_I16 : Atomic2Ops; + def ATOMIC_LOAD_XOR_I32 : Atomic2Ops; + def ATOMIC_LOAD_NAND_I8 : Atomic2Ops; + def ATOMIC_LOAD_NAND_I16 : Atomic2Ops; + def ATOMIC_LOAD_NAND_I32 : Atomic2Ops; + + def ATOMIC_SWAP_I8 : Atomic2Ops; + def ATOMIC_SWAP_I16 : Atomic2Ops; + def ATOMIC_SWAP_I32 : Atomic2Ops; + + def ATOMIC_CMP_SWAP_I8 : AtomicCmpSwap; + def ATOMIC_CMP_SWAP_I16 : AtomicCmpSwap; + def ATOMIC_CMP_SWAP_I32 : AtomicCmpSwap; + + def ATOMIC_LOAD_MAX_I8 : Atomic2Ops; + def ATOMIC_LOAD_MAX_I16 : Atomic2Ops; + def ATOMIC_LOAD_MAX_I32 : Atomic2Ops; + + def ATOMIC_LOAD_MIN_I8 : Atomic2Ops; + def ATOMIC_LOAD_MIN_I16 : Atomic2Ops; + def ATOMIC_LOAD_MIN_I32 : Atomic2Ops; + + def ATOMIC_LOAD_UMAX_I8 : Atomic2Ops; + def ATOMIC_LOAD_UMAX_I16 : Atomic2Ops; + def ATOMIC_LOAD_UMAX_I32 : Atomic2Ops; + + def ATOMIC_LOAD_UMIN_I8 : Atomic2Ops; + def ATOMIC_LOAD_UMIN_I16 : Atomic2Ops; + def ATOMIC_LOAD_UMIN_I32 : Atomic2Ops; +} + +def ATOMIC_LOAD_ADD_I8_POSTRA : Atomic2OpsSubwordPostRA; +def ATOMIC_LOAD_ADD_I16_POSTRA : Atomic2OpsSubwordPostRA; +def ATOMIC_LOAD_ADD_I32_POSTRA : Atomic2OpsPostRA; +def ATOMIC_LOAD_SUB_I8_POSTRA : Atomic2OpsSubwordPostRA; +def ATOMIC_LOAD_SUB_I16_POSTRA : Atomic2OpsSubwordPostRA; +def ATOMIC_LOAD_SUB_I32_POSTRA : Atomic2OpsPostRA; +def ATOMIC_LOAD_AND_I8_POSTRA : Atomic2OpsSubwordPostRA; +def ATOMIC_LOAD_AND_I16_POSTRA : Atomic2OpsSubwordPostRA; +def ATOMIC_LOAD_AND_I32_POSTRA : Atomic2OpsPostRA; +def ATOMIC_LOAD_OR_I8_POSTRA : Atomic2OpsSubwordPostRA; +def ATOMIC_LOAD_OR_I16_POSTRA : Atomic2OpsSubwordPostRA; +def ATOMIC_LOAD_OR_I32_POSTRA : Atomic2OpsPostRA; +def ATOMIC_LOAD_XOR_I8_POSTRA : Atomic2OpsSubwordPostRA; +def ATOMIC_LOAD_XOR_I16_POSTRA : Atomic2OpsSubwordPostRA; +def ATOMIC_LOAD_XOR_I32_POSTRA : Atomic2OpsPostRA; +def ATOMIC_LOAD_NAND_I8_POSTRA : Atomic2OpsSubwordPostRA; +def ATOMIC_LOAD_NAND_I16_POSTRA : Atomic2OpsSubwordPostRA; +def ATOMIC_LOAD_NAND_I32_POSTRA : Atomic2OpsPostRA; + +def ATOMIC_SWAP_I8_POSTRA : Atomic2OpsSubwordPostRA; +def ATOMIC_SWAP_I16_POSTRA : Atomic2OpsSubwordPostRA; +def ATOMIC_SWAP_I32_POSTRA : Atomic2OpsPostRA; + +def ATOMIC_CMP_SWAP_I8_POSTRA : AtomicCmpSwapSubwordPostRA; +def ATOMIC_CMP_SWAP_I16_POSTRA : AtomicCmpSwapSubwordPostRA; +def ATOMIC_CMP_SWAP_I32_POSTRA : AtomicCmpSwapPostRA; + +def ATOMIC_LOAD_MAX_I8_POSTRA : Atomic2OpsSubwordPostRA; +def ATOMIC_LOAD_MAX_I16_POSTRA : Atomic2OpsSubwordPostRA; +def ATOMIC_LOAD_MAX_I32_POSTRA : Atomic2OpsPostRA; + +def ATOMIC_LOAD_MIN_I8_POSTRA : Atomic2OpsSubwordPostRA; +def ATOMIC_LOAD_MIN_I16_POSTRA : Atomic2OpsSubwordPostRA; +def ATOMIC_LOAD_MIN_I32_POSTRA : Atomic2OpsPostRA; + +def ATOMIC_LOAD_UMAX_I8_POSTRA : Atomic2OpsSubwordPostRA; +def ATOMIC_LOAD_UMAX_I16_POSTRA : Atomic2OpsSubwordPostRA; +def ATOMIC_LOAD_UMAX_I32_POSTRA : Atomic2OpsPostRA; + +def ATOMIC_LOAD_UMIN_I8_POSTRA : Atomic2OpsSubwordPostRA; +def ATOMIC_LOAD_UMIN_I16_POSTRA : Atomic2OpsSubwordPostRA; +def ATOMIC_LOAD_UMIN_I32_POSTRA : Atomic2OpsPostRA; + +def : LoongArchPat<(atomic_load_8 addr:$a), (LD_B32 addr:$a)>; +def : LoongArchPat<(atomic_load_16 addr:$a), (LD_H32 addr:$a)>; +def : LoongArchPat<(atomic_load_32 addrimm14lsl2:$a), (LDPTR_W32 addrimm14lsl2:$a)>; +def : LoongArchPat<(atomic_load_32 addr:$a), (LD_W32 addr:$a)>; + +def : LoongArchPat<(atomic_store_8 addr:$a, GPR32:$v), + (ST_B32 GPR32:$v, addr:$a)>; +def : LoongArchPat<(atomic_store_16 addr:$a, GPR32:$v), + (ST_H32 GPR32:$v, addr:$a)>; +def : LoongArchPat<(atomic_store_32 addrimm14lsl2:$a, GPR32:$v), + (STPTR_W32 GPR32:$v, addrimm14lsl2:$a)>; +def : LoongArchPat<(atomic_store_32 addr:$a, GPR32:$v), + (ST_W32 GPR32:$v, addr:$a)>; + +def : LoongArchPat<(LoongArchDBAR (i32 immz)), + (DBAR 0)>; + +def : LoongArchPat<(i32 (extloadi1 addr:$src)), (LD_BU32 addr:$src)>; +def : LoongArchPat<(i32 (extloadi8 addr:$src)), (LD_BU32 addr:$src)>; +def : LoongArchPat<(i32 (extloadi16 addr:$src)), (LD_HU32 addr:$src)>; + +def : LoongArchPat<(store (i32 0), addr:$dst), (ST_W32 ZERO, addr:$dst)>; + +// Patterns for loads/stores with a reg+imm operand. +let AddedComplexity = 40 in { + def : LoadRegImmPat; + def : LoadRegImmPat; + def : LoadRegImmPat; + def : LoadRegImmPat; + def : LoadRegImmPat; + def : StoreRegImmPat; + def : StoreRegImmPat; + def : StoreRegImmPat; + + def : LoadRegImm14Lsl2Pat; + def : StoreRegImm14Lsl2Pat; +} + +let isCall=1, isCTI=1, Defs = [RA] in { + + class JumpLinkRegPseudo: + LoongArchPseudo<(outs), (ins RO:$rj), [(LoongArchJmpLink RO:$rj)]>, + PseudoInstExpansion<(JIRLRInst RetReg, ResRO:$rj)> { + let hasPostISelHook = 1; + } + + class JumpLinkReg: + InstForm<(outs RO:$rd), (ins RO:$rj), !strconcat(opstr, "\t$rd, $rj, 0"), + [], FrmR, opstr> { + let hasPostISelHook = 1; + } + +} + +def JIRLR : JumpLinkReg<"jirl", GPR32Opnd>, R2_IMM16JIRL { + let offs16 = 0; +} +def JIRLRPseudo : JumpLinkRegPseudo; + +class BrindRegPseudo: + LoongArchPseudo<(outs), (ins RO:$rj), [(brind RO:$rj)]>, + PseudoInstExpansion<(JIRLRInst RetReg, ResRO:$rj)> { + let isTerminator=1; + let isBarrier=1; + let isBranch = 1; + let isIndirectBranch = 1; + bit isCTI = 1; +} + +def JIRLRBRIND : BrindRegPseudo; + +def : LoongArchPat<(addc GPR32:$src, immSExt12:$imm), + (ADDI_W GPR32:$src, imm:$imm)>; + +defm : SeteqPats; +defm : SetlePats; +defm : SetgtPats; +defm : SetgePats; +defm : SetgeImmPats; + +def : LoongArchPat<(i64 (sext (i32 (xor (i32 (trunc (i64 (assertsext GPR64:$rj)))), (immZExt12:$imm12))))), + (INSERT_SUBREG (i64 (IMPLICIT_DEF)), + (XORI32 (EXTRACT_SUBREG GPR64:$rj, sub_32), (immZExt12:$imm12)), sub_32)>, GPR_64; + +def : LoongArchPat<(i64 (sext (i32 (add (i32 (trunc (i64 (assertsext GPR64:$rj)))), (i32 (trunc (i64 (assertsext GPR64:$rk)))))))), + (INSERT_SUBREG (i64 (IMPLICIT_DEF)), + (ADD_W (EXTRACT_SUBREG GPR64:$rj, sub_32), (EXTRACT_SUBREG GPR64:$rk, sub_32)), sub_32)>, GPR_64; + +def : LoongArchPat<(i64 (sext (i32 (add (i32 (trunc (i64 (assertsext GPR64:$rj)))), (immSExt12:$imm12))))), + (INSERT_SUBREG (i64 (IMPLICIT_DEF)), + (ADDI_W (EXTRACT_SUBREG GPR64:$rj, sub_32), (immSExt12:$imm12)), sub_32)>, GPR_64; + +def : LoongArchPat<(i64 (sext (i32 (sra (i32 (trunc (i64 (assertsext GPR64:$rj)))), (i32 (trunc (i64 (assertsext GPR64:$rk)))))))), + (INSERT_SUBREG (i64 (IMPLICIT_DEF)), + (SRA_W (EXTRACT_SUBREG GPR64:$rj, sub_32), (EXTRACT_SUBREG GPR64:$rk, sub_32)), sub_32)>, GPR_64; + +def : LoongArchPat<(i64 (sext (i32 (srl (i32 (trunc (i64 (assertsext GPR64:$rj)))), (i32 (trunc (i64 (assertsext GPR64:$rk)))))))), + (INSERT_SUBREG (i64 (IMPLICIT_DEF)), + (SRL_W (EXTRACT_SUBREG GPR64:$rj, sub_32), (EXTRACT_SUBREG GPR64:$rk, sub_32)), sub_32)>, GPR_64; + +def : LoongArchPat<(i64 (sext (i32 (mul (i32 (trunc (i64 (assertsext GPR64:$rj)))), (i32 (trunc (i64 (assertsext GPR64:$rk)))))))), + (INSERT_SUBREG (i64 (IMPLICIT_DEF)), + (MUL_W (EXTRACT_SUBREG GPR64:$rj, sub_32), (EXTRACT_SUBREG GPR64:$rk, sub_32)), sub_32)>, GPR_64; + +def : LoongArchPat<(i64 (sext (i32 (xor (i32 (trunc (i64 (assertsext GPR64:$rj)))), (i32 (trunc (i64 (assertsext GPR64:$rk)))))))), + (INSERT_SUBREG (i64 (IMPLICIT_DEF)), + (XOR32 (EXTRACT_SUBREG GPR64:$rj, sub_32), (EXTRACT_SUBREG GPR64:$rk, sub_32)), sub_32)>, GPR_64; + +def : LoongArchPat<(i64 (sext (i32 (xor (i32 (trunc (i64 (assertsext GPR64:$rj)))), (i32 GPR32:$rk))))), + (INSERT_SUBREG (i64 (IMPLICIT_DEF)), + (XOR32 (EXTRACT_SUBREG GPR64:$rj, sub_32), GPR32:$rk), sub_32)>, GPR_64; + +def : LoongArchPat<(i64 (sext (i32 (or (i32 (trunc (i64 (assertsext GPR64:$rj)))), (uimm12_32:$imm12))))), + (INSERT_SUBREG (i64 (IMPLICIT_DEF)), + (ORI32 (EXTRACT_SUBREG GPR64:$rj, sub_32), (uimm12_32:$imm12)), sub_32)>, GPR_64; + +def : LoongArchPat<(i64 (sext (i32 (or (i32 (trunc (i64 (assertsext GPR64:$rj)))), (i32 GPR32:$rk))))), + (INSERT_SUBREG (i64 (IMPLICIT_DEF)), + (OR32 (EXTRACT_SUBREG GPR64:$rj, sub_32), GPR32:$rk), sub_32)>, GPR_64; + +def : LoongArchPat<(i64 (sext (select i32:$cond, (i32 (trunc (i64 (assertsext GPR64:$t)))), (i32 (trunc (i64 (assertsext GPR64:$f))))))), + (INSERT_SUBREG (i64 (IMPLICIT_DEF)), + (OR32 (MASKEQZ32 (EXTRACT_SUBREG GPR64:$t, sub_32), i32:$cond), + (MASKNEZ32 (EXTRACT_SUBREG GPR64:$f, sub_32), i32:$cond)), sub_32)>; + +def : LoongArchPat<(i64 (sext (i32 (shl (i32 (trunc (i64 (assertsext GPR64:$rj)))), (i32 (trunc (i64 (assertsext GPR64:$rk)))))))), + (INSERT_SUBREG (i64 (IMPLICIT_DEF)), + (SLL_W (EXTRACT_SUBREG GPR64:$rj, sub_32), (EXTRACT_SUBREG GPR64:$rk, sub_32)), sub_32)>, GPR_64; + +def : LoongArchPat<(i64 (sext (i32 (srem (i32 (trunc (i64 (assertsext GPR64:$rj)))), (i32 (trunc (i64 (assertsext GPR64:$rk)))))))), + (INSERT_SUBREG (i64 (IMPLICIT_DEF)), + (MOD_W (EXTRACT_SUBREG GPR64:$rj, sub_32), (EXTRACT_SUBREG GPR64:$rk, sub_32)), sub_32)>, GPR_64; + +def : LoongArchPat<(atomic_store_32 addr:$a, (i32 (trunc (i64 (assertsext GPR64:$rj))))), + (ST_W32 (EXTRACT_SUBREG GPR64:$rj, sub_32), addr:$a)>, GPR_64; + +def : LoongArchPat<(i64 (sext (i32 (sub (i32 (trunc (i64 (assertsext GPR64:$rj)))), (i32 (trunc (i64 (assertsext GPR64:$rk)))))))), + (INSERT_SUBREG (i64 (IMPLICIT_DEF)), + (SUB_W (EXTRACT_SUBREG GPR64:$rj, sub_32), (EXTRACT_SUBREG GPR64:$rk, sub_32)), sub_32)>, GPR_64; + +def : LoongArchPat<(i64 (sext (i32 (udiv (i32 (trunc (i64 (assertsext GPR64:$rj)))), (i32 (trunc (i64 (assertsext GPR64:$rk)))))))), + (INSERT_SUBREG (i64 (IMPLICIT_DEF)), + (DIV_WU (EXTRACT_SUBREG GPR64:$rj, sub_32), (EXTRACT_SUBREG GPR64:$rk, sub_32)), sub_32)>, GPR_64; + +def : LoongArchPat<(i64 (sext (i32 (urem (i32 (trunc (i64 (assertsext GPR64:$rj)))), (i32 (trunc (i64 (assertsext GPR64:$rk)))))))), + (INSERT_SUBREG (i64 (IMPLICIT_DEF)), + (MOD_WU (EXTRACT_SUBREG GPR64:$rj, sub_32), (EXTRACT_SUBREG GPR64:$rk, sub_32)), sub_32)>, GPR_64; + +def : LoongArchPat<(brcond (i32 (seteq (i32 (trunc (i64 (assertsext GPR64:$rj)))), 0)), bb:$offs21), + (BEQZ32 (EXTRACT_SUBREG GPR64:$rj, sub_32), brtarget:$offs21)>; + +def : LoongArchPat<(setne (i32 (trunc (i64 (assertsext GPR64:$rj)))), 0), + (SLTU32 ZERO, (EXTRACT_SUBREG GPR64:$rj, sub_32))>; + +def : LoongArchPat<(select i32:$cond, (i32 (trunc (i64 (assertsext GPR64:$t)))), (i32 (trunc (i64 (assertsext GPR64:$f))))), + (OR32 (MASKEQZ32 (EXTRACT_SUBREG GPR64:$t, sub_32), i32:$cond), + (MASKNEZ32 (EXTRACT_SUBREG GPR64:$f, sub_32), i32:$cond))>; + +def : LoongArchPat<(select (i32 (setne (i32 (trunc (i64 (assertsext GPR64:$cond)))), immz)), immz, i32:$f), + (MASKNEZ32 i32:$f, (EXTRACT_SUBREG GPR64:$cond, sub_32))>; + +def : LoongArchPat<(select (i32 (seteq (i32 (trunc (i64 (assertsext GPR64:$cond)))), immz)), immz, i32:$f), + (MASKEQZ32 i32:$f, (EXTRACT_SUBREG GPR64:$cond, sub_32))>; + + def : LoongArchPat<(store (i32 (trunc (i64 (assertsext GPR64:$v)))), addr:$a), + (ST_W32 (EXTRACT_SUBREG GPR64:$v, sub_32), addr:$a)>; + + +def : LoongArchPat<(i32 (xor GPR32:$rj, (i32 -1))), + (NOR32 ZERO, GPR32:$rj)>; + +def : LoongArchPat<(and GPR32:$rj, (i32 (xor GPR32:$rk, (i32 -1)))), + (ANDN32 GPR32:$rj, GPR32:$rk)>; + +def : LoongArchPat<(or GPR32:$rj, (i32 (xor GPR32:$rk, (i32 -1)))), + (ORN32 GPR32:$rj, GPR32:$rk)>; + +def : LoongArchPat<(or (i32 (trunc GPR64:$rj)), + (i32 (xor (i32 (trunc GPR64:$rk)), (i32 -1)))), + (ORN32 (EXTRACT_SUBREG GPR64:$rj, sub_32), + (EXTRACT_SUBREG GPR64:$rk, sub_32))>; + +def : LoongArchPat<(and (i32 (trunc GPR64:$rj)), + (i32 (xor (i32 (trunc GPR64:$rk)), (i32 -1)))), + (ANDN32 (EXTRACT_SUBREG GPR64:$rj, sub_32), + (EXTRACT_SUBREG GPR64:$rk, sub_32))>; + +def : LoongArchPat<(xor (i32 (trunc GPR64:$rj)), -1), + (NOR32 ZERO, (EXTRACT_SUBREG GPR64:$rj, sub_32))>; + +def : LoongArchPat<(not (i32 (trunc (or GPR64:$rj, GPR64:$rk)))), + (NOR32 (EXTRACT_SUBREG GPR64:$rj, sub_32), + (EXTRACT_SUBREG GPR64:$rk, sub_32))>; + +def : LoongArchPat< + (i64 + (sext + (i32 (and (i32 (trunc (i64 (assertsext GPR64:$rj)))), + (i32 (xor (i32 (trunc (i64 (assertsext GPR64:$rk)))), + (i32 -1)))) + ) + ) + ), + (INSERT_SUBREG + (i64 (IMPLICIT_DEF)), + (ANDN32 (EXTRACT_SUBREG GPR64:$rj, sub_32), + (EXTRACT_SUBREG GPR64:$rk, sub_32)), + sub_32 + )>; + +def : LoongArchPat< + (i64 + (sext + (i32 (or (i32 (trunc (i64 (assertsext GPR64:$rj)))), + (i32 (xor (i32 (trunc (i64 (assertsext GPR64:$rk)))), + (i32 -1)))) + ) + ) + ), + (INSERT_SUBREG + (i64 (IMPLICIT_DEF)), + (ORN32 (EXTRACT_SUBREG GPR64:$rj, sub_32), + (EXTRACT_SUBREG GPR64:$rk, sub_32)), + sub_32 + )>; + +def : LoongArchPat<(i64 + (sext + (i32 (xor (i32 (or (i32 (trunc (i64 (assertsext GPR64:$rj)))), + (i32 (trunc (i64 (assertsext GPR64:$rk)))))), + (i32 -1)) + ) + ) + ), + (INSERT_SUBREG + (i64 (IMPLICIT_DEF)), + (NOR32 (EXTRACT_SUBREG GPR64:$rj, sub_32), + (EXTRACT_SUBREG GPR64:$rk, sub_32)), + sub_32 + )>; + +def : LoongArchPat<(i64 + (sext + (i32 (xor (i32 (trunc (i64 (or (i64 (assertsext GPR64:$rj)), + (i64 (assertsext GPR64:$rk)))))), + (i32 -1)) + ) + ) + ), + (INSERT_SUBREG + (i64 (IMPLICIT_DEF)), + (NOR32 (EXTRACT_SUBREG GPR64:$rk, sub_32), + (EXTRACT_SUBREG GPR64:$rj, sub_32)), + sub_32 + )>; + +def : LoongArchPat<(i64 + (sext + (i32 (xor (i32 (trunc (i64 (assertsext GPR64:$rj)))), + (i32 -1)) + ) + ) + ), + (INSERT_SUBREG + (i64 (IMPLICIT_DEF)), + (NOR32 ZERO, (EXTRACT_SUBREG GPR64:$rj, sub_32)), + sub_32 + )>; + +def : LoongArchPat<(i64 + (zext + (i32 (seteq (i32 (trunc (i64 (assertsext GPR64:$rj)))), + (i32 0)) + ) + ) + ), + (INSERT_SUBREG + (i64 (IMPLICIT_DEF)), + (SLTUI32 (EXTRACT_SUBREG GPR64:$rj, sub_32), (i32 1)), + sub_32 + )>; diff --git a/lib/Target/LoongArch/LoongArchAsmPrinter.cpp b/lib/Target/LoongArch/LoongArchAsmPrinter.cpp new file mode 100644 index 00000000..73a74131 --- /dev/null +++ b/lib/Target/LoongArch/LoongArchAsmPrinter.cpp @@ -0,0 +1,647 @@ +//===- LoongArchAsmPrinter.cpp - LoongArch LLVM Assembly Printer --------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file contains a printer that converts from our internal representation +// of machine-dependent LLVM code to GAS-format LoongArch assembly language. +// +//===----------------------------------------------------------------------===// + +#include "LoongArchAsmPrinter.h" +#include "MCTargetDesc/LoongArchInstPrinter.h" +#include "MCTargetDesc/LoongArchABIInfo.h" +#include "MCTargetDesc/LoongArchBaseInfo.h" +#include "MCTargetDesc/LoongArchMCTargetDesc.h" +#include "LoongArch.h" +#include "LoongArchMCInstLower.h" +#include "LoongArchMachineFunction.h" +#include "LoongArchSubtarget.h" +#include "LoongArchTargetMachine.h" +#include "LoongArchTargetStreamer.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/Triple.h" +#include "llvm/ADT/Twine.h" +#include "llvm/BinaryFormat/ELF.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineConstantPool.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineJumpTableInfo.h" +#include "llvm/CodeGen/MachineOperand.h" +#include "llvm/CodeGen/TargetRegisterInfo.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" +#include "llvm/IR/Attributes.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/InlineAsm.h" +#include "llvm/IR/Instructions.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCInst.h" +#include "llvm/MC/MCInstBuilder.h" +#include "llvm/MC/MCObjectFileInfo.h" +#include "llvm/MC/MCSectionELF.h" +#include "llvm/MC/MCSymbol.h" +#include "llvm/MC/MCSymbolELF.h" +#include "llvm/Support/TargetRegistry.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetMachine.h" +#include +#include +#include +#include +#include +#include + +using namespace llvm; + +#define DEBUG_TYPE "loongarch-asm-printer" + +LoongArchTargetStreamer &LoongArchAsmPrinter::getTargetStreamer() const { + return static_cast(*OutStreamer->getTargetStreamer()); +} + +bool LoongArchAsmPrinter::runOnMachineFunction(MachineFunction &MF) { + Subtarget = &MF.getSubtarget(); + + LoongArchFI = MF.getInfo(); + MCP = MF.getConstantPool(); + + AsmPrinter::runOnMachineFunction(MF); + + emitXRayTable(); + + return true; +} + +bool LoongArchAsmPrinter::lowerOperand(const MachineOperand &MO, MCOperand &MCOp) { + MCOp = MCInstLowering.LowerOperand(MO); + return MCOp.isValid(); +} + +#include "LoongArchGenMCPseudoLowering.inc" + +// Lower PseudoReturn/PseudoIndirectBranch/PseudoIndirectBranch64 to +// JIRL as appropriate for the target. +void LoongArchAsmPrinter::emitPseudoIndirectBranch(MCStreamer &OutStreamer, + const MachineInstr *MI) { + bool HasLinkReg = false; + MCInst TmpInst0; + TmpInst0.setOpcode(LoongArch::JIRL); + HasLinkReg = true; + + MCOperand MCOp; + + if (HasLinkReg) { + unsigned ZeroReg = Subtarget->is64Bit() ? LoongArch::ZERO_64 : LoongArch::ZERO; + TmpInst0.addOperand(MCOperand::createReg(ZeroReg)); + } + + lowerOperand(MI->getOperand(0), MCOp); + TmpInst0.addOperand(MCOp); + + TmpInst0.addOperand(MCOperand::createImm(0)); + + EmitToStreamer(OutStreamer, TmpInst0); +} + +void LoongArchAsmPrinter::emitPseudoTailBranch(MCStreamer &OutStreamer, + const MachineInstr *MI) { + MCInst TmpInst; + TmpInst.setOpcode(LoongArch::B); + + MCOperand MCOp; + + lowerOperand(MI->getOperand(0), MCOp); + TmpInst.addOperand(MCOp); + + EmitToStreamer(OutStreamer, TmpInst); +} + +void LoongArchAsmPrinter::emitInstruction(const MachineInstr *MI) { + LoongArchTargetStreamer &TS = getTargetStreamer(); + unsigned Opc = MI->getOpcode(); + TS.forbidModuleDirective(); + + if (MI->isDebugValue()) { + SmallString<128> Str; + raw_svector_ostream OS(Str); + + PrintDebugValueComment(MI, OS); + return; + } + if (MI->isDebugLabel()) + return; + // If we just ended a constant pool, mark it as such. + OutStreamer->emitDataRegion(MCDR_DataRegionEnd); + InConstantPool = false; + + switch (Opc) { + case LoongArch::PATCHABLE_FUNCTION_ENTER: + LowerPATCHABLE_FUNCTION_ENTER(*MI); + return; + case LoongArch::PATCHABLE_FUNCTION_EXIT: + LowerPATCHABLE_FUNCTION_EXIT(*MI); + return; + case LoongArch::PATCHABLE_TAIL_CALL: + LowerPATCHABLE_TAIL_CALL(*MI); + return; + } + MachineBasicBlock::const_instr_iterator I = MI->getIterator(); + MachineBasicBlock::const_instr_iterator E = MI->getParent()->instr_end(); + + do { + // Do any auto-generated pseudo lowerings. + if (emitPseudoExpansionLowering(*OutStreamer, &*I)) + continue; + if (I->getOpcode() == LoongArch::PseudoReturn || + I->getOpcode() == LoongArch::PseudoReturn64){ + emitPseudoIndirectBranch(*OutStreamer, &*I); + continue; + } + if (I->getOpcode() == LoongArch::PseudoTailReturn){ + emitPseudoTailBranch(*OutStreamer, &*I); + continue; + } + + // Some instructions are marked as pseudo right now which + // would make the test fail for the wrong reason but + // that will be fixed soon. We need this here because we are + // removing another test for this situation downstream in the + // callchain. + // + if (I->isPseudo() + && !isLongBranchPseudo(I->getOpcode())) + llvm_unreachable("Pseudo opcode found in EmitInstruction()"); + + MCInst TmpInst0; + MCInstLowering.Lower(&*I, TmpInst0); + EmitToStreamer(*OutStreamer, TmpInst0); + } while ((++I != E) && I->isInsideBundle()); +} + +//===----------------------------------------------------------------------===// +// +// LoongArch Asm Directives +// +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// Set directives +//===----------------------------------------------------------------------===// + +/// Emit Set directives. +const char *LoongArchAsmPrinter::getCurrentABIString() const { + switch (static_cast(TM).getABI().GetEnumValue()) { + case LoongArchABIInfo::ABI::ILP32D: + return "abiilp32d"; + case LoongArchABIInfo::ABI::ILP32F: + return "abiilp32f"; + case LoongArchABIInfo::ABI::ILP32S: + return "abiilp32s"; + case LoongArchABIInfo::ABI::LP64D: + return "abilp64d"; + case LoongArchABIInfo::ABI::LP64S: + return "abilp64s"; + case LoongArchABIInfo::ABI::LP64F: + return "abilp64f"; + default: llvm_unreachable("Unknown LoongArch ABI"); + } +} + +void LoongArchAsmPrinter::emitFunctionEntryLabel() { + + OutStreamer->emitLabel(CurrentFnSym); + +} + +/// EmitFunctionBodyStart - Targets can override this to emit stuff before +/// the first basic block in the function. +void LoongArchAsmPrinter::emitFunctionBodyStart() { + + MCInstLowering.Initialize(&MF->getContext()); +} + +/// EmitFunctionBodyEnd - Targets can override this to emit stuff after +/// the last basic block in the function. +void LoongArchAsmPrinter::emitFunctionBodyEnd() { + + // Make sure to terminate any constant pools that were at the end + // of the function. + if (!InConstantPool) + return; + InConstantPool = false; + OutStreamer->emitDataRegion(MCDR_DataRegionEnd); +} + +void LoongArchAsmPrinter::emitBasicBlockEnd(const MachineBasicBlock &MBB) { + AsmPrinter::emitBasicBlockEnd(MBB); +} + +/// isBlockOnlyReachableByFallthough - Return true if the basic block has +/// exactly one predecessor and the control transfer mechanism between +/// the predecessor and this block is a fall-through. +bool LoongArchAsmPrinter::isBlockOnlyReachableByFallthrough(const MachineBasicBlock* + MBB) const { + // The predecessor has to be immediately before this block. + const MachineBasicBlock *Pred = *MBB->pred_begin(); + + // If the predecessor is a switch statement, assume a jump table + // implementation, so it is not a fall through. + if (const BasicBlock *bb = Pred->getBasicBlock()) + if (isa(bb->getTerminator())) + return false; + + // Check default implementation + return AsmPrinter::isBlockOnlyReachableByFallthrough(MBB); +} + +// Print out an operand for an inline asm expression. +bool LoongArchAsmPrinter::PrintAsmOperand(const MachineInstr *MI, + unsigned OpNum, const char *ExtraCode, raw_ostream &O) { + // Does this asm operand have a single letter operand modifier? + if (ExtraCode && ExtraCode[0]) { + if (ExtraCode[1] != 0) return true; // Unknown modifier. + + const MachineOperand &MO = MI->getOperand(OpNum); + switch (ExtraCode[0]) { + default: + // See if this is a generic print operand + return AsmPrinter::PrintAsmOperand(MI,OpNum,ExtraCode,O); + case 'X': // hex const int + if ((MO.getType()) != MachineOperand::MO_Immediate) + return true; + O << "0x" << Twine::utohexstr(MO.getImm()); + return false; + case 'x': // hex const int (low 16 bits) + if ((MO.getType()) != MachineOperand::MO_Immediate) + return true; + O << "0x" << Twine::utohexstr(MO.getImm() & 0xffff); + return false; + case 'd': // decimal const int + if ((MO.getType()) != MachineOperand::MO_Immediate) + return true; + O << MO.getImm(); + return false; + case 'm': // decimal const int minus 1 + if ((MO.getType()) != MachineOperand::MO_Immediate) + return true; + O << MO.getImm() - 1; + return false; + case 'y': // exact log2 + if ((MO.getType()) != MachineOperand::MO_Immediate) + return true; + if (!isPowerOf2_64(MO.getImm())) + return true; + O << Log2_64(MO.getImm()); + return false; + case 'z': + // $r0 if zero, regular printing otherwise + if (MO.getType() == MachineOperand::MO_Immediate && MO.getImm() == 0) { + O << "$r0"; + return false; + } + // If not, call printOperand as normal. + break; + case 'D': // Second part of a double word register operand + case 'L': // Low order register of a double word register operand + case 'M': // High order register of a double word register operand + { + if (OpNum == 0) + return true; + const MachineOperand &FlagsOP = MI->getOperand(OpNum - 1); + if (!FlagsOP.isImm()) + return true; + unsigned Flags = FlagsOP.getImm(); + unsigned NumVals = InlineAsm::getNumOperandRegisters(Flags); + // Number of registers represented by this operand. We are looking + // for 2 for 32 bit mode and 1 for 64 bit mode. + if (NumVals != 2) { + if (Subtarget->is64Bit() && NumVals == 1 && MO.isReg()) { + unsigned Reg = MO.getReg(); + O << '$' << LoongArchInstPrinter::getRegisterName(Reg); + return false; + } + return true; + } + + unsigned RegOp = OpNum; + if (!Subtarget->is64Bit()){ + // Endianness reverses which register holds the high or low value + // between M and L. + switch(ExtraCode[0]) { + case 'M': + RegOp = OpNum + 1; + break; + case 'L': + RegOp = OpNum; + break; + case 'D': // Always the second part + RegOp = OpNum + 1; + } + if (RegOp >= MI->getNumOperands()) + return true; + const MachineOperand &MO = MI->getOperand(RegOp); + if (!MO.isReg()) + return true; + unsigned Reg = MO.getReg(); + O << '$' << LoongArchInstPrinter::getRegisterName(Reg); + return false; + } + break; + } + case 'w': + // Print LSX registers for the 'f' constraint + // In LLVM, the 'w' modifier doesn't need to do anything. + // We can just call printOperand as normal. + break; + case 'u': + // Print LASX registers for the 'f' constraint + // In LLVM, the 'u' modifier doesn't need to do anything. + // We can just call printOperand as normal. + break; + } + } + + printOperand(MI, OpNum, O); + return false; +} + +bool LoongArchAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, + unsigned OpNum, + const char *ExtraCode, + raw_ostream &O) { + assert(OpNum + 1 < MI->getNumOperands() && "Insufficient operands"); + const MachineOperand &BaseMO = MI->getOperand(OpNum); + const MachineOperand &OffsetMO = MI->getOperand(OpNum + 1); + assert(BaseMO.isReg() && "Unexpected base pointer for inline asm memory operand."); + assert(OffsetMO.isImm() && "Unexpected offset for inline asm memory operand."); + int Offset = OffsetMO.getImm(); + + // Currently we are expecting either no ExtraCode or 'D','M','L'. + if (ExtraCode) { + switch (ExtraCode[0]) { + case 'D': + case 'M': + Offset += 4; + break; + case 'L': + break; + default: + return true; // Unknown modifier. + } + } + + O << "$" << LoongArchInstPrinter::getRegisterName(BaseMO.getReg()) << ", " << Offset; + + return false; +} + +void LoongArchAsmPrinter::printOperand(const MachineInstr *MI, int opNum, + raw_ostream &O) { + const MachineOperand &MO = MI->getOperand(opNum); + + switch (MO.getType()) { + case MachineOperand::MO_Register: + O << '$' + << StringRef(LoongArchInstPrinter::getRegisterName(MO.getReg())).lower(); + break; + + case MachineOperand::MO_Immediate: + O << MO.getImm(); + break; + + case MachineOperand::MO_MachineBasicBlock: + MO.getMBB()->getSymbol()->print(O, MAI); + return; + + case MachineOperand::MO_GlobalAddress: + getSymbol(MO.getGlobal())->print(O, MAI); + break; + + case MachineOperand::MO_BlockAddress: { + MCSymbol *BA = GetBlockAddressSymbol(MO.getBlockAddress()); + O << BA->getName(); + break; + } + + case MachineOperand::MO_ConstantPoolIndex: + O << getDataLayout().getPrivateGlobalPrefix() << "CPI" + << getFunctionNumber() << "_" << MO.getIndex(); + if (MO.getOffset()) + O << "+" << MO.getOffset(); + break; + + default: + llvm_unreachable(""); + } +} + +void LoongArchAsmPrinter:: +printMemOperand(const MachineInstr *MI, int opNum, raw_ostream &O) { + // Load/Store memory operands -- imm($reg) + // If PIC target the target is loaded as the + // pattern lw $25,%call16($28) + + printOperand(MI, opNum+1, O); + O << "("; + printOperand(MI, opNum, O); + O << ")"; +} + +void LoongArchAsmPrinter:: +printMemOperandEA(const MachineInstr *MI, int opNum, raw_ostream &O) { + // when using stack locations for not load/store instructions + // print the same way as all normal 3 operand instructions. + printOperand(MI, opNum, O); + O << ", "; + printOperand(MI, opNum+1, O); +} + +void LoongArchAsmPrinter:: +printRegisterList(const MachineInstr *MI, int opNum, raw_ostream &O) { + for (int i = opNum, e = MI->getNumOperands(); i != e; ++i) { + if (i != opNum) O << ", "; + printOperand(MI, i, O); + } +} + +void LoongArchAsmPrinter::emitStartOfAsmFile(Module &M) { + LoongArchTargetStreamer &TS = getTargetStreamer(); + + // LoongArchTargetStreamer has an initialization order problem when emitting an + // object file directly (see LoongArchTargetELFStreamer for full details). Work + // around it by re-initializing the PIC state here. + TS.setPic(OutContext.getObjectFileInfo()->isPositionIndependent()); + + // Compute LoongArch architecture attributes based on the default subtarget + // that we'd have constructed. Module level directives aren't LTO + // clean anyhow. + // FIXME: For ifunc related functions we could iterate over and look + // for a feature string that doesn't match the default one. + const Triple &TT = TM.getTargetTriple(); + StringRef CPU = LoongArch_MC::selectLoongArchCPU(TT, TM.getTargetCPU()); + StringRef FS = TM.getTargetFeatureString(); + const LoongArchTargetMachine &MTM = static_cast(TM); + const LoongArchSubtarget STI(TT, CPU, FS, MTM, None); + + TS.updateABIInfo(STI); +} + +void LoongArchAsmPrinter::emitInlineAsmStart() const { + + OutStreamer->AddBlankLine(); +} + +void LoongArchAsmPrinter::emitInlineAsmEnd(const MCSubtargetInfo &StartInfo, + const MCSubtargetInfo *EndInfo) const { + OutStreamer->AddBlankLine(); +} + +void LoongArchAsmPrinter::EmitInstrReg(const MCSubtargetInfo &STI, unsigned Opcode, + unsigned Reg) { + MCInst I; + I.setOpcode(Opcode); + I.addOperand(MCOperand::createReg(Reg)); + OutStreamer->emitInstruction(I, STI); +} + +void LoongArchAsmPrinter::EmitInstrRegReg(const MCSubtargetInfo &STI, + unsigned Opcode, unsigned Reg1, + unsigned Reg2) { + MCInst I; + // + // Because of the current td files for LoongArch32, the operands for MTC1 + // appear backwards from their normal assembly order. It's not a trivial + // change to fix this in the td file so we adjust for it here. + // + if (Opcode == LoongArch::MOVGR2FR_W) { + unsigned Temp = Reg1; + Reg1 = Reg2; + Reg2 = Temp; + } + I.setOpcode(Opcode); + I.addOperand(MCOperand::createReg(Reg1)); + I.addOperand(MCOperand::createReg(Reg2)); + OutStreamer->emitInstruction(I, STI); +} + +void LoongArchAsmPrinter::EmitInstrRegRegReg(const MCSubtargetInfo &STI, + unsigned Opcode, unsigned Reg1, + unsigned Reg2, unsigned Reg3) { + MCInst I; + I.setOpcode(Opcode); + I.addOperand(MCOperand::createReg(Reg1)); + I.addOperand(MCOperand::createReg(Reg2)); + I.addOperand(MCOperand::createReg(Reg3)); + OutStreamer->emitInstruction(I, STI); +} + +void LoongArchAsmPrinter::EmitMovFPIntPair(const MCSubtargetInfo &STI, + unsigned MovOpc, unsigned Reg1, + unsigned Reg2, unsigned FPReg1, + unsigned FPReg2, bool LE) { + if (!LE) { + unsigned temp = Reg1; + Reg1 = Reg2; + Reg2 = temp; + } + EmitInstrRegReg(STI, MovOpc, Reg1, FPReg1); + EmitInstrRegReg(STI, MovOpc, Reg2, FPReg2); +} + +void LoongArchAsmPrinter::emitEndOfAsmFile(Module &M) { + // return to the text section + OutStreamer->SwitchSection(OutContext.getObjectFileInfo()->getTextSection()); +} + +void LoongArchAsmPrinter::EmitSled(const MachineInstr &MI, SledKind Kind) { + const uint8_t NoopsInSledCount = 11; + // For LoongArch we want to emit the following pattern: + // + // .Lxray_sled_N: + // ALIGN + // B .tmpN + // 11 NOP instructions (44 bytes) + // .tmpN + // + // We need the 44 bytes (11 instructions) because at runtime, we'd + // be patching over the full 48 bytes (12 instructions) with the following + // pattern: + // + // addi.d sp,sp, -16 ;create stack frame + // st.d ra, sp, 8 ;save return address + // lu12i.w t0,%%abs_hi20(__xray_FunctionEntry/Exit) + // ori %1,t0,%%abs_lo12(__xray_FunctionEntry/Exit) + // lu32i.d t0,%%abs64_lo20(__xray_FunctionEntry/Exit) + // lu52i.d t0,t0,%%abs64_hi12(__xray_FunctionEntry/Exit) + // lu12i.w t1,%%abs_hi20(function_id) + // ori %1,t1,%%abs_lo12(function_id) ;pass function id + // jirl ra, t0, 0 ;call Tracing hook + // ld.d ra, sp, 8 ;restore return address + // addi.d sp, sp, 16 ;delete stack frame + + OutStreamer->emitCodeAlignment(4); + auto CurSled = OutContext.createTempSymbol("xray_sled_", true); + OutStreamer->emitLabel(CurSled); + auto Target = OutContext.createTempSymbol(); + + // Emit "B .tmpN" instruction, which jumps over the nop sled to the actual + // start of function + const MCExpr *TargetExpr = MCSymbolRefExpr::create( + Target, MCSymbolRefExpr::VariantKind::VK_None, OutContext); + EmitToStreamer(*OutStreamer, MCInstBuilder(LoongArch::BEQ) + .addReg(LoongArch::ZERO) + .addReg(LoongArch::ZERO) + .addExpr(TargetExpr)); + + for (int8_t I = 0; I < NoopsInSledCount; I++) + EmitToStreamer(*OutStreamer, MCInstBuilder(LoongArch::ORI) + .addReg(LoongArch::ZERO) + .addReg(LoongArch::ZERO) + .addImm(0)); + + OutStreamer->emitLabel(Target); + recordSled(CurSled, MI, Kind, 2); +} + +void LoongArchAsmPrinter::LowerPATCHABLE_FUNCTION_ENTER(const MachineInstr &MI) { + EmitSled(MI, SledKind::FUNCTION_ENTER); +} + +void LoongArchAsmPrinter::LowerPATCHABLE_FUNCTION_EXIT(const MachineInstr &MI) { + EmitSled(MI, SledKind::FUNCTION_EXIT); +} + +void LoongArchAsmPrinter::LowerPATCHABLE_TAIL_CALL(const MachineInstr &MI) { + EmitSled(MI, SledKind::TAIL_CALL); +} + +void LoongArchAsmPrinter::PrintDebugValueComment(const MachineInstr *MI, + raw_ostream &OS) { + // TODO: implement +} + +bool LoongArchAsmPrinter::isLongBranchPseudo(int Opcode) const { + return (Opcode == LoongArch::LONG_BRANCH_ADDIW + || Opcode == LoongArch::LONG_BRANCH_ADDIW2Op + || Opcode == LoongArch::LONG_BRANCH_ADDID + || Opcode == LoongArch::LONG_BRANCH_ADDID2Op + || Opcode == LoongArch::LONG_BRANCH_PCADDU12I); +} + +// Force static initialization. +extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeLoongArchAsmPrinter() { + RegisterAsmPrinter X(getTheLoongArch32Target()); + RegisterAsmPrinter A(getTheLoongArch64Target()); +} diff --git a/lib/Target/LoongArch/LoongArchAsmPrinter.h b/lib/Target/LoongArch/LoongArchAsmPrinter.h new file mode 100644 index 00000000..0facaa29 --- /dev/null +++ b/lib/Target/LoongArch/LoongArchAsmPrinter.h @@ -0,0 +1,138 @@ +//===- LoongArchAsmPrinter.h - LoongArch LLVM Assembly Printer -----------*- C++ -*--===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// LoongArch Assembly printer class. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_LOONGARCH_LOONGARCHASMPRINTER_H +#define LLVM_LIB_TARGET_LOONGARCH_LOONGARCHASMPRINTER_H + +#include "LoongArchMCInstLower.h" +#include "LoongArchSubtarget.h" +#include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/Support/Compiler.h" +#include +#include +#include + +namespace llvm { + +class MCOperand; +class MCSubtargetInfo; +class MCSymbol; +class MachineBasicBlock; +class MachineConstantPool; +class MachineFunction; +class MachineInstr; +class MachineOperand; +class LoongArchFunctionInfo; +class LoongArchTargetStreamer; +class Module; +class raw_ostream; +class TargetMachine; + +class LLVM_LIBRARY_VISIBILITY LoongArchAsmPrinter : public AsmPrinter { + LoongArchTargetStreamer &getTargetStreamer() const; + + void EmitInstrWithMacroNoAT(const MachineInstr *MI); + + //===------------------------------------------------------------------===// + // XRay implementation + //===------------------------------------------------------------------===// + +public: + // XRay-specific lowering for LoongArch. + void LowerPATCHABLE_FUNCTION_ENTER(const MachineInstr &MI); + void LowerPATCHABLE_FUNCTION_EXIT(const MachineInstr &MI); + void LowerPATCHABLE_TAIL_CALL(const MachineInstr &MI); + +private: + /// MCP - Keep a pointer to constantpool entries of the current + /// MachineFunction. + const MachineConstantPool *MCP = nullptr; + + /// InConstantPool - Maintain state when emitting a sequence of constant + /// pool entries so we can properly mark them as data regions. + bool InConstantPool = false; + + void EmitSled(const MachineInstr &MI, SledKind Kind); + + // tblgen'erated function. + bool emitPseudoExpansionLowering(MCStreamer &OutStreamer, + const MachineInstr *MI); + + // Emit PseudoReturn, PseudoReturn64, PseudoIndirectBranch, + // and PseudoIndirectBranch64 as a JIRL as appropriate + // for the target. + void emitPseudoIndirectBranch(MCStreamer &OutStreamer, + const MachineInstr *MI); + + void emitPseudoTailBranch(MCStreamer &OutStreamer, + const MachineInstr *MI); + + // lowerOperand - Convert a MachineOperand into the equivalent MCOperand. + bool lowerOperand(const MachineOperand &MO, MCOperand &MCOp); + + void emitInlineAsmStart() const override; + + void emitInlineAsmEnd(const MCSubtargetInfo &StartInfo, + const MCSubtargetInfo *EndInfo) const override; + + void EmitInstrReg(const MCSubtargetInfo &STI, unsigned Opcode, unsigned Reg); + + void EmitInstrRegReg(const MCSubtargetInfo &STI, unsigned Opcode, + unsigned Reg1, unsigned Reg2); + + void EmitInstrRegRegReg(const MCSubtargetInfo &STI, unsigned Opcode, + unsigned Reg1, unsigned Reg2, unsigned Reg3); + + void EmitMovFPIntPair(const MCSubtargetInfo &STI, unsigned MovOpc, + unsigned Reg1, unsigned Reg2, unsigned FPReg1, + unsigned FPReg2, bool LE); + + bool isLongBranchPseudo(int Opcode) const; + +public: + const LoongArchSubtarget *Subtarget; + const LoongArchFunctionInfo *LoongArchFI; + LoongArchMCInstLower MCInstLowering; + + explicit LoongArchAsmPrinter(TargetMachine &TM, + std::unique_ptr Streamer) + : AsmPrinter(TM, std::move(Streamer)), MCInstLowering(*this) {} + + StringRef getPassName() const override { return "LoongArch Assembly Printer"; } + + bool runOnMachineFunction(MachineFunction &MF) override; + + void emitInstruction(const MachineInstr *MI) override; + const char *getCurrentABIString() const; + void emitFunctionEntryLabel() override; + void emitFunctionBodyStart() override; + void emitFunctionBodyEnd() override; + void emitBasicBlockEnd(const MachineBasicBlock &MBB) override; + bool isBlockOnlyReachableByFallthrough( + const MachineBasicBlock* MBB) const override; + bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, + const char *ExtraCode, raw_ostream &O) override; + bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNum, + const char *ExtraCode, raw_ostream &O) override; + void printOperand(const MachineInstr *MI, int opNum, raw_ostream &O); + void printMemOperand(const MachineInstr *MI, int opNum, raw_ostream &O); + void printMemOperandEA(const MachineInstr *MI, int opNum, raw_ostream &O); + void printRegisterList(const MachineInstr *MI, int opNum, raw_ostream &O); + void emitStartOfAsmFile(Module &M) override; + void emitEndOfAsmFile(Module &M) override; + void PrintDebugValueComment(const MachineInstr *MI, raw_ostream &OS); +}; + +} // end namespace llvm + +#endif // LLVM_LIB_TARGET_LOONGARCH_LOONGARCHASMPRINTER_H diff --git a/lib/Target/LoongArch/LoongArchCCState.cpp b/lib/Target/LoongArch/LoongArchCCState.cpp new file mode 100644 index 00000000..18996f1e --- /dev/null +++ b/lib/Target/LoongArch/LoongArchCCState.cpp @@ -0,0 +1,165 @@ +//===---- LoongArchCCState.cpp - CCState with LoongArch specific extensions ---------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "LoongArchCCState.h" +#include "LoongArchSubtarget.h" +#include "llvm/IR/Module.h" + +using namespace llvm; + +/// This function returns true if CallSym is a long double emulation routine. +static bool isF128SoftLibCall(const char *CallSym) { + const char *const LibCalls[] = { + "__addtf3", "__divtf3", "__eqtf2", "__extenddftf2", + "__extendsftf2", "__fixtfdi", "__fixtfsi", "__fixtfti", + "__fixunstfdi", "__fixunstfsi", "__fixunstfti", "__floatditf", + "__floatsitf", "__floattitf", "__floatunditf", "__floatunsitf", + "__floatuntitf", "__getf2", "__gttf2", "__letf2", + "__lttf2", "__multf3", "__netf2", "__powitf2", + "__subtf3", "__trunctfdf2", "__trunctfsf2", "__unordtf2", + "ceill", "copysignl", "cosl", "exp2l", + "expl", "floorl", "fmal", "fmaxl", + "fmodl", "log10l", "log2l", "logl", + "nearbyintl", "powl", "rintl", "roundl", + "sinl", "sqrtl", "truncl"}; + + // Check that LibCalls is sorted alphabetically. + auto Comp = [](const char *S1, const char *S2) { return strcmp(S1, S2) < 0; }; + assert(std::is_sorted(std::begin(LibCalls), std::end(LibCalls), Comp)); + return std::binary_search(std::begin(LibCalls), std::end(LibCalls), + CallSym, Comp); +} + +/// This function returns true if Ty is fp128, {f128} or i128 which was +/// originally a fp128. +static bool originalTypeIsF128(const Type *Ty, const char *Func) { + if (Ty->isFP128Ty()) + return true; + + if (Ty->isStructTy() && Ty->getStructNumElements() == 1 && + Ty->getStructElementType(0)->isFP128Ty()) + return true; + + // If the Ty is i128 and the function being called is a long double emulation + // routine, then the original type is f128. + return (Func && Ty->isIntegerTy(128) && isF128SoftLibCall(Func)); +} + +/// Return true if the original type was vXfXX. +static bool originalEVTTypeIsVectorFloat(EVT Ty) { + if (Ty.isVector() && Ty.getVectorElementType().isFloatingPoint()) + return true; + + return false; +} + +/// Return true if the original type was vXfXX / vXfXX. +static bool originalTypeIsVectorFloat(const Type * Ty) { + if (Ty->isVectorTy() && Ty->isFPOrFPVectorTy()) + return true; + + return false; +} + +LoongArchCCState::SpecialCallingConvType +LoongArchCCState::getSpecialCallingConvForCallee(const SDNode *Callee, + const LoongArchSubtarget &Subtarget) { + LoongArchCCState::SpecialCallingConvType SpecialCallingConv = NoSpecialCallingConv; + return SpecialCallingConv; +} + +void LoongArchCCState::PreAnalyzeCallResultForF128( + const SmallVectorImpl &Ins, + const Type *RetTy, const char *Call) { + for (unsigned i = 0; i < Ins.size(); ++i) { + OriginalArgWasF128.push_back( + originalTypeIsF128(RetTy, Call)); + OriginalArgWasFloat.push_back(RetTy->isFloatingPointTy()); + } +} + +/// Identify lowered values that originated from f128 or float arguments and +/// record this for use by RetCC_LoongArchLP64. +void LoongArchCCState::PreAnalyzeReturnForF128( + const SmallVectorImpl &Outs) { + const MachineFunction &MF = getMachineFunction(); + for (unsigned i = 0; i < Outs.size(); ++i) { + OriginalArgWasF128.push_back( + originalTypeIsF128(MF.getFunction().getReturnType(), nullptr)); + OriginalArgWasFloat.push_back( + MF.getFunction().getReturnType()->isFloatingPointTy()); + } +} + +/// Identify lower values that originated from vXfXX and record +/// this. +void LoongArchCCState::PreAnalyzeCallResultForVectorFloat( + const SmallVectorImpl &Ins, const Type *RetTy) { + for (unsigned i = 0; i < Ins.size(); ++i) { + OriginalRetWasFloatVector.push_back(originalTypeIsVectorFloat(RetTy)); + } +} + +/// Identify lowered values that originated from vXfXX arguments and record +/// this. +void LoongArchCCState::PreAnalyzeReturnForVectorFloat( + const SmallVectorImpl &Outs) { + for (unsigned i = 0; i < Outs.size(); ++i) { + ISD::OutputArg Out = Outs[i]; + OriginalRetWasFloatVector.push_back( + originalEVTTypeIsVectorFloat(Out.ArgVT)); + } +} + +/// Identify lowered values that originated from f128, float and sret to vXfXX +/// arguments and record this. +void LoongArchCCState::PreAnalyzeCallOperands( + const SmallVectorImpl &Outs, + std::vector &FuncArgs, + const char *Func) { + for (unsigned i = 0; i < Outs.size(); ++i) { + TargetLowering::ArgListEntry FuncArg = FuncArgs[Outs[i].OrigArgIndex]; + + OriginalArgWasF128.push_back(originalTypeIsF128(FuncArg.Ty, Func)); + OriginalArgWasFloat.push_back(FuncArg.Ty->isFloatingPointTy()); + OriginalArgWasFloatVector.push_back(FuncArg.Ty->isVectorTy()); + CallOperandIsFixed.push_back(Outs[i].IsFixed); + } +} + +/// Identify lowered values that originated from f128, float and vXfXX arguments +/// and record this. +void LoongArchCCState::PreAnalyzeFormalArgumentsForF128( + const SmallVectorImpl &Ins) { + const MachineFunction &MF = getMachineFunction(); + for (unsigned i = 0; i < Ins.size(); ++i) { + Function::const_arg_iterator FuncArg = MF.getFunction().arg_begin(); + + // SRet arguments cannot originate from f128 or {f128} returns so we just + // push false. We have to handle this specially since SRet arguments + // aren't mapped to an original argument. + if (Ins[i].Flags.isSRet()) { + OriginalArgWasF128.push_back(false); + OriginalArgWasFloat.push_back(false); + OriginalArgWasFloatVector.push_back(false); + continue; + } + + assert(Ins[i].getOrigArgIndex() < MF.getFunction().arg_size()); + std::advance(FuncArg, Ins[i].getOrigArgIndex()); + + OriginalArgWasF128.push_back( + originalTypeIsF128(FuncArg->getType(), nullptr)); + OriginalArgWasFloat.push_back(FuncArg->getType()->isFloatingPointTy()); + + // The LoongArch vector ABI exhibits a corner case of sorts or quirk; if the + // first argument is actually an SRet pointer to a vector, then the next + // argument slot is $a2. + OriginalArgWasFloatVector.push_back(FuncArg->getType()->isVectorTy()); + } +} diff --git a/lib/Target/LoongArch/LoongArchCCState.h b/lib/Target/LoongArch/LoongArchCCState.h new file mode 100644 index 00000000..56d5b89b --- /dev/null +++ b/lib/Target/LoongArch/LoongArchCCState.h @@ -0,0 +1,165 @@ +//===---- LoongArchCCState.h - CCState with LoongArch specific extensions -----------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LoongArchCCSTATE_H +#define LoongArchCCSTATE_H + +#include "LoongArchISelLowering.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/CodeGen/CallingConvLower.h" + +namespace llvm { +class SDNode; +class LoongArchSubtarget; + +class LoongArchCCState : public CCState { +public: + enum SpecialCallingConvType { NoSpecialCallingConv }; + + /// Determine the SpecialCallingConvType for the given callee + static SpecialCallingConvType + getSpecialCallingConvForCallee(const SDNode *Callee, + const LoongArchSubtarget &Subtarget); + +private: + /// Identify lowered values that originated from f128 arguments and record + /// this for use by RetCC_LoongArchLP64. + void PreAnalyzeCallResultForF128(const SmallVectorImpl &Ins, + const Type *RetTy, const char * Func); + + /// Identify lowered values that originated from f128 arguments and record + /// this for use by RetCC_LoongArchLP64. + void PreAnalyzeReturnForF128(const SmallVectorImpl &Outs); + + /// Identify lowered values that originated from f128 arguments and record + /// this. + void + PreAnalyzeCallOperands(const SmallVectorImpl &Outs, + std::vector &FuncArgs, + const char *Func); + + /// Identify lowered values that originated from f128 arguments and record + /// this for use by RetCC_LoongArchLP64. + void + PreAnalyzeFormalArgumentsForF128(const SmallVectorImpl &Ins); + + void + PreAnalyzeCallResultForVectorFloat(const SmallVectorImpl &Ins, + const Type *RetTy); + + void PreAnalyzeFormalArgumentsForVectorFloat( + const SmallVectorImpl &Ins); + + void + PreAnalyzeReturnForVectorFloat(const SmallVectorImpl &Outs); + + /// Records whether the value has been lowered from an f128. + SmallVector OriginalArgWasF128; + + /// Records whether the value has been lowered from float. + SmallVector OriginalArgWasFloat; + + /// Records whether the value has been lowered from a floating point vector. + SmallVector OriginalArgWasFloatVector; + + /// Records whether the return value has been lowered from a floating point + /// vector. + SmallVector OriginalRetWasFloatVector; + + /// Records whether the value was a fixed argument. + /// See ISD::OutputArg::IsFixed, + SmallVector CallOperandIsFixed; + + // FIXME: This should probably be a fully fledged calling convention. + SpecialCallingConvType SpecialCallingConv; + +public: + LoongArchCCState(CallingConv::ID CC, bool isVarArg, MachineFunction &MF, + SmallVectorImpl &locs, LLVMContext &C, + SpecialCallingConvType SpecialCC = NoSpecialCallingConv) + : CCState(CC, isVarArg, MF, locs, C), SpecialCallingConv(SpecialCC) {} + + void + AnalyzeCallOperands(const SmallVectorImpl &Outs, + CCAssignFn Fn, + std::vector &FuncArgs, + const char *Func) { + PreAnalyzeCallOperands(Outs, FuncArgs, Func); + CCState::AnalyzeCallOperands(Outs, Fn); + OriginalArgWasF128.clear(); + OriginalArgWasFloat.clear(); + OriginalArgWasFloatVector.clear(); + CallOperandIsFixed.clear(); + } + + // The AnalyzeCallOperands in the base class is not usable since we must + // provide a means of accessing ArgListEntry::IsFixed. Delete them from this + // class. This doesn't stop them being used via the base class though. + void AnalyzeCallOperands(const SmallVectorImpl &Outs, + CCAssignFn Fn) = delete; + void AnalyzeCallOperands(const SmallVectorImpl &Outs, + SmallVectorImpl &Flags, + CCAssignFn Fn) = delete; + + void AnalyzeFormalArguments(const SmallVectorImpl &Ins, + CCAssignFn Fn) { + PreAnalyzeFormalArgumentsForF128(Ins); + CCState::AnalyzeFormalArguments(Ins, Fn); + OriginalArgWasFloat.clear(); + OriginalArgWasF128.clear(); + OriginalArgWasFloatVector.clear(); + } + + void AnalyzeCallResult(const SmallVectorImpl &Ins, + CCAssignFn Fn, const Type *RetTy, + const char *Func) { + PreAnalyzeCallResultForF128(Ins, RetTy, Func); + PreAnalyzeCallResultForVectorFloat(Ins, RetTy); + CCState::AnalyzeCallResult(Ins, Fn); + OriginalArgWasFloat.clear(); + OriginalArgWasF128.clear(); + OriginalArgWasFloatVector.clear(); + } + + void AnalyzeReturn(const SmallVectorImpl &Outs, + CCAssignFn Fn) { + PreAnalyzeReturnForF128(Outs); + PreAnalyzeReturnForVectorFloat(Outs); + CCState::AnalyzeReturn(Outs, Fn); + OriginalArgWasFloat.clear(); + OriginalArgWasF128.clear(); + OriginalArgWasFloatVector.clear(); + } + + bool CheckReturn(const SmallVectorImpl &ArgsFlags, + CCAssignFn Fn) { + PreAnalyzeReturnForF128(ArgsFlags); + PreAnalyzeReturnForVectorFloat(ArgsFlags); + bool Return = CCState::CheckReturn(ArgsFlags, Fn); + OriginalArgWasFloat.clear(); + OriginalArgWasF128.clear(); + OriginalArgWasFloatVector.clear(); + return Return; + } + + bool WasOriginalArgF128(unsigned ValNo) { return OriginalArgWasF128[ValNo]; } + bool WasOriginalArgFloat(unsigned ValNo) { + return OriginalArgWasFloat[ValNo]; + } + bool WasOriginalArgVectorFloat(unsigned ValNo) const { + return OriginalArgWasFloatVector[ValNo]; + } + bool WasOriginalRetVectorFloat(unsigned ValNo) const { + return OriginalRetWasFloatVector[ValNo]; + } + bool IsCallOperandFixed(unsigned ValNo) { return CallOperandIsFixed[ValNo]; } + SpecialCallingConvType getSpecialCallingConv() { return SpecialCallingConv; } +}; +} + +#endif diff --git a/lib/Target/LoongArch/LoongArchCallingConv.td b/lib/Target/LoongArch/LoongArchCallingConv.td new file mode 100644 index 00000000..02bdb323 --- /dev/null +++ b/lib/Target/LoongArch/LoongArchCallingConv.td @@ -0,0 +1,292 @@ +//===-- LoongArchCallingConv.td - Calling Conventions for LoongArch --*- tablegen -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// This describes the calling conventions for LoongArch architecture. +//===----------------------------------------------------------------------===// + +/// CCIfSubtarget - Match if the current subtarget has a feature F. +class CCIfSubtarget + : CCIf" + "(State.getMachineFunction().getSubtarget()).", + F), + A>; + +// The inverse of CCIfSubtarget +class CCIfSubtargetNot : CCIfSubtarget; + +/// Match if the original argument (before lowering) was a float. +/// For example, this is true for i32's that were lowered from soft-float. +class CCIfOrigArgWasNotFloat + : CCIf<"!static_cast(&State)->WasOriginalArgFloat(ValNo)", + A>; + +/// Match if the original argument (before lowering) was a 128-bit float (i.e. +/// long double). +class CCIfOrigArgWasF128 + : CCIf<"static_cast(&State)->WasOriginalArgF128(ValNo)", A>; + +/// Match if this specific argument is a vararg. +/// This is slightly different fro CCIfIsVarArg which matches if any argument is +/// a vararg. +class CCIfArgIsVarArg + : CCIf<"!static_cast(&State)->IsCallOperandFixed(ValNo)", A>; + +/// Match if the return was a floating point vector. +class CCIfOrigArgWasNotVectorFloat + : CCIf<"!static_cast(&State)" + "->WasOriginalRetVectorFloat(ValNo)", A>; + +/// Match if the special calling conv is the specified value. +class CCIfSpecialCallingConv + : CCIf<"static_cast(&State)->getSpecialCallingConv() == " + "LoongArchCCState::" # CC, A>; + +// For soft-float, f128 values are returned in A0_64 rather than V1_64. +def RetCC_F128SoftFloat : CallingConv<[ + CCAssignToReg<[A0_64, A1_64]> +]>; + +// +// For hard-float, f128 values are returned as a pair of f64's rather than a +// pair of i64's. +def RetCC_F128HardFloat : CallingConv<[ + //CCBitConvertToType, + + // Contrary to the ABI documentation, a struct containing a long double is + // returned in $f0, and $f1 instead of the usual $f0, and $f2. This is to + // match the de facto ABI as implemented by GCC. + CCIfInReg>, + + CCAssignToReg<[A0_64, A1_64]> +]>; + +// Handle F128 specially since we can't identify the original type during the +// tablegen-erated code. +def RetCC_F128 : CallingConv<[ + CCIfSubtarget<"useSoftFloat()", + CCIfType<[i64], CCDelegateTo>>, + CCIfSubtargetNot<"useSoftFloat()", + CCIfType<[i64], CCDelegateTo>> +]>; + +//===----------------------------------------------------------------------===// +// LoongArch ILP32 Calling Convention +//===----------------------------------------------------------------------===// + +def CC_LoongArchILP32 : CallingConv<[ + // Promote i8/i16 arguments to i32. + CCIfType<[i1, i8, i16], CCPromoteToType>, + + // Integer values get stored in stack slots that are 4 bytes in + // size and 4-byte aligned. + CCIfType<[i32, f32], CCAssignToStack<4, 4>>, + + // Integer values get stored in stack slots that are 8 bytes in + // size and 8-byte aligned. + CCIfType<[f64], CCAssignToStack<8, 8>> +]>; + +// Only the return rules are defined here for 32-bit ABI. The rules for argument +// passing are defined in LoongArchISelLowering.cpp. +def RetCC_LoongArchILP32 : CallingConv<[ + // Promote i1/i8/i16 return values to i32. + CCIfType<[i1, i8, i16], CCPromoteToType>, + + // i32 are returned in registers A0, A1, unless the original return + // type was a vector of floats. + CCIfOrigArgWasNotVectorFloat>>, + + // f32 are returned in registers F0, F1 + CCIfType<[f32], CCAssignToReg<[F0, F1]>>, + + // f64 arguments are returned in F0_64 and F1_64 in hasBasicD mode. + CCIfType<[f64], CCIfSubtarget<"hasBasicD()", CCAssignToReg<[F0_64, F1_64]>>> +]>; + +def CC_LoongArchILP32_FP32 : CustomCallingConv; +def CC_LoongArchILP32_FP64 : CustomCallingConv; +def CC_LoongArch_F128 : CustomCallingConv; + +def CC_LoongArchILP32_FP : CallingConv<[ + CCIfSubtargetNot<"hasBasicD()", CCDelegateTo>, + CCIfSubtarget<"hasBasicD()", CCDelegateTo> +]>; + +//===----------------------------------------------------------------------===// +// LoongArch LP64 Calling Convention +//===----------------------------------------------------------------------===// + +def CC_LoongArchLP64_SoftFloat : CallingConv<[ + CCAssignToReg<[A0, A1, A2, A3, + A4, A5, A6, A7]>, + CCAssignToStack<4, 8> +]>; + +def CC_LoongArchLP64 : CallingConv<[ + + // All integers (except soft-float integers) are promoted to 64-bit. + CCIfType<[i8, i16, i32], CCIfOrigArgWasNotFloat>>, + + // The only i32's we have left are soft-float arguments. + CCIfSubtarget<"useSoftFloat()", CCIfType<[i32], CCDelegateTo>>, + + // Integer arguments are passed in integer registers. + //CCIfType<[i64], CCAssignToRegWithShadow<[A0_64, A1_64, A2_64, A3_64, + // A4_64, A5_64, A6_64, A7_64], + // [F0_64, F1_64, F2_64, F3_64, + // F4_64, F5_64, F6_64, F7_64]>>, + CCIfType<[i64], CCAssignToReg<[A0_64, A1_64, A2_64, A3_64, + A4_64, A5_64, A6_64, A7_64]>>, + + // f32 arguments are passed in single precision FP registers. + CCIfType<[f32], CCAssignToReg<[F0, F1, F2, F3, + F4, F5, F6, F7]>>, + + // f64 arguments are passed in double precision FP registers. + CCIfType<[f64], CCAssignToReg<[F0_64, F1_64, F2_64, F3_64, + F4_64, F5_64, F6_64, F7_64]>>, + + // others f32 arguments are passed in single precision FP registers. + CCIfType<[f32], CCAssignToReg<[A0, A1, A2, A3, A4, A5, A6, A7]>>, + + // others f64 arguments are passed in double precision FP registers. + CCIfType<[f64], CCAssignToReg<[A0_64, A1_64, A2_64, A3_64, + A4_64, A5_64, A6_64, A7_64]>>, + + CCIfSubtarget<"hasLSX()", + CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], + CCAssignToRegWithShadow<[VR0, VR1, VR2, VR3, VR4, VR5, VR6, VR7], + [A0_64, A1_64, A2_64, A3_64, + A4_64, A5_64, A6_64, A7_64]>>>, + CCIfSubtarget<"hasLASX()", + CCIfType<[v32i8, v16i16, v8i32, v4i64, v8f32, v4f64], + CCAssignToRegWithShadow<[XR0, XR1, XR2, XR3, XR4, XR5, XR6, XR7], + [A0_64, A1_64, A2_64, A3_64, + A4_64, A5_64, A6_64, A7_64]>>>, + + // All stack parameter slots become 64-bit doublewords and are 8-byte aligned. + CCIfType<[f32], CCAssignToStack<4, 8>>, + CCIfType<[i64, f64], CCAssignToStack<8, 8>>, + CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], + CCAssignToStack<16, 16>>, + CCIfType<[v32i8, v16i16, v8i32, v4i64, v8f32, v4f64], + CCAssignToStack<32, 32>> +]>; + +// LP64 variable arguments. +// All arguments are passed in integer registers. +def CC_LoongArchLP64_VarArg : CallingConv<[ + // All integers are promoted to 64-bit. + CCIfType<[i8, i16, i32], CCPromoteToType>, + + CCIfType<[f32], CCAssignToReg<[A0, A1, A2, A3, A4, A5, A6, A7]>>, + + CCIfType<[i64], CCIfOrigArgWasF128>>, + + CCIfType<[i64, f64], CCAssignToReg<[A0_64, A1_64, A2_64, A3_64, + A4_64, A5_64, A6_64, A7_64]>>, + + // All stack parameter slots become 64-bit doublewords and are 8-byte aligned. + CCIfType<[f32], CCAssignToStack<4, 8>>, + CCIfType<[i64, f64], CCAssignToStack<8, 8>> +]>; + +def RetCC_LoongArchLP64 : CallingConv<[ + // f128 needs to be handled similarly to f32 and f64. However, f128 is not + // legal and is lowered to i128 which is further lowered to a pair of i64's. + // This presents us with a problem for the calling convention since hard-float + // still needs to pass them in FPU registers, and soft-float needs to use $v0, + // and $a0 instead of the usual $v0, and $v1. We therefore resort to a + // pre-analyze (see PreAnalyzeReturnForF128()) step to pass information on + // whether the result was originally an f128 into the tablegen-erated code. + // + // f128 should only occur for the 64-bit ABI where long double is 128-bit. + CCIfType<[i64], CCIfOrigArgWasF128>>, + + CCIfType<[i8, i16, i32, i64], CCIfInReg>>, + + // i64 are returned in registers V0_64, V1_64 + CCIfType<[i64], CCAssignToReg<[A0_64, A1_64]>>, + + CCIfSubtarget<"hasLSX()", + CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], CCAssignToReg<[VR0]>>>, + + CCIfSubtarget<"hasLASX()", + CCIfType<[v32i8, v16i16, v8i32, v4i64, v8f32, v4f64], CCAssignToReg<[XR0]>>>, + + CCIfSubtarget<"hasLASX()", + CCIfType<[i64], CCAssignToReg<[A0_64, A1_64]>>>, + + // f32 are returned in registers F0, F2 + CCIfType<[f32], CCAssignToReg<[F0, F1]>>, + + // f64 are returned in registers D0, D2 + CCIfType<[f64], CCAssignToReg<[F0_64, F1_64]>> +]>; + +//===----------------------------------------------------------------------===// +// LoongArch Calling Convention Dispatch +//===----------------------------------------------------------------------===// + +def RetCC_LoongArch : CallingConv<[ + CCIfSubtarget<"isABI_LP64()", CCDelegateTo>, + CCDelegateTo +]>; + +def CC_LoongArch_ByVal : CallingConv<[ + CCIfSubtarget<"isABI_ILP32()", CCIfByVal>>, + CCIfByVal> +]>; + +def CC_LoongArch_FixedArg : CallingConv<[ + CCIfByVal>, + //CCIfByVal>>, + + // f128 needs to be handled similarly to f32 and f64 on hard-float. However, + // f128 is not legal and is lowered to i128 which is further lowered to a pair + // of i64's. + // This presents us with a problem for the calling convention since hard-float + // still needs to pass them in FPU registers. We therefore resort to a + // pre-analyze (see PreAnalyzeFormalArgsForF128()) step to pass information on + // whether the argument was originally an f128 into the tablegen-erated code. + // + // f128 should only occur for the 64-bit ABI where long double is 128-bit. + CCIfType<[i64], + CCIfSubtargetNot<"useSoftFloat()", + CCIfOrigArgWasF128>>>, + + CCIfSubtarget<"isABI_ILP32()", CCDelegateTo>, + CCDelegateTo +]>; + +def CC_LoongArch_VarArg : CallingConv<[ + CCIfByVal>, + + CCIfSubtarget<"isABI_ILP32()", CCDelegateTo>, + CCDelegateTo +]>; + +def CC_LoongArch : CallingConv<[ + CCIfVarArg>>, + CCDelegateTo +]>; + +//===----------------------------------------------------------------------===// +// Callee-saved register lists. +//===----------------------------------------------------------------------===// + +def CSR_SingleFloatOnly : CalleeSavedRegs<(add (sequence "F%u", 31, 24), RA, FP, + (sequence "S%u", 8, 0))>; + +def CSR_ILP32 : CalleeSavedRegs<(add (sequence "F%u_64", 31, 24), RA, FP, + (sequence "S%u", 8, 0))>; + +def CSR_LP64 : CalleeSavedRegs<(add (sequence "F%u_64", 31, 24), RA_64, FP_64, + (sequence "S%u_64", 8, 0))>; diff --git a/lib/Target/LoongArch/LoongArchExpandPseudo.cpp b/lib/Target/LoongArch/LoongArchExpandPseudo.cpp new file mode 100644 index 00000000..c192f7fc --- /dev/null +++ b/lib/Target/LoongArch/LoongArchExpandPseudo.cpp @@ -0,0 +1,2438 @@ +//===-- LoongArchExpandPseudoInsts.cpp - Expand pseudo instructions ------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file contains a pass that expands pseudo instructions into target +// instructions to allow proper scheduling, if-conversion, and other late +// optimizations. This pass should be run after register allocation but before +// the post-regalloc scheduling pass. +// +// This is currently only used for expanding atomic pseudos after register +// allocation. We do this to avoid the fast register allocator introducing +// spills between ll and sc. These stores cause some LoongArch implementations to +// abort the atomic RMW sequence. +// +//===----------------------------------------------------------------------===// + +#include "LoongArch.h" +#include "LoongArchInstrInfo.h" +#include "LoongArchSubtarget.h" +#include "MCTargetDesc/LoongArchMCTargetDesc.h" +#include "llvm/CodeGen/LivePhysRegs.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" + +using namespace llvm; + +#define DEBUG_TYPE "loongarch-pseudo" + +namespace { + class LoongArchExpandPseudo : public MachineFunctionPass { + public: + static char ID; + LoongArchExpandPseudo() : MachineFunctionPass(ID) {} + + const LoongArchInstrInfo *TII; + const LoongArchSubtarget *STI; + + bool runOnMachineFunction(MachineFunction &Fn) override; + + MachineFunctionProperties getRequiredProperties() const override { + return MachineFunctionProperties().set( + MachineFunctionProperties::Property::NoVRegs); + } + + StringRef getPassName() const override { + return "LoongArch pseudo instruction expansion pass"; + } + + private: + bool expandAtomicCmpSwap(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + MachineBasicBlock::iterator &NextMBBI); + bool expandAtomicCmpSwapSubword(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + MachineBasicBlock::iterator &NextMBBI); + + bool expandAtomicBinOp(MachineBasicBlock &BB, + MachineBasicBlock::iterator I, + MachineBasicBlock::iterator &NMBBI, unsigned Size); + bool expandXINSERT_BOp(MachineBasicBlock &BB, MachineBasicBlock::iterator I, + MachineBasicBlock::iterator &NMBBI); + bool expandINSERT_HOp(MachineBasicBlock &BB, MachineBasicBlock::iterator I, + MachineBasicBlock::iterator &NMBBI); + bool expandXINSERT_FWOp(MachineBasicBlock &BB, + MachineBasicBlock::iterator I, + MachineBasicBlock::iterator &NMBBI); + bool expandAtomicBinOpSubword(MachineBasicBlock &BB, + MachineBasicBlock::iterator I, + MachineBasicBlock::iterator &NMBBI); + + bool expandPseudoCall(MachineBasicBlock &BB, + MachineBasicBlock::iterator I, + MachineBasicBlock::iterator &NMBBI); + bool expandPseudoTailCall(MachineBasicBlock &BB, + MachineBasicBlock::iterator I); + + bool expandPseudoTEQ(MachineBasicBlock &BB, + MachineBasicBlock::iterator I, + MachineBasicBlock::iterator &NMBBI); + + bool expandLoadAddr(MachineBasicBlock &BB, + MachineBasicBlock::iterator I, + MachineBasicBlock::iterator &NMBBI); + + bool expandMI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, + MachineBasicBlock::iterator &NMBB); + bool expandMBB(MachineBasicBlock &MBB); + }; + char LoongArchExpandPseudo::ID = 0; +} + +static bool hasDbar(MachineBasicBlock *MBB) { + + for (MachineBasicBlock::iterator MBBb = MBB->begin(), MBBe = MBB->end(); + MBBb != MBBe; ++MBBb) { + if (MBBb->getOpcode() == LoongArch::DBAR) + return true; + if (MBBb->mayLoad() || MBBb->mayStore()) + break; + } + return false; +} + +bool LoongArchExpandPseudo::expandAtomicCmpSwapSubword( + MachineBasicBlock &BB, MachineBasicBlock::iterator I, + MachineBasicBlock::iterator &NMBBI) { + + MachineFunction *MF = BB.getParent(); + + DebugLoc DL = I->getDebugLoc(); + unsigned LL, SC; + unsigned ZERO = LoongArch::ZERO; + unsigned BNE = LoongArch::BNE32; + unsigned BEQ = LoongArch::BEQ32; + unsigned SEOp = + I->getOpcode() == LoongArch::ATOMIC_CMP_SWAP_I8_POSTRA ? LoongArch::EXT_W_B32 : LoongArch::EXT_W_H32; + + LL = LoongArch::LL_W; + SC = LoongArch::SC_W; + + unsigned Dest = I->getOperand(0).getReg(); + unsigned Ptr = I->getOperand(1).getReg(); + unsigned Mask = I->getOperand(2).getReg(); + unsigned ShiftCmpVal = I->getOperand(3).getReg(); + unsigned Mask2 = I->getOperand(4).getReg(); + unsigned ShiftNewVal = I->getOperand(5).getReg(); + unsigned ShiftAmnt = I->getOperand(6).getReg(); + unsigned Scratch = I->getOperand(7).getReg(); + unsigned Scratch2 = I->getOperand(8).getReg(); + + // insert new blocks after the current block + const BasicBlock *LLVM_BB = BB.getBasicBlock(); + MachineBasicBlock *loop1MBB = MF->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *loop2MBB = MF->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *sinkMBB = MF->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB); + MachineFunction::iterator It = ++BB.getIterator(); + MF->insert(It, loop1MBB); + MF->insert(It, loop2MBB); + MF->insert(It, sinkMBB); + MF->insert(It, exitMBB); + + // Transfer the remainder of BB and its successor edges to exitMBB. + exitMBB->splice(exitMBB->begin(), &BB, + std::next(MachineBasicBlock::iterator(I)), BB.end()); + exitMBB->transferSuccessorsAndUpdatePHIs(&BB); + + // thisMBB: + // ... + // fallthrough --> loop1MBB + BB.addSuccessor(loop1MBB, BranchProbability::getOne()); + loop1MBB->addSuccessor(sinkMBB); + loop1MBB->addSuccessor(loop2MBB); + loop1MBB->normalizeSuccProbs(); + loop2MBB->addSuccessor(loop1MBB); + loop2MBB->addSuccessor(sinkMBB); + loop2MBB->normalizeSuccProbs(); + sinkMBB->addSuccessor(exitMBB, BranchProbability::getOne()); + + // loop1MBB: + // ll dest, 0(ptr) + // and Mask', dest, Mask + // bne Mask', ShiftCmpVal, exitMBB + BuildMI(loop1MBB, DL, TII->get(LL), Scratch).addReg(Ptr).addImm(0); + BuildMI(loop1MBB, DL, TII->get(LoongArch::AND32), Scratch2) + .addReg(Scratch) + .addReg(Mask); + BuildMI(loop1MBB, DL, TII->get(BNE)) + .addReg(Scratch2).addReg(ShiftCmpVal).addMBB(sinkMBB); + + // loop2MBB: + // and dest, dest, mask2 + // or dest, dest, ShiftNewVal + // sc dest, dest, 0(ptr) + // beq dest, $0, loop1MBB + BuildMI(loop2MBB, DL, TII->get(LoongArch::AND32), Scratch) + .addReg(Scratch, RegState::Kill) + .addReg(Mask2); + BuildMI(loop2MBB, DL, TII->get(LoongArch::OR32), Scratch) + .addReg(Scratch, RegState::Kill) + .addReg(ShiftNewVal); + BuildMI(loop2MBB, DL, TII->get(SC), Scratch) + .addReg(Scratch, RegState::Kill) + .addReg(Ptr) + .addImm(0); + BuildMI(loop2MBB, DL, TII->get(BEQ)) + .addReg(Scratch, RegState::Kill) + .addReg(ZERO) + .addMBB(loop1MBB); + + // sinkMBB: + // srl srlres, Mask', shiftamt + // sign_extend dest,srlres + BuildMI(sinkMBB, DL, TII->get(LoongArch::SRL_W), Dest) + .addReg(Scratch2) + .addReg(ShiftAmnt); + + BuildMI(sinkMBB, DL, TII->get(SEOp), Dest).addReg(Dest); + + if (!hasDbar(sinkMBB)) { + MachineBasicBlock::iterator Pos = sinkMBB->begin(); + BuildMI(*sinkMBB, Pos, DL, TII->get(LoongArch::DBAR)).addImm(DBAR_HINT); + } + + LivePhysRegs LiveRegs; + computeAndAddLiveIns(LiveRegs, *loop1MBB); + computeAndAddLiveIns(LiveRegs, *loop2MBB); + computeAndAddLiveIns(LiveRegs, *sinkMBB); + computeAndAddLiveIns(LiveRegs, *exitMBB); + + NMBBI = BB.end(); + I->eraseFromParent(); + return true; +} + +bool LoongArchExpandPseudo::expandAtomicCmpSwap(MachineBasicBlock &BB, + MachineBasicBlock::iterator I, + MachineBasicBlock::iterator &NMBBI) { + + const unsigned Size = + I->getOpcode() == LoongArch::ATOMIC_CMP_SWAP_I32_POSTRA ? 4 : 8; + MachineFunction *MF = BB.getParent(); + + DebugLoc DL = I->getDebugLoc(); + + unsigned LL, SC, ZERO, BNE, BEQ, MOVE; + + if (Size == 4) { + LL = LoongArch::LL_W; + SC = LoongArch::SC_W; + BNE = LoongArch::BNE32; + BEQ = LoongArch::BEQ32; + + ZERO = LoongArch::ZERO; + MOVE = LoongArch::OR32; + } else { + LL = LoongArch::LL_D; + SC = LoongArch::SC_D; + ZERO = LoongArch::ZERO_64; + BNE = LoongArch::BNE; + BEQ = LoongArch::BEQ; + MOVE = LoongArch::OR; + } + + unsigned Dest = I->getOperand(0).getReg(); + unsigned Ptr = I->getOperand(1).getReg(); + unsigned OldVal = I->getOperand(2).getReg(); + unsigned NewVal = I->getOperand(3).getReg(); + unsigned Scratch = I->getOperand(4).getReg(); + + // insert new blocks after the current block + const BasicBlock *LLVM_BB = BB.getBasicBlock(); + MachineBasicBlock *loop1MBB = MF->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *loop2MBB = MF->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB); + MachineFunction::iterator It = ++BB.getIterator(); + MF->insert(It, loop1MBB); + MF->insert(It, loop2MBB); + MF->insert(It, exitMBB); + + // Transfer the remainder of BB and its successor edges to exitMBB. + exitMBB->splice(exitMBB->begin(), &BB, + std::next(MachineBasicBlock::iterator(I)), BB.end()); + exitMBB->transferSuccessorsAndUpdatePHIs(&BB); + + // thisMBB: + // ... + // fallthrough --> loop1MBB + BB.addSuccessor(loop1MBB, BranchProbability::getOne()); + loop1MBB->addSuccessor(exitMBB); + loop1MBB->addSuccessor(loop2MBB); + loop1MBB->normalizeSuccProbs(); + loop2MBB->addSuccessor(loop1MBB); + loop2MBB->addSuccessor(exitMBB); + loop2MBB->normalizeSuccProbs(); + + // loop1MBB: + // ll dest, 0(ptr) + // bne dest, oldval, exitMBB + BuildMI(loop1MBB, DL, TII->get(LL), Dest).addReg(Ptr).addImm(0); + BuildMI(loop1MBB, DL, TII->get(BNE)) + .addReg(Dest, RegState::Kill).addReg(OldVal).addMBB(exitMBB); + + // loop2MBB: + // move scratch, NewVal + // sc Scratch, Scratch, 0(ptr) + // beq Scratch, $0, loop1MBB + BuildMI(loop2MBB, DL, TII->get(MOVE), Scratch).addReg(NewVal).addReg(ZERO); + BuildMI(loop2MBB, DL, TII->get(SC), Scratch) + .addReg(Scratch).addReg(Ptr).addImm(0); + BuildMI(loop2MBB, DL, TII->get(BEQ)) + .addReg(Scratch, RegState::Kill).addReg(ZERO).addMBB(loop1MBB); + + if (!hasDbar(exitMBB)) { + MachineBasicBlock::iterator Pos = exitMBB->begin(); + BuildMI(*exitMBB, Pos, DL, TII->get(LoongArch::DBAR)).addImm(DBAR_HINT); + } + + LivePhysRegs LiveRegs; + computeAndAddLiveIns(LiveRegs, *loop1MBB); + computeAndAddLiveIns(LiveRegs, *loop2MBB); + computeAndAddLiveIns(LiveRegs, *exitMBB); + + NMBBI = BB.end(); + I->eraseFromParent(); + return true; +} + +bool LoongArchExpandPseudo::expandXINSERT_FWOp( + MachineBasicBlock &BB, MachineBasicBlock::iterator I, + MachineBasicBlock::iterator &NMBBI) { + + MachineFunction *MF = BB.getParent(); + + DebugLoc DL = I->getDebugLoc(); + + unsigned isGP64 = 0; + switch (I->getOpcode()) { + case LoongArch::XINSERT_FW_VIDX64_PSEUDO_POSTRA: + isGP64 = 1; + break; + case LoongArch::XINSERT_FW_VIDX_PSEUDO_POSTRA: + break; + default: + llvm_unreachable("Unknown subword vector pseudo for expansion!"); + } + + unsigned Dest = I->getOperand(0).getReg(); + unsigned SrcVecReg = I->getOperand(1).getReg(); + unsigned LaneReg = I->getOperand(2).getReg(); + unsigned SrcValReg = I->getOperand(3).getReg(); + + unsigned Dsttmp = I->getOperand(4).getReg(); + unsigned RI = I->getOperand(5).getReg(); + unsigned RJ = I->getOperand(6).getReg(); + Dsttmp = SrcVecReg; + + const BasicBlock *LLVM_BB = BB.getBasicBlock(); + MachineBasicBlock *blocks[11]; + MachineFunction::iterator It = ++BB.getIterator(); + for (int i = 0; i < 11; i++) { + blocks[i] = MF->CreateMachineBasicBlock(LLVM_BB); + MF->insert(It, blocks[i]); + } + + MachineBasicBlock *mainMBB = blocks[0]; + MachineBasicBlock *FirstMBB = blocks[1]; + MachineBasicBlock *sinkMBB = blocks[9]; + MachineBasicBlock *exitMBB = blocks[10]; + + exitMBB->splice(exitMBB->begin(), &BB, std::next(I), BB.end()); + exitMBB->transferSuccessorsAndUpdatePHIs(&BB); + + BB.addSuccessor(mainMBB, BranchProbability::getOne()); + for (int i = 1; i < 9; i++) { + mainMBB->addSuccessor(blocks[i]); + blocks[i]->addSuccessor(sinkMBB); + } + + unsigned ADDI, BLT, ZERO; + ADDI = isGP64 ? LoongArch::ADDI_D : LoongArch::ADDI_W; + BLT = isGP64 ? LoongArch::BLT : LoongArch::BLT32; + ZERO = isGP64 ? LoongArch::ZERO_64 : LoongArch::ZERO; + + for (int i = 1; i < 8; i++) { + BuildMI(mainMBB, DL, TII->get(ADDI), RI).addReg(ZERO).addImm(i); + BuildMI(mainMBB, DL, TII->get(BLT)) + .addReg(LaneReg) + .addReg(RI) + .addMBB(blocks[i + 1]); + } + + BuildMI(mainMBB, DL, TII->get(LoongArch::B32)).addMBB(FirstMBB); + + BuildMI(FirstMBB, DL, TII->get(LoongArch::XVINSGR2VR_W), Dsttmp) + .addReg(SrcVecReg) + .addReg(RJ) + .addImm(7); + BuildMI(FirstMBB, DL, TII->get(LoongArch::B32)).addMBB(sinkMBB); + + for (int i = 0; i < 7; i++) { + BuildMI(blocks[i + 2], DL, TII->get(LoongArch::XVINSGR2VR_W), Dsttmp) + .addReg(SrcVecReg) + .addReg(RJ) + .addImm(i); + BuildMI(blocks[i + 2], DL, TII->get(LoongArch::B32)).addMBB(sinkMBB); + } + + sinkMBB->addSuccessor(exitMBB, BranchProbability::getOne()); + BuildMI(sinkMBB, DL, TII->get(LoongArch::XVORI_B), Dest) + .addReg(Dsttmp) + .addImm(0); + + LivePhysRegs LiveRegs; + for (int i = 0; i < 11; i++) { + computeAndAddLiveIns(LiveRegs, *blocks[i]); + } + + NMBBI = BB.end(); + I->eraseFromParent(); + + return true; +} + +bool LoongArchExpandPseudo::expandINSERT_HOp( + MachineBasicBlock &BB, MachineBasicBlock::iterator I, + MachineBasicBlock::iterator &NMBBI) { + + MachineFunction *MF = BB.getParent(); + + DebugLoc DL = I->getDebugLoc(); + + unsigned isGP64 = 0; + switch (I->getOpcode()) { + case LoongArch::INSERT_H_VIDX64_PSEUDO_POSTRA: + isGP64 = 1; + break; + default: + llvm_unreachable("Unknown subword vector pseudo for expansion!"); + } + + unsigned Dest = I->getOperand(0).getReg(); + unsigned SrcVecReg = I->getOperand(1).getReg(); + unsigned LaneReg = I->getOperand(2).getReg(); + unsigned SrcValReg = I->getOperand(3).getReg(); + + unsigned Dsttmp = I->getOperand(4).getReg(); + unsigned RI = I->getOperand(5).getReg(); + Dsttmp = SrcVecReg; + + const BasicBlock *LLVM_BB = BB.getBasicBlock(); + MachineBasicBlock *blocks[11]; + MachineFunction::iterator It = ++BB.getIterator(); + for (int i = 0; i < 11; i++) { + blocks[i] = MF->CreateMachineBasicBlock(LLVM_BB); + MF->insert(It, blocks[i]); + } + + MachineBasicBlock *mainMBB = blocks[0]; + MachineBasicBlock *FirstMBB = blocks[1]; + MachineBasicBlock *sinkMBB = blocks[9]; + MachineBasicBlock *exitMBB = blocks[10]; + + exitMBB->splice(exitMBB->begin(), &BB, std::next(I), BB.end()); + exitMBB->transferSuccessorsAndUpdatePHIs(&BB); + + BB.addSuccessor(mainMBB, BranchProbability::getOne()); + for (int i = 1; i < 9; i++) { + mainMBB->addSuccessor(blocks[i]); + blocks[i]->addSuccessor(sinkMBB); + } + + unsigned ADDI, BLT, ZERO; + ADDI = isGP64 ? LoongArch::ADDI_D : LoongArch::ADDI_W; + BLT = isGP64 ? LoongArch::BLT : LoongArch::BLT32; + ZERO = isGP64 ? LoongArch::ZERO_64 : LoongArch::ZERO; + + for (int i = 1; i < 8; i++) { + BuildMI(mainMBB, DL, TII->get(ADDI), RI).addReg(ZERO).addImm(i); + BuildMI(mainMBB, DL, TII->get(BLT)) + .addReg(LaneReg) + .addReg(RI) + .addMBB(blocks[i + 1]); + } + + BuildMI(mainMBB, DL, TII->get(LoongArch::B32)).addMBB(FirstMBB); + + BuildMI(FirstMBB, DL, TII->get(LoongArch::VINSGR2VR_H), Dsttmp) + .addReg(SrcVecReg) + .addReg(SrcValReg) + .addImm(7); + BuildMI(FirstMBB, DL, TII->get(LoongArch::B32)).addMBB(sinkMBB); + + for (int i = 0; i < 7; i++) { + BuildMI(blocks[i + 2], DL, TII->get(LoongArch::VINSGR2VR_H), Dsttmp) + .addReg(SrcVecReg) + .addReg(SrcValReg) + .addImm(i); + BuildMI(blocks[i + 2], DL, TII->get(LoongArch::B32)).addMBB(sinkMBB); + } + + sinkMBB->addSuccessor(exitMBB, BranchProbability::getOne()); + BuildMI(sinkMBB, DL, TII->get(LoongArch::VORI_B), Dest) + .addReg(Dsttmp) + .addImm(0); + + LivePhysRegs LiveRegs; + for (int i = 0; i < 11; i++) { + computeAndAddLiveIns(LiveRegs, *blocks[i]); + } + + NMBBI = BB.end(); + I->eraseFromParent(); + + return true; +} + +bool LoongArchExpandPseudo::expandXINSERT_BOp( + MachineBasicBlock &BB, MachineBasicBlock::iterator I, + MachineBasicBlock::iterator &NMBBI) { + + MachineFunction *MF = BB.getParent(); + + DebugLoc DL = I->getDebugLoc(); + + unsigned isGP64 = 0; + switch (I->getOpcode()) { + case LoongArch::XINSERT_B_VIDX64_PSEUDO_POSTRA: + isGP64 = 1; + break; + case LoongArch::XINSERT_B_VIDX_PSEUDO_POSTRA: + break; + default: + llvm_unreachable("Unknown subword vector pseudo for expansion!"); + } + + unsigned Dest = I->getOperand(0).getReg(); + unsigned SrcVecReg = I->getOperand(1).getReg(); + unsigned LaneReg = I->getOperand(2).getReg(); + unsigned SrcValReg = I->getOperand(3).getReg(); + + unsigned R4r = I->getOperand(5).getReg(); + unsigned Rib = I->getOperand(6).getReg(); + unsigned Ris = I->getOperand(7).getReg(); + unsigned R7b1 = I->getOperand(8).getReg(); + unsigned R7b2 = I->getOperand(9).getReg(); + unsigned R7b3 = I->getOperand(10).getReg(); + unsigned R7r80_3 = I->getOperand(11).getReg(); + unsigned R7r80l_3 = I->getOperand(12).getReg(); + unsigned R7r81_3 = I->getOperand(13).getReg(); + unsigned R7r81l_3 = I->getOperand(14).getReg(); + unsigned R7r82_3 = I->getOperand(15).getReg(); + unsigned R7r82l_3 = I->getOperand(16).getReg(); + unsigned RI = I->getOperand(17).getReg(); + unsigned tmp_Dst73 = I->getOperand(18).getReg(); + unsigned Rimm = I->getOperand(19).getReg(); + unsigned R70 = I->getOperand(20).getReg(); + tmp_Dst73 = SrcVecReg; + + const BasicBlock *LLVM_BB = BB.getBasicBlock(); + MachineBasicBlock *mainMBB = MF->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *SevenMBB = MF->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *SevenMBB0 = MF->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *SevenMBB1 = MF->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *SevenMBB2 = MF->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *SevenMBB3 = MF->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *ZeroMBB = MF->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *ZeroMBB0 = MF->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *ZeroMBB1 = MF->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *ZeroMBB2 = MF->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *ZeroMBB3 = MF->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *OneMBB = MF->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *OneMBB0 = MF->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *OneMBB1 = MF->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *OneMBB2 = MF->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *OneMBB3 = MF->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *TwoMBB = MF->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *TwoMBB0 = MF->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *TwoMBB1 = MF->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *TwoMBB2 = MF->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *TwoMBB3 = MF->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *ThreeMBB = MF->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *ThreeMBB0 = MF->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *ThreeMBB1 = MF->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *ThreeMBB2 = MF->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *ThreeMBB3 = MF->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *FourMBB = MF->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *FourMBB0 = MF->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *FourMBB1 = MF->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *FourMBB2 = MF->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *FourMBB3 = MF->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *FiveMBB = MF->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *FiveMBB0 = MF->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *FiveMBB1 = MF->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *FiveMBB2 = MF->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *FiveMBB3 = MF->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *SixMBB = MF->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *SixMBB0 = MF->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *SixMBB1 = MF->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *SixMBB2 = MF->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *SixMBB3 = MF->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *sinkMBB = MF->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB); + MachineFunction::iterator It = ++BB.getIterator(); + MF->insert(It, mainMBB); + MF->insert(It, SevenMBB); + MF->insert(It, SevenMBB3); + MF->insert(It, SevenMBB0); + MF->insert(It, SevenMBB1); + MF->insert(It, SevenMBB2); + MF->insert(It, ZeroMBB); + MF->insert(It, ZeroMBB3); + MF->insert(It, ZeroMBB0); + MF->insert(It, ZeroMBB1); + MF->insert(It, ZeroMBB2); + MF->insert(It, OneMBB); + MF->insert(It, OneMBB3); + MF->insert(It, OneMBB0); + MF->insert(It, OneMBB1); + MF->insert(It, OneMBB2); + MF->insert(It, TwoMBB); + MF->insert(It, TwoMBB3); + MF->insert(It, TwoMBB0); + MF->insert(It, TwoMBB1); + MF->insert(It, TwoMBB2); + MF->insert(It, ThreeMBB); + MF->insert(It, ThreeMBB3); + MF->insert(It, ThreeMBB0); + MF->insert(It, ThreeMBB1); + MF->insert(It, ThreeMBB2); + MF->insert(It, FourMBB); + MF->insert(It, FourMBB3); + MF->insert(It, FourMBB0); + MF->insert(It, FourMBB1); + MF->insert(It, FourMBB2); + MF->insert(It, FiveMBB); + MF->insert(It, FiveMBB3); + MF->insert(It, FiveMBB0); + MF->insert(It, FiveMBB1); + MF->insert(It, FiveMBB2); + MF->insert(It, SixMBB); + MF->insert(It, SixMBB3); + MF->insert(It, SixMBB0); + MF->insert(It, SixMBB1); + MF->insert(It, SixMBB2); + MF->insert(It, sinkMBB); + MF->insert(It, exitMBB); + + exitMBB->splice(exitMBB->begin(), &BB, std::next(I), BB.end()); + exitMBB->transferSuccessorsAndUpdatePHIs(&BB); + + BB.addSuccessor(mainMBB, BranchProbability::getOne()); + mainMBB->addSuccessor(SevenMBB); + mainMBB->addSuccessor(ZeroMBB); + mainMBB->addSuccessor(OneMBB); + mainMBB->addSuccessor(TwoMBB); + mainMBB->addSuccessor(ThreeMBB); + mainMBB->addSuccessor(FourMBB); + mainMBB->addSuccessor(FiveMBB); + mainMBB->addSuccessor(SixMBB); + SevenMBB->addSuccessor(SevenMBB0); + SevenMBB->addSuccessor(SevenMBB1); + SevenMBB->addSuccessor(SevenMBB2); + SevenMBB->addSuccessor(SevenMBB3); + SevenMBB0->addSuccessor(sinkMBB); + SevenMBB1->addSuccessor(sinkMBB); + SevenMBB2->addSuccessor(sinkMBB); + SevenMBB3->addSuccessor(sinkMBB); + ZeroMBB->addSuccessor(ZeroMBB0); + ZeroMBB->addSuccessor(ZeroMBB1); + ZeroMBB->addSuccessor(ZeroMBB2); + ZeroMBB->addSuccessor(ZeroMBB3); + ZeroMBB0->addSuccessor(sinkMBB); + ZeroMBB1->addSuccessor(sinkMBB); + ZeroMBB2->addSuccessor(sinkMBB); + ZeroMBB3->addSuccessor(sinkMBB); + OneMBB->addSuccessor(OneMBB0); + OneMBB->addSuccessor(OneMBB1); + OneMBB->addSuccessor(OneMBB2); + OneMBB->addSuccessor(OneMBB3); + OneMBB0->addSuccessor(sinkMBB); + OneMBB1->addSuccessor(sinkMBB); + OneMBB2->addSuccessor(sinkMBB); + OneMBB3->addSuccessor(sinkMBB); + TwoMBB->addSuccessor(TwoMBB0); + TwoMBB->addSuccessor(TwoMBB1); + TwoMBB->addSuccessor(TwoMBB2); + TwoMBB->addSuccessor(TwoMBB3); + TwoMBB0->addSuccessor(sinkMBB); + TwoMBB1->addSuccessor(sinkMBB); + TwoMBB2->addSuccessor(sinkMBB); + TwoMBB3->addSuccessor(sinkMBB); + ThreeMBB->addSuccessor(ThreeMBB0); + ThreeMBB->addSuccessor(ThreeMBB1); + ThreeMBB->addSuccessor(ThreeMBB2); + ThreeMBB->addSuccessor(ThreeMBB3); + ThreeMBB0->addSuccessor(sinkMBB); + ThreeMBB1->addSuccessor(sinkMBB); + ThreeMBB2->addSuccessor(sinkMBB); + ThreeMBB3->addSuccessor(sinkMBB); + FourMBB->addSuccessor(FourMBB0); + FourMBB->addSuccessor(FourMBB1); + FourMBB->addSuccessor(FourMBB2); + FourMBB->addSuccessor(FourMBB3); + FourMBB0->addSuccessor(sinkMBB); + FourMBB1->addSuccessor(sinkMBB); + FourMBB2->addSuccessor(sinkMBB); + FourMBB3->addSuccessor(sinkMBB); + FiveMBB->addSuccessor(FiveMBB0); + FiveMBB->addSuccessor(FiveMBB1); + FiveMBB->addSuccessor(FiveMBB2); + FiveMBB->addSuccessor(FiveMBB3); + FiveMBB0->addSuccessor(sinkMBB); + FiveMBB1->addSuccessor(sinkMBB); + FiveMBB2->addSuccessor(sinkMBB); + FiveMBB3->addSuccessor(sinkMBB); + SixMBB->addSuccessor(SixMBB0); + SixMBB->addSuccessor(SixMBB1); + SixMBB->addSuccessor(SixMBB2); + SixMBB->addSuccessor(SixMBB3); + SixMBB0->addSuccessor(sinkMBB); + SixMBB1->addSuccessor(sinkMBB); + SixMBB2->addSuccessor(sinkMBB); + SixMBB3->addSuccessor(sinkMBB); + + unsigned SRLI, ADDI, OR, MOD, BLT, ZERO; + SRLI = isGP64 ? LoongArch::SRLI_D : LoongArch::SRLI_W; + ADDI = isGP64 ? LoongArch::ADDI_D : LoongArch::ADDI_W; + OR = isGP64 ? LoongArch::OR : LoongArch::OR32; + MOD = isGP64 ? LoongArch::MOD_DU : LoongArch::MOD_WU; + BLT = isGP64 ? LoongArch::BLT : LoongArch::BLT32; + ZERO = isGP64 ? LoongArch::ZERO_64 : LoongArch::ZERO; + + BuildMI(mainMBB, DL, TII->get(SRLI), Rimm).addReg(LaneReg).addImm(2); + BuildMI(mainMBB, DL, TII->get(ADDI), R4r).addReg(ZERO).addImm(4); + BuildMI(mainMBB, DL, TII->get(OR), Rib).addReg(Rimm).addReg(ZERO); + BuildMI(mainMBB, DL, TII->get(MOD), Ris).addReg(Rib).addReg(R4r); + BuildMI(mainMBB, DL, TII->get(ADDI), RI).addReg(ZERO).addImm(1); + BuildMI(mainMBB, DL, TII->get(BLT)).addReg(Rib).addReg(RI).addMBB(ZeroMBB); + BuildMI(mainMBB, DL, TII->get(ADDI), RI).addReg(ZERO).addImm(2); + BuildMI(mainMBB, DL, TII->get(BLT)).addReg(Rib).addReg(RI).addMBB(OneMBB); + BuildMI(mainMBB, DL, TII->get(ADDI), RI).addReg(ZERO).addImm(3); + BuildMI(mainMBB, DL, TII->get(BLT)).addReg(Rib).addReg(RI).addMBB(TwoMBB); + BuildMI(mainMBB, DL, TII->get(ADDI), RI).addReg(ZERO).addImm(4); + BuildMI(mainMBB, DL, TII->get(BLT)).addReg(Rib).addReg(RI).addMBB(ThreeMBB); + BuildMI(mainMBB, DL, TII->get(ADDI), RI).addReg(ZERO).addImm(5); + BuildMI(mainMBB, DL, TII->get(BLT)).addReg(Rib).addReg(RI).addMBB(FourMBB); + BuildMI(mainMBB, DL, TII->get(ADDI), RI).addReg(ZERO).addImm(6); + BuildMI(mainMBB, DL, TII->get(BLT)).addReg(Rib).addReg(RI).addMBB(FiveMBB); + BuildMI(mainMBB, DL, TII->get(ADDI), RI).addReg(ZERO).addImm(7); + BuildMI(mainMBB, DL, TII->get(BLT)).addReg(Rib).addReg(RI).addMBB(SixMBB); + BuildMI(mainMBB, DL, TII->get(LoongArch::B32)).addMBB(SevenMBB); + + BuildMI(SevenMBB, DL, TII->get(LoongArch::XVPICKVE2GR_W), R70) + .addReg(SrcVecReg) + .addImm(7); + BuildMI(SevenMBB, DL, TII->get(ADDI), R7b1).addReg(ZERO).addImm(1); + BuildMI(SevenMBB, DL, TII->get(BLT)) + .addReg(Ris) + .addReg(R7b1) + .addMBB(SevenMBB0); + BuildMI(SevenMBB, DL, TII->get(ADDI), R7b2).addReg(ZERO).addImm(2); + BuildMI(SevenMBB, DL, TII->get(BLT)) + .addReg(Ris) + .addReg(R7b2) + .addMBB(SevenMBB1); + BuildMI(SevenMBB, DL, TII->get(ADDI), R7b3).addReg(ZERO).addImm(3); + BuildMI(SevenMBB, DL, TII->get(BLT)) + .addReg(Ris) + .addReg(R7b3) + .addMBB(SevenMBB2); + BuildMI(SevenMBB, DL, TII->get(LoongArch::B32)).addMBB(SevenMBB3); + + BuildMI(SevenMBB3, DL, TII->get(LoongArch::SLLI_W), R7r80_3) + .addReg(SrcValReg) + .addImm(24); + BuildMI(SevenMBB3, DL, TII->get(LoongArch::LU12I_W), R7r81l_3) + .addImm(0x00fff); + BuildMI(SevenMBB3, DL, TII->get(LoongArch::ORI32), R7r81_3) + .addReg(R7r81l_3) + .addImm(0xfff); + BuildMI(SevenMBB3, DL, TII->get(LoongArch::AND32), R7r82l_3) + .addReg(R70) + .addReg(R7r81_3); + BuildMI(SevenMBB3, DL, TII->get(LoongArch::OR32), R7r82_3) + .addReg(R7r82l_3) + .addReg(R7r80_3); + BuildMI(SevenMBB3, DL, TII->get(LoongArch::XVINSGR2VR_W), tmp_Dst73) + .addReg(SrcVecReg) + .addReg(R7r82_3) + .addImm(7); + BuildMI(SevenMBB3, DL, TII->get(LoongArch::B32)).addMBB(sinkMBB); + + BuildMI(SevenMBB0, DL, TII->get(LoongArch::SLLI_W), R7r80_3) + .addReg(SrcValReg) + .addImm(24); + BuildMI(SevenMBB0, DL, TII->get(LoongArch::SRLI_W), R7r80l_3) + .addReg(R7r80_3) + .addImm(8); + BuildMI(SevenMBB0, DL, TII->get(LoongArch::LU12I_W), R7r81l_3) + .addImm(0xff00f); + BuildMI(SevenMBB0, DL, TII->get(LoongArch::ORI32), R7r81_3) + .addReg(R7r81l_3) + .addImm(0xfff); + BuildMI(SevenMBB0, DL, TII->get(LoongArch::AND32), R7r82l_3) + .addReg(R70) + .addReg(R7r81_3); + BuildMI(SevenMBB0, DL, TII->get(LoongArch::OR32), R7r82_3) + .addReg(R7r82l_3) + .addReg(R7r80l_3); + BuildMI(SevenMBB0, DL, TII->get(LoongArch::XVINSGR2VR_W), tmp_Dst73) + .addReg(SrcVecReg) + .addReg(R7r82_3) + .addImm(7); + BuildMI(SevenMBB0, DL, TII->get(LoongArch::B32)).addMBB(sinkMBB); + + BuildMI(SevenMBB1, DL, TII->get(LoongArch::SLLI_W), R7r80_3) + .addReg(SrcValReg) + .addImm(24); + BuildMI(SevenMBB1, DL, TII->get(LoongArch::SRLI_W), R7r80l_3) + .addReg(R7r80_3) + .addImm(16); + BuildMI(SevenMBB1, DL, TII->get(LoongArch::LU12I_W), R7r81l_3) + .addImm(0xffff0); + BuildMI(SevenMBB1, DL, TII->get(LoongArch::ORI32), R7r81_3) + .addReg(R7r81l_3) + .addImm(0x0ff); + BuildMI(SevenMBB1, DL, TII->get(LoongArch::AND32), R7r82l_3) + .addReg(R70) + .addReg(R7r81_3); + BuildMI(SevenMBB1, DL, TII->get(LoongArch::OR32), R7r82_3) + .addReg(R7r82l_3) + .addReg(R7r80l_3); + BuildMI(SevenMBB1, DL, TII->get(LoongArch::XVINSGR2VR_W), tmp_Dst73) + .addReg(SrcVecReg) + .addReg(R7r82_3) + .addImm(7); + BuildMI(SevenMBB1, DL, TII->get(LoongArch::B32)).addMBB(sinkMBB); + + BuildMI(SevenMBB2, DL, TII->get(LoongArch::SLLI_W), R7r80_3) + .addReg(SrcValReg) + .addImm(24); + BuildMI(SevenMBB2, DL, TII->get(LoongArch::SRLI_W), R7r80l_3) + .addReg(R7r80_3) + .addImm(24); + BuildMI(SevenMBB2, DL, TII->get(LoongArch::LU12I_W), R7r81l_3) + .addImm(0xfffff); + BuildMI(SevenMBB2, DL, TII->get(LoongArch::ORI32), R7r81_3) + .addReg(R7r81l_3) + .addImm(0xf00); + BuildMI(SevenMBB2, DL, TII->get(LoongArch::AND32), R7r82l_3) + .addReg(R70) + .addReg(R7r81_3); + BuildMI(SevenMBB2, DL, TII->get(LoongArch::OR32), R7r82_3) + .addReg(R7r82l_3) + .addReg(R7r80l_3); + BuildMI(SevenMBB2, DL, TII->get(LoongArch::XVINSGR2VR_W), tmp_Dst73) + .addReg(SrcVecReg) + .addReg(R7r82_3) + .addImm(7); + BuildMI(SevenMBB2, DL, TII->get(LoongArch::B32)).addMBB(sinkMBB); + + BuildMI(ZeroMBB, DL, TII->get(LoongArch::XVPICKVE2GR_W), R70) + .addReg(SrcVecReg) + .addImm(0); + BuildMI(ZeroMBB, DL, TII->get(ADDI), R7b1).addReg(ZERO).addImm(1); + BuildMI(ZeroMBB, DL, TII->get(BLT)).addReg(Ris).addReg(R7b1).addMBB(ZeroMBB0); + BuildMI(ZeroMBB, DL, TII->get(ADDI), R7b2).addReg(ZERO).addImm(2); + BuildMI(ZeroMBB, DL, TII->get(BLT)).addReg(Ris).addReg(R7b2).addMBB(ZeroMBB1); + BuildMI(ZeroMBB, DL, TII->get(ADDI), R7b3).addReg(ZERO).addImm(3); + BuildMI(ZeroMBB, DL, TII->get(BLT)).addReg(Ris).addReg(R7b3).addMBB(ZeroMBB2); + BuildMI(ZeroMBB, DL, TII->get(LoongArch::B32)).addMBB(ZeroMBB3); + + BuildMI(ZeroMBB3, DL, TII->get(LoongArch::SLLI_W), R7r80_3) + .addReg(SrcValReg) + .addImm(24); + BuildMI(ZeroMBB3, DL, TII->get(LoongArch::LU12I_W), R7r81l_3).addImm(0x00fff); + BuildMI(ZeroMBB3, DL, TII->get(LoongArch::ORI32), R7r81_3) + .addReg(R7r81l_3) + .addImm(0xfff); + BuildMI(ZeroMBB3, DL, TII->get(LoongArch::AND32), R7r82l_3) + .addReg(R70) + .addReg(R7r81_3); + BuildMI(ZeroMBB3, DL, TII->get(LoongArch::OR32), R7r82_3) + .addReg(R7r82l_3) + .addReg(R7r80_3); + BuildMI(ZeroMBB3, DL, TII->get(LoongArch::XVINSGR2VR_W), tmp_Dst73) + .addReg(SrcVecReg) + .addReg(R7r82_3) + .addImm(0); + BuildMI(ZeroMBB3, DL, TII->get(LoongArch::B32)).addMBB(sinkMBB); + + BuildMI(ZeroMBB0, DL, TII->get(LoongArch::SLLI_W), R7r80_3) + .addReg(SrcValReg) + .addImm(24); + BuildMI(ZeroMBB0, DL, TII->get(LoongArch::SRLI_W), R7r80l_3) + .addReg(R7r80_3) + .addImm(8); + BuildMI(ZeroMBB0, DL, TII->get(LoongArch::LU12I_W), R7r81l_3).addImm(0xff00f); + BuildMI(ZeroMBB0, DL, TII->get(LoongArch::ORI32), R7r81_3) + .addReg(R7r81l_3) + .addImm(0xfff); + BuildMI(ZeroMBB0, DL, TII->get(LoongArch::AND32), R7r82l_3) + .addReg(R70) + .addReg(R7r81_3); + BuildMI(ZeroMBB0, DL, TII->get(LoongArch::OR32), R7r82_3) + .addReg(R7r82l_3) + .addReg(R7r80l_3); + BuildMI(ZeroMBB0, DL, TII->get(LoongArch::XVINSGR2VR_W), tmp_Dst73) + .addReg(SrcVecReg) + .addReg(R7r82_3) + .addImm(0); + BuildMI(ZeroMBB0, DL, TII->get(LoongArch::B32)).addMBB(sinkMBB); + + BuildMI(ZeroMBB1, DL, TII->get(LoongArch::SLLI_W), R7r80_3) + .addReg(SrcValReg) + .addImm(24); + BuildMI(ZeroMBB1, DL, TII->get(LoongArch::SRLI_W), R7r80l_3) + .addReg(R7r80_3) + .addImm(16); + BuildMI(ZeroMBB1, DL, TII->get(LoongArch::LU12I_W), R7r81l_3).addImm(0xffff0); + BuildMI(ZeroMBB1, DL, TII->get(LoongArch::ORI32), R7r81_3) + .addReg(R7r81l_3) + .addImm(0x0ff); + BuildMI(ZeroMBB1, DL, TII->get(LoongArch::AND32), R7r82l_3) + .addReg(R70) + .addReg(R7r81_3); + BuildMI(ZeroMBB1, DL, TII->get(LoongArch::OR32), R7r82_3) + .addReg(R7r82l_3) + .addReg(R7r80l_3); + BuildMI(ZeroMBB1, DL, TII->get(LoongArch::XVINSGR2VR_W), tmp_Dst73) + .addReg(SrcVecReg) + .addReg(R7r82_3) + .addImm(0); + BuildMI(ZeroMBB1, DL, TII->get(LoongArch::B32)).addMBB(sinkMBB); + + BuildMI(ZeroMBB2, DL, TII->get(LoongArch::SLLI_W), R7r80_3) + .addReg(SrcValReg) + .addImm(24); + BuildMI(ZeroMBB2, DL, TII->get(LoongArch::SRLI_W), R7r80l_3) + .addReg(R7r80_3) + .addImm(24); + BuildMI(ZeroMBB2, DL, TII->get(LoongArch::LU12I_W), R7r81l_3).addImm(0xfffff); + BuildMI(ZeroMBB2, DL, TII->get(LoongArch::ORI32), R7r81_3) + .addReg(R7r81l_3) + .addImm(0xf00); + BuildMI(ZeroMBB2, DL, TII->get(LoongArch::AND32), R7r82l_3) + .addReg(R70) + .addReg(R7r81_3); + BuildMI(ZeroMBB2, DL, TII->get(LoongArch::OR32), R7r82_3) + .addReg(R7r82l_3) + .addReg(R7r80l_3); + BuildMI(ZeroMBB2, DL, TII->get(LoongArch::XVINSGR2VR_W), tmp_Dst73) + .addReg(SrcVecReg) + .addReg(R7r82_3) + .addImm(0); + BuildMI(ZeroMBB2, DL, TII->get(LoongArch::B32)).addMBB(sinkMBB); + + BuildMI(OneMBB, DL, TII->get(LoongArch::XVPICKVE2GR_W), R70) + .addReg(SrcVecReg) + .addImm(1); + BuildMI(OneMBB, DL, TII->get(ADDI), R7b1).addReg(ZERO).addImm(1); + BuildMI(OneMBB, DL, TII->get(BLT)).addReg(Ris).addReg(R7b1).addMBB(OneMBB0); + BuildMI(OneMBB, DL, TII->get(ADDI), R7b2).addReg(ZERO).addImm(2); + BuildMI(OneMBB, DL, TII->get(BLT)).addReg(Ris).addReg(R7b2).addMBB(OneMBB1); + BuildMI(OneMBB, DL, TII->get(ADDI), R7b3).addReg(ZERO).addImm(3); + BuildMI(OneMBB, DL, TII->get(BLT)).addReg(Ris).addReg(R7b3).addMBB(OneMBB2); + BuildMI(OneMBB, DL, TII->get(LoongArch::B32)).addMBB(OneMBB3); + + BuildMI(OneMBB3, DL, TII->get(LoongArch::SLLI_W), R7r80_3) + .addReg(SrcValReg) + .addImm(24); + BuildMI(OneMBB3, DL, TII->get(LoongArch::LU12I_W), R7r81l_3).addImm(0x00fff); + BuildMI(OneMBB3, DL, TII->get(LoongArch::ORI32), R7r81_3) + .addReg(R7r81l_3) + .addImm(0xfff); + BuildMI(OneMBB3, DL, TII->get(LoongArch::AND32), R7r82l_3) + .addReg(R70) + .addReg(R7r81_3); + BuildMI(OneMBB3, DL, TII->get(LoongArch::OR32), R7r82_3) + .addReg(R7r82l_3) + .addReg(R7r80_3); + BuildMI(OneMBB3, DL, TII->get(LoongArch::XVINSGR2VR_W), tmp_Dst73) + .addReg(SrcVecReg) + .addReg(R7r82_3) + .addImm(1); + BuildMI(OneMBB3, DL, TII->get(LoongArch::B32)).addMBB(sinkMBB); + + BuildMI(OneMBB0, DL, TII->get(LoongArch::SLLI_W), R7r80_3) + .addReg(SrcValReg) + .addImm(24); + BuildMI(OneMBB0, DL, TII->get(LoongArch::SRLI_W), R7r80l_3) + .addReg(R7r80_3) + .addImm(8); + BuildMI(OneMBB0, DL, TII->get(LoongArch::LU12I_W), R7r81l_3).addImm(0xff00f); + BuildMI(OneMBB0, DL, TII->get(LoongArch::ORI32), R7r81_3) + .addReg(R7r81l_3) + .addImm(0xfff); + BuildMI(OneMBB0, DL, TII->get(LoongArch::AND32), R7r82l_3) + .addReg(R70) + .addReg(R7r81_3); + BuildMI(OneMBB0, DL, TII->get(LoongArch::OR32), R7r82_3) + .addReg(R7r82l_3) + .addReg(R7r80l_3); + BuildMI(OneMBB0, DL, TII->get(LoongArch::XVINSGR2VR_W), tmp_Dst73) + .addReg(SrcVecReg) + .addReg(R7r82_3) + .addImm(1); + BuildMI(OneMBB0, DL, TII->get(LoongArch::B32)).addMBB(sinkMBB); + + BuildMI(OneMBB1, DL, TII->get(LoongArch::SLLI_W), R7r80_3) + .addReg(SrcValReg) + .addImm(24); + BuildMI(OneMBB1, DL, TII->get(LoongArch::SRLI_W), R7r80l_3) + .addReg(R7r80_3) + .addImm(16); + BuildMI(OneMBB1, DL, TII->get(LoongArch::LU12I_W), R7r81l_3).addImm(0xffff0); + BuildMI(OneMBB1, DL, TII->get(LoongArch::ORI32), R7r81_3) + .addReg(R7r81l_3) + .addImm(0x0ff); + BuildMI(OneMBB1, DL, TII->get(LoongArch::AND32), R7r82l_3) + .addReg(R70) + .addReg(R7r81_3); + BuildMI(OneMBB1, DL, TII->get(LoongArch::OR32), R7r82_3) + .addReg(R7r82l_3) + .addReg(R7r80l_3); + BuildMI(OneMBB1, DL, TII->get(LoongArch::XVINSGR2VR_W), tmp_Dst73) + .addReg(SrcVecReg) + .addReg(R7r82_3) + .addImm(1); + BuildMI(OneMBB1, DL, TII->get(LoongArch::B32)).addMBB(sinkMBB); + + BuildMI(OneMBB2, DL, TII->get(LoongArch::SLLI_W), R7r80_3) + .addReg(SrcValReg) + .addImm(24); + BuildMI(OneMBB2, DL, TII->get(LoongArch::SRLI_W), R7r80l_3) + .addReg(R7r80_3) + .addImm(24); + BuildMI(OneMBB2, DL, TII->get(LoongArch::LU12I_W), R7r81l_3).addImm(0xfffff); + BuildMI(OneMBB2, DL, TII->get(LoongArch::ORI32), R7r81_3) + .addReg(R7r81l_3) + .addImm(0xf00); + BuildMI(OneMBB2, DL, TII->get(LoongArch::AND32), R7r82l_3) + .addReg(R70) + .addReg(R7r81_3); + BuildMI(OneMBB2, DL, TII->get(LoongArch::OR32), R7r82_3) + .addReg(R7r82l_3) + .addReg(R7r80l_3); + BuildMI(OneMBB2, DL, TII->get(LoongArch::XVINSGR2VR_W), tmp_Dst73) + .addReg(SrcVecReg) + .addReg(R7r82_3) + .addImm(1); + BuildMI(OneMBB2, DL, TII->get(LoongArch::B32)).addMBB(sinkMBB); + + BuildMI(TwoMBB, DL, TII->get(LoongArch::XVPICKVE2GR_W), R70) + .addReg(SrcVecReg) + .addImm(2); + BuildMI(TwoMBB, DL, TII->get(ADDI), R7b1).addReg(ZERO).addImm(1); + BuildMI(TwoMBB, DL, TII->get(BLT)).addReg(Ris).addReg(R7b1).addMBB(TwoMBB0); + BuildMI(TwoMBB, DL, TII->get(ADDI), R7b2).addReg(ZERO).addImm(2); + BuildMI(TwoMBB, DL, TII->get(BLT)).addReg(Ris).addReg(R7b2).addMBB(TwoMBB1); + BuildMI(TwoMBB, DL, TII->get(ADDI), R7b3).addReg(ZERO).addImm(3); + BuildMI(TwoMBB, DL, TII->get(BLT)).addReg(Ris).addReg(R7b3).addMBB(TwoMBB2); + BuildMI(TwoMBB, DL, TII->get(LoongArch::B32)).addMBB(TwoMBB3); + + BuildMI(TwoMBB3, DL, TII->get(LoongArch::SLLI_W), R7r80_3) + .addReg(SrcValReg) + .addImm(24); + BuildMI(TwoMBB3, DL, TII->get(LoongArch::LU12I_W), R7r81l_3).addImm(0x00fff); + BuildMI(TwoMBB3, DL, TII->get(LoongArch::ORI32), R7r81_3) + .addReg(R7r81l_3) + .addImm(0xfff); + BuildMI(TwoMBB3, DL, TII->get(LoongArch::AND32), R7r82l_3) + .addReg(R70) + .addReg(R7r81_3); + BuildMI(TwoMBB3, DL, TII->get(LoongArch::OR32), R7r82_3) + .addReg(R7r82l_3) + .addReg(R7r80_3); + BuildMI(TwoMBB3, DL, TII->get(LoongArch::XVINSGR2VR_W), tmp_Dst73) + .addReg(SrcVecReg) + .addReg(R7r82_3) + .addImm(2); + BuildMI(TwoMBB3, DL, TII->get(LoongArch::B32)).addMBB(sinkMBB); + + BuildMI(TwoMBB0, DL, TII->get(LoongArch::SLLI_W), R7r80_3) + .addReg(SrcValReg) + .addImm(24); + BuildMI(TwoMBB0, DL, TII->get(LoongArch::SRLI_W), R7r80l_3) + .addReg(R7r80_3) + .addImm(8); + BuildMI(TwoMBB0, DL, TII->get(LoongArch::LU12I_W), R7r81l_3).addImm(0xff00f); + BuildMI(TwoMBB0, DL, TII->get(LoongArch::ORI32), R7r81_3) + .addReg(R7r81l_3) + .addImm(0xfff); + BuildMI(TwoMBB0, DL, TII->get(LoongArch::AND32), R7r82l_3) + .addReg(R70) + .addReg(R7r81_3); + BuildMI(TwoMBB0, DL, TII->get(LoongArch::OR32), R7r82_3) + .addReg(R7r82l_3) + .addReg(R7r80l_3); + BuildMI(TwoMBB0, DL, TII->get(LoongArch::XVINSGR2VR_W), tmp_Dst73) + .addReg(SrcVecReg) + .addReg(R7r82_3) + .addImm(2); + BuildMI(TwoMBB0, DL, TII->get(LoongArch::B32)).addMBB(sinkMBB); + + BuildMI(TwoMBB1, DL, TII->get(LoongArch::SLLI_W), R7r80_3) + .addReg(SrcValReg) + .addImm(24); + BuildMI(TwoMBB1, DL, TII->get(LoongArch::SRLI_W), R7r80l_3) + .addReg(R7r80_3) + .addImm(16); + BuildMI(TwoMBB1, DL, TII->get(LoongArch::LU12I_W), R7r81l_3).addImm(0xffff0); + BuildMI(TwoMBB1, DL, TII->get(LoongArch::ORI32), R7r81_3) + .addReg(R7r81l_3) + .addImm(0x0ff); + BuildMI(TwoMBB1, DL, TII->get(LoongArch::AND32), R7r82l_3) + .addReg(R70) + .addReg(R7r81_3); + BuildMI(TwoMBB1, DL, TII->get(LoongArch::OR32), R7r82_3) + .addReg(R7r82l_3) + .addReg(R7r80l_3); + BuildMI(TwoMBB1, DL, TII->get(LoongArch::XVINSGR2VR_W), tmp_Dst73) + .addReg(SrcVecReg) + .addReg(R7r82_3) + .addImm(2); + BuildMI(TwoMBB1, DL, TII->get(LoongArch::B32)).addMBB(sinkMBB); + + BuildMI(TwoMBB2, DL, TII->get(LoongArch::SLLI_W), R7r80_3) + .addReg(SrcValReg) + .addImm(24); + BuildMI(TwoMBB2, DL, TII->get(LoongArch::SRLI_W), R7r80l_3) + .addReg(R7r80_3) + .addImm(24); + BuildMI(TwoMBB2, DL, TII->get(LoongArch::LU12I_W), R7r81l_3).addImm(0xfffff); + BuildMI(TwoMBB2, DL, TII->get(LoongArch::ORI32), R7r81_3) + .addReg(R7r81l_3) + .addImm(0xf00); + BuildMI(TwoMBB2, DL, TII->get(LoongArch::AND32), R7r82l_3) + .addReg(R70) + .addReg(R7r81_3); + BuildMI(TwoMBB2, DL, TII->get(LoongArch::OR32), R7r82_3) + .addReg(R7r82l_3) + .addReg(R7r80l_3); + BuildMI(TwoMBB2, DL, TII->get(LoongArch::XVINSGR2VR_W), tmp_Dst73) + .addReg(SrcVecReg) + .addReg(R7r82_3) + .addImm(2); + BuildMI(TwoMBB2, DL, TII->get(LoongArch::B32)).addMBB(sinkMBB); + + BuildMI(ThreeMBB, DL, TII->get(LoongArch::XVPICKVE2GR_W), R70) + .addReg(SrcVecReg) + .addImm(3); + BuildMI(ThreeMBB, DL, TII->get(ADDI), R7b1).addReg(ZERO).addImm(1); + BuildMI(ThreeMBB, DL, TII->get(BLT)) + .addReg(Ris) + .addReg(R7b1) + .addMBB(ThreeMBB0); + BuildMI(ThreeMBB, DL, TII->get(ADDI), R7b2).addReg(ZERO).addImm(2); + BuildMI(ThreeMBB, DL, TII->get(BLT)) + .addReg(Ris) + .addReg(R7b2) + .addMBB(ThreeMBB1); + BuildMI(ThreeMBB, DL, TII->get(ADDI), R7b3).addReg(ZERO).addImm(3); + BuildMI(ThreeMBB, DL, TII->get(BLT)) + .addReg(Ris) + .addReg(R7b3) + .addMBB(ThreeMBB2); + BuildMI(ThreeMBB, DL, TII->get(LoongArch::B32)).addMBB(ThreeMBB3); + + BuildMI(ThreeMBB3, DL, TII->get(LoongArch::SLLI_W), R7r80_3) + .addReg(SrcValReg) + .addImm(24); + BuildMI(ThreeMBB3, DL, TII->get(LoongArch::LU12I_W), R7r81l_3) + .addImm(0x00fff); + BuildMI(ThreeMBB3, DL, TII->get(LoongArch::ORI32), R7r81_3) + .addReg(R7r81l_3) + .addImm(0xfff); + BuildMI(ThreeMBB3, DL, TII->get(LoongArch::AND32), R7r82l_3) + .addReg(R70) + .addReg(R7r81_3); + BuildMI(ThreeMBB3, DL, TII->get(LoongArch::OR32), R7r82_3) + .addReg(R7r82l_3) + .addReg(R7r80_3); + BuildMI(ThreeMBB3, DL, TII->get(LoongArch::XVINSGR2VR_W), tmp_Dst73) + .addReg(SrcVecReg) + .addReg(R7r82_3) + .addImm(3); + BuildMI(ThreeMBB3, DL, TII->get(LoongArch::B32)).addMBB(sinkMBB); + + BuildMI(ThreeMBB0, DL, TII->get(LoongArch::SLLI_W), R7r80_3) + .addReg(SrcValReg) + .addImm(24); + BuildMI(ThreeMBB0, DL, TII->get(LoongArch::SRLI_W), R7r80l_3) + .addReg(R7r80_3) + .addImm(8); + BuildMI(ThreeMBB0, DL, TII->get(LoongArch::LU12I_W), R7r81l_3) + .addImm(0xff00f); + BuildMI(ThreeMBB0, DL, TII->get(LoongArch::ORI32), R7r81_3) + .addReg(R7r81l_3) + .addImm(0xfff); + BuildMI(ThreeMBB0, DL, TII->get(LoongArch::AND32), R7r82l_3) + .addReg(R70) + .addReg(R7r81_3); + BuildMI(ThreeMBB0, DL, TII->get(LoongArch::OR32), R7r82_3) + .addReg(R7r82l_3) + .addReg(R7r80l_3); + BuildMI(ThreeMBB0, DL, TII->get(LoongArch::XVINSGR2VR_W), tmp_Dst73) + .addReg(SrcVecReg) + .addReg(R7r82_3) + .addImm(3); + BuildMI(ThreeMBB0, DL, TII->get(LoongArch::B32)).addMBB(sinkMBB); + + BuildMI(ThreeMBB1, DL, TII->get(LoongArch::SLLI_W), R7r80_3) + .addReg(SrcValReg) + .addImm(24); + BuildMI(ThreeMBB1, DL, TII->get(LoongArch::SRLI_W), R7r80l_3) + .addReg(R7r80_3) + .addImm(16); + BuildMI(ThreeMBB1, DL, TII->get(LoongArch::LU12I_W), R7r81l_3) + .addImm(0xffff0); + BuildMI(ThreeMBB1, DL, TII->get(LoongArch::ORI32), R7r81_3) + .addReg(R7r81l_3) + .addImm(0x0ff); + BuildMI(ThreeMBB1, DL, TII->get(LoongArch::AND32), R7r82l_3) + .addReg(R70) + .addReg(R7r81_3); + BuildMI(ThreeMBB1, DL, TII->get(LoongArch::OR32), R7r82_3) + .addReg(R7r82l_3) + .addReg(R7r80l_3); + BuildMI(ThreeMBB1, DL, TII->get(LoongArch::XVINSGR2VR_W), tmp_Dst73) + .addReg(SrcVecReg) + .addReg(R7r82_3) + .addImm(3); + BuildMI(ThreeMBB1, DL, TII->get(LoongArch::B32)).addMBB(sinkMBB); + + BuildMI(ThreeMBB2, DL, TII->get(LoongArch::SLLI_W), R7r80_3) + .addReg(SrcValReg) + .addImm(24); + BuildMI(ThreeMBB2, DL, TII->get(LoongArch::SRLI_W), R7r80l_3) + .addReg(R7r80_3) + .addImm(24); + BuildMI(ThreeMBB2, DL, TII->get(LoongArch::LU12I_W), R7r81l_3) + .addImm(0xfffff); + BuildMI(ThreeMBB2, DL, TII->get(LoongArch::ORI32), R7r81_3) + .addReg(R7r81l_3) + .addImm(0xf00); + BuildMI(ThreeMBB2, DL, TII->get(LoongArch::AND32), R7r82l_3) + .addReg(R70) + .addReg(R7r81_3); + BuildMI(ThreeMBB2, DL, TII->get(LoongArch::OR32), R7r82_3) + .addReg(R7r82l_3) + .addReg(R7r80l_3); + BuildMI(ThreeMBB2, DL, TII->get(LoongArch::XVINSGR2VR_W), tmp_Dst73) + .addReg(SrcVecReg) + .addReg(R7r82_3) + .addImm(3); + BuildMI(ThreeMBB2, DL, TII->get(LoongArch::B32)).addMBB(sinkMBB); + + BuildMI(FourMBB, DL, TII->get(LoongArch::XVPICKVE2GR_W), R70) + .addReg(SrcVecReg) + .addImm(4); + BuildMI(FourMBB, DL, TII->get(ADDI), R7b1).addReg(ZERO).addImm(1); + BuildMI(FourMBB, DL, TII->get(BLT)).addReg(Ris).addReg(R7b1).addMBB(FourMBB0); + BuildMI(FourMBB, DL, TII->get(ADDI), R7b2).addReg(ZERO).addImm(2); + BuildMI(FourMBB, DL, TII->get(BLT)).addReg(Ris).addReg(R7b2).addMBB(FourMBB1); + BuildMI(FourMBB, DL, TII->get(ADDI), R7b3).addReg(ZERO).addImm(3); + BuildMI(FourMBB, DL, TII->get(BLT)).addReg(Ris).addReg(R7b3).addMBB(FourMBB2); + BuildMI(FourMBB, DL, TII->get(LoongArch::B32)).addMBB(FourMBB3); + + BuildMI(FourMBB3, DL, TII->get(LoongArch::SLLI_W), R7r80_3) + .addReg(SrcValReg) + .addImm(24); + BuildMI(FourMBB3, DL, TII->get(LoongArch::LU12I_W), R7r81l_3).addImm(0x00fff); + BuildMI(FourMBB3, DL, TII->get(LoongArch::ORI32), R7r81_3) + .addReg(R7r81l_3) + .addImm(0xfff); + BuildMI(FourMBB3, DL, TII->get(LoongArch::AND32), R7r82l_3) + .addReg(R70) + .addReg(R7r81_3); + BuildMI(FourMBB3, DL, TII->get(LoongArch::OR32), R7r82_3) + .addReg(R7r82l_3) + .addReg(R7r80_3); + BuildMI(FourMBB3, DL, TII->get(LoongArch::XVINSGR2VR_W), tmp_Dst73) + .addReg(SrcVecReg) + .addReg(R7r82_3) + .addImm(4); + BuildMI(FourMBB3, DL, TII->get(LoongArch::B32)).addMBB(sinkMBB); + + BuildMI(FourMBB0, DL, TII->get(LoongArch::SLLI_W), R7r80_3) + .addReg(SrcValReg) + .addImm(24); + BuildMI(FourMBB0, DL, TII->get(LoongArch::SRLI_W), R7r80l_3) + .addReg(R7r80_3) + .addImm(8); + BuildMI(FourMBB0, DL, TII->get(LoongArch::LU12I_W), R7r81l_3).addImm(0xff00f); + BuildMI(FourMBB0, DL, TII->get(LoongArch::ORI32), R7r81_3) + .addReg(R7r81l_3) + .addImm(0xfff); + BuildMI(FourMBB0, DL, TII->get(LoongArch::AND32), R7r82l_3) + .addReg(R70) + .addReg(R7r81_3); + BuildMI(FourMBB0, DL, TII->get(LoongArch::OR32), R7r82_3) + .addReg(R7r82l_3) + .addReg(R7r80l_3); + BuildMI(FourMBB0, DL, TII->get(LoongArch::XVINSGR2VR_W), tmp_Dst73) + .addReg(SrcVecReg) + .addReg(R7r82_3) + .addImm(4); + BuildMI(FourMBB0, DL, TII->get(LoongArch::B32)).addMBB(sinkMBB); + + BuildMI(FourMBB1, DL, TII->get(LoongArch::SLLI_W), R7r80_3) + .addReg(SrcValReg) + .addImm(24); + BuildMI(FourMBB1, DL, TII->get(LoongArch::SRLI_W), R7r80l_3) + .addReg(R7r80_3) + .addImm(16); + BuildMI(FourMBB1, DL, TII->get(LoongArch::LU12I_W), R7r81l_3).addImm(0xffff0); + BuildMI(FourMBB1, DL, TII->get(LoongArch::ORI32), R7r81_3) + .addReg(R7r81l_3) + .addImm(0x0ff); + BuildMI(FourMBB1, DL, TII->get(LoongArch::AND32), R7r82l_3) + .addReg(R70) + .addReg(R7r81_3); + BuildMI(FourMBB1, DL, TII->get(LoongArch::OR32), R7r82_3) + .addReg(R7r82l_3) + .addReg(R7r80l_3); + BuildMI(FourMBB1, DL, TII->get(LoongArch::XVINSGR2VR_W), tmp_Dst73) + .addReg(SrcVecReg) + .addReg(R7r82_3) + .addImm(4); + BuildMI(FourMBB1, DL, TII->get(LoongArch::B32)).addMBB(sinkMBB); + + BuildMI(FourMBB2, DL, TII->get(LoongArch::SLLI_W), R7r80_3) + .addReg(SrcValReg) + .addImm(24); + BuildMI(FourMBB2, DL, TII->get(LoongArch::SRLI_W), R7r80l_3) + .addReg(R7r80_3) + .addImm(24); + BuildMI(FourMBB2, DL, TII->get(LoongArch::LU12I_W), R7r81l_3).addImm(0xfffff); + BuildMI(FourMBB2, DL, TII->get(LoongArch::ORI32), R7r81_3) + .addReg(R7r81l_3) + .addImm(0xf00); + BuildMI(FourMBB2, DL, TII->get(LoongArch::AND32), R7r82l_3) + .addReg(R70) + .addReg(R7r81_3); + BuildMI(FourMBB2, DL, TII->get(LoongArch::OR32), R7r82_3) + .addReg(R7r82l_3) + .addReg(R7r80l_3); + BuildMI(FourMBB2, DL, TII->get(LoongArch::XVINSGR2VR_W), tmp_Dst73) + .addReg(SrcVecReg) + .addReg(R7r82_3) + .addImm(4); + BuildMI(FourMBB2, DL, TII->get(LoongArch::B32)).addMBB(sinkMBB); + + BuildMI(FiveMBB, DL, TII->get(LoongArch::XVPICKVE2GR_W), R70) + .addReg(SrcVecReg) + .addImm(5); + BuildMI(FiveMBB, DL, TII->get(ADDI), R7b1).addReg(ZERO).addImm(1); + BuildMI(FiveMBB, DL, TII->get(BLT)).addReg(Ris).addReg(R7b1).addMBB(FiveMBB0); + BuildMI(FiveMBB, DL, TII->get(ADDI), R7b2).addReg(ZERO).addImm(2); + BuildMI(FiveMBB, DL, TII->get(BLT)).addReg(Ris).addReg(R7b2).addMBB(FiveMBB1); + BuildMI(FiveMBB, DL, TII->get(ADDI), R7b3).addReg(ZERO).addImm(3); + BuildMI(FiveMBB, DL, TII->get(BLT)).addReg(Ris).addReg(R7b3).addMBB(FiveMBB2); + BuildMI(FiveMBB, DL, TII->get(LoongArch::B32)).addMBB(FiveMBB3); + + BuildMI(FiveMBB3, DL, TII->get(LoongArch::SLLI_W), R7r80_3) + .addReg(SrcValReg) + .addImm(24); + BuildMI(FiveMBB3, DL, TII->get(LoongArch::LU12I_W), R7r81l_3).addImm(0x00fff); + BuildMI(FiveMBB3, DL, TII->get(LoongArch::ORI32), R7r81_3) + .addReg(R7r81l_3) + .addImm(0xfff); + BuildMI(FiveMBB3, DL, TII->get(LoongArch::AND32), R7r82l_3) + .addReg(R70) + .addReg(R7r81_3); + BuildMI(FiveMBB3, DL, TII->get(LoongArch::OR32), R7r82_3) + .addReg(R7r82l_3) + .addReg(R7r80_3); + BuildMI(FiveMBB3, DL, TII->get(LoongArch::XVINSGR2VR_W), tmp_Dst73) + .addReg(SrcVecReg) + .addReg(R7r82_3) + .addImm(5); + BuildMI(FiveMBB3, DL, TII->get(LoongArch::B32)).addMBB(sinkMBB); + + BuildMI(FiveMBB0, DL, TII->get(LoongArch::SLLI_W), R7r80_3) + .addReg(SrcValReg) + .addImm(24); + BuildMI(FiveMBB0, DL, TII->get(LoongArch::SRLI_W), R7r80l_3) + .addReg(R7r80_3) + .addImm(8); + BuildMI(FiveMBB0, DL, TII->get(LoongArch::LU12I_W), R7r81l_3).addImm(0xff00f); + BuildMI(FiveMBB0, DL, TII->get(LoongArch::ORI32), R7r81_3) + .addReg(R7r81l_3) + .addImm(0xfff); + BuildMI(FiveMBB0, DL, TII->get(LoongArch::AND32), R7r82l_3) + .addReg(R70) + .addReg(R7r81_3); + BuildMI(FiveMBB0, DL, TII->get(LoongArch::OR32), R7r82_3) + .addReg(R7r82l_3) + .addReg(R7r80l_3); + BuildMI(FiveMBB0, DL, TII->get(LoongArch::XVINSGR2VR_W), tmp_Dst73) + .addReg(SrcVecReg) + .addReg(R7r82_3) + .addImm(5); + BuildMI(FiveMBB0, DL, TII->get(LoongArch::B32)).addMBB(sinkMBB); + + BuildMI(FiveMBB1, DL, TII->get(LoongArch::SLLI_W), R7r80_3) + .addReg(SrcValReg) + .addImm(24); + BuildMI(FiveMBB1, DL, TII->get(LoongArch::SRLI_W), R7r80l_3) + .addReg(R7r80_3) + .addImm(16); + BuildMI(FiveMBB1, DL, TII->get(LoongArch::LU12I_W), R7r81l_3).addImm(0xffff0); + BuildMI(FiveMBB1, DL, TII->get(LoongArch::ORI32), R7r81_3) + .addReg(R7r81l_3) + .addImm(0x0ff); + BuildMI(FiveMBB1, DL, TII->get(LoongArch::AND32), R7r82l_3) + .addReg(R70) + .addReg(R7r81_3); + BuildMI(FiveMBB1, DL, TII->get(LoongArch::OR32), R7r82_3) + .addReg(R7r82l_3) + .addReg(R7r80l_3); + BuildMI(FiveMBB1, DL, TII->get(LoongArch::XVINSGR2VR_W), tmp_Dst73) + .addReg(SrcVecReg) + .addReg(R7r82_3) + .addImm(5); + BuildMI(FiveMBB1, DL, TII->get(LoongArch::B32)).addMBB(sinkMBB); + + BuildMI(FiveMBB2, DL, TII->get(LoongArch::SLLI_W), R7r80_3) + .addReg(SrcValReg) + .addImm(24); + BuildMI(FiveMBB2, DL, TII->get(LoongArch::SRLI_W), R7r80l_3) + .addReg(R7r80_3) + .addImm(24); + BuildMI(FiveMBB2, DL, TII->get(LoongArch::LU12I_W), R7r81l_3).addImm(0xfffff); + BuildMI(FiveMBB2, DL, TII->get(LoongArch::ORI32), R7r81_3) + .addReg(R7r81l_3) + .addImm(0xf00); + BuildMI(FiveMBB2, DL, TII->get(LoongArch::AND32), R7r82l_3) + .addReg(R70) + .addReg(R7r81_3); + BuildMI(FiveMBB2, DL, TII->get(LoongArch::OR32), R7r82_3) + .addReg(R7r82l_3) + .addReg(R7r80l_3); + BuildMI(FiveMBB2, DL, TII->get(LoongArch::XVINSGR2VR_W), tmp_Dst73) + .addReg(SrcVecReg) + .addReg(R7r82_3) + .addImm(5); + BuildMI(FiveMBB2, DL, TII->get(LoongArch::B32)).addMBB(sinkMBB); + + BuildMI(SixMBB, DL, TII->get(LoongArch::XVPICKVE2GR_W), R70) + .addReg(SrcVecReg) + .addImm(6); + BuildMI(SixMBB, DL, TII->get(ADDI), R7b1).addReg(ZERO).addImm(1); + BuildMI(SixMBB, DL, TII->get(BLT)).addReg(Ris).addReg(R7b1).addMBB(SixMBB0); + BuildMI(SixMBB, DL, TII->get(ADDI), R7b2).addReg(ZERO).addImm(2); + BuildMI(SixMBB, DL, TII->get(BLT)).addReg(Ris).addReg(R7b2).addMBB(SixMBB1); + BuildMI(SixMBB, DL, TII->get(ADDI), R7b3).addReg(ZERO).addImm(3); + BuildMI(SixMBB, DL, TII->get(BLT)).addReg(Ris).addReg(R7b3).addMBB(SixMBB2); + BuildMI(SixMBB, DL, TII->get(LoongArch::B32)).addMBB(SixMBB3); + + BuildMI(SixMBB3, DL, TII->get(LoongArch::SLLI_W), R7r80_3) + .addReg(SrcValReg) + .addImm(24); + BuildMI(SixMBB3, DL, TII->get(LoongArch::LU12I_W), R7r81l_3).addImm(0x00fff); + BuildMI(SixMBB3, DL, TII->get(LoongArch::ORI32), R7r81_3) + .addReg(R7r81l_3) + .addImm(0xfff); + BuildMI(SixMBB3, DL, TII->get(LoongArch::AND32), R7r82l_3) + .addReg(R70) + .addReg(R7r81_3); + BuildMI(SixMBB3, DL, TII->get(LoongArch::OR32), R7r82_3) + .addReg(R7r82l_3) + .addReg(R7r80_3); + BuildMI(SixMBB3, DL, TII->get(LoongArch::XVINSGR2VR_W), tmp_Dst73) + .addReg(SrcVecReg) + .addReg(R7r82_3) + .addImm(6); + BuildMI(SixMBB3, DL, TII->get(LoongArch::B32)).addMBB(sinkMBB); + + BuildMI(SixMBB0, DL, TII->get(LoongArch::SLLI_W), R7r80_3) + .addReg(SrcValReg) + .addImm(24); + BuildMI(SixMBB0, DL, TII->get(LoongArch::SRLI_W), R7r80l_3) + .addReg(R7r80_3) + .addImm(8); + BuildMI(SixMBB0, DL, TII->get(LoongArch::LU12I_W), R7r81l_3).addImm(0xff00f); + BuildMI(SixMBB0, DL, TII->get(LoongArch::ORI32), R7r81_3) + .addReg(R7r81l_3) + .addImm(0xfff); + BuildMI(SixMBB0, DL, TII->get(LoongArch::AND32), R7r82l_3) + .addReg(R70) + .addReg(R7r81_3); + BuildMI(SixMBB0, DL, TII->get(LoongArch::OR32), R7r82_3) + .addReg(R7r82l_3) + .addReg(R7r80l_3); + BuildMI(SixMBB0, DL, TII->get(LoongArch::XVINSGR2VR_W), tmp_Dst73) + .addReg(SrcVecReg) + .addReg(R7r82_3) + .addImm(6); + BuildMI(SixMBB0, DL, TII->get(LoongArch::B32)).addMBB(sinkMBB); + + BuildMI(SixMBB1, DL, TII->get(LoongArch::SLLI_W), R7r80_3) + .addReg(SrcValReg) + .addImm(24); + BuildMI(SixMBB1, DL, TII->get(LoongArch::SRLI_W), R7r80l_3) + .addReg(R7r80_3) + .addImm(16); + BuildMI(SixMBB1, DL, TII->get(LoongArch::LU12I_W), R7r81l_3).addImm(0xffff0); + BuildMI(SixMBB1, DL, TII->get(LoongArch::ORI32), R7r81_3) + .addReg(R7r81l_3) + .addImm(0x0ff); + BuildMI(SixMBB1, DL, TII->get(LoongArch::AND32), R7r82l_3) + .addReg(R70) + .addReg(R7r81_3); + BuildMI(SixMBB1, DL, TII->get(LoongArch::OR32), R7r82_3) + .addReg(R7r82l_3) + .addReg(R7r80l_3); + BuildMI(SixMBB1, DL, TII->get(LoongArch::XVINSGR2VR_W), tmp_Dst73) + .addReg(SrcVecReg) + .addReg(R7r82_3) + .addImm(6); + BuildMI(SixMBB1, DL, TII->get(LoongArch::B32)).addMBB(sinkMBB); + + BuildMI(SixMBB2, DL, TII->get(LoongArch::SLLI_W), R7r80_3) + .addReg(SrcValReg) + .addImm(24); + BuildMI(SixMBB2, DL, TII->get(LoongArch::SRLI_W), R7r80l_3) + .addReg(R7r80_3) + .addImm(24); + BuildMI(SixMBB2, DL, TII->get(LoongArch::LU12I_W), R7r81l_3).addImm(0xfffff); + BuildMI(SixMBB2, DL, TII->get(LoongArch::ORI32), R7r81_3) + .addReg(R7r81l_3) + .addImm(0xf00); + BuildMI(SixMBB2, DL, TII->get(LoongArch::AND32), R7r82l_3) + .addReg(R70) + .addReg(R7r81_3); + BuildMI(SixMBB2, DL, TII->get(LoongArch::OR32), R7r82_3) + .addReg(R7r82l_3) + .addReg(R7r80l_3); + BuildMI(SixMBB2, DL, TII->get(LoongArch::XVINSGR2VR_W), tmp_Dst73) + .addReg(SrcVecReg) + .addReg(R7r82_3) + .addImm(6); + BuildMI(SixMBB2, DL, TII->get(LoongArch::B32)).addMBB(sinkMBB); + + sinkMBB->addSuccessor(exitMBB, BranchProbability::getOne()); + + BuildMI(sinkMBB, DL, TII->get(LoongArch::XVORI_B), Dest) + .addReg(tmp_Dst73) + .addImm(0); + + LivePhysRegs LiveRegs; + computeAndAddLiveIns(LiveRegs, *mainMBB); + computeAndAddLiveIns(LiveRegs, *SevenMBB); + computeAndAddLiveIns(LiveRegs, *SevenMBB0); + computeAndAddLiveIns(LiveRegs, *SevenMBB1); + computeAndAddLiveIns(LiveRegs, *SevenMBB2); + computeAndAddLiveIns(LiveRegs, *SevenMBB3); + computeAndAddLiveIns(LiveRegs, *ZeroMBB); + computeAndAddLiveIns(LiveRegs, *ZeroMBB0); + computeAndAddLiveIns(LiveRegs, *ZeroMBB1); + computeAndAddLiveIns(LiveRegs, *ZeroMBB2); + computeAndAddLiveIns(LiveRegs, *ZeroMBB3); + computeAndAddLiveIns(LiveRegs, *OneMBB); + computeAndAddLiveIns(LiveRegs, *OneMBB0); + computeAndAddLiveIns(LiveRegs, *OneMBB1); + computeAndAddLiveIns(LiveRegs, *OneMBB2); + computeAndAddLiveIns(LiveRegs, *OneMBB3); + computeAndAddLiveIns(LiveRegs, *TwoMBB); + computeAndAddLiveIns(LiveRegs, *TwoMBB0); + computeAndAddLiveIns(LiveRegs, *TwoMBB1); + computeAndAddLiveIns(LiveRegs, *TwoMBB2); + computeAndAddLiveIns(LiveRegs, *TwoMBB3); + computeAndAddLiveIns(LiveRegs, *ThreeMBB); + computeAndAddLiveIns(LiveRegs, *ThreeMBB0); + computeAndAddLiveIns(LiveRegs, *ThreeMBB1); + computeAndAddLiveIns(LiveRegs, *ThreeMBB2); + computeAndAddLiveIns(LiveRegs, *ThreeMBB3); + computeAndAddLiveIns(LiveRegs, *FourMBB); + computeAndAddLiveIns(LiveRegs, *FourMBB0); + computeAndAddLiveIns(LiveRegs, *FourMBB1); + computeAndAddLiveIns(LiveRegs, *FourMBB2); + computeAndAddLiveIns(LiveRegs, *FourMBB3); + computeAndAddLiveIns(LiveRegs, *FiveMBB); + computeAndAddLiveIns(LiveRegs, *FiveMBB0); + computeAndAddLiveIns(LiveRegs, *FiveMBB1); + computeAndAddLiveIns(LiveRegs, *FiveMBB2); + computeAndAddLiveIns(LiveRegs, *FiveMBB3); + computeAndAddLiveIns(LiveRegs, *SixMBB); + computeAndAddLiveIns(LiveRegs, *SixMBB0); + computeAndAddLiveIns(LiveRegs, *SixMBB1); + computeAndAddLiveIns(LiveRegs, *SixMBB2); + computeAndAddLiveIns(LiveRegs, *SixMBB3); + computeAndAddLiveIns(LiveRegs, *sinkMBB); + computeAndAddLiveIns(LiveRegs, *exitMBB); + + NMBBI = BB.end(); + I->eraseFromParent(); + + return true; +} + +bool LoongArchExpandPseudo::expandAtomicBinOpSubword( + MachineBasicBlock &BB, MachineBasicBlock::iterator I, + MachineBasicBlock::iterator &NMBBI) { + + MachineFunction *MF = BB.getParent(); + + DebugLoc DL = I->getDebugLoc(); + unsigned LL, SC; + unsigned BEQ = LoongArch::BEQ32; + unsigned SEOp = LoongArch::EXT_W_H32; + + LL = LoongArch::LL_W; + SC = LoongArch::SC_W; + + bool IsSwap = false; + bool IsNand = false; + bool IsMAX = false; + bool IsMIN = false; + bool IsUnsigned = false; + + unsigned Opcode = 0; + switch (I->getOpcode()) { + case LoongArch::ATOMIC_LOAD_NAND_I8_POSTRA: + SEOp = LoongArch::EXT_W_B32; + LLVM_FALLTHROUGH; + case LoongArch::ATOMIC_LOAD_NAND_I16_POSTRA: + IsNand = true; + break; + case LoongArch::ATOMIC_SWAP_I8_POSTRA: + SEOp = LoongArch::EXT_W_B32; + LLVM_FALLTHROUGH; + case LoongArch::ATOMIC_SWAP_I16_POSTRA: + IsSwap = true; + break; + case LoongArch::ATOMIC_LOAD_ADD_I8_POSTRA: + SEOp = LoongArch::EXT_W_B32; + LLVM_FALLTHROUGH; + case LoongArch::ATOMIC_LOAD_ADD_I16_POSTRA: + Opcode = LoongArch::ADD_W; + break; + case LoongArch::ATOMIC_LOAD_MAX_I8_POSTRA: + SEOp = LoongArch::EXT_W_B32; + LLVM_FALLTHROUGH; + case LoongArch::ATOMIC_LOAD_MAX_I16_POSTRA: + Opcode = LoongArch::AMMAX_DB_W; + IsMAX = true; + break; + case LoongArch::ATOMIC_LOAD_MIN_I8_POSTRA: + SEOp = LoongArch::EXT_W_B32; + LLVM_FALLTHROUGH; + case LoongArch::ATOMIC_LOAD_MIN_I16_POSTRA: + Opcode = LoongArch::AMMIN_DB_W; + IsMIN = true; + break; + case LoongArch::ATOMIC_LOAD_UMAX_I8_POSTRA: + SEOp = LoongArch::EXT_W_B32; + LLVM_FALLTHROUGH; + case LoongArch::ATOMIC_LOAD_UMAX_I16_POSTRA: + Opcode = LoongArch::AMMAX_DB_WU; + IsMAX = true; + IsUnsigned = true; + break; + case LoongArch::ATOMIC_LOAD_UMIN_I8_POSTRA: + SEOp = LoongArch::EXT_W_B32; + LLVM_FALLTHROUGH; + case LoongArch::ATOMIC_LOAD_UMIN_I16_POSTRA: + Opcode = LoongArch::AMMIN_DB_WU; + IsMIN = true; + IsUnsigned = true; + break; + case LoongArch::ATOMIC_LOAD_SUB_I8_POSTRA: + SEOp = LoongArch::EXT_W_B32; + LLVM_FALLTHROUGH; + case LoongArch::ATOMIC_LOAD_SUB_I16_POSTRA: + Opcode = LoongArch::SUB_W; + break; + case LoongArch::ATOMIC_LOAD_AND_I8_POSTRA: + SEOp = LoongArch::EXT_W_B32; + LLVM_FALLTHROUGH; + case LoongArch::ATOMIC_LOAD_AND_I16_POSTRA: + Opcode = LoongArch::AND32; + break; + case LoongArch::ATOMIC_LOAD_OR_I8_POSTRA: + SEOp = LoongArch::EXT_W_B32; + LLVM_FALLTHROUGH; + case LoongArch::ATOMIC_LOAD_OR_I16_POSTRA: + Opcode = LoongArch::OR32; + break; + case LoongArch::ATOMIC_LOAD_XOR_I8_POSTRA: + SEOp = LoongArch::EXT_W_B32; + LLVM_FALLTHROUGH; + case LoongArch::ATOMIC_LOAD_XOR_I16_POSTRA: + Opcode = LoongArch::XOR32; + break; + default: + llvm_unreachable("Unknown subword atomic pseudo for expansion!"); + } + + unsigned Dest = I->getOperand(0).getReg(); + unsigned Ptr = I->getOperand(1).getReg(); + unsigned Incr = I->getOperand(2).getReg(); + unsigned Mask = I->getOperand(3).getReg(); + unsigned Mask2 = I->getOperand(4).getReg(); + unsigned ShiftAmnt = I->getOperand(5).getReg(); + unsigned OldVal = I->getOperand(6).getReg(); + unsigned BinOpRes = I->getOperand(7).getReg(); + unsigned StoreVal = I->getOperand(8).getReg(); + + const BasicBlock *LLVM_BB = BB.getBasicBlock(); + MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *sinkMBB = MF->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB); + MachineFunction::iterator It = ++BB.getIterator(); + MF->insert(It, loopMBB); + MF->insert(It, sinkMBB); + MF->insert(It, exitMBB); + + exitMBB->splice(exitMBB->begin(), &BB, std::next(I), BB.end()); + exitMBB->transferSuccessorsAndUpdatePHIs(&BB); + + BB.addSuccessor(loopMBB, BranchProbability::getOne()); + loopMBB->addSuccessor(sinkMBB); + loopMBB->addSuccessor(loopMBB); + loopMBB->normalizeSuccProbs(); + + BuildMI(loopMBB, DL, TII->get(LL), OldVal).addReg(Ptr).addImm(0); + if (IsNand) { + // and andres, oldval, incr2 + // nor binopres, $0, andres + // and newval, binopres, mask + BuildMI(loopMBB, DL, TII->get(LoongArch::AND32), BinOpRes) + .addReg(OldVal) + .addReg(Incr); + BuildMI(loopMBB, DL, TII->get(LoongArch::NOR32), BinOpRes) + .addReg(LoongArch::ZERO) + .addReg(BinOpRes); + BuildMI(loopMBB, DL, TII->get(LoongArch::AND32), BinOpRes) + .addReg(BinOpRes) + .addReg(Mask); + } else if (IsMAX || IsMIN) { + + unsigned SLTScratch4 = IsUnsigned ? LoongArch::SLTU32 : LoongArch::SLT32; + unsigned CMPIncr = IsMAX ? LoongArch::MASKEQZ32 : LoongArch::MASKNEZ32; + unsigned CMPOldVal = IsMAX ? LoongArch::MASKNEZ32 : LoongArch::MASKEQZ32; + + unsigned Scratch4 = I->getOperand(9).getReg(); + unsigned Scratch5 = I->getOperand(10).getReg(); + + BuildMI(loopMBB, DL, TII->get(LoongArch::AND32), Scratch5) + .addReg(OldVal) + .addReg(Mask); + BuildMI(loopMBB, DL, TII->get(LoongArch::AND32), Incr) + .addReg(Incr) + .addReg(Mask); + BuildMI(loopMBB, DL, TII->get(SLTScratch4), Scratch4) + .addReg(Scratch5) + .addReg(Incr); + BuildMI(loopMBB, DL, TII->get(CMPOldVal), BinOpRes) + .addReg(Scratch5) + .addReg(Scratch4); + BuildMI(loopMBB, DL, TII->get(CMPIncr), Scratch4) + .addReg(Incr) + .addReg(Scratch4); + BuildMI(loopMBB, DL, TII->get(LoongArch::OR32), BinOpRes) + .addReg(BinOpRes) + .addReg(Scratch4); + + } else if (!IsSwap) { + // binopres, oldval, incr2 + // and newval, binopres, mask + BuildMI(loopMBB, DL, TII->get(Opcode), BinOpRes) + .addReg(OldVal) + .addReg(Incr); + BuildMI(loopMBB, DL, TII->get(LoongArch::AND32), BinOpRes) + .addReg(BinOpRes) + .addReg(Mask); + } else { // atomic.swap + // and newval, incr2, mask + BuildMI(loopMBB, DL, TII->get(LoongArch::AND32), BinOpRes) + .addReg(Incr) + .addReg(Mask); + } + + // and StoreVal, OlddVal, Mask2 + // or StoreVal, StoreVal, BinOpRes + // StoreVal = sc StoreVal, 0(Ptr) + // beq StoreVal, zero, loopMBB + BuildMI(loopMBB, DL, TII->get(LoongArch::AND32), StoreVal) + .addReg(OldVal) + .addReg(Mask2); + BuildMI(loopMBB, DL, TII->get(LoongArch::OR32), StoreVal) + .addReg(StoreVal) + .addReg(BinOpRes); + BuildMI(loopMBB, DL, TII->get(SC), StoreVal) + .addReg(StoreVal) + .addReg(Ptr) + .addImm(0); + BuildMI(loopMBB, DL, TII->get(BEQ)) + .addReg(StoreVal) + .addReg(LoongArch::ZERO) + .addMBB(loopMBB); + + // sinkMBB: + // and maskedoldval1,oldval,mask + // srl srlres,maskedoldval1,shiftamt + // sign_extend dest,srlres + + sinkMBB->addSuccessor(exitMBB, BranchProbability::getOne()); + + BuildMI(sinkMBB, DL, TII->get(LoongArch::AND32), Dest) + .addReg(OldVal) + .addReg(Mask); + BuildMI(sinkMBB, DL, TII->get(LoongArch::SRL_W), Dest) + .addReg(Dest) + .addReg(ShiftAmnt); + + BuildMI(sinkMBB, DL, TII->get(SEOp), Dest).addReg(Dest); + + LivePhysRegs LiveRegs; + computeAndAddLiveIns(LiveRegs, *loopMBB); + computeAndAddLiveIns(LiveRegs, *sinkMBB); + computeAndAddLiveIns(LiveRegs, *exitMBB); + + NMBBI = BB.end(); + I->eraseFromParent(); + + return true; +} + +bool LoongArchExpandPseudo::expandAtomicBinOp(MachineBasicBlock &BB, + MachineBasicBlock::iterator I, + MachineBasicBlock::iterator &NMBBI, + unsigned Size) { + MachineFunction *MF = BB.getParent(); + + DebugLoc DL = I->getDebugLoc(); + + unsigned LL, SC, ZERO, BEQ, SUB; + if (Size == 4) { + LL = LoongArch::LL_W; + SC = LoongArch::SC_W; + BEQ = LoongArch::BEQ32; + ZERO = LoongArch::ZERO; + SUB = LoongArch::SUB_W; + } else { + LL = LoongArch::LL_D; + SC = LoongArch::SC_D; + ZERO = LoongArch::ZERO_64; + BEQ = LoongArch::BEQ; + SUB = LoongArch::SUB_D; + } + + unsigned OldVal = I->getOperand(0).getReg(); + unsigned Ptr = I->getOperand(1).getReg(); + unsigned Incr = I->getOperand(2).getReg(); + unsigned Scratch = I->getOperand(3).getReg(); + + unsigned Opcode = 0; + unsigned OR = 0; + unsigned AND = 0; + unsigned NOR = 0; + bool IsNand = false; + bool IsSub = false; + switch (I->getOpcode()) { + case LoongArch::ATOMIC_LOAD_ADD_I32_POSTRA: + Opcode = LoongArch::AMADD_DB_W; + break; + case LoongArch::ATOMIC_LOAD_SUB_I32_POSTRA: + IsSub = true; + Opcode = LoongArch::AMADD_DB_W; + break; + case LoongArch::ATOMIC_LOAD_AND_I32_POSTRA: + Opcode = LoongArch::AMAND_DB_W; + break; + case LoongArch::ATOMIC_LOAD_OR_I32_POSTRA: + Opcode = LoongArch::AMOR_DB_W; + break; + case LoongArch::ATOMIC_LOAD_XOR_I32_POSTRA: + Opcode = LoongArch::AMXOR_DB_W; + break; + case LoongArch::ATOMIC_LOAD_NAND_I32_POSTRA: + IsNand = true; + AND = LoongArch::AND32; + NOR = LoongArch::NOR32; + break; + case LoongArch::ATOMIC_SWAP_I32_POSTRA: + OR = LoongArch::AMSWAP_DB_W; + break; + case LoongArch::ATOMIC_LOAD_MAX_I32_POSTRA: + Opcode = LoongArch::AMMAX_DB_W; + break; + case LoongArch::ATOMIC_LOAD_MIN_I32_POSTRA: + Opcode = LoongArch::AMMIN_DB_W; + break; + case LoongArch::ATOMIC_LOAD_UMAX_I32_POSTRA: + Opcode = LoongArch::AMMAX_DB_WU; + break; + case LoongArch::ATOMIC_LOAD_UMIN_I32_POSTRA: + Opcode = LoongArch::AMMIN_DB_WU; + break; + case LoongArch::ATOMIC_LOAD_ADD_I64_POSTRA: + Opcode = LoongArch::AMADD_DB_D; + break; + case LoongArch::ATOMIC_LOAD_SUB_I64_POSTRA: + IsSub = true; + Opcode = LoongArch::AMADD_DB_D; + break; + case LoongArch::ATOMIC_LOAD_AND_I64_POSTRA: + Opcode = LoongArch::AMAND_DB_D; + break; + case LoongArch::ATOMIC_LOAD_OR_I64_POSTRA: + Opcode = LoongArch::AMOR_DB_D; + break; + case LoongArch::ATOMIC_LOAD_XOR_I64_POSTRA: + Opcode = LoongArch::AMXOR_DB_D; + break; + case LoongArch::ATOMIC_LOAD_NAND_I64_POSTRA: + IsNand = true; + AND = LoongArch::AND; + NOR = LoongArch::NOR; + break; + case LoongArch::ATOMIC_SWAP_I64_POSTRA: + OR = LoongArch::AMSWAP_DB_D; + break; + case LoongArch::ATOMIC_LOAD_MAX_I64_POSTRA: + Opcode = LoongArch::AMMAX_DB_D; + break; + case LoongArch::ATOMIC_LOAD_MIN_I64_POSTRA: + Opcode = LoongArch::AMMIN_DB_D; + break; + case LoongArch::ATOMIC_LOAD_UMAX_I64_POSTRA: + Opcode = LoongArch::AMMAX_DB_DU; + break; + case LoongArch::ATOMIC_LOAD_UMIN_I64_POSTRA: + Opcode = LoongArch::AMMIN_DB_DU; + break; + default: + llvm_unreachable("Unknown pseudo atomic!"); + } + + const BasicBlock *LLVM_BB = BB.getBasicBlock(); + MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB); + MachineFunction::iterator It = ++BB.getIterator(); + MF->insert(It, loopMBB); + MF->insert(It, exitMBB); + + exitMBB->splice(exitMBB->begin(), &BB, std::next(I), BB.end()); + exitMBB->transferSuccessorsAndUpdatePHIs(&BB); + + BB.addSuccessor(loopMBB, BranchProbability::getOne()); + loopMBB->addSuccessor(exitMBB); + loopMBB->addSuccessor(loopMBB); + loopMBB->normalizeSuccProbs(); + + assert((OldVal != Ptr) && "Clobbered the wrong ptr reg!"); + assert((OldVal != Incr) && "Clobbered the wrong reg!"); + if (Opcode) { + if(IsSub){ + BuildMI(loopMBB, DL, TII->get(SUB), Scratch).addReg(ZERO).addReg(Incr); + BuildMI(loopMBB, DL, TII->get(Opcode), OldVal).addReg(Scratch).addReg(Ptr).addImm(0); + } + else{ + BuildMI(loopMBB, DL, TII->get(Opcode), OldVal).addReg(Incr).addReg(Ptr).addImm(0); + } + } else if (IsNand) { + assert(AND && NOR && + "Unknown nand instruction for atomic pseudo expansion"); + BuildMI(loopMBB, DL, TII->get(LL), OldVal).addReg(Ptr).addImm(0); + BuildMI(loopMBB, DL, TII->get(AND), Scratch).addReg(OldVal).addReg(Incr); + BuildMI(loopMBB, DL, TII->get(NOR), Scratch).addReg(ZERO).addReg(Scratch); + BuildMI(loopMBB, DL, TII->get(SC), Scratch).addReg(Scratch).addReg(Ptr).addImm(0); + BuildMI(loopMBB, DL, TII->get(BEQ)).addReg(Scratch).addReg(ZERO).addMBB(loopMBB); + } else { + assert(OR && "Unknown instruction for atomic pseudo expansion!"); + BuildMI(loopMBB, DL, TII->get(OR), OldVal).addReg(Incr).addReg(Ptr).addImm(0); + } + + + NMBBI = BB.end(); + I->eraseFromParent(); + + LivePhysRegs LiveRegs; + computeAndAddLiveIns(LiveRegs, *loopMBB); + computeAndAddLiveIns(LiveRegs, *exitMBB); + + return true; +} + +bool LoongArchExpandPseudo::expandLoadAddr(MachineBasicBlock &BB, + MachineBasicBlock::iterator I, + MachineBasicBlock::iterator &NMBBI) { + MachineFunction *MF = BB.getParent(); + MachineInstr &MI = *I; + DebugLoc DL = MI.getDebugLoc(); + + unsigned Op = MI.getOpcode(); + unsigned DestReg = MI.getOperand(0).getReg(); + unsigned TmpReg; + const MachineOperand &MO = MI.getOperand(1); + Reloc::Model RM = MF->getTarget().getRelocationModel(); + + MachineInstrBuilder MIB1, MIB2, MIB3, MIB4, MIB5; + unsigned HiFlag, LoFlag, HigherFlag, HighestFlag; + unsigned HiOp, LoOp, HigherOp, HighestOp, LastOp; + + HiOp = LoongArch::PCALAU12I_ri; + LoOp = LoongArch::ORI_rri; + HigherOp = LoongArch::LU32I_D_ri; + HighestOp = LoongArch::LU52I_D_rri; + + switch (Op) { + case LoongArch::LoadAddrLocal: + if (RM == Reloc::Static) { // for jit + HiFlag = LoongArchII::MO_ABS_HI; + LoFlag = LoongArchII::MO_ABS_LO; + HigherFlag = LoongArchII::MO_ABS_HIGHER; + HighestFlag = LoongArchII::MO_ABS_HIGHEST; + // lu12i.w + ori + lu32i.d + lu52i.d + HiOp = LoongArch::LU12I_W; + LoOp = LoongArch::ORI; + HigherOp = LoongArch::LU32I_D; + HighestOp = LoongArch::LU52I_D; + } else { + // pcalau12i + addi.d + LoFlag = LoongArchII::MO_PCREL_LO; + HiFlag = LoongArchII::MO_PCREL_HI; + LoOp = LoongArch::ADDI_D_rri; + } + break; + case LoongArch::LoadAddrLocalRR: + // pcalau12i + ori + lu32i.d + lu52i.d + add.d + LoFlag = LoongArchII::MO_PCREL_RRLO; + HiFlag = LoongArchII::MO_PCREL_RRHI; + HigherFlag = LoongArchII::MO_PCREL_RRHIGHER; + HighestFlag = LoongArchII::MO_PCREL_RRHIGHEST; + LastOp = LoongArch::ADD_D_rrr; + break; + case LoongArch::LoadAddrGlobal: + case LoongArch::LoadAddrGlobal_Alias: + // pcalau12i + ld.d + LoFlag = LoongArchII::MO_GOT_LO; + HiFlag = LoongArchII::MO_GOT_HI; + HiOp = LoongArch::PCALAU12I_ri; + LoOp = LoongArch::LD_D_rri; + break; + case LoongArch::LoadAddrGlobalRR: + // pcalau12i + ori + lu32i.d + lu52i.d +ldx.d + LoFlag = LoongArchII::MO_GOT_RRLO; + HiFlag = LoongArchII::MO_GOT_RRHI; + HigherFlag = LoongArchII::MO_GOT_RRHIGHER; + HighestFlag = LoongArchII::MO_GOT_RRHIGHEST; + HiOp = LoongArch::PCALAU12I_ri; + LoOp = LoongArch::ORI_rri; + HigherOp = LoongArch::LU32I_D_ri; + HighestOp = LoongArch::LU52I_D_rri; + LastOp = LoongArch::LDX_D_rrr; + break; + case LoongArch::LoadAddrTLS_LE: + // lu12i.w + ori + lu32i.d + lu52i.d + LoFlag = LoongArchII::MO_TLSLE_LO; + HiFlag = LoongArchII::MO_TLSLE_HI; + HigherFlag = LoongArchII::MO_TLSLE_HIGHER; + HighestFlag = LoongArchII::MO_TLSLE_HIGHEST; + HiOp = LoongArch::LU12I_W_ri; + break; + case LoongArch::LoadAddrTLS_IE: + // pcalau12i + ld.d + LoFlag = LoongArchII::MO_TLSIE_LO; + HiFlag = LoongArchII::MO_TLSIE_HI; + HiOp = LoongArch::PCALAU12I_ri; + LoOp = LoongArch::LD_D_rri; + break; + case LoongArch::LoadAddrTLS_IE_RR: + // pcalau12i + ori + lu32i.d + lu52i.d +ldx.d + LoFlag = LoongArchII::MO_TLSIE_RRLO; + HiFlag = LoongArchII::MO_TLSIE_RRHI; + HigherFlag = LoongArchII::MO_TLSIE_RRHIGHER; + HighestFlag = LoongArchII::MO_TLSIE_RRHIGHEST; + HiOp = LoongArch::PCALAU12I_ri; + LoOp = LoongArch::ORI_rri; + HigherOp = LoongArch::LU32I_D_ri; + HighestOp = LoongArch::LU52I_D_rri; + LastOp = LoongArch::LDX_D_rrr; + break; + case LoongArch::LoadAddrTLS_LD: + case LoongArch::LoadAddrTLS_GD: + // pcalau12i + addi.d + LoFlag = LoongArchII::MO_TLSGD_LO; + HiFlag = LoongArchII::MO_TLSGD_HI; + HiOp = LoongArch::PCALAU12I_ri; + LoOp = LoongArch::ADDI_D_rri; + break; + case LoongArch::LoadAddrTLS_LD_RR: + case LoongArch::LoadAddrTLS_GD_RR: + // pcalau12i + ori + lu32i.d + lu52i.d + add.d + LoFlag = LoongArchII::MO_TLSGD_RRLO; + HiFlag = LoongArchII::MO_TLSGD_RRHI; + HigherFlag = LoongArchII::MO_TLSGD_RRHIGHER; + HighestFlag = LoongArchII::MO_TLSGD_RRHIGHEST; + HiOp = LoongArch::PCALAU12I_ri; + LoOp = LoongArch::ORI_rri; + HigherOp = LoongArch::LU32I_D_ri; + HighestOp = LoongArch::LU52I_D_rri; + LastOp = LoongArch::ADD_D_rrr; + break; + default: + break; + } + + MIB1 = BuildMI(BB, I, DL, TII->get(HiOp), DestReg); + + switch (Op) { + case LoongArch::LoadAddrLocal: + if (RM == Reloc::Static) { // for jit + // la.abs rd, symbol + MIB2 = BuildMI(BB, I, DL, TII->get(LoOp), DestReg).addReg(DestReg); + MIB3 = BuildMI(BB, I, DL, TII->get(HigherOp), DestReg); + MIB4 = BuildMI(BB, I, DL, TII->get(HighestOp), DestReg).addReg(DestReg); + if (MO.isJTI()) { + MIB1.addJumpTableIndex(MO.getIndex(), HiFlag); + MIB2.addJumpTableIndex(MO.getIndex(), LoFlag); + MIB3.addJumpTableIndex(MO.getIndex(), HigherFlag); + MIB4.addJumpTableIndex(MO.getIndex(), HighestFlag); + } else if (MO.isBlockAddress()) { + MIB1.addBlockAddress(MO.getBlockAddress(), 0, HiFlag); + MIB2.addBlockAddress(MO.getBlockAddress(), 0, LoFlag); + MIB3.addBlockAddress(MO.getBlockAddress(), 0, HigherFlag); + MIB4.addBlockAddress(MO.getBlockAddress(), 0, HighestFlag); + } else { + MIB1.addDisp(MO, 0, HiFlag); + MIB2.addDisp(MO, 0, LoFlag); + MIB3.addDisp(MO, 0, HigherFlag); + MIB4.addDisp(MO, 0, HighestFlag); + } + break; + } + LLVM_FALLTHROUGH; + case LoongArch::LoadAddrGlobal: // la.global rd, symbol + case LoongArch::LoadAddrGlobal_Alias: // la rd, symbol + case LoongArch::LoadAddrTLS_IE: // la.tls.ie rd, symbol + case LoongArch::LoadAddrTLS_LD: // la.tls.ld rd, symbol + case LoongArch::LoadAddrTLS_GD: // la.tls.gd rd, symbol + MIB2 = BuildMI(BB, I, DL, TII->get(LoOp), DestReg) + .addReg(DestReg); + if (MO.isJTI()) { + MIB1.addJumpTableIndex(MO.getIndex(), HiFlag); + MIB2.addJumpTableIndex(MO.getIndex(), LoFlag); + } else if (MO.isBlockAddress()) { + MIB1.addBlockAddress(MO.getBlockAddress(), 0, HiFlag); + MIB2.addBlockAddress(MO.getBlockAddress(), 0, LoFlag); + } else { + MIB1.addDisp(MO, 0, HiFlag); + MIB2.addDisp(MO, 0, LoFlag); + } + break; + + case LoongArch::LoadAddrLocalRR: //la.local rd, rs, symbol + case LoongArch::LoadAddrGlobalRR: // la.global rd, rs, symbol + case LoongArch::LoadAddrTLS_IE_RR: // la.tls.ie rd, rs, symbol + case LoongArch::LoadAddrTLS_LD_RR: // la.tls.ld rd, rs, symbol + case LoongArch::LoadAddrTLS_GD_RR: // la.tls.gd rd, rs, symbol + TmpReg = MI.getOperand(MI.getNumOperands()-1).getReg(); + MIB2 = BuildMI(BB, I, DL, TII->get(LoOp), TmpReg) + .addReg(TmpReg); + MIB3 = BuildMI(BB, I, DL, TII->get(HigherOp), TmpReg); + MIB4 = BuildMI(BB, I, DL, TII->get(HighestOp), TmpReg) + .addReg(TmpReg); + MIB5 = BuildMI(BB, I, DL, TII->get(LastOp), DestReg) + .addReg(DestReg) + .addReg(TmpReg); + if (MO.isJTI()) { + MIB1.addJumpTableIndex(MO.getIndex(), HiFlag); + MIB2.addJumpTableIndex(MO.getIndex(), LoFlag); + MIB3.addJumpTableIndex(MO.getIndex(), HigherFlag); + MIB4.addJumpTableIndex(MO.getIndex(), HighestFlag); + } else if (MO.isBlockAddress()) { + MIB1.addBlockAddress(MO.getBlockAddress(), 0, HiFlag); + MIB2.addBlockAddress(MO.getBlockAddress(), 0, LoFlag); + MIB3.addBlockAddress(MO.getBlockAddress(), 0, HigherFlag); + MIB4.addBlockAddress(MO.getBlockAddress(), 0, HighestFlag); + } else { + MIB1.addDisp(MO, 0, HiFlag); + MIB2.addDisp(MO, 0, LoFlag); + MIB3.addDisp(MO, 0, HigherFlag); + MIB4.addDisp(MO, 0, HighestFlag); + } + break; + case LoongArch::LoadAddrTLS_LE: // la.tls.le rd, symbol + MIB2 = BuildMI(BB, I, DL, TII->get(LoOp), DestReg) + .addReg(DestReg); + MIB3 = BuildMI(BB, I, DL, TII->get(HigherOp), DestReg); + MIB4 = BuildMI(BB, I, DL, TII->get(HighestOp), DestReg) + .addReg(DestReg); + if (MO.isJTI()) { + MIB1.addJumpTableIndex(MO.getIndex(), HiFlag); + MIB2.addJumpTableIndex(MO.getIndex(), LoFlag); + MIB3.addJumpTableIndex(MO.getIndex(), HigherFlag); + MIB4.addJumpTableIndex(MO.getIndex(), HighestFlag); + } else if (MO.isBlockAddress()) { + MIB1.addBlockAddress(MO.getBlockAddress(), 0, HiFlag); + MIB2.addBlockAddress(MO.getBlockAddress(), 0, LoFlag); + MIB3.addBlockAddress(MO.getBlockAddress(), 0, HigherFlag); + MIB4.addBlockAddress(MO.getBlockAddress(), 0, HighestFlag); + } else { + MIB1.addDisp(MO, 0, HiFlag); + MIB2.addDisp(MO, 0, LoFlag); + MIB3.addDisp(MO, 0, HigherFlag); + MIB4.addDisp(MO, 0, HighestFlag); + } + break; + default: + break; + } + + MI.eraseFromParent(); + + return true; +} + +bool LoongArchExpandPseudo::expandPseudoTailCall( + MachineBasicBlock &BB, MachineBasicBlock::iterator I) { + + MachineInstr &MI = *I; + DebugLoc DL = MI.getDebugLoc(); + + const MachineOperand &MO = MI.getOperand(0); + + unsigned NoFlag = LoongArchII::MO_NO_FLAG; + + MachineInstrBuilder MIB = + BuildMI(BB, I, DL, TII->get(LoongArch::PseudoTailReturn)); + + if (MO.isSymbol()) { + MIB.addExternalSymbol(MO.getSymbolName(), NoFlag); + } else { + MIB.addDisp(MO, 0, NoFlag); + } + + MI.eraseFromParent(); + + return true; +} + +bool LoongArchExpandPseudo::expandPseudoCall(MachineBasicBlock &BB, + MachineBasicBlock::iterator I, + MachineBasicBlock::iterator &NMBBI) { + MachineFunction *MF = BB.getParent(); + MachineInstr &MI = *I; + DebugLoc DL = MI.getDebugLoc(); + CodeModel::Model M = MF->getTarget().getCodeModel(); + Reloc::Model RM = MF->getTarget().getRelocationModel(); + + unsigned Ra = LoongArch::RA_64; + const MachineOperand &MO = MI.getOperand(0); + unsigned HiFlag, LoFlag, HigherFlag, HighestFlag, NoFlag; + + NoFlag = LoongArchII::MO_NO_FLAG; + + if (RM == Reloc::Static) { // for jit + MachineInstrBuilder MIB1, MIB2, MIB3, MIB4, MIB5; + + HiFlag = LoongArchII::MO_ABS_HI; + LoFlag = LoongArchII::MO_ABS_LO; + HigherFlag = LoongArchII::MO_ABS_HIGHER; + HighestFlag = LoongArchII::MO_ABS_HIGHEST; + // lu12i.w + ori + lu32i.d + lu52i.d + jirl + + MIB1 = BuildMI(BB, I, DL, TII->get(LoongArch::LU12I_W), Ra); + MIB2 = BuildMI(BB, I, DL, TII->get(LoongArch::ORI), Ra) + .addReg(Ra); + MIB3 = BuildMI(BB, I, DL, TII->get(LoongArch::LU32I_D), Ra); + MIB4 = BuildMI(BB, I, DL, TII->get(LoongArch::LU52I_D), Ra) + .addReg(Ra); + MIB5 = + BuildMI(BB, I, DL, TII->get(LoongArch::JIRL), Ra).addReg(Ra).addImm(0); + if (MO.isSymbol()) { + MIB1.addExternalSymbol(MO.getSymbolName(), HiFlag); + MIB2.addExternalSymbol(MO.getSymbolName(), LoFlag); + MIB3.addExternalSymbol(MO.getSymbolName(), HigherFlag); + MIB4.addExternalSymbol(MO.getSymbolName(), HighestFlag); + } else { + MIB1.addDisp(MO, 0, HiFlag); + MIB2.addDisp(MO, 0, LoFlag); + MIB3.addDisp(MO, 0, HigherFlag); + MIB4.addDisp(MO, 0, HighestFlag); + } + } else { + // bl + MachineInstrBuilder MIB1; + MIB1 = BuildMI(BB, I, DL, TII->get(LoongArch::BL)); + if (MO.isSymbol()) { + MIB1.addExternalSymbol(MO.getSymbolName(), NoFlag); + } else { + MIB1.addDisp(MO, 0, NoFlag); + } + } + + MI.eraseFromParent(); + + return true; +} + +bool LoongArchExpandPseudo::expandPseudoTEQ(MachineBasicBlock &BB, + MachineBasicBlock::iterator I, + MachineBasicBlock::iterator &NMBBI) { + MachineInstr &MI = *I; + DebugLoc DL = MI.getDebugLoc(); + + unsigned Divisor = MI.getOperand(0).getReg(); + unsigned BneOp = LoongArch::BNE; + unsigned Zero = LoongArch::ZERO_64; + + // beq $Divisor, $zero, 8 + BuildMI(BB, I, DL, TII->get(BneOp), Divisor) + .addReg(Zero) + .addImm(8); + // break 7 + BuildMI(BB, I, DL, TII->get(LoongArch::BREAK)) + .addImm(7);; + + MI.eraseFromParent(); + + return true; +} +bool LoongArchExpandPseudo::expandMI(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + MachineBasicBlock::iterator &NMBB) { + + bool Modified = false; + + switch (MBBI->getOpcode()) { + case LoongArch::PseudoTEQ: + return expandPseudoTEQ(MBB, MBBI, NMBB); + case LoongArch::PseudoCall: + return expandPseudoCall(MBB, MBBI, NMBB); + case LoongArch::PseudoTailCall: + return expandPseudoTailCall(MBB, MBBI); + case LoongArch::LoadAddrLocal: + case LoongArch::LoadAddrLocalRR: + case LoongArch::LoadAddrGlobal: + case LoongArch::LoadAddrGlobalRR: + case LoongArch::LoadAddrGlobal_Alias: + case LoongArch::LoadAddrTLS_LD: + case LoongArch::LoadAddrTLS_LD_RR: + case LoongArch::LoadAddrTLS_GD: + case LoongArch::LoadAddrTLS_GD_RR: + case LoongArch::LoadAddrTLS_IE: + case LoongArch::LoadAddrTLS_IE_RR: + case LoongArch::LoadAddrTLS_LE: + return expandLoadAddr(MBB, MBBI, NMBB); + case LoongArch::ATOMIC_CMP_SWAP_I32_POSTRA: + case LoongArch::ATOMIC_CMP_SWAP_I64_POSTRA: + return expandAtomicCmpSwap(MBB, MBBI, NMBB); + case LoongArch::ATOMIC_CMP_SWAP_I8_POSTRA: + case LoongArch::ATOMIC_CMP_SWAP_I16_POSTRA: + return expandAtomicCmpSwapSubword(MBB, MBBI, NMBB); + case LoongArch::ATOMIC_SWAP_I8_POSTRA: + case LoongArch::ATOMIC_SWAP_I16_POSTRA: + case LoongArch::ATOMIC_LOAD_NAND_I8_POSTRA: + case LoongArch::ATOMIC_LOAD_NAND_I16_POSTRA: + case LoongArch::ATOMIC_LOAD_ADD_I8_POSTRA: + case LoongArch::ATOMIC_LOAD_ADD_I16_POSTRA: + case LoongArch::ATOMIC_LOAD_SUB_I8_POSTRA: + case LoongArch::ATOMIC_LOAD_SUB_I16_POSTRA: + case LoongArch::ATOMIC_LOAD_AND_I8_POSTRA: + case LoongArch::ATOMIC_LOAD_AND_I16_POSTRA: + case LoongArch::ATOMIC_LOAD_OR_I8_POSTRA: + case LoongArch::ATOMIC_LOAD_OR_I16_POSTRA: + case LoongArch::ATOMIC_LOAD_XOR_I8_POSTRA: + case LoongArch::ATOMIC_LOAD_XOR_I16_POSTRA: + case LoongArch::ATOMIC_LOAD_MAX_I8_POSTRA: + case LoongArch::ATOMIC_LOAD_MAX_I16_POSTRA: + case LoongArch::ATOMIC_LOAD_MIN_I8_POSTRA: + case LoongArch::ATOMIC_LOAD_MIN_I16_POSTRA: + case LoongArch::ATOMIC_LOAD_UMAX_I8_POSTRA: + case LoongArch::ATOMIC_LOAD_UMAX_I16_POSTRA: + case LoongArch::ATOMIC_LOAD_UMIN_I8_POSTRA: + case LoongArch::ATOMIC_LOAD_UMIN_I16_POSTRA: + return expandAtomicBinOpSubword(MBB, MBBI, NMBB); + case LoongArch::XINSERT_B_VIDX_PSEUDO_POSTRA: + case LoongArch::XINSERT_B_VIDX64_PSEUDO_POSTRA: + return expandXINSERT_BOp(MBB, MBBI, NMBB); + case LoongArch::INSERT_H_VIDX64_PSEUDO_POSTRA: + return expandINSERT_HOp(MBB, MBBI, NMBB); + case LoongArch::XINSERT_FW_VIDX_PSEUDO_POSTRA: + case LoongArch::XINSERT_FW_VIDX64_PSEUDO_POSTRA: + return expandXINSERT_FWOp(MBB, MBBI, NMBB); + case LoongArch::ATOMIC_LOAD_ADD_I32_POSTRA: + case LoongArch::ATOMIC_LOAD_SUB_I32_POSTRA: + case LoongArch::ATOMIC_LOAD_AND_I32_POSTRA: + case LoongArch::ATOMIC_LOAD_OR_I32_POSTRA: + case LoongArch::ATOMIC_LOAD_XOR_I32_POSTRA: + case LoongArch::ATOMIC_LOAD_NAND_I32_POSTRA: + case LoongArch::ATOMIC_SWAP_I32_POSTRA: + case LoongArch::ATOMIC_LOAD_MAX_I32_POSTRA: + case LoongArch::ATOMIC_LOAD_MIN_I32_POSTRA: + case LoongArch::ATOMIC_LOAD_UMAX_I32_POSTRA: + case LoongArch::ATOMIC_LOAD_UMIN_I32_POSTRA: + return expandAtomicBinOp(MBB, MBBI, NMBB, 4); + case LoongArch::ATOMIC_LOAD_ADD_I64_POSTRA: + case LoongArch::ATOMIC_LOAD_SUB_I64_POSTRA: + case LoongArch::ATOMIC_LOAD_AND_I64_POSTRA: + case LoongArch::ATOMIC_LOAD_OR_I64_POSTRA: + case LoongArch::ATOMIC_LOAD_XOR_I64_POSTRA: + case LoongArch::ATOMIC_LOAD_NAND_I64_POSTRA: + case LoongArch::ATOMIC_SWAP_I64_POSTRA: + case LoongArch::ATOMIC_LOAD_MAX_I64_POSTRA: + case LoongArch::ATOMIC_LOAD_MIN_I64_POSTRA: + case LoongArch::ATOMIC_LOAD_UMAX_I64_POSTRA: + case LoongArch::ATOMIC_LOAD_UMIN_I64_POSTRA: + return expandAtomicBinOp(MBB, MBBI, NMBB, 8); + default: + return Modified; + } +} + +bool LoongArchExpandPseudo::expandMBB(MachineBasicBlock &MBB) { + bool Modified = false; + + MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end(); + while (MBBI != E) { + MachineBasicBlock::iterator NMBBI = std::next(MBBI); + Modified |= expandMI(MBB, MBBI, NMBBI); + MBBI = NMBBI; + } + + return Modified; +} + +bool LoongArchExpandPseudo::runOnMachineFunction(MachineFunction &MF) { + STI = &static_cast(MF.getSubtarget()); + TII = STI->getInstrInfo(); + + bool Modified = false; + for (MachineFunction::iterator MFI = MF.begin(), E = MF.end(); MFI != E; + ++MFI) + Modified |= expandMBB(*MFI); + + if (Modified) + MF.RenumberBlocks(); + + return Modified; +} + +/// createLoongArchExpandPseudoPass - returns an instance of the pseudo instruction +/// expansion pass. +FunctionPass *llvm::createLoongArchExpandPseudoPass() { + return new LoongArchExpandPseudo(); +} diff --git a/lib/Target/LoongArch/LoongArchFrameLowering.cpp b/lib/Target/LoongArch/LoongArchFrameLowering.cpp new file mode 100644 index 00000000..e3dfe704 --- /dev/null +++ b/lib/Target/LoongArch/LoongArchFrameLowering.cpp @@ -0,0 +1,561 @@ +//===-- LoongArchFrameLowering.cpp - LoongArch Frame Information --------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file contains the LoongArch implementation of TargetFrameLowering class. +// +//===----------------------------------------------------------------------===// + +#include "LoongArchFrameLowering.h" +#include "MCTargetDesc/LoongArchBaseInfo.h" +#include "MCTargetDesc/LoongArchABIInfo.h" +#include "LoongArchInstrInfo.h" +#include "LoongArchMachineFunction.h" +#include "LoongArchTargetMachine.h" +#include "LoongArchRegisterInfo.h" +#include "LoongArchSubtarget.h" +#include "llvm/ADT/BitVector.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/StringSwitch.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/MachineOperand.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/RegisterScavenging.h" +#include "llvm/CodeGen/TargetInstrInfo.h" +#include "llvm/CodeGen/TargetRegisterInfo.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/DebugLoc.h" +#include "llvm/IR/Function.h" +#include "llvm/MC/MCDwarf.h" +#include "llvm/MC/MCRegisterInfo.h" +#include "llvm/MC/MachineLocation.h" +#include "llvm/Support/CodeGen.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Target/TargetOptions.h" +#include +#include +#include +#include + +using namespace llvm; + +// We would like to split the SP adjustment to reduce prologue/epilogue +// as following instructions. In this way, the offset of the callee saved +// register could fit in a single store. +uint64_t +LoongArchFrameLowering::getFirstSPAdjustAmount(const MachineFunction &MF, + bool IsPrologue) const { + const MachineFrameInfo &MFI = MF.getFrameInfo(); + const std::vector &CSI = MFI.getCalleeSavedInfo(); + uint64_t StackSize = MFI.getStackSize(); + + // Return the FirstSPAdjustAmount if the StackSize can not fit in signed + // 12-bit and there exists a callee saved register need to be pushed. + if (!isInt<12>(StackSize)) { + // FirstSPAdjustAmount is choosed as (2048 - StackAlign) + // because 2048 will cause sp = sp + 2048 in epilogue split into + // multi-instructions. The offset smaller than 2048 can fit in signle + // load/store instruction and we have to stick with the stack alignment. + return CSI.size() > 0 ? 2048 - getStackAlign().value() + : (IsPrologue ? 2048 : 0); + } + return 0; +} + +//===----------------------------------------------------------------------===// +// +// Stack Frame Processing methods +// +----------------------------+ +// +// The stack is allocated decrementing the stack pointer on +// the first instruction of a function prologue. Once decremented, +// all stack references are done thought a positive offset +// from the stack/frame pointer, so the stack is considering +// to grow up! Otherwise terrible hacks would have to be made +// to get this stack ABI compliant :) +// +// The stack frame required by the ABI (after call): +// Offset +// +// 0 ---------- +// 4 Args to pass +// . Alloca allocations +// . Local Area +// . CPU "Callee Saved" Registers +// . saved FP +// . saved RA +// . FPU "Callee Saved" Registers +// StackSize ----------- +// +// Offset - offset from sp after stack allocation on function prologue +// +// The sp is the stack pointer subtracted/added from the stack size +// at the Prologue/Epilogue +// +// References to the previous stack (to obtain arguments) are done +// with offsets that exceeds the stack size: (stacksize+(4*(num_arg-1)) +// +// Examples: +// - reference to the actual stack frame +// for any local area var there is smt like : FI >= 0, StackOffset: 4 +// st.w REGX, SP, 4 +// +// - reference to previous stack frame +// suppose there's a load to the 5th arguments : FI < 0, StackOffset: 16. +// The emitted instruction will be something like: +// ld.w REGX, SP, 16+StackSize +// +// Since the total stack size is unknown on LowerFormalArguments, all +// stack references (ObjectOffset) created to reference the function +// arguments, are negative numbers. This way, on eliminateFrameIndex it's +// possible to detect those references and the offsets are adjusted to +// their real location. +// +//===----------------------------------------------------------------------===// +// +LoongArchFrameLowering::LoongArchFrameLowering(const LoongArchSubtarget &STI) + : TargetFrameLowering(StackGrowsDown, STI.getStackAlignment(), 0, + STI.getStackAlignment()), STI(STI) {} + +void LoongArchFrameLowering::emitPrologue(MachineFunction &MF, + MachineBasicBlock &MBB) const { + MachineFrameInfo &MFI = MF.getFrameInfo(); + LoongArchFunctionInfo *LoongArchFI = MF.getInfo(); + + const LoongArchInstrInfo &TII = + *static_cast(STI.getInstrInfo()); + const LoongArchRegisterInfo &RegInfo = + *static_cast(STI.getRegisterInfo()); + MachineBasicBlock::iterator MBBI = MBB.begin(); + DebugLoc dl; + LoongArchABIInfo ABI = STI.getABI(); + unsigned SP = ABI.GetStackPtr(); + unsigned FP = ABI.GetFramePtr(); + unsigned ZERO = ABI.GetNullPtr(); + unsigned MOVE = ABI.GetGPRMoveOp(); + unsigned ADDI = ABI.GetPtrAddiOp(); + unsigned AND = ABI.IsLP64() ? LoongArch::AND : LoongArch::AND32; + unsigned SLLI = ABI.IsLP64() ? LoongArch::SLLI_D : LoongArch::SLLI_W; + + const TargetRegisterClass *RC = ABI.ArePtrs64bit() ? + &LoongArch::GPR64RegClass : &LoongArch::GPR32RegClass; + + // First, compute final stack size. + uint64_t StackSize = MFI.getStackSize(); + uint64_t RealStackSize = StackSize; + + // No need to allocate space on the stack. + if (StackSize == 0 && !MFI.adjustsStack()) + return; + + uint64_t FirstSPAdjustAmount = getFirstSPAdjustAmount(MF, true); + uint64_t SecondSPAdjustAmount = RealStackSize - FirstSPAdjustAmount; + // Split the SP adjustment to reduce the offsets of callee saved spill. + if (FirstSPAdjustAmount) + StackSize = FirstSPAdjustAmount; + + // Adjust stack. + TII.adjustReg(SP, SP, -StackSize, MBB, MBBI, MachineInstr::FrameSetup); + if (FirstSPAdjustAmount != 2048 || SecondSPAdjustAmount == 0) { + // Emit ".cfi_def_cfa_offset StackSize". + unsigned CFIIndex = + MF.addFrameInst(MCCFIInstruction::cfiDefCfaOffset(nullptr, StackSize)); + BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) + .addCFIIndex(CFIIndex); + } + + MachineModuleInfo &MMI = MF.getMMI(); + const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo(); + + const std::vector &CSI = MFI.getCalleeSavedInfo(); + + if (!CSI.empty()) { + // Find the instruction past the last instruction that saves a callee-saved + // register to the stack. + for (unsigned i = 0; i < CSI.size(); ++i) + ++MBBI; + + // Iterate over list of callee-saved registers and emit .cfi_offset + // directives. + for (std::vector::const_iterator I = CSI.begin(), + E = CSI.end(); I != E; ++I) { + int64_t Offset = MFI.getObjectOffset(I->getFrameIdx()); + unsigned Reg = I->getReg(); + unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset( + nullptr, MRI->getDwarfRegNum(Reg, true), Offset)); + BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) + .addCFIIndex(CFIIndex); + } + } + + if (LoongArchFI->callsEhReturn()) { + // Insert instructions that spill eh data registers. + for (int I = 0; I < 4; ++I) { + if (!MBB.isLiveIn(ABI.GetEhDataReg(I))) + MBB.addLiveIn(ABI.GetEhDataReg(I)); + TII.storeRegToStackSlot(MBB, MBBI, ABI.GetEhDataReg(I), false, + LoongArchFI->getEhDataRegFI(I), RC, &RegInfo); + } + + // Emit .cfi_offset directives for eh data registers. + for (int I = 0; I < 4; ++I) { + int64_t Offset = MFI.getObjectOffset(LoongArchFI->getEhDataRegFI(I)); + unsigned Reg = MRI->getDwarfRegNum(ABI.GetEhDataReg(I), true); + unsigned CFIIndex = MF.addFrameInst( + MCCFIInstruction::createOffset(nullptr, Reg, Offset)); + BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) + .addCFIIndex(CFIIndex); + } + } + + // If framepointer enabled, set it to point to the stack pointer on entry. + if (hasFP(MF)) { + // Insert instruction "addi.w/d $fp, $sp, StackSize" at this location. + TII.adjustReg(FP, SP, StackSize - LoongArchFI->getVarArgsSaveSize(), MBB, + MBBI, MachineInstr::FrameSetup); + // Emit ".cfi_def_cfa $fp, $varargs_size". + unsigned CFIIndex = MF.addFrameInst( + MCCFIInstruction::cfiDefCfa(nullptr, MRI->getDwarfRegNum(FP, true), + LoongArchFI->getVarArgsSaveSize())); + BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) + .addCFIIndex(CFIIndex) + .setMIFlag(MachineInstr::FrameSetup); + } + + // Emit the second SP adjustment after saving callee saved registers. + if (FirstSPAdjustAmount && SecondSPAdjustAmount) { + if (hasFP(MF)) { + assert(SecondSPAdjustAmount > 0 && + "SecondSPAdjustAmount should be greater than zero"); + TII.adjustReg(SP, SP, -SecondSPAdjustAmount, MBB, MBBI, + MachineInstr::FrameSetup); + } else { + // FIXME: RegScavenger will place the spill instruction before the + // prologue if a VReg is created in the prologue. This will pollute the + // caller's stack data. Therefore, until there is better way, we just use + // the `addi.w/d` instruction for stack adjustment to ensure that VReg + // will not be created. + for (int Val = SecondSPAdjustAmount; Val > 0; Val -= 2048) + BuildMI(MBB, MBBI, dl, TII.get(ADDI), SP) + .addReg(SP) + .addImm(Val < 2048 ? -Val : -2048) + .setMIFlag(MachineInstr::FrameSetup); + // If we are using a frame-pointer, and thus emitted ".cfi_def_cfa fp, 0", + // don't emit an sp-based .cfi_def_cfa_offset. + // Emit ".cfi_def_cfa_offset StackSize" + unsigned CFIIndex = MF.addFrameInst( + MCCFIInstruction::cfiDefCfaOffset(nullptr, MFI.getStackSize())); + BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) + .addCFIIndex(CFIIndex) + .setMIFlag(MachineInstr::FrameSetup); + } + } + + // Realign stack. + if (hasFP(MF)) { + if (RegInfo.needsStackRealignment(MF)) { + // addiu $Reg, $zero, -MaxAlignment + // andi $sp, $sp, $Reg + unsigned VR = MF.getRegInfo().createVirtualRegister(RC); + assert((Log2(MFI.getMaxAlign()) < 16) && + "Function's alignment size requirement is not supported."); + int MaxAlign = -(int)MFI.getMaxAlign().value(); + int Alignment = (int)MFI.getMaxAlign().value(); + + if (Alignment <= 2048) { + BuildMI(MBB, MBBI, dl, TII.get(ADDI), VR).addReg(ZERO).addImm(MaxAlign); + BuildMI(MBB, MBBI, dl, TII.get(AND), SP).addReg(SP).addReg(VR); + } else { + const unsigned NrBitsToZero = countTrailingZeros((unsigned)Alignment); + BuildMI(MBB, MBBI, dl, TII.get(ADDI), VR).addReg(ZERO).addImm(-1); + BuildMI(MBB, MBBI, dl, TII.get(SLLI), VR) + .addReg(VR) + .addImm(NrBitsToZero); + BuildMI(MBB, MBBI, dl, TII.get(AND), SP).addReg(SP).addReg(VR); + } + + if (hasBP(MF)) { + // move $s7, $sp + unsigned BP = STI.isABI_LP64() ? LoongArch::S7_64 : LoongArch::S7; + BuildMI(MBB, MBBI, dl, TII.get(MOVE), BP).addReg(SP).addReg(ZERO); + } + } + } +} + +void LoongArchFrameLowering::emitEpilogue(MachineFunction &MF, + MachineBasicBlock &MBB) const { + MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator(); + MachineFrameInfo &MFI = MF.getFrameInfo(); + LoongArchFunctionInfo *LoongArchFI = MF.getInfo(); + + const LoongArchInstrInfo &TII = + *static_cast(STI.getInstrInfo()); + const LoongArchRegisterInfo &RegInfo = + *static_cast(STI.getRegisterInfo()); + + DebugLoc DL = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc(); + LoongArchABIInfo ABI = STI.getABI(); + unsigned SP = ABI.GetStackPtr(); + unsigned FP = ABI.GetFramePtr(); + + // Get the number of bytes from FrameInfo. + uint64_t StackSize = MFI.getStackSize(); + + // Restore the stack pointer. + if (hasFP(MF) && + (RegInfo.needsStackRealignment(MF) || MFI.hasVarSizedObjects())) { + // Find the first instruction that restores a callee-saved register. + MachineBasicBlock::iterator I = MBBI; + for (unsigned i = 0; i < MFI.getCalleeSavedInfo().size(); ++i) + --I; + TII.adjustReg(SP, FP, -(StackSize - LoongArchFI->getVarArgsSaveSize()), MBB, + I); + } + + uint64_t FirstSPAdjustAmount = getFirstSPAdjustAmount(MF); + if (FirstSPAdjustAmount) { + uint64_t SecondSPAdjustAmount = MFI.getStackSize() - FirstSPAdjustAmount; + assert(SecondSPAdjustAmount > 0 && + "SecondSPAdjustAmount should be greater than zero"); + // Find the first instruction that restores a callee-saved register. + MachineBasicBlock::iterator I = MBBI; + for (unsigned i = 0; i < MFI.getCalleeSavedInfo().size(); ++i) + --I; + + TII.adjustReg(SP, SP, SecondSPAdjustAmount, MBB, I); + } + + if (LoongArchFI->callsEhReturn()) { + const TargetRegisterClass *RC = + ABI.ArePtrs64bit() ? &LoongArch::GPR64RegClass : &LoongArch::GPR32RegClass; + + // Find first instruction that restores a callee-saved register. + MachineBasicBlock::iterator I = MBBI; + for (unsigned i = 0; i < MFI.getCalleeSavedInfo().size(); ++i) + --I; + + // Insert instructions that restore eh data registers. + for (int J = 0; J < 4; ++J) + TII.loadRegFromStackSlot(MBB, I, ABI.GetEhDataReg(J), + LoongArchFI->getEhDataRegFI(J), RC, &RegInfo); + } + + if (FirstSPAdjustAmount) + StackSize = FirstSPAdjustAmount; + + if (!StackSize) + return; + + // Final adjust stack. + TII.adjustReg(SP, SP, StackSize, MBB, MBBI); +} + +StackOffset +LoongArchFrameLowering::getFrameIndexReference(const MachineFunction &MF, + int FI, + Register &FrameReg) const { + const MachineFrameInfo &MFI = MF.getFrameInfo(); + const TargetRegisterInfo *RI = MF.getSubtarget().getRegisterInfo(); + LoongArchABIInfo ABI = STI.getABI(); + const auto *LoongArchFI = MF.getInfo(); + + // Callee-saved registers should be referenced relative to the stack + // pointer (positive offset), otherwise use the frame pointer (negative + // offset). + const auto &CSI = MFI.getCalleeSavedInfo(); + int MinCSFI = 0; + int MaxCSFI = -1; + StackOffset Offset = + StackOffset::getFixed(MFI.getObjectOffset(FI) - getOffsetOfLocalArea() + + MFI.getOffsetAdjustment()); + uint64_t FirstSPAdjustAmount = getFirstSPAdjustAmount(MF); + + if (CSI.size()) { + MinCSFI = CSI[0].getFrameIdx(); + MaxCSFI = CSI[CSI.size() - 1].getFrameIdx(); + } + + bool EhDataRegFI = LoongArchFI->isEhDataRegFI(FI); + if ((FI >= MinCSFI && FI <= MaxCSFI) || EhDataRegFI) { + FrameReg = ABI.GetStackPtr(); + + if (FirstSPAdjustAmount) + Offset += StackOffset::getFixed(FirstSPAdjustAmount); + else + Offset += StackOffset::getFixed(MFI.getStackSize()); + } else if (RI->needsStackRealignment(MF) && !MFI.isFixedObjectIndex(FI)) { + // If the stack was realigned, the frame pointer is set in order to allow + // SP to be restored, so we need another base register to record the stack + // after realignment. + FrameReg = hasBP(MF) ? ABI.GetBasePtr() : ABI.GetStackPtr(); + Offset += StackOffset::getFixed(MFI.getStackSize()); + } else { + FrameReg = RI->getFrameRegister(MF); + if (hasFP(MF)) + Offset += StackOffset::getFixed(LoongArchFI->getVarArgsSaveSize()); + else + Offset += StackOffset::getFixed(MFI.getStackSize()); + } + return Offset; +} + +bool LoongArchFrameLowering::spillCalleeSavedRegisters( + MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, + ArrayRef CSI, const TargetRegisterInfo *TRI) const { + MachineFunction *MF = MBB.getParent(); + const TargetInstrInfo &TII = *STI.getInstrInfo(); + + for (unsigned i = 0, e = CSI.size(); i != e; ++i) { + // Add the callee-saved register as live-in. Do not add if the register is + // RA and return address is taken, because it has already been added in + // method LoongArchTargetLowering::lowerRETURNADDR. + // It's killed at the spill, unless the register is RA and return address + // is taken. + unsigned Reg = CSI[i].getReg(); + bool IsRAAndRetAddrIsTaken = (Reg == LoongArch::RA || Reg == LoongArch::RA_64) + && MF->getFrameInfo().isReturnAddressTaken(); + if (!IsRAAndRetAddrIsTaken) + MBB.addLiveIn(Reg); + + // Insert the spill to the stack frame. + bool IsKill = !IsRAAndRetAddrIsTaken; + const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); + TII.storeRegToStackSlot(MBB, MI, Reg, IsKill, + CSI[i].getFrameIdx(), RC, TRI); + } + + return true; +} + +bool +LoongArchFrameLowering::hasReservedCallFrame(const MachineFunction &MF) const { + const MachineFrameInfo &MFI = MF.getFrameInfo(); + // Reserve call frame if the size of the maximum call frame fits into 12-bit + // immediate field and there are no variable sized objects on the stack. + // Make sure the second register scavenger spill slot can be accessed with one + // instruction. + return isInt<12>(MFI.getMaxCallFrameSize() + getStackAlignment()) && + !MFI.hasVarSizedObjects(); +} + +/// Mark \p Reg and all registers aliasing it in the bitset. +static void setAliasRegs(MachineFunction &MF, BitVector &SavedRegs, + unsigned Reg) { + const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); + for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) + SavedRegs.set(*AI); +} + +void LoongArchFrameLowering::determineCalleeSaves(MachineFunction &MF, + BitVector &SavedRegs, + RegScavenger *RS) const { + TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS); + const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); + LoongArchFunctionInfo *LoongArchFI = MF.getInfo(); + LoongArchABIInfo ABI = STI.getABI(); + unsigned FP = ABI.GetFramePtr(); + unsigned BP = ABI.IsLP64() ? LoongArch::S7_64 : LoongArch::S7; + + // Mark $fp as used if function has dedicated frame pointer. + if (hasFP(MF)) + setAliasRegs(MF, SavedRegs, FP); + // Mark $s7 as used if function has dedicated base pointer. + if (hasBP(MF)) + setAliasRegs(MF, SavedRegs, BP); + + // Create spill slots for eh data registers if function calls eh_return. + if (LoongArchFI->callsEhReturn()) + LoongArchFI->createEhDataRegsFI(); + + // Set scavenging frame index if necessary. + uint64_t MaxSPOffset = estimateStackSize(MF); + + // If there is a variable + // sized object on the stack, the estimation cannot account for it. + if (isIntN(12, MaxSPOffset) && + !MF.getFrameInfo().hasVarSizedObjects()) + return; + + const TargetRegisterClass &RC = + ABI.ArePtrs64bit() ? LoongArch::GPR64RegClass : LoongArch::GPR32RegClass; + int FI = MF.getFrameInfo().CreateStackObject(TRI->getSpillSize(RC), + TRI->getSpillAlign(RC), false); + RS->addScavengingFrameIndex(FI); +} + +// hasFP - Return true if the specified function should have a dedicated frame +// pointer register. This is true if the function has variable sized allocas, +// if it needs dynamic stack realignment, if frame pointer elimination is +// disabled, or if the frame address is taken. +bool LoongArchFrameLowering::hasFP(const MachineFunction &MF) const { + const MachineFrameInfo &MFI = MF.getFrameInfo(); + const TargetRegisterInfo *TRI = STI.getRegisterInfo(); + + return MF.getTarget().Options.DisableFramePointerElim(MF) || + MFI.hasVarSizedObjects() || MFI.isFrameAddressTaken() || + TRI->needsStackRealignment(MF); +} + +bool LoongArchFrameLowering::hasBP(const MachineFunction &MF) const { + const MachineFrameInfo &MFI = MF.getFrameInfo(); + const TargetRegisterInfo *TRI = STI.getRegisterInfo(); + + return MFI.hasVarSizedObjects() && TRI->needsStackRealignment(MF); +} + +// Estimate the size of the stack, including the incoming arguments. We need to +// account for register spills, local objects, reserved call frame and incoming +// arguments. This is required to determine the largest possible positive offset +// from $sp so that it can be determined if an emergency spill slot for stack +// addresses is required. +uint64_t LoongArchFrameLowering:: +estimateStackSize(const MachineFunction &MF) const { + const MachineFrameInfo &MFI = MF.getFrameInfo(); + const TargetRegisterInfo &TRI = *STI.getRegisterInfo(); + + int64_t Size = 0; + + // Iterate over fixed sized objects which are incoming arguments. + for (int I = MFI.getObjectIndexBegin(); I != 0; ++I) + if (MFI.getObjectOffset(I) > 0) + Size += MFI.getObjectSize(I); + + // Conservatively assume all callee-saved registers will be saved. + for (const MCPhysReg *R = TRI.getCalleeSavedRegs(&MF); *R; ++R) { + unsigned RegSize = TRI.getSpillSize(*TRI.getMinimalPhysRegClass(*R)); + Size = alignTo(Size + RegSize, RegSize); + } + + // Get the size of the rest of the frame objects and any possible reserved + // call frame, accounting for alignment. + return Size + MFI.estimateStackSize(MF); +} + +// Eliminate ADJCALLSTACKDOWN, ADJCALLSTACKUP pseudo instructions +MachineBasicBlock::iterator LoongArchFrameLowering:: +eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, + MachineBasicBlock::iterator I) const { + unsigned SP = STI.getABI().IsLP64() ? LoongArch::SP_64 : LoongArch::SP; + + if (!hasReservedCallFrame(MF)) { + int64_t Amount = I->getOperand(0).getImm(); + if (I->getOpcode() == LoongArch::ADJCALLSTACKDOWN) + Amount = -Amount; + + STI.getInstrInfo()->adjustReg(SP, SP, Amount, MBB, I); + } + + return MBB.erase(I); +} diff --git a/lib/Target/LoongArch/LoongArchFrameLowering.h b/lib/Target/LoongArch/LoongArchFrameLowering.h new file mode 100644 index 00000000..74aabaeb --- /dev/null +++ b/lib/Target/LoongArch/LoongArchFrameLowering.h @@ -0,0 +1,71 @@ +//===-- LoongArchFrameLowering.h - Define frame lowering for LoongArch ----*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_LOONGARCH_LOONGARCHFRAMELOWERING_H +#define LLVM_LIB_TARGET_LOONGARCH_LOONGARCHFRAMELOWERING_H + +#include "LoongArch.h" +#include "llvm/CodeGen/TargetFrameLowering.h" + +namespace llvm { + class LoongArchSubtarget; + +class LoongArchFrameLowering : public TargetFrameLowering { + const LoongArchSubtarget &STI; + +public: + explicit LoongArchFrameLowering(const LoongArchSubtarget &STI); + + /// emitProlog/emitEpilog - These methods insert prolog and epilog code into + /// the function. + void emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const override; + void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override; + + StackOffset getFrameIndexReference(const MachineFunction &MF, int FI, + Register &FrameReg) const override; + + bool spillCalleeSavedRegisters(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + ArrayRef CSI, + const TargetRegisterInfo *TRI) const override; + + bool hasReservedCallFrame(const MachineFunction &MF) const override; + + void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs, + RegScavenger *RS) const override; + + bool hasFP(const MachineFunction &MF) const override; + + bool hasBP(const MachineFunction &MF) const; + + bool enableShrinkWrapping(const MachineFunction &MF) const override { + return true; + } + + MachineBasicBlock::iterator + eliminateCallFramePseudoInstr(MachineFunction &MF, + MachineBasicBlock &MBB, + MachineBasicBlock::iterator I) const override; + + // Get the first stack adjustment amount for split the SP adjustment. + // Return 0 if we don't want to to split the SP adjustment in prologue and + // epilogue. + uint64_t getFirstSPAdjustAmount(const MachineFunction &MF, + bool IsPrologue = false) const; + +protected: + uint64_t estimateStackSize(const MachineFunction &MF) const; +}; + +} // End llvm namespace + +#endif diff --git a/lib/Target/LoongArch/LoongArchISelDAGToDAG.cpp b/lib/Target/LoongArch/LoongArchISelDAGToDAG.cpp new file mode 100644 index 00000000..6149623c --- /dev/null +++ b/lib/Target/LoongArch/LoongArchISelDAGToDAG.cpp @@ -0,0 +1,878 @@ +//===-- LoongArchISelDAGToDAG.cpp - A Dag to Dag Inst Selector for LoongArch --------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines an instruction selector for the LoongArch target. +// +//===----------------------------------------------------------------------===// + +#include "LoongArchISelDAGToDAG.h" +#include "LoongArch.h" +#include "LoongArchMachineFunction.h" +#include "LoongArchRegisterInfo.h" +#include "MCTargetDesc/LoongArchAnalyzeImmediate.h" +#include "MCTargetDesc/LoongArchBaseInfo.h" +#include "MCTargetDesc/LoongArchMCTargetDesc.h" +#include "llvm/CodeGen/MachineConstantPool.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/SelectionDAGNodes.h" +#include "llvm/IR/CFG.h" +#include "llvm/IR/Dominators.h" +#include "llvm/IR/GlobalValue.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/IntrinsicsLoongArch.h" +#include "llvm/IR/Type.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetMachine.h" +using namespace llvm; + +#define DEBUG_TYPE "loongarch-isel" + +//===----------------------------------------------------------------------===// +// Instruction Selector Implementation +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// LoongArchDAGToDAGISel - LoongArch specific code to select LoongArch machine +// instructions for SelectionDAG operations. +//===----------------------------------------------------------------------===// + +void LoongArchDAGToDAGISel::PostprocessISelDAG() { doPeepholeLoadStoreADDI(); } + +void LoongArchDAGToDAGISel::getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired(); + SelectionDAGISel::getAnalysisUsage(AU); +} + +bool LoongArchDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) { + Subtarget = &static_cast(MF.getSubtarget()); + bool Ret = SelectionDAGISel::runOnMachineFunction(MF); + + return Ret; +} + +/// Match frameindex +bool LoongArchDAGToDAGISel::selectAddrFrameIndex(SDValue Addr, SDValue &Base, + SDValue &Offset) const { + if (FrameIndexSDNode *FIN = dyn_cast(Addr)) { + EVT ValTy = Addr.getValueType(); + + Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), ValTy); + Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), ValTy); + return true; + } + return false; +} + +/// Match frameindex+offset and frameindex|offset +bool LoongArchDAGToDAGISel::selectAddrFrameIndexOffset( + SDValue Addr, SDValue &Base, SDValue &Offset, unsigned OffsetBits, + unsigned ShiftAmount = 0) const { + if (CurDAG->isBaseWithConstantOffset(Addr)) { + ConstantSDNode *CN = dyn_cast(Addr.getOperand(1)); + if (isIntN(OffsetBits + ShiftAmount, CN->getSExtValue())) { + EVT ValTy = Addr.getValueType(); + + // If the first operand is a FI, get the TargetFI Node + if (FrameIndexSDNode *FIN = + dyn_cast(Addr.getOperand(0))) + Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), ValTy); + else { + Base = Addr.getOperand(0); + // If base is a FI, additional offset calculation is done in + // eliminateFrameIndex, otherwise we need to check the alignment + const Align Alignment(1ULL << ShiftAmount); + if (!isAligned(Alignment, CN->getZExtValue())) + return false; + } + + Offset = CurDAG->getTargetConstant(CN->getZExtValue(), SDLoc(Addr), + ValTy); + return true; + } + } + return false; +} + +/// ComplexPattern used on LoongArchInstrInfo +/// Used on LoongArch Load/Store instructions +bool LoongArchDAGToDAGISel::selectAddrRegImm(SDValue Addr, SDValue &Base, + SDValue &Offset) const { + // if Address is FI, get the TargetFrameIndex. + if (selectAddrFrameIndex(Addr, Base, Offset)) + return true; + + if (!TM.isPositionIndependent()) { + if ((Addr.getOpcode() == ISD::TargetExternalSymbol || + Addr.getOpcode() == ISD::TargetGlobalAddress)) + return false; + } + + // Addresses of the form FI+const or FI|const + if (selectAddrFrameIndexOffset(Addr, Base, Offset, 12)) + return true; + + return false; +} + +/// ComplexPattern used on LoongArchInstrInfo +/// Used on LoongArch Load/Store instructions +bool LoongArchDAGToDAGISel::selectAddrDefault(SDValue Addr, SDValue &Base, + SDValue &Offset) const { + Base = Addr; + Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), Addr.getValueType()); + return true; +} + +bool LoongArchDAGToDAGISel::selectIntAddr(SDValue Addr, SDValue &Base, + SDValue &Offset) const { + return selectAddrRegImm(Addr, Base, Offset) || + selectAddrDefault(Addr, Base, Offset); +} + +bool LoongArchDAGToDAGISel::selectAddrRegImm12(SDValue Addr, SDValue &Base, + SDValue &Offset) const { + if (selectAddrFrameIndex(Addr, Base, Offset)) + return true; + + if (selectAddrFrameIndexOffset(Addr, Base, Offset, 12)) + return true; + + return false; +} + +bool LoongArchDAGToDAGISel::selectIntAddrSImm12(SDValue Addr, SDValue &Base, + SDValue &Offset) const { + if (selectAddrFrameIndex(Addr, Base, Offset)) + return true; + + if (selectAddrFrameIndexOffset(Addr, Base, Offset, 12)) + return true; + + return selectAddrDefault(Addr, Base, Offset); +} + +bool LoongArchDAGToDAGISel::selectIntAddrSImm10Lsl1(SDValue Addr, SDValue &Base, + SDValue &Offset) const { + if (selectAddrFrameIndex(Addr, Base, Offset)) + return true; + + if (selectAddrFrameIndexOffset(Addr, Base, Offset, 10, 1)) + return true; + + return selectAddrDefault(Addr, Base, Offset); +} + +bool LoongArchDAGToDAGISel::selectIntAddrSImm10(SDValue Addr, SDValue &Base, + SDValue &Offset) const { + if (selectAddrFrameIndex(Addr, Base, Offset)) + return true; + + if (selectAddrFrameIndexOffset(Addr, Base, Offset, 10)) + return true; + + return selectAddrDefault(Addr, Base, Offset); +} + +bool LoongArchDAGToDAGISel::selectIntAddrSImm10Lsl2(SDValue Addr, SDValue &Base, + SDValue &Offset) const { + if (selectAddrFrameIndex(Addr, Base, Offset)) + return true; + + if (selectAddrFrameIndexOffset(Addr, Base, Offset, 10, 2)) + return true; + + return selectAddrDefault(Addr, Base, Offset); +} + +bool LoongArchDAGToDAGISel::selectIntAddrSImm11Lsl1(SDValue Addr, SDValue &Base, + SDValue &Offset) const { + if (selectAddrFrameIndex(Addr, Base, Offset)) + return true; + + if (selectAddrFrameIndexOffset(Addr, Base, Offset, 11, 1)) + return true; + + return selectAddrDefault(Addr, Base, Offset); +} + +bool LoongArchDAGToDAGISel::selectIntAddrSImm9Lsl3(SDValue Addr, SDValue &Base, + SDValue &Offset) const { + if (selectAddrFrameIndex(Addr, Base, Offset)) + return true; + + if (selectAddrFrameIndexOffset(Addr, Base, Offset, 9, 3)) + return true; + + return selectAddrDefault(Addr, Base, Offset); +} + +bool LoongArchDAGToDAGISel::selectIntAddrSImm14Lsl2(SDValue Addr, SDValue &Base, + SDValue &Offset) const { + if (selectAddrFrameIndex(Addr, Base, Offset)) + return true; + + if (selectAddrFrameIndexOffset(Addr, Base, Offset, 14, 2)) + return true; + + return false; +} + +bool LoongArchDAGToDAGISel::selectIntAddrSImm10Lsl3(SDValue Addr, SDValue &Base, + SDValue &Offset) const { + if (selectAddrFrameIndex(Addr, Base, Offset)) + return true; + + if (selectAddrFrameIndexOffset(Addr, Base, Offset, 10, 3)) + return true; + + return selectAddrDefault(Addr, Base, Offset); +} + +// Select constant vector splats. +// +// Returns true and sets Imm if: +// * LSX is enabled +// * N is a ISD::BUILD_VECTOR representing a constant splat +bool LoongArchDAGToDAGISel::selectVSplat(SDNode *N, APInt &Imm, + unsigned MinSizeInBits) const { + if (!(Subtarget->hasLSX() || Subtarget->hasLASX())) + return false; + + BuildVectorSDNode *Node = dyn_cast(N); + + if (!Node) + return false; + + APInt SplatValue, SplatUndef; + unsigned SplatBitSize; + bool HasAnyUndefs; + + if (!Node->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, HasAnyUndefs, + MinSizeInBits)) + return false; + + Imm = SplatValue; + + return true; +} + +// Select constant vector splats. +// +// In addition to the requirements of selectVSplat(), this function returns +// true and sets Imm if: +// * The splat value is the same width as the elements of the vector +// * The splat value fits in an integer with the specified signed-ness and +// width. +// +// This function looks through ISD::BITCAST nodes. +// TODO: This might not be appropriate for big-endian LSX since BITCAST is +// sometimes a shuffle in big-endian mode. +// +// It's worth noting that this function is not used as part of the selection +// of [v/xv]ldi.[bhwd] since it does not permit using the wrong-typed +// [v/xv]ldi.[bhwd] instruction to achieve the desired bit pattern. +// [v/xv]ldi.[bhwd] is selected in LoongArchDAGToDAGISel::selectNode. +bool LoongArchDAGToDAGISel::selectVSplatCommon(SDValue N, SDValue &Imm, + bool Signed, + unsigned ImmBitSize) const { + APInt ImmValue; + EVT EltTy = N->getValueType(0).getVectorElementType(); + + if (N->getOpcode() == ISD::BITCAST) + N = N->getOperand(0); + + if (selectVSplat(N.getNode(), ImmValue, EltTy.getSizeInBits()) && + ImmValue.getBitWidth() == EltTy.getSizeInBits()) { + + if ((Signed && ImmValue.isSignedIntN(ImmBitSize)) || + (!Signed && ImmValue.isIntN(ImmBitSize))) { + Imm = CurDAG->getTargetConstant(ImmValue, SDLoc(N), EltTy); + return true; + } + } + + return false; +} + +// Select constant vector splats. +bool LoongArchDAGToDAGISel::selectVSplatUimm1(SDValue N, SDValue &Imm) const { + return selectVSplatCommon(N, Imm, false, 1); +} + +bool LoongArchDAGToDAGISel::selectVSplatUimm2(SDValue N, SDValue &Imm) const { + return selectVSplatCommon(N, Imm, false, 2); +} + +bool LoongArchDAGToDAGISel::selectVSplatUimm3(SDValue N, SDValue &Imm) const { + return selectVSplatCommon(N, Imm, false, 3); +} + +bool LoongArchDAGToDAGISel::selectVSplatUimm4(SDValue N, SDValue &Imm) const { + return selectVSplatCommon(N, Imm, false, 4); +} + +bool LoongArchDAGToDAGISel::selectVSplatUimm5(SDValue N, SDValue &Imm) const { + return selectVSplatCommon(N, Imm, false, 5); +} + +bool LoongArchDAGToDAGISel::selectVSplatUimm6(SDValue N, SDValue &Imm) const { + return selectVSplatCommon(N, Imm, false, 6); +} + +bool LoongArchDAGToDAGISel::selectVSplatUimm8(SDValue N, SDValue &Imm) const { + return selectVSplatCommon(N, Imm, false, 8); +} + +bool LoongArchDAGToDAGISel::selectVSplatSimm5(SDValue N, SDValue &Imm) const { + return selectVSplatCommon(N, Imm, true, 5); +} + +// Select constant vector splats whose value is a power of 2. +// +// In addition to the requirements of selectVSplat(), this function returns +// true and sets Imm if: +// * The splat value is the same width as the elements of the vector +// * The splat value is a power of two. +// +// This function looks through ISD::BITCAST nodes. +// TODO: This might not be appropriate for big-endian LSX since BITCAST is +// sometimes a shuffle in big-endian mode. +bool LoongArchDAGToDAGISel::selectVSplatUimmPow2(SDValue N, + SDValue &Imm) const { + APInt ImmValue; + EVT EltTy = N->getValueType(0).getVectorElementType(); + + if (N->getOpcode() == ISD::BITCAST) + N = N->getOperand(0); + + if (selectVSplat(N.getNode(), ImmValue, EltTy.getSizeInBits()) && + ImmValue.getBitWidth() == EltTy.getSizeInBits()) { + int32_t Log2 = ImmValue.exactLogBase2(); + + if (Log2 != -1) { + Imm = CurDAG->getTargetConstant(Log2, SDLoc(N), EltTy); + return true; + } + } + + return false; +} + +bool LoongArchDAGToDAGISel::selectVSplatUimmInvPow2(SDValue N, + SDValue &Imm) const { + APInt ImmValue; + EVT EltTy = N->getValueType(0).getVectorElementType(); + + if (N->getOpcode() == ISD::BITCAST) + N = N->getOperand(0); + + if (selectVSplat(N.getNode(), ImmValue, EltTy.getSizeInBits()) && + ImmValue.getBitWidth() == EltTy.getSizeInBits()) { + int32_t Log2 = (~ImmValue).exactLogBase2(); + + if (Log2 != -1) { + Imm = CurDAG->getTargetConstant(Log2, SDLoc(N), EltTy); + return true; + } + } + + return false; +} + +// Select constant vector splats whose value only has a consecutive sequence +// of left-most bits set (e.g. 0b11...1100...00). +// +// In addition to the requirements of selectVSplat(), this function returns +// true and sets Imm if: +// * The splat value is the same width as the elements of the vector +// * The splat value is a consecutive sequence of left-most bits. +// +// This function looks through ISD::BITCAST nodes. +// TODO: This might not be appropriate for big-endian LSX since BITCAST is +// sometimes a shuffle in big-endian mode. +bool LoongArchDAGToDAGISel::selectVSplatMaskL(SDValue N, SDValue &Imm) const { + APInt ImmValue; + EVT EltTy = N->getValueType(0).getVectorElementType(); + + if (N->getOpcode() == ISD::BITCAST) + N = N->getOperand(0); + + if (selectVSplat(N.getNode(), ImmValue, EltTy.getSizeInBits()) && + ImmValue.getBitWidth() == EltTy.getSizeInBits()) { + // Extract the run of set bits starting with bit zero from the bitwise + // inverse of ImmValue, and test that the inverse of this is the same + // as the original value. + if (ImmValue == ~(~ImmValue & ~(~ImmValue + 1))) { + + Imm = CurDAG->getTargetConstant(ImmValue.countPopulation() - 1, SDLoc(N), + EltTy); + return true; + } + } + + return false; +} + +// Select constant vector splats whose value only has a consecutive sequence +// of right-most bits set (e.g. 0b00...0011...11). +// +// In addition to the requirements of selectVSplat(), this function returns +// true and sets Imm if: +// * The splat value is the same width as the elements of the vector +// * The splat value is a consecutive sequence of right-most bits. +// +// This function looks through ISD::BITCAST nodes. +// TODO: This might not be appropriate for big-endian LSX since BITCAST is +// sometimes a shuffle in big-endian mode. +bool LoongArchDAGToDAGISel::selectVSplatMaskR(SDValue N, SDValue &Imm) const { + APInt ImmValue; + EVT EltTy = N->getValueType(0).getVectorElementType(); + + if (N->getOpcode() == ISD::BITCAST) + N = N->getOperand(0); + + if (selectVSplat(N.getNode(), ImmValue, EltTy.getSizeInBits()) && + ImmValue.getBitWidth() == EltTy.getSizeInBits()) { + // Extract the run of set bits starting with bit zero, and test that the + // result is the same as the original value + if (ImmValue == (ImmValue & ~(ImmValue + 1))) { + Imm = CurDAG->getTargetConstant(ImmValue.countPopulation() - 1, SDLoc(N), + EltTy); + return true; + } + } + + return false; +} + +bool LoongArchDAGToDAGISel::trySelect(SDNode *Node) { + unsigned Opcode = Node->getOpcode(); + SDLoc DL(Node); + + /// + // Instruction Selection not handled by the auto-generated + // tablegen selection should be handled here. + /// + switch(Opcode) { + default: break; + case ISD::ConstantFP: { + ConstantFPSDNode *CN = dyn_cast(Node); + if (Node->getValueType(0) == MVT::f64 && CN->isExactlyValue(+0.0)) { + if (Subtarget->is64Bit()) { + SDValue Zero = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL, + LoongArch::ZERO_64, MVT::i64); + ReplaceNode(Node, + CurDAG->getMachineNode(LoongArch::MOVGR2FR_D, DL, MVT::f64, Zero)); + } + return true; + } + break; + } + + case ISD::Constant: { + const ConstantSDNode *CN = dyn_cast(Node); + MVT VT = CN->getSimpleValueType(0); + int64_t Imm = CN->getSExtValue(); + LoongArchAnalyzeImmediate::InstSeq Seq = + LoongArchAnalyzeImmediate::generateInstSeq(Imm, VT == MVT::i64); + SDLoc DL(CN); + SDNode *Result = nullptr; + SDValue SrcReg = CurDAG->getRegister( + VT == MVT::i64 ? LoongArch::ZERO_64 : LoongArch::ZERO, VT); + + // The instructions in the sequence are handled here. + for (LoongArchAnalyzeImmediate::Inst &Inst : Seq) { + SDValue SDImm = CurDAG->getTargetConstant(Inst.Imm, DL, VT); + if (Inst.Opc == LoongArch::LU12I_W || Inst.Opc == LoongArch::LU12I_W32) + Result = CurDAG->getMachineNode(Inst.Opc, DL, VT, SDImm); + else + Result = CurDAG->getMachineNode(Inst.Opc, DL, VT, SrcReg, SDImm); + SrcReg = SDValue(Result, 0); + } + ReplaceNode(Node, Result); + return true; + } + + case ISD::BUILD_VECTOR: { + // Select appropriate vldi.[bhwd] instructions for constant splats of + // 128-bit when LSX is enabled. Select appropriate xvldi.[bhwd] instructions + // for constant splats of 256-bit when LASX is enabled. Fixup any register + // class mismatches that occur as a result. + // + // This allows the compiler to use a wider range of immediates than would + // otherwise be allowed. If, for example, v4i32 could only use [v/xv]ldi.h + // then it would not be possible to load { 0x01010101, 0x01010101, + // 0x01010101, 0x01010101 } without using a constant pool. This would be + // sub-optimal when // '[v/xv]ldi.b vd, 1' is capable of producing that + // bit-pattern in the same set/ of registers. Similarly, [v/xv]ldi.h isn't + // capable of producing { 0x00000000, 0x00000001, 0x00000000, 0x00000001 } + // but '[v/xv]ldi.d vd, 1' can. + + const LoongArchABIInfo &ABI = + static_cast(TM).getABI(); + + BuildVectorSDNode *BVN = cast(Node); + APInt SplatValue, SplatUndef; + unsigned SplatBitSize; + bool HasAnyUndefs; + unsigned LdiOp; + EVT ResVecTy = BVN->getValueType(0); + EVT ViaVecTy; + + if ((!Subtarget->hasLSX() || !BVN->getValueType(0).is128BitVector()) && + (!Subtarget->hasLASX() || !BVN->getValueType(0).is256BitVector())) + return false; + + if (!BVN->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, + HasAnyUndefs, 8)) + return false; + + bool IsLASX256 = BVN->getValueType(0).is256BitVector(); + + switch (SplatBitSize) { + default: + return false; + case 8: + LdiOp = IsLASX256 ? LoongArch::XVLDI_B : LoongArch::VLDI_B; + ViaVecTy = IsLASX256 ? MVT::v32i8 : MVT::v16i8; + break; + case 16: + LdiOp = IsLASX256 ? LoongArch::XVLDI_H : LoongArch::VLDI_H; + ViaVecTy = IsLASX256 ? MVT::v16i16 : MVT::v8i16; + break; + case 32: + LdiOp = IsLASX256 ? LoongArch::XVLDI_W : LoongArch::VLDI_W; + ViaVecTy = IsLASX256 ? MVT::v8i32 : MVT::v4i32; + break; + case 64: + LdiOp = IsLASX256 ? LoongArch::XVLDI_D : LoongArch::VLDI_D; + ViaVecTy = IsLASX256 ? MVT::v4i64 : MVT::v2i64; + break; + } + + SDNode *Res; + + // If we have a signed 13 bit integer, we can splat it directly. + // + // If we have something bigger we can synthesize the value into a GPR and + // splat from there. + if (SplatValue.isSignedIntN(10)) { + SDValue Imm = CurDAG->getTargetConstant(SplatValue, DL, + ViaVecTy.getVectorElementType()); + + Res = CurDAG->getMachineNode(LdiOp, DL, ViaVecTy, Imm); + } else if (SplatValue.isSignedIntN(12)) { + bool Is32BitSplat = SplatBitSize < 64 ? true : false; + const unsigned ADDIOp = + Is32BitSplat ? LoongArch::ADDI_W : LoongArch::ADDI_D; + const MVT SplatMVT = Is32BitSplat ? MVT::i32 : MVT::i64; + SDValue ZeroVal = CurDAG->getRegister( + Is32BitSplat ? LoongArch::ZERO : LoongArch::ZERO_64, SplatMVT); + + const unsigned FILLOp = + (SplatBitSize == 16) + ? (IsLASX256 ? LoongArch::XVREPLGR2VR_H : LoongArch::VREPLGR2VR_H) + : (SplatBitSize == 32 + ? (IsLASX256 ? LoongArch::XVREPLGR2VR_W + : LoongArch::VREPLGR2VR_W) + : (SplatBitSize == 64 + ? (IsLASX256 ? LoongArch::XVREPLGR2VR_D + : LoongArch::VREPLGR2VR_D) + : 0)); + + assert(FILLOp != 0 && "Unknown FILL Op for splat synthesis!"); + + short Lo = SplatValue.getLoBits(12).getSExtValue(); + SDValue LoVal = CurDAG->getTargetConstant(Lo, DL, SplatMVT); + + Res = CurDAG->getMachineNode(ADDIOp, DL, SplatMVT, ZeroVal, LoVal); + Res = CurDAG->getMachineNode(FILLOp, DL, ViaVecTy, SDValue(Res, 0)); + } else if (SplatValue.isSignedIntN(16) && SplatBitSize == 16) { + const unsigned Lo = SplatValue.getLoBits(12).getZExtValue(); + const unsigned Hi = SplatValue.lshr(12).getLoBits(4).getZExtValue(); + SDValue ZeroVal = CurDAG->getRegister(LoongArch::ZERO, MVT::i32); + + SDValue LoVal = CurDAG->getTargetConstant(Lo, DL, MVT::i32); + SDValue HiVal = CurDAG->getTargetConstant(Hi, DL, MVT::i32); + if (Hi) + Res = CurDAG->getMachineNode(LoongArch::LU12I_W32, DL, MVT::i32, HiVal); + + if (Lo) + Res = CurDAG->getMachineNode(LoongArch::ORI32, DL, MVT::i32, + Hi ? SDValue(Res, 0) : ZeroVal, LoVal); + + assert((Hi || Lo) && "Zero case reached 32 bit case splat synthesis!"); + const unsigned FILLOp = + IsLASX256 ? LoongArch::XVREPLGR2VR_H : LoongArch::VREPLGR2VR_H; + EVT FILLTy = IsLASX256 ? MVT::v16i16 : MVT::v8i16; + Res = CurDAG->getMachineNode(FILLOp, DL, FILLTy, SDValue(Res, 0)); + } else if (SplatValue.isSignedIntN(32) && SplatBitSize == 32) { + // Only handle the cases where the splat size agrees with the size + // of the SplatValue here. + const unsigned Lo = SplatValue.getLoBits(12).getZExtValue(); + const unsigned Hi = SplatValue.lshr(12).getLoBits(20).getZExtValue(); + SDValue ZeroVal = CurDAG->getRegister(LoongArch::ZERO, MVT::i32); + + SDValue LoVal = CurDAG->getTargetConstant(Lo, DL, MVT::i32); + SDValue HiVal = CurDAG->getTargetConstant(Hi, DL, MVT::i32); + if (Hi) + Res = CurDAG->getMachineNode(LoongArch::LU12I_W32, DL, MVT::i32, HiVal); + + if (Lo) + Res = CurDAG->getMachineNode(LoongArch::ORI32, DL, MVT::i32, + Hi ? SDValue(Res, 0) : ZeroVal, LoVal); + + assert((Hi || Lo) && "Zero case reached 32 bit case splat synthesis!"); + const unsigned FILLOp = + IsLASX256 ? LoongArch::XVREPLGR2VR_W : LoongArch::VREPLGR2VR_W; + EVT FILLTy = IsLASX256 ? MVT::v8i32 : MVT::v4i32; + Res = CurDAG->getMachineNode(FILLOp, DL, FILLTy, SDValue(Res, 0)); + + } else if ((SplatValue.isSignedIntN(32) && SplatBitSize == 64 && + ABI.IsLP64D()) || + (SplatValue.isSignedIntN(64))) { + + int64_t Imm = SplatValue.getSExtValue(); + LoongArchAnalyzeImmediate::InstSeq Seq = + LoongArchAnalyzeImmediate::generateInstSeq(Imm, true); + SDValue SrcReg = CurDAG->getRegister(LoongArch::ZERO_64, MVT::i64); + + for (LoongArchAnalyzeImmediate::Inst &Inst : Seq) { + SDValue SDImm = CurDAG->getTargetConstant(Inst.Imm, DL, MVT::i64); + if (Inst.Opc == LoongArch::LU12I_W || Inst.Opc == LoongArch::LU12I_W32) + Res = CurDAG->getMachineNode(Inst.Opc, DL, MVT::i64, SDImm); + else + Res = CurDAG->getMachineNode(Inst.Opc, DL, MVT::i64, SrcReg, SDImm); + SrcReg = SDValue(Res, 0); + } + + const unsigned FILLOp = + IsLASX256 ? LoongArch::XVREPLGR2VR_D : LoongArch::VREPLGR2VR_D; + EVT FILLTy = IsLASX256 ? MVT::v4i64 : MVT::v2i64; + Res = CurDAG->getMachineNode(FILLOp, DL, FILLTy, SDValue(Res, 0)); + + } else + return false; + + if (ResVecTy != ViaVecTy) { + // If LdiOp is writing to a different register class to ResVecTy, then + // fix it up here. This COPY_TO_REGCLASS should never cause a move.v + // since the source and destination register sets contain the same + // registers. + const TargetLowering *TLI = getTargetLowering(); + MVT ResVecTySimple = ResVecTy.getSimpleVT(); + const TargetRegisterClass *RC = TLI->getRegClassFor(ResVecTySimple); + Res = CurDAG->getMachineNode( + LoongArch::COPY_TO_REGCLASS, DL, ResVecTy, SDValue(Res, 0), + CurDAG->getTargetConstant(RC->getID(), DL, MVT::i32)); + } + + ReplaceNode(Node, Res); + return true; + } + } + + return false; +} + +/// Select instructions not customized! Used for +/// expanded, promoted and normal instructions +void LoongArchDAGToDAGISel::Select(SDNode *Node) { + // If we have a custom node, we already have selected! + if (Node->isMachineOpcode()) { + LLVM_DEBUG(errs() << "== "; Node->dump(CurDAG); errs() << "\n"); + Node->setNodeId(-1); + return; + } + + // See if subclasses can handle this node. + if (trySelect(Node)) + return; + + // Select the default instruction + SelectCode(Node); +} + +bool LoongArchDAGToDAGISel:: +SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID, + std::vector &OutOps) { + SDValue Base, Offset; + + switch(ConstraintID) { + default: + llvm_unreachable("Unexpected asm memory constraint"); + // All memory constraints can at least accept raw pointers. + case InlineAsm::Constraint_i: + OutOps.push_back(Op); + OutOps.push_back(CurDAG->getTargetConstant(0, SDLoc(Op), MVT::i32)); + return false; + case InlineAsm::Constraint_m: + if (selectAddrRegImm12(Op, Base, Offset)) { + OutOps.push_back(Base); + OutOps.push_back(Offset); + return false; + } + OutOps.push_back(Op); + OutOps.push_back(CurDAG->getTargetConstant(0, SDLoc(Op), MVT::i32)); + return false; + case InlineAsm::Constraint_R: + if (selectAddrRegImm12(Op, Base, Offset)) { + OutOps.push_back(Base); + OutOps.push_back(Offset); + return false; + } + OutOps.push_back(Op); + OutOps.push_back(CurDAG->getTargetConstant(0, SDLoc(Op), MVT::i32)); + return false; + case InlineAsm::Constraint_ZC: + if (selectIntAddrSImm14Lsl2(Op, Base, Offset)) { + OutOps.push_back(Base); + OutOps.push_back(Offset); + return false; + } + OutOps.push_back(Op); + OutOps.push_back(CurDAG->getTargetConstant(0, SDLoc(Op), MVT::i32)); + return false; + case InlineAsm::Constraint_ZB: + OutOps.push_back(Op); + OutOps.push_back(CurDAG->getTargetConstant(0, SDLoc(Op), MVT::i32)); + return false; + } + return true; +} + +// This optimisation is ported from RISCV. +// Merge an ADDI into the offset of a load/store instruction where possible. +// (load (addi base, off1), off2) -> (load base, off1+off2) +// (store val, (addi base, off1), off2) -> (store val, base, off1+off2) +// This is possible when off1+off2 fits a 12-bit immediate. +void LoongArchDAGToDAGISel::doPeepholeLoadStoreADDI() { + SelectionDAG::allnodes_iterator Position(CurDAG->getRoot().getNode()); + ++Position; + + while (Position != CurDAG->allnodes_begin()) { + SDNode *N = &*--Position; + // Skip dead nodes and any non-machine opcodes. + if (N->use_empty() || !N->isMachineOpcode()) + continue; + + int OffsetOpIdx; + int BaseOpIdx; + + // TODO: handle more instructions. + switch (N->getMachineOpcode()) { + default: + continue; + case LoongArch::LD_B: + case LoongArch::LD_B32: + case LoongArch::LD_BU: + case LoongArch::LD_BU32: + case LoongArch::LD_H: + case LoongArch::LD_H32: + case LoongArch::LD_HU: + case LoongArch::LD_HU32: + case LoongArch::LD_W: + case LoongArch::LD_W32: + case LoongArch::LD_WU: + case LoongArch::LD_D: + BaseOpIdx = 0; + OffsetOpIdx = 1; + break; + case LoongArch::ST_B: + case LoongArch::ST_B32: + case LoongArch::ST_H: + case LoongArch::ST_H32: + case LoongArch::ST_W: + case LoongArch::ST_W32: + case LoongArch::ST_D: + BaseOpIdx = 1; + OffsetOpIdx = 2; + break; + } + + if (!isa(N->getOperand(OffsetOpIdx))) + continue; + + SDValue Base = N->getOperand(BaseOpIdx); + + // If the base is an ADDI, we can merge it in to the load/store. + // TODO: handle more instructions, i.e. ADDI_W. + if (!Base.isMachineOpcode() || Base.getMachineOpcode() != LoongArch::ADDI_D) + continue; + + SDValue ImmOperand = Base.getOperand(1); + uint64_t Offset2 = N->getConstantOperandVal(OffsetOpIdx); + + if (auto *Const = dyn_cast(ImmOperand)) { + int64_t Offset1 = Const->getSExtValue(); + int64_t CombinedOffset = Offset1 + Offset2; + if (!isInt<12>(CombinedOffset)) + continue; + ImmOperand = CurDAG->getTargetConstant(CombinedOffset, SDLoc(ImmOperand), + ImmOperand.getValueType()); + // TODO: handle below cases. +#if 0 + } else if (auto *GA = dyn_cast(ImmOperand)) { + // If the off1 in (addi base, off1) is a global variable's address (its + // low part, really), then we can rely on the alignment of that variable + // to provide a margin of safety before off1 can overflow the 12 bits. + // Check if off2 falls within that margin; if so off1+off2 can't overflow. + const DataLayout &DL = CurDAG->getDataLayout(); + Align Alignment = GA->getGlobal()->getPointerAlignment(DL); + if (Offset2 != 0 && Alignment <= Offset2) + continue; + int64_t Offset1 = GA->getOffset(); + int64_t CombinedOffset = Offset1 + Offset2; + ImmOperand = CurDAG->getTargetGlobalAddress( + GA->getGlobal(), SDLoc(ImmOperand), ImmOperand.getValueType(), + CombinedOffset, GA->getTargetFlags()); + } else if (auto *CP = dyn_cast(ImmOperand)) { + // Ditto. + Align Alignment = CP->getAlign(); + if (Offset2 != 0 && Alignment <= Offset2) + continue; + int64_t Offset1 = CP->getOffset(); + int64_t CombinedOffset = Offset1 + Offset2; + ImmOperand = CurDAG->getTargetConstantPool( + CP->getConstVal(), ImmOperand.getValueType(), CP->getAlign(), + CombinedOffset, CP->getTargetFlags()); +#endif + } else { + continue; + } + + LLVM_DEBUG(dbgs() << "Folding add-immediate into mem-op:\nBase: "); + LLVM_DEBUG(Base->dump(CurDAG)); + LLVM_DEBUG(dbgs() << "\nN: "); + LLVM_DEBUG(N->dump(CurDAG)); + LLVM_DEBUG(dbgs() << "\n"); + + // Modify the offset operand of the load/store. + if (BaseOpIdx == 0) // Load + CurDAG->UpdateNodeOperands(N, Base.getOperand(0), ImmOperand, + N->getOperand(2)); + else // Store + CurDAG->UpdateNodeOperands(N, N->getOperand(0), Base.getOperand(0), + ImmOperand, N->getOperand(3)); + + // The add-immediate may now be dead, in which case remove it. + if (Base.getNode()->use_empty()) + CurDAG->RemoveDeadNode(Base.getNode()); + } +} + +FunctionPass *llvm::createLoongArchISelDag(LoongArchTargetMachine &TM, + CodeGenOpt::Level OptLevel) { + return new LoongArchDAGToDAGISel(TM, OptLevel); +} diff --git a/lib/Target/LoongArch/LoongArchISelDAGToDAG.h b/lib/Target/LoongArch/LoongArchISelDAGToDAG.h new file mode 100644 index 00000000..76549731 --- /dev/null +++ b/lib/Target/LoongArch/LoongArchISelDAGToDAG.h @@ -0,0 +1,151 @@ +//===---- LoongArchISelDAGToDAG.h - A Dag to Dag Inst Selector for LoongArch --------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines an instruction selector for the LoongArch target. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_LOONGARCH_LOONGARCHISELDAGTODAG_H +#define LLVM_LIB_TARGET_LOONGARCH_LOONGARCHISELDAGTODAG_H + +#include "LoongArch.h" +#include "LoongArchSubtarget.h" +#include "LoongArchTargetMachine.h" +#include "llvm/CodeGen/SelectionDAGISel.h" + +//===----------------------------------------------------------------------===// +// Instruction Selector Implementation +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// LoongArchDAGToDAGISel - LoongArch specific code to select LoongArch machine +// instructions for SelectionDAG operations. +//===----------------------------------------------------------------------===// +namespace llvm { + +class LoongArchDAGToDAGISel : public SelectionDAGISel { +public: + explicit LoongArchDAGToDAGISel(LoongArchTargetMachine &TM, CodeGenOpt::Level OL) + : SelectionDAGISel(TM, OL), Subtarget(nullptr) {} + + // Pass Name + StringRef getPassName() const override { + return "LoongArch DAG->DAG Pattern Instruction Selection"; + } + + bool runOnMachineFunction(MachineFunction &MF) override; + + void PostprocessISelDAG() override; + + void getAnalysisUsage(AnalysisUsage &AU) const override; + +private: + /// Keep a pointer to the LoongArchSubtarget around so that we can make the right + /// decision when generating code for different targets. + const LoongArchSubtarget *Subtarget; + // Include the pieces autogenerated from the target description. + #include "LoongArchGenDAGISel.inc" + + void doPeepholeLoadStoreADDI(); + + bool selectAddrFrameIndex(SDValue Addr, SDValue &Base, SDValue &Offset) const; + + bool selectAddrFrameIndexOffset(SDValue Addr, SDValue &Base, SDValue &Offset, + unsigned OffsetBits, + unsigned ShiftAmount) const; + + // Complex Pattern. + /// (reg + imm). + bool selectAddrRegImm(SDValue Addr, SDValue &Base, SDValue &Offset) const; + + /// Fall back on this function if all else fails. + bool selectAddrDefault(SDValue Addr, SDValue &Base, SDValue &Offset) const; + + /// Match integer address pattern. + bool selectIntAddr(SDValue Addr, SDValue &Base, SDValue &Offset) const; + + bool selectAddrRegImm12(SDValue Addr, SDValue &Base, + SDValue &Offset) const; + + /// Match addr+simm12 and addr + bool selectIntAddrSImm12(SDValue Addr, SDValue &Base, + SDValue &Offset) const; + + bool selectIntAddrSImm10(SDValue Addr, SDValue &Base, + SDValue &Offset) const; + + bool selectIntAddrSImm10Lsl1(SDValue Addr, SDValue &Base, + SDValue &Offset) const; + + bool selectIntAddrSImm10Lsl2(SDValue Addr, SDValue &Base, + SDValue &Offset) const; + + bool selectIntAddrSImm9Lsl3(SDValue Addr, SDValue &Base, + SDValue &Offset) const; + + bool selectIntAddrSImm11Lsl1(SDValue Addr, SDValue &Base, + SDValue &Offset) const; + + bool selectIntAddrSImm14Lsl2(SDValue Addr, SDValue &Base, + SDValue &Offset) const; + + bool selectIntAddrSImm10Lsl3(SDValue Addr, SDValue &Base, + SDValue &Offset) const; + + /// Select constant vector splats. + bool selectVSplat(SDNode *N, APInt &Imm, unsigned MinSizeInBits) const; + /// Select constant vector splats whose value fits in a given integer. + bool selectVSplatCommon(SDValue N, SDValue &Imm, bool Signed, + unsigned ImmBitSize) const; + /// Select constant vector splats whose value fits in a uimm1. + bool selectVSplatUimm1(SDValue N, SDValue &Imm) const; + /// Select constant vector splats whose value fits in a uimm2. + bool selectVSplatUimm2(SDValue N, SDValue &Imm) const; + /// Select constant vector splats whose value fits in a uimm3. + bool selectVSplatUimm3(SDValue N, SDValue &Imm) const; + /// Select constant vector splats whose value fits in a uimm4. + bool selectVSplatUimm4(SDValue N, SDValue &Imm) const; + /// Select constant vector splats whose value fits in a uimm5. + bool selectVSplatUimm5(SDValue N, SDValue &Imm) const; + /// Select constant vector splats whose value fits in a uimm6. + bool selectVSplatUimm6(SDValue N, SDValue &Imm) const; + /// Select constant vector splats whose value fits in a uimm8. + bool selectVSplatUimm8(SDValue N, SDValue &Imm) const; + /// Select constant vector splats whose value fits in a simm5. + bool selectVSplatSimm5(SDValue N, SDValue &Imm) const; + /// Select constant vector splats whose value is a power of 2. + bool selectVSplatUimmPow2(SDValue N, SDValue &Imm) const; + /// Select constant vector splats whose value is the inverse of a + /// power of 2. + bool selectVSplatUimmInvPow2(SDValue N, SDValue &Imm) const; + /// Select constant vector splats whose value is a run of set bits + /// ending at the most significant bit + bool selectVSplatMaskL(SDValue N, SDValue &Imm) const; + /// Select constant vector splats whose value is a run of set bits + /// starting at bit zero. + bool selectVSplatMaskR(SDValue N, SDValue &Imm) const; + + void Select(SDNode *N) override; + + bool trySelect(SDNode *Node); + + // getImm - Return a target constant with the specified value. + inline SDValue getImm(const SDNode *Node, uint64_t Imm) { + return CurDAG->getTargetConstant(Imm, SDLoc(Node), Node->getValueType(0)); + } + + bool SelectInlineAsmMemoryOperand(const SDValue &Op, + unsigned ConstraintID, + std::vector &OutOps) override; +}; + +FunctionPass *createLoongArchISelDag(LoongArchTargetMachine &TM, + CodeGenOpt::Level OptLevel); +} + +#endif diff --git a/lib/Target/LoongArch/LoongArchISelLowering.cpp b/lib/Target/LoongArch/LoongArchISelLowering.cpp new file mode 100644 index 00000000..49bc719a --- /dev/null +++ b/lib/Target/LoongArch/LoongArchISelLowering.cpp @@ -0,0 +1,8208 @@ +//===- LoongArchISelLowering.cpp - LoongArch DAG Lowering Implementation ------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines the interfaces that LoongArch uses to lower LLVM code into a +// selection DAG. +// +//===----------------------------------------------------------------------===// + +#include "LoongArchISelLowering.h" +#include "MCTargetDesc/LoongArchBaseInfo.h" +#include "MCTargetDesc/LoongArchInstPrinter.h" +#include "MCTargetDesc/LoongArchMCTargetDesc.h" +#include "LoongArchCCState.h" +#include "LoongArchInstrInfo.h" +#include "LoongArchMachineFunction.h" +#include "LoongArchRegisterInfo.h" +#include "LoongArchSubtarget.h" +#include "LoongArchTargetMachine.h" +#include "LoongArchTargetObjectFile.h" +#include "llvm/ADT/APFloat.h" +#include "llvm/ADT/APInt.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/StringSwitch.h" +#include "llvm/ADT/Triple.h" +#include "llvm/CodeGen/CallingConvLower.h" +#include "llvm/CodeGen/FunctionLoweringInfo.h" +#include "llvm/CodeGen/ISDOpcodes.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineJumpTableInfo.h" +#include "llvm/CodeGen/MachineMemOperand.h" +#include "llvm/CodeGen/MachineOperand.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/RuntimeLibcalls.h" +#include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/CodeGen/SelectionDAGNodes.h" +#include "llvm/CodeGen/TargetFrameLowering.h" +#include "llvm/CodeGen/TargetInstrInfo.h" +#include "llvm/CodeGen/TargetRegisterInfo.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" +#include "llvm/CodeGen/ValueTypes.h" +#include "llvm/IR/CallingConv.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/DebugLoc.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/GlobalValue.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/IntrinsicsLoongArch.h" +#include "llvm/IR/Type.h" +#include "llvm/IR/Value.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCRegisterInfo.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/CodeGen.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/MachineValueType.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetOptions.h" +#include +#include +#include +#include +#include +#include +#include +#include + +using namespace llvm; + +#define DEBUG_TYPE "loongarch-lower" + +STATISTIC(NumTailCalls, "Number of tail calls"); + +static cl::opt +NoZeroDivCheck("mnocheck-zero-division", cl::Hidden, + cl::desc("LoongArch: Don't trap on integer division by zero."), + cl::init(false)); + +static const MCPhysReg LoongArch64DPRegs[8] = { + LoongArch::F0_64, LoongArch::F1_64, LoongArch::F2_64, LoongArch::F3_64, + LoongArch::F4_64, LoongArch::F5_64, LoongArch::F6_64, LoongArch::F7_64 +}; + +// If I is a shifted mask, set the size (SMSize) and the first bit of the +// mask (SMLsb), and return true. +// For example, if I is 0x003ff800, (SMLsb, SMSize) = (11, 11). +static bool isShiftedMask(uint64_t I, uint64_t &SMLsb, uint64_t &SMSize) { + if (!isShiftedMask_64(I)) + return false; + + SMSize = countPopulation(I); + SMLsb = countTrailingZeros(I); + return true; +} + +SDValue LoongArchTargetLowering::getTargetNode(GlobalAddressSDNode *N, EVT Ty, + SelectionDAG &DAG, + unsigned Flag) const { + return DAG.getTargetGlobalAddress(N->getGlobal(), SDLoc(N), Ty, 0, Flag); +} + +SDValue LoongArchTargetLowering::getTargetNode(ExternalSymbolSDNode *N, EVT Ty, + SelectionDAG &DAG, + unsigned Flag) const { + return DAG.getTargetExternalSymbol(N->getSymbol(), Ty, Flag); +} + +SDValue LoongArchTargetLowering::getTargetNode(BlockAddressSDNode *N, EVT Ty, + SelectionDAG &DAG, + unsigned Flag) const { + return DAG.getTargetBlockAddress(N->getBlockAddress(), Ty, N->getOffset(), Flag); +} + +SDValue LoongArchTargetLowering::getTargetNode(JumpTableSDNode *N, EVT Ty, + SelectionDAG &DAG, + unsigned Flag) const { + return DAG.getTargetJumpTable(N->getIndex(), Ty, Flag); +} + +SDValue LoongArchTargetLowering::getTargetNode(ConstantPoolSDNode *N, EVT Ty, + SelectionDAG &DAG, + unsigned Flag) const { + return DAG.getTargetConstantPool(N->getConstVal(), Ty, N->getAlign(), + N->getOffset(), Flag); +} + +const char *LoongArchTargetLowering::getTargetNodeName(unsigned Opcode) const { + switch ((LoongArchISD::NodeType)Opcode) { + case LoongArchISD::FIRST_NUMBER: break; + case LoongArchISD::JmpLink: return "LoongArchISD::JmpLink"; + case LoongArchISD::TailCall: return "LoongArchISD::TailCall"; + case LoongArchISD::GlobalAddress: return "LoongArchISD::GlobalAddress"; + case LoongArchISD::Ret: return "LoongArchISD::Ret"; + case LoongArchISD::ERet: return "LoongArchISD::ERet"; + case LoongArchISD::EH_RETURN: return "LoongArchISD::EH_RETURN"; + case LoongArchISD::FPBrcond: return "LoongArchISD::FPBrcond"; + case LoongArchISD::FPCmp: return "LoongArchISD::FPCmp"; + case LoongArchISD::CMovFP_T: return "LoongArchISD::CMovFP_T"; + case LoongArchISD::CMovFP_F: return "LoongArchISD::CMovFP_F"; + case LoongArchISD::TruncIntFP: return "LoongArchISD::TruncIntFP"; + case LoongArchISD::DBAR: return "LoongArchISD::DBAR"; + case LoongArchISD::BSTRPICK: return "LoongArchISD::BSTRPICK"; + case LoongArchISD::BSTRINS: return "LoongArchISD::BSTRINS"; + case LoongArchISD::VALL_ZERO: + return "LoongArchISD::VALL_ZERO"; + case LoongArchISD::VANY_ZERO: + return "LoongArchISD::VANY_ZERO"; + case LoongArchISD::VALL_NONZERO: + return "LoongArchISD::VALL_NONZERO"; + case LoongArchISD::VANY_NONZERO: + return "LoongArchISD::VANY_NONZERO"; + case LoongArchISD::VEXTRACT_SEXT_ELT: + return "LoongArchISD::VEXTRACT_SEXT_ELT"; + case LoongArchISD::VEXTRACT_ZEXT_ELT: + return "LoongArchISD::VEXTRACT_ZEXT_ELT"; + case LoongArchISD::VNOR: + return "LoongArchISD::VNOR"; + case LoongArchISD::VSHF: + return "LoongArchISD::VSHF"; + case LoongArchISD::SHF: + return "LoongArchISD::SHF"; + case LoongArchISD::VPACKEV: + return "LoongArchISD::VPACKEV"; + case LoongArchISD::VPACKOD: + return "LoongArchISD::VPACKOD"; + case LoongArchISD::VILVH: + return "LoongArchISD::VILVH"; + case LoongArchISD::VILVL: + return "LoongArchISD::VILVL"; + case LoongArchISD::VPICKEV: + return "LoongArchISD::VPICKEV"; + case LoongArchISD::VPICKOD: + return "LoongArchISD::VPICKOD"; + case LoongArchISD::INSVE: + return "LoongArchISD::INSVE"; + case LoongArchISD::VROR: + return "LoongArchISD::VROR"; + case LoongArchISD::VRORI: + return "LoongArchISD::VRORI"; + case LoongArchISD::XVBROADCAST: + return "LoongArchISD::XVBROADCAST"; + case LoongArchISD::VBROADCAST: + return "LoongArchISD::VBROADCAST"; + case LoongArchISD::VABSD: + return "LoongArchISD::VABSD"; + case LoongArchISD::UVABSD: + return "LoongArchISD::UVABSD"; + case LoongArchISD::XVPICKVE: + return "LoongArchISD::XVPICKVE"; + case LoongArchISD::XVPERMI: + return "LoongArchISD::XVPERMI"; + case LoongArchISD::XVSHUF4I: + return "LoongArchISD::XVSHUF4I"; + case LoongArchISD::REVBD: + return "LoongArchISD::REVBD"; + case LoongArchISD::FSEL: + return "LoongArchISD::FSEL"; + } + return nullptr; +} + +LoongArchTargetLowering::LoongArchTargetLowering(const LoongArchTargetMachine &TM, + const LoongArchSubtarget &STI) + : TargetLowering(TM), Subtarget(STI), ABI(TM.getABI()) { + // Set up the register classes + addRegisterClass(MVT::i32, &LoongArch::GPR32RegClass); + + if (Subtarget.is64Bit()) + addRegisterClass(MVT::i64, &LoongArch::GPR64RegClass); + + // LoongArch does not have i1 type, so use i32 for + // setcc operations results (slt, sgt, ...). + setBooleanContents(ZeroOrOneBooleanContent); + setBooleanVectorContents(ZeroOrNegativeOneBooleanContent); + + // Load extented operations for i1 types must be promoted + for (MVT VT : MVT::integer_valuetypes()) { + setLoadExtAction(ISD::EXTLOAD, VT, MVT::i1, Promote); + setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i1, Promote); + setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote); + } + + // LoongArch doesn't have extending float->double load/store. Set LoadExtAction + // for f32, f16 + for (MVT VT : MVT::fp_valuetypes()) { + setLoadExtAction(ISD::EXTLOAD, VT, MVT::f32, Expand); + setLoadExtAction(ISD::EXTLOAD, VT, MVT::f16, Expand); + } + + // Set LoadExtAction for f16 vectors to Expand + for (MVT VT : MVT::fp_fixedlen_vector_valuetypes()) { + MVT F16VT = MVT::getVectorVT(MVT::f16, VT.getVectorNumElements()); + if (F16VT.isValid()) + setLoadExtAction(ISD::EXTLOAD, VT, F16VT, Expand); + } + + setTruncStoreAction(MVT::f32, MVT::f16, Expand); + setTruncStoreAction(MVT::f64, MVT::f16, Expand); + + setTruncStoreAction(MVT::f64, MVT::f32, Expand); + + // Used by legalize types to correctly generate the setcc result. + // Without this, every float setcc comes with a AND/OR with the result, + // we don't want this, since the fpcmp result goes to a flag register, + // which is used implicitly by brcond and select operations. + AddPromotedToType(ISD::SETCC, MVT::i1, MVT::i32); + + // LoongArch Custom Operations + setOperationAction(ISD::BR_JT, MVT::Other, Expand); + setOperationAction(ISD::GlobalAddress, MVT::i32, Custom); + setOperationAction(ISD::BlockAddress, MVT::i32, Custom); + setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom); + setOperationAction(ISD::JumpTable, MVT::i32, Custom); + setOperationAction(ISD::ConstantPool, MVT::i32, Custom); + setOperationAction(ISD::SELECT, MVT::f32, Custom); + setOperationAction(ISD::SELECT, MVT::f64, Custom); + setOperationAction(ISD::SELECT, MVT::i32, Custom); + setOperationAction(ISD::SETCC, MVT::f32, Custom); + setOperationAction(ISD::SETCC, MVT::f64, Custom); + setOperationAction(ISD::BRCOND, MVT::Other, Custom); + setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom); + + if (Subtarget.is64Bit()) { + setOperationAction(ISD::GlobalAddress, MVT::i64, Custom); + setOperationAction(ISD::BlockAddress, MVT::i64, Custom); + setOperationAction(ISD::GlobalTLSAddress, MVT::i64, Custom); + setOperationAction(ISD::JumpTable, MVT::i64, Custom); + setOperationAction(ISD::ConstantPool, MVT::i64, Custom); + setOperationAction(ISD::SELECT, MVT::i64, Custom); + setOperationAction(ISD::LOAD, MVT::i64, Legal); + setOperationAction(ISD::STORE, MVT::i64, Legal); + setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom); + setOperationAction(ISD::SHL_PARTS, MVT::i64, Custom); + setOperationAction(ISD::SRA_PARTS, MVT::i64, Custom); + setOperationAction(ISD::SRL_PARTS, MVT::i64, Custom); + } + + if (!Subtarget.is64Bit()) { + setOperationAction(ISD::SHL_PARTS, MVT::i32, Custom); + setOperationAction(ISD::SRA_PARTS, MVT::i32, Custom); + setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom); + } + + setOperationAction(ISD::EH_DWARF_CFA, MVT::i32, Custom); + if (Subtarget.is64Bit()) + setOperationAction(ISD::EH_DWARF_CFA, MVT::i64, Custom); + + setOperationAction(ISD::SDIV, MVT::i32, Expand); + setOperationAction(ISD::SREM, MVT::i32, Expand); + setOperationAction(ISD::UDIV, MVT::i32, Expand); + setOperationAction(ISD::UREM, MVT::i32, Expand); + setOperationAction(ISD::SDIV, MVT::i64, Expand); + setOperationAction(ISD::SREM, MVT::i64, Expand); + setOperationAction(ISD::UDIV, MVT::i64, Expand); + setOperationAction(ISD::UREM, MVT::i64, Expand); + + // Operations not directly supported by LoongArch. + setOperationAction(ISD::BR_CC, MVT::f32, Expand); + setOperationAction(ISD::BR_CC, MVT::f64, Expand); + setOperationAction(ISD::BR_CC, MVT::i32, Expand); + setOperationAction(ISD::BR_CC, MVT::i64, Expand); + setOperationAction(ISD::SELECT_CC, MVT::i32, Expand); + setOperationAction(ISD::SELECT_CC, MVT::i64, Expand); + setOperationAction(ISD::SELECT_CC, MVT::f32, Expand); + setOperationAction(ISD::SELECT_CC, MVT::f64, Expand); + setOperationAction(ISD::UINT_TO_FP, MVT::i32, Expand); + setOperationAction(ISD::UINT_TO_FP, MVT::i64, Expand); + setOperationAction(ISD::FP_TO_UINT, MVT::i32, Expand); + setOperationAction(ISD::FP_TO_UINT, MVT::i64, Expand); + setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); + setOperationAction(ISD::CTPOP, MVT::i32, Expand); + setOperationAction(ISD::CTPOP, MVT::i64, Expand); + setOperationAction(ISD::ROTL, MVT::i32, Expand); + setOperationAction(ISD::ROTL, MVT::i64, Expand); + setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand); + setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Expand); + + setOperationAction(ISD::FSIN, MVT::f32, Expand); + setOperationAction(ISD::FSIN, MVT::f64, Expand); + setOperationAction(ISD::FCOS, MVT::f32, Expand); + setOperationAction(ISD::FCOS, MVT::f64, Expand); + setOperationAction(ISD::FSINCOS, MVT::f32, Expand); + setOperationAction(ISD::FSINCOS, MVT::f64, Expand); + setOperationAction(ISD::FPOW, MVT::f32, Expand); + setOperationAction(ISD::FPOW, MVT::f64, Expand); + setOperationAction(ISD::FLOG, MVT::f32, Expand); + setOperationAction(ISD::FRINT, MVT::f32, Legal); + setOperationAction(ISD::FRINT, MVT::f64, Legal); + + setOperationAction(ISD::FLOG10, MVT::f32, Expand); + setOperationAction(ISD::FEXP, MVT::f32, Expand); + setOperationAction(ISD::FMA, MVT::f32, Legal); + setOperationAction(ISD::FMA, MVT::f64, Legal); + setOperationAction(ISD::FREM, MVT::f32, Expand); + setOperationAction(ISD::FREM, MVT::f64, Expand); + + setOperationAction(ISD::FMINNUM_IEEE, MVT::f32, Legal); + setOperationAction(ISD::FMINNUM_IEEE, MVT::f64, Legal); + setOperationAction(ISD::FMAXNUM_IEEE, MVT::f32, Legal); + setOperationAction(ISD::FMAXNUM_IEEE, MVT::f64, Legal); + + // Lower f16 conversion operations into library calls + setOperationAction(ISD::FP16_TO_FP, MVT::f32, Expand); + setOperationAction(ISD::FP_TO_FP16, MVT::f32, Expand); + setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand); + setOperationAction(ISD::FP_TO_FP16, MVT::f64, Expand); + + setOperationAction(ISD::EH_RETURN, MVT::Other, Custom); + + setOperationAction(ISD::VASTART, MVT::Other, Custom); + setOperationAction(ISD::VAARG, MVT::Other, Custom); + setOperationAction(ISD::VACOPY, MVT::Other, Expand); + setOperationAction(ISD::VAEND, MVT::Other, Expand); + + // Use the default for now + setOperationAction(ISD::STACKSAVE, MVT::Other, Expand); + setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand); + + if (!Subtarget.is64Bit()) { + setOperationAction(ISD::ATOMIC_LOAD, MVT::i64, Expand); + setOperationAction(ISD::ATOMIC_STORE, MVT::i64, Expand); + } + + if (Subtarget.is64Bit()) { + setLoadExtAction(ISD::EXTLOAD, MVT::i64, MVT::i32, Custom); + setTruncStoreAction(MVT::i64, MVT::i32, Custom); + } + + setOperationAction(ISD::TRAP, MVT::Other, Legal); + setOperationAction(ISD::BITREVERSE, MVT::i32, Legal); + setOperationAction(ISD::BITREVERSE, MVT::i64, Legal); + + setTargetDAGCombine(ISD::SELECT); + setTargetDAGCombine(ISD::AND); + setTargetDAGCombine(ISD::OR); + setTargetDAGCombine(ISD::AssertZext); + setTargetDAGCombine(ISD::SHL); + setTargetDAGCombine(ISD::SIGN_EXTEND); + setTargetDAGCombine(ISD::ZERO_EXTEND); + setTargetDAGCombine(ISD::ADD); + setTargetDAGCombine(ISD::SUB); + setTargetDAGCombine(ISD::MUL); + setTargetDAGCombine(ISD::SRL); + setTargetDAGCombine(ISD::SRA); + + if (ABI.IsILP32D() || ABI.IsILP32F() || ABI.IsILP32S()) { + // TODO + llvm_unreachable("Unimplemented ABI"); + } + + if (Subtarget.hasLSX() || Subtarget.hasLASX()) { + // Expand all truncating stores and extending loads. + for (MVT VT0 : MVT::vector_valuetypes()) { + for (MVT VT1 : MVT::vector_valuetypes()) { + setTruncStoreAction(VT0, VT1, Expand); + setLoadExtAction(ISD::SEXTLOAD, VT0, VT1, Expand); + setLoadExtAction(ISD::ZEXTLOAD, VT0, VT1, Expand); + setLoadExtAction(ISD::EXTLOAD, VT0, VT1, Expand); + } + } + } + + if (Subtarget.hasLSX()) { + addLSXIntType(MVT::v16i8, &LoongArch::LSX128BRegClass); + addLSXIntType(MVT::v8i16, &LoongArch::LSX128HRegClass); + addLSXIntType(MVT::v4i32, &LoongArch::LSX128WRegClass); + addLSXIntType(MVT::v2i64, &LoongArch::LSX128DRegClass); + addLSXFloatType(MVT::v4f32, &LoongArch::LSX128WRegClass); + addLSXFloatType(MVT::v2f64, &LoongArch::LSX128DRegClass); + + // f16 is a storage-only type, always promote it to f32. + setOperationAction(ISD::SETCC, MVT::f16, Promote); + setOperationAction(ISD::BR_CC, MVT::f16, Promote); + setOperationAction(ISD::SELECT_CC, MVT::f16, Promote); + setOperationAction(ISD::SELECT, MVT::f16, Promote); + setOperationAction(ISD::FADD, MVT::f16, Promote); + setOperationAction(ISD::FSUB, MVT::f16, Promote); + setOperationAction(ISD::FMUL, MVT::f16, Promote); + setOperationAction(ISD::FDIV, MVT::f16, Promote); + setOperationAction(ISD::FREM, MVT::f16, Promote); + setOperationAction(ISD::FMA, MVT::f16, Promote); + setOperationAction(ISD::FNEG, MVT::f16, Promote); + setOperationAction(ISD::FABS, MVT::f16, Promote); + setOperationAction(ISD::FCEIL, MVT::f16, Promote); + setOperationAction(ISD::FCOPYSIGN, MVT::f16, Promote); + setOperationAction(ISD::FCOS, MVT::f16, Promote); + setOperationAction(ISD::FP_EXTEND, MVT::f16, Promote); + setOperationAction(ISD::FFLOOR, MVT::f16, Promote); + setOperationAction(ISD::FNEARBYINT, MVT::f16, Promote); + setOperationAction(ISD::FPOW, MVT::f16, Promote); + setOperationAction(ISD::FPOWI, MVT::f16, Promote); + setOperationAction(ISD::FRINT, MVT::f16, Promote); + setOperationAction(ISD::FSIN, MVT::f16, Promote); + setOperationAction(ISD::FSINCOS, MVT::f16, Promote); + setOperationAction(ISD::FSQRT, MVT::f16, Promote); + setOperationAction(ISD::FEXP, MVT::f16, Promote); + setOperationAction(ISD::FEXP2, MVT::f16, Promote); + setOperationAction(ISD::FLOG, MVT::f16, Promote); + setOperationAction(ISD::FLOG2, MVT::f16, Promote); + setOperationAction(ISD::FLOG10, MVT::f16, Promote); + setOperationAction(ISD::FROUND, MVT::f16, Promote); + setOperationAction(ISD::FTRUNC, MVT::f16, Promote); + setOperationAction(ISD::FMINNUM, MVT::f16, Promote); + setOperationAction(ISD::FMAXNUM, MVT::f16, Promote); + setOperationAction(ISD::FMINIMUM, MVT::f16, Promote); + setOperationAction(ISD::FMAXIMUM, MVT::f16, Promote); + + setTargetDAGCombine(ISD::AND); + setTargetDAGCombine(ISD::OR); + setTargetDAGCombine(ISD::SRA); + setTargetDAGCombine(ISD::VSELECT); + setTargetDAGCombine(ISD::XOR); + } + + if (Subtarget.hasLASX()) { + addLASXIntType(MVT::v32i8, &LoongArch::LASX256BRegClass); + addLASXIntType(MVT::v16i16, &LoongArch::LASX256HRegClass); + addLASXIntType(MVT::v8i32, &LoongArch::LASX256WRegClass); + addLASXIntType(MVT::v4i64, &LoongArch::LASX256DRegClass); + addLASXFloatType(MVT::v8f32, &LoongArch::LASX256WRegClass); + addLASXFloatType(MVT::v4f64, &LoongArch::LASX256DRegClass); + + // f16 is a storage-only type, always promote it to f32. + setOperationAction(ISD::SETCC, MVT::f16, Promote); + setOperationAction(ISD::BR_CC, MVT::f16, Promote); + setOperationAction(ISD::SELECT_CC, MVT::f16, Promote); + setOperationAction(ISD::SELECT, MVT::f16, Promote); + setOperationAction(ISD::FADD, MVT::f16, Promote); + setOperationAction(ISD::FSUB, MVT::f16, Promote); + setOperationAction(ISD::FMUL, MVT::f16, Promote); + setOperationAction(ISD::FDIV, MVT::f16, Promote); + setOperationAction(ISD::FREM, MVT::f16, Promote); + setOperationAction(ISD::FMA, MVT::f16, Promote); + setOperationAction(ISD::FNEG, MVT::f16, Promote); + setOperationAction(ISD::FABS, MVT::f16, Promote); + setOperationAction(ISD::FCEIL, MVT::f16, Promote); + setOperationAction(ISD::FCOPYSIGN, MVT::f16, Promote); + setOperationAction(ISD::FCOS, MVT::f16, Promote); + setOperationAction(ISD::FP_EXTEND, MVT::f16, Promote); + setOperationAction(ISD::FFLOOR, MVT::f16, Promote); + setOperationAction(ISD::FNEARBYINT, MVT::f16, Promote); + setOperationAction(ISD::FPOW, MVT::f16, Promote); + setOperationAction(ISD::FPOWI, MVT::f16, Promote); + setOperationAction(ISD::FRINT, MVT::f16, Promote); + setOperationAction(ISD::FSIN, MVT::f16, Promote); + setOperationAction(ISD::FSINCOS, MVT::f16, Promote); + setOperationAction(ISD::FSQRT, MVT::f16, Promote); + setOperationAction(ISD::FEXP, MVT::f16, Promote); + setOperationAction(ISD::FEXP2, MVT::f16, Promote); + setOperationAction(ISD::FLOG, MVT::f16, Promote); + setOperationAction(ISD::FLOG2, MVT::f16, Promote); + setOperationAction(ISD::FLOG10, MVT::f16, Promote); + setOperationAction(ISD::FROUND, MVT::f16, Promote); + setOperationAction(ISD::FTRUNC, MVT::f16, Promote); + setOperationAction(ISD::FMINNUM, MVT::f16, Promote); + setOperationAction(ISD::FMAXNUM, MVT::f16, Promote); + setOperationAction(ISD::FMINIMUM, MVT::f16, Promote); + setOperationAction(ISD::FMAXIMUM, MVT::f16, Promote); + + setTargetDAGCombine(ISD::AND); + setTargetDAGCombine(ISD::OR); + setTargetDAGCombine(ISD::SRA); + setTargetDAGCombine(ISD::VSELECT); + setTargetDAGCombine(ISD::XOR); + } + + if (Subtarget.hasBasicF()) + addRegisterClass(MVT::f32, &LoongArch::FGR32RegClass); + + if (Subtarget.hasBasicD()) + addRegisterClass(MVT::f64, &LoongArch::FGR64RegClass); + + setOperationAction(ISD::SMUL_LOHI, MVT::i32, Custom); + setOperationAction(ISD::UMUL_LOHI, MVT::i32, Custom); + + if (Subtarget.is64Bit()) + setOperationAction(ISD::MUL, MVT::i64, Custom); + + if (Subtarget.is64Bit()) { + setOperationAction(ISD::SMUL_LOHI, MVT::i64, Custom); + setOperationAction(ISD::UMUL_LOHI, MVT::i64, Custom); + setOperationAction(ISD::SDIVREM, MVT::i64, Custom); + setOperationAction(ISD::UDIVREM, MVT::i64, Custom); + } + + setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i64, Custom); + setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i64, Custom); + + setOperationAction(ISD::SDIVREM, MVT::i32, Custom); + setOperationAction(ISD::UDIVREM, MVT::i32, Custom); + setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom); + setOperationAction(ISD::LOAD, MVT::i32, Legal); + setOperationAction(ISD::STORE, MVT::i32, Legal); + + setTargetDAGCombine(ISD::MUL); + + setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); + setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom); + setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom); + + // Replace the accumulator-based multiplies with a + // three register instruction. + setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand); + setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand); + setOperationAction(ISD::MUL, MVT::i32, Legal); + setOperationAction(ISD::MULHS, MVT::i32, Legal); + setOperationAction(ISD::MULHU, MVT::i32, Legal); + + // Replace the accumulator-based division/remainder with separate + // three register division and remainder instructions. + setOperationAction(ISD::SDIVREM, MVT::i32, Expand); + setOperationAction(ISD::UDIVREM, MVT::i32, Expand); + setOperationAction(ISD::SDIV, MVT::i32, Legal); + setOperationAction(ISD::UDIV, MVT::i32, Legal); + setOperationAction(ISD::SREM, MVT::i32, Legal); + setOperationAction(ISD::UREM, MVT::i32, Legal); + + // Replace the accumulator-based multiplies with a + // three register instruction. + setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand); + setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand); + setOperationAction(ISD::MUL, MVT::i64, Legal); + setOperationAction(ISD::MULHS, MVT::i64, Legal); + setOperationAction(ISD::MULHU, MVT::i64, Legal); + + // Replace the accumulator-based division/remainder with separate + // three register division and remainder instructions. + setOperationAction(ISD::SDIVREM, MVT::i64, Expand); + setOperationAction(ISD::UDIVREM, MVT::i64, Expand); + setOperationAction(ISD::SDIV, MVT::i64, Legal); + setOperationAction(ISD::UDIV, MVT::i64, Legal); + setOperationAction(ISD::SREM, MVT::i64, Legal); + setOperationAction(ISD::UREM, MVT::i64, Legal); + + MaxGluedStoresPerMemcpy = 4; + + setMinFunctionAlignment(Subtarget.is64Bit() ? Align(8) : Align(4)); + + // The arguments on the stack are defined in terms of 4-byte slots on 32bit + // target and 8-byte slots on 64bit target. + setMinStackArgumentAlignment(Subtarget.is64Bit() ? Align(8) : Align(4)); + + setStackPointerRegisterToSaveRestore(Subtarget.is64Bit() ? LoongArch::SP_64 + : LoongArch::SP); + + if (Subtarget.hasLASX()) { + // = 16*32/2; the smallest memcpy; + MaxStoresPerMemcpy = 16; + } else if (Subtarget.hasLSX()) { + MaxStoresPerMemcpy = 65535; + } else { + MaxStoresPerMemcpy = 16; + } + + computeRegisterProperties(Subtarget.getRegisterInfo()); +} + +// Enable LSX support for the given integer type and Register class. +void LoongArchTargetLowering::addLSXIntType(MVT::SimpleValueType Ty, + const TargetRegisterClass *RC) { + addRegisterClass(Ty, RC); + + // Expand all builtin opcodes. + for (unsigned Opc = 0; Opc < ISD::BUILTIN_OP_END; ++Opc) + setOperationAction(Opc, Ty, Expand); + + setOperationAction(ISD::BITCAST, Ty, Legal); + setOperationAction(ISD::LOAD, Ty, Legal); + setOperationAction(ISD::STORE, Ty, Legal); + setOperationAction(ISD::EXTRACT_VECTOR_ELT, Ty, Custom); + setOperationAction(ISD::INSERT_VECTOR_ELT, Ty, Legal); + setOperationAction(ISD::BUILD_VECTOR, Ty, Custom); + setOperationAction(ISD::ABS, Ty, Legal); + setOperationAction(ISD::UNDEF, Ty, Legal); + setOperationAction(ISD::EXTRACT_SUBVECTOR, Ty, Legal); + setOperationAction(ISD::CONCAT_VECTORS, Ty, Legal); + + if (Ty == MVT::v4i32 || Ty == MVT::v2i64) { + setOperationAction(ISD::FP_TO_SINT, Ty, Custom); + setOperationAction(ISD::FP_TO_UINT, Ty, Custom); + } + + setOperationAction(ISD::ADD, Ty, Legal); + setOperationAction(ISD::AND, Ty, Legal); + setOperationAction(ISD::CTLZ, Ty, Legal); + setOperationAction(ISD::CTPOP, Ty, Legal); + setOperationAction(ISD::MUL, Ty, Legal); + setOperationAction(ISD::OR, Ty, Legal); + setOperationAction(ISD::SDIV, Ty, Legal); + setOperationAction(ISD::SREM, Ty, Legal); + setOperationAction(ISD::SHL, Ty, Legal); + setOperationAction(ISD::SRA, Ty, Legal); + setOperationAction(ISD::SRL, Ty, Legal); + setOperationAction(ISD::SUB, Ty, Legal); + setOperationAction(ISD::SMAX, Ty, Legal); + setOperationAction(ISD::SMIN, Ty, Legal); + setOperationAction(ISD::UDIV, Ty, Legal); + setOperationAction(ISD::UREM, Ty, Legal); + setOperationAction(ISD::UMAX, Ty, Legal); + setOperationAction(ISD::UMIN, Ty, Legal); + setOperationAction(ISD::VECTOR_SHUFFLE, Ty, Custom); + setOperationAction(ISD::VSELECT, Ty, Legal); + setOperationAction(ISD::XOR, Ty, Legal); + setOperationAction(ISD::MULHS, Ty, Legal); + setOperationAction(ISD::MULHU, Ty, Legal); + + if (Ty == MVT::v4i32 || Ty == MVT::v2i64) { + setOperationAction(ISD::SINT_TO_FP, Ty, Custom); + setOperationAction(ISD::UINT_TO_FP, Ty, Custom); + } + + setOperationAction(ISD::SETCC, Ty, Legal); + setCondCodeAction(ISD::SETNE, Ty, Expand); + setCondCodeAction(ISD::SETGE, Ty, Expand); + setCondCodeAction(ISD::SETGT, Ty, Expand); + setCondCodeAction(ISD::SETUGE, Ty, Expand); + setCondCodeAction(ISD::SETUGT, Ty, Expand); +} + +// Enable LASX support for the given integer type and Register class. +void LoongArchTargetLowering::addLASXIntType(MVT::SimpleValueType Ty, + const TargetRegisterClass *RC) { + addRegisterClass(Ty, RC); + + // Expand all builtin opcodes. + for (unsigned Opc = 0; Opc < ISD::BUILTIN_OP_END; ++Opc) + setOperationAction(Opc, Ty, Expand); + + // FIXME + setOperationAction(ISD::BITCAST, Ty, Legal); + setOperationAction(ISD::LOAD, Ty, Legal); + setOperationAction(ISD::STORE, Ty, Legal); + setOperationAction(ISD::EXTRACT_VECTOR_ELT, Ty, Custom); + setOperationAction(ISD::INSERT_VECTOR_ELT, Ty, Custom); + setOperationAction(ISD::BUILD_VECTOR, Ty, Custom); + setOperationAction(ISD::CONCAT_VECTORS, Ty, Legal); + setOperationAction(ISD::UNDEF, Ty, Legal); + setOperationAction(ISD::UADDSAT, Ty, Legal); + setOperationAction(ISD::SADDSAT, Ty, Legal); + setOperationAction(ISD::USUBSAT, Ty, Legal); + setOperationAction(ISD::SSUBSAT, Ty, Legal); + setOperationAction(ISD::ABS, Ty, Legal); + + setOperationAction(ISD::ADD, Ty, Legal); + setOperationAction(ISD::AND, Ty, Legal); + setOperationAction(ISD::CTLZ, Ty, Legal); + setOperationAction(ISD::CTPOP, Ty, Legal); + setOperationAction(ISD::MUL, Ty, Legal); + setOperationAction(ISD::OR, Ty, Legal); + setOperationAction(ISD::SDIV, Ty, Legal); + setOperationAction(ISD::SREM, Ty, Legal); + setOperationAction(ISD::SHL, Ty, Legal); + setOperationAction(ISD::SRA, Ty, Legal); + setOperationAction(ISD::SRL, Ty, Legal); + setOperationAction(ISD::SUB, Ty, Legal); + setOperationAction(ISD::SMAX, Ty, Legal); + setOperationAction(ISD::SMIN, Ty, Legal); + setOperationAction(ISD::UDIV, Ty, Legal); + setOperationAction(ISD::UREM, Ty, Legal); + setOperationAction(ISD::UMAX, Ty, Legal); + setOperationAction(ISD::UMIN, Ty, Legal); + setOperationAction(ISD::VECTOR_SHUFFLE, Ty, Custom); + setOperationAction(ISD::VSELECT, Ty, Legal); + setOperationAction(ISD::XOR, Ty, Legal); + setOperationAction(ISD::INSERT_SUBVECTOR, Ty, Legal); + setOperationAction(ISD::MULHS, Ty, Legal); + setOperationAction(ISD::MULHU, Ty, Legal); + + setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, Ty, Legal); + setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, Ty, Legal); + + setOperationAction(ISD::SIGN_EXTEND, Ty, Legal); + setOperationAction(ISD::ZERO_EXTEND, Ty, Legal); + + if (Ty == MVT::v8i32 || Ty == MVT::v4i64) { + setOperationAction(ISD::SINT_TO_FP, Ty, Custom); + setOperationAction(ISD::UINT_TO_FP, Ty, Custom); + } + + setTargetDAGCombine(ISD::CONCAT_VECTORS); + + setOperationAction(ISD::SETCC, Ty, Legal); + setCondCodeAction(ISD::SETNE, Ty, Expand); + setCondCodeAction(ISD::SETGE, Ty, Expand); + setCondCodeAction(ISD::SETGT, Ty, Expand); + setCondCodeAction(ISD::SETUGE, Ty, Expand); + setCondCodeAction(ISD::SETUGT, Ty, Expand); +} + +// Enable LSX support for the given floating-point type and Register class. +void LoongArchTargetLowering::addLSXFloatType(MVT::SimpleValueType Ty, + const TargetRegisterClass *RC) { + addRegisterClass(Ty, RC); + + // Expand all builtin opcodes. + for (unsigned Opc = 0; Opc < ISD::BUILTIN_OP_END; ++Opc) + setOperationAction(Opc, Ty, Expand); + + setOperationAction(ISD::LOAD, Ty, Legal); + setOperationAction(ISD::STORE, Ty, Legal); + setOperationAction(ISD::BITCAST, Ty, Legal); + setOperationAction(ISD::EXTRACT_VECTOR_ELT, Ty, Legal); + setOperationAction(ISD::INSERT_VECTOR_ELT, Ty, Legal); + setOperationAction(ISD::UNDEF, Ty, Legal); + setOperationAction(ISD::BUILD_VECTOR, Ty, Custom); + setOperationAction(ISD::CONCAT_VECTORS, Ty, Legal); + + if (Ty == MVT::v4f32 || Ty == MVT::v2f64) { + setOperationAction(ISD::FP_TO_SINT, Ty, Custom); + setOperationAction(ISD::FP_TO_UINT, Ty, Custom); + } + + setOperationAction(ISD::FADD, Ty, Legal); + setOperationAction(ISD::FDIV, Ty, Legal); + setOperationAction(ISD::FMA, Ty, Legal); + setOperationAction(ISD::FMUL, Ty, Legal); + setOperationAction(ISD::FSQRT, Ty, Legal); + setOperationAction(ISD::FSUB, Ty, Legal); + setOperationAction(ISD::VSELECT, Ty, Legal); + setOperationAction(ISD::FNEG, Ty, Legal); + setOperationAction(ISD::FRINT, Ty, Legal); + + setOperationAction(ISD::SETCC, Ty, Legal); + setCondCodeAction(ISD::SETOGE, Ty, Expand); + setCondCodeAction(ISD::SETOGT, Ty, Expand); + setCondCodeAction(ISD::SETUGE, Ty, Expand); + setCondCodeAction(ISD::SETUGT, Ty, Expand); + setCondCodeAction(ISD::SETGE, Ty, Expand); + setCondCodeAction(ISD::SETGT, Ty, Expand); +} + +// Enable LASX support for the given floating-point type and Register class. +void LoongArchTargetLowering::addLASXFloatType(MVT::SimpleValueType Ty, + const TargetRegisterClass *RC) { + addRegisterClass(Ty, RC); + + // Expand all builtin opcodes. + for (unsigned Opc = 0; Opc < ISD::BUILTIN_OP_END; ++Opc) + setOperationAction(Opc, Ty, Expand); + + setOperationAction(ISD::LOAD, Ty, Legal); + setOperationAction(ISD::STORE, Ty, Legal); + setOperationAction(ISD::BITCAST, Ty, Legal); + setOperationAction(ISD::EXTRACT_VECTOR_ELT, Ty, Legal); + setOperationAction(ISD::INSERT_VECTOR_ELT, Ty, Legal); + setOperationAction(ISD::BUILD_VECTOR, Ty, Custom); + setOperationAction(ISD::UNDEF, Ty, Legal); + setOperationAction(ISD::CONCAT_VECTORS, Ty, Legal); + + setOperationAction(ISD::FADD, Ty, Legal); + setOperationAction(ISD::FDIV, Ty, Legal); + setOperationAction(ISD::FMA, Ty, Legal); + setOperationAction(ISD::FMUL, Ty, Legal); + setOperationAction(ISD::FSQRT, Ty, Legal); + setOperationAction(ISD::FSUB, Ty, Legal); + setOperationAction(ISD::VSELECT, Ty, Legal); + setOperationAction(ISD::FNEG, Ty, Legal); + setOperationAction(ISD::FRINT, Ty, Legal); + + if (Ty == MVT::v8f32 || Ty == MVT::v4f64) { + setOperationAction(ISD::FP_TO_SINT, Ty, Custom); + setOperationAction(ISD::FP_TO_UINT, Ty, Custom); + } + + setOperationAction(ISD::SETCC, Ty, Legal); + setCondCodeAction(ISD::SETOGE, Ty, Expand); + setCondCodeAction(ISD::SETOGT, Ty, Expand); + setCondCodeAction(ISD::SETUGE, Ty, Expand); + setCondCodeAction(ISD::SETUGT, Ty, Expand); + setCondCodeAction(ISD::SETGE, Ty, Expand); + setCondCodeAction(ISD::SETGT, Ty, Expand); +} + +bool LoongArchTargetLowering::allowsMisalignedMemoryAccesses( + EVT VT, unsigned AddrSpace, unsigned Alignment, MachineMemOperand::Flags Flags, + bool *Fast) const { + if (!Subtarget.allowUnalignedAccess()) + return false; + if (Fast) + *Fast = true; + return true; +} + +EVT LoongArchTargetLowering::getSetCCResultType(const DataLayout &, LLVMContext &, + EVT VT) const { + if (!VT.isVector()) + return MVT::i32; + return VT.changeVectorElementTypeToInteger(); +} + +static LoongArch::CondCode condCodeToFCC(ISD::CondCode CC) { + switch (CC) { + default: llvm_unreachable("Unknown fp condition code!"); + case ISD::SETEQ: + case ISD::SETOEQ: return LoongArch::FCOND_OEQ; + case ISD::SETUNE: return LoongArch::FCOND_UNE; + case ISD::SETLT: + case ISD::SETOLT: return LoongArch::FCOND_OLT; + case ISD::SETGT: + case ISD::SETOGT: return LoongArch::FCOND_OGT; + case ISD::SETLE: + case ISD::SETOLE: return LoongArch::FCOND_OLE; + case ISD::SETGE: + case ISD::SETOGE: return LoongArch::FCOND_OGE; + case ISD::SETULT: return LoongArch::FCOND_ULT; + case ISD::SETULE: return LoongArch::FCOND_ULE; + case ISD::SETUGT: return LoongArch::FCOND_UGT; + case ISD::SETUGE: return LoongArch::FCOND_UGE; + case ISD::SETUO: return LoongArch::FCOND_UN; + case ISD::SETO: return LoongArch::FCOND_OR; + case ISD::SETNE: + case ISD::SETONE: return LoongArch::FCOND_ONE; + case ISD::SETUEQ: return LoongArch::FCOND_UEQ; + } +} + +/// This function returns true if the floating point conditional branches and +/// conditional moves which use condition code CC should be inverted. +static bool invertFPCondCodeUser(LoongArch::CondCode CC) { + if (CC >= LoongArch::FCOND_F && CC <= LoongArch::FCOND_SUNE) + return false; + + assert((CC >= LoongArch::FCOND_T && CC <= LoongArch::FCOND_GT) && + "Illegal Condition Code"); + + return true; +} + +// Creates and returns an FPCmp node from a setcc node. +// Returns Op if setcc is not a floating point comparison. +static SDValue createFPCmp(SelectionDAG &DAG, const SDValue &Op) { + // must be a SETCC node + if (Op.getOpcode() != ISD::SETCC) + return Op; + + SDValue LHS = Op.getOperand(0); + + if (!LHS.getValueType().isFloatingPoint()) + return Op; + + SDValue RHS = Op.getOperand(1); + SDLoc DL(Op); + + // Assume the 3rd operand is a CondCodeSDNode. Add code to check the type of + // node if necessary. + ISD::CondCode CC = cast(Op.getOperand(2))->get(); + + return DAG.getNode(LoongArchISD::FPCmp, DL, MVT::Glue, LHS, RHS, + DAG.getConstant(condCodeToFCC(CC), DL, MVT::i32)); +} + +// Creates and returns a CMovFPT/F node. +static SDValue createCMovFP(SelectionDAG &DAG, SDValue Cond, SDValue True, + SDValue False, const SDLoc &DL) { + ConstantSDNode *CC = cast(Cond.getOperand(2)); + bool invert = invertFPCondCodeUser((LoongArch::CondCode)CC->getSExtValue()); + SDValue FCC0 = DAG.getRegister(LoongArch::FCC0, MVT::i32); + + return DAG.getNode((invert ? LoongArchISD::CMovFP_F : LoongArchISD::CMovFP_T), DL, + True.getValueType(), True, FCC0, False, Cond); + +} + +static SDValue performSELECTCombine(SDNode *N, SelectionDAG &DAG, + TargetLowering::DAGCombinerInfo &DCI, + const LoongArchSubtarget &Subtarget) { + if (DCI.isBeforeLegalizeOps()) + return SDValue(); + + SDValue SetCC = N->getOperand(0); + + if ((SetCC.getOpcode() != ISD::SETCC) || + !SetCC.getOperand(0).getValueType().isInteger()) + return SDValue(); + + SDValue False = N->getOperand(2); + EVT FalseTy = False.getValueType(); + + if (!FalseTy.isInteger()) + return SDValue(); + + ConstantSDNode *FalseC = dyn_cast(False); + + // If the RHS (False) is 0, we swap the order of the operands + // of ISD::SELECT (obviously also inverting the condition) so that we can + // take advantage of conditional moves using the $0 register. + // Example: + // return (a != 0) ? x : 0; + // load $reg, x + // movz $reg, $0, a + if (!FalseC) + return SDValue(); + + const SDLoc DL(N); + + if (!FalseC->getZExtValue()) { + ISD::CondCode CC = cast(SetCC.getOperand(2))->get(); + SDValue True = N->getOperand(1); + + SetCC = DAG.getSetCC(DL, SetCC.getValueType(), SetCC.getOperand(0), + SetCC.getOperand(1), + ISD::getSetCCInverse(CC, SetCC.getValueType())); + + return DAG.getNode(ISD::SELECT, DL, FalseTy, SetCC, False, True); + } + + // If both operands are integer constants there's a possibility that we + // can do some interesting optimizations. + SDValue True = N->getOperand(1); + ConstantSDNode *TrueC = dyn_cast(True); + + if (!TrueC || !True.getValueType().isInteger()) + return SDValue(); + + // We'll also ignore MVT::i64 operands as this optimizations proves + // to be ineffective because of the required sign extensions as the result + // of a SETCC operator is always MVT::i32 for non-vector types. + if (True.getValueType() == MVT::i64) + return SDValue(); + + int64_t Diff = TrueC->getSExtValue() - FalseC->getSExtValue(); + + // 1) (a < x) ? y : y-1 + // slti $reg1, a, x + // addiu $reg2, $reg1, y-1 + if (Diff == 1) + return DAG.getNode(ISD::ADD, DL, SetCC.getValueType(), SetCC, False); + + // 2) (a < x) ? y-1 : y + // slti $reg1, a, x + // xor $reg1, $reg1, 1 + // addiu $reg2, $reg1, y-1 + if (Diff == -1) { + ISD::CondCode CC = cast(SetCC.getOperand(2))->get(); + SetCC = DAG.getSetCC(DL, SetCC.getValueType(), SetCC.getOperand(0), + SetCC.getOperand(1), + ISD::getSetCCInverse(CC, SetCC.getValueType())); + return DAG.getNode(ISD::ADD, DL, SetCC.getValueType(), SetCC, True); + } + + // Could not optimize. + return SDValue(); +} + +static SDValue performANDCombine(SDNode *N, SelectionDAG &DAG, + TargetLowering::DAGCombinerInfo &DCI, + const LoongArchSubtarget &Subtarget) { + + if (Subtarget.hasLSX()) { + + // Fold zero extensions into LoongArchISD::VEXTRACT_[SZ]EXT_ELT + // + // Performs the following transformations: + // - Changes LoongArchISD::VEXTRACT_[SZ]EXT_ELT to zero extension if its + // sign/zero-extension is completely overwritten by the new one performed + // by the ISD::AND. + // - Removes redundant zero extensions performed by an ISD::AND. + SDValue Op0 = N->getOperand(0); + SDValue Op1 = N->getOperand(1); + unsigned Op0Opcode = Op0->getOpcode(); + + // (and (LoongArchVExtract[SZ]Ext $a, $b, $c), imm:$d) + // where $d + 1 == 2^n and n == 32 + // or $d + 1 == 2^n and n <= 32 and ZExt + // -> (LoongArchVExtractZExt $a, $b, $c) + if (Op0Opcode == LoongArchISD::VEXTRACT_SEXT_ELT || + Op0Opcode == LoongArchISD::VEXTRACT_ZEXT_ELT) { + ConstantSDNode *Mask = dyn_cast(Op1); + + if (Mask) { + + int32_t Log2IfPositive = (Mask->getAPIntValue() + 1).exactLogBase2(); + + if (Log2IfPositive > 0) { + SDValue Op0Op2 = Op0->getOperand(2); + EVT ExtendTy = cast(Op0Op2)->getVT(); + unsigned ExtendTySize = ExtendTy.getSizeInBits(); + unsigned Log2 = Log2IfPositive; + + if ((Op0Opcode == LoongArchISD::VEXTRACT_ZEXT_ELT && + Log2 >= ExtendTySize) || + Log2 == ExtendTySize) { + SDValue Ops[] = {Op0->getOperand(0), Op0->getOperand(1), Op0Op2}; + return DAG.getNode(LoongArchISD::VEXTRACT_ZEXT_ELT, SDLoc(Op0), + Op0->getVTList(), + makeArrayRef(Ops, Op0->getNumOperands())); + } + } + } + } + } + + if (DCI.isBeforeLegalizeOps()) + return SDValue(); + + SDValue FirstOperand = N->getOperand(0); + unsigned FirstOperandOpc = FirstOperand.getOpcode(); + SDValue Mask = N->getOperand(1); + EVT ValTy = N->getValueType(0); + SDLoc DL(N); + + uint64_t Lsb = 0, SMLsb, SMSize; + ConstantSDNode *CN; + SDValue NewOperand; + unsigned Opc; + + // Op's second operand must be a shifted mask. + if (!(CN = dyn_cast(Mask)) || + !isShiftedMask(CN->getZExtValue(), SMLsb, SMSize)) + return SDValue(); + + if (FirstOperandOpc == ISD::SRA || FirstOperandOpc == ISD::SRL) { + // Pattern match BSTRPICK. + // $dst = and ((sra or srl) $src , lsb), (2**size - 1) + // => bstrpick $dst, $src, lsb+size-1, lsb + + // The second operand of the shift must be an immediate. + if (!(CN = dyn_cast(FirstOperand.getOperand(1)))) + return SDValue(); + + Lsb = CN->getZExtValue(); + + // Return if the shifted mask does not start at bit 0 or the sum of its size + // and Lsb exceeds the word's size. + if (SMLsb != 0 || Lsb + SMSize > ValTy.getSizeInBits()) + return SDValue(); + + Opc = LoongArchISD::BSTRPICK; + NewOperand = FirstOperand.getOperand(0); + } else { + // Pattern match BSTRPICK. + // $dst = and $src, (2**size - 1) , if size > 12 + // => bstrpick $dst, $src, lsb+size-1, lsb , lsb = 0 + + // If the mask is <= 0xfff, andi can be used instead. + if (CN->getZExtValue() <= 0xfff) + return SDValue(); + // Return if the mask doesn't start at position 0. + if (SMLsb) + return SDValue(); + + Opc = LoongArchISD::BSTRPICK; + NewOperand = FirstOperand; + } + return DAG.getNode(Opc, DL, ValTy, NewOperand, + DAG.getConstant((Lsb + SMSize - 1), DL, MVT::i32), + DAG.getConstant(Lsb, DL, MVT::i32)); +} + +// Determine if the specified node is a constant vector splat. +// +// Returns true and sets Imm if: +// * N is a ISD::BUILD_VECTOR representing a constant splat +static bool isVSplat(SDValue N, APInt &Imm) { + BuildVectorSDNode *Node = dyn_cast(N.getNode()); + + if (!Node) + return false; + + APInt SplatValue, SplatUndef; + unsigned SplatBitSize; + bool HasAnyUndefs; + + if (!Node->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, HasAnyUndefs, + 8)) + return false; + + Imm = SplatValue; + + return true; +} + +// Test whether the given node is an all-ones build_vector. +static bool isVectorAllOnes(SDValue N) { + // Look through bitcasts. Endianness doesn't matter because we are looking + // for an all-ones value. + if (N->getOpcode() == ISD::BITCAST) + N = N->getOperand(0); + + BuildVectorSDNode *BVN = dyn_cast(N); + + if (!BVN) + return false; + + APInt SplatValue, SplatUndef; + unsigned SplatBitSize; + bool HasAnyUndefs; + + // Endianness doesn't matter in this context because we are looking for + // an all-ones value. + if (BVN->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, HasAnyUndefs)) + return SplatValue.isAllOnesValue(); + + return false; +} + +// Test whether N is the bitwise inverse of OfNode. +static bool isBitwiseInverse(SDValue N, SDValue OfNode) { + if (N->getOpcode() != ISD::XOR) + return false; + + if (isVectorAllOnes(N->getOperand(0))) + return N->getOperand(1) == OfNode; + + if (isVectorAllOnes(N->getOperand(1))) + return N->getOperand(0) == OfNode; + + return false; +} + +static SDValue performSet(SDNode *N, SelectionDAG &DAG, + TargetLowering::DAGCombinerInfo &DCI, + const LoongArchSubtarget &Subtarget) { + + SDValue Op0 = N->getOperand(0); + SDValue Op1 = N->getOperand(1); + SDValue N1, N2; + if (Op0->getOpcode() == ISD::BUILD_VECTOR && + (Op1->getValueType(0).is128BitVector() || + Op1->getValueType(0).is256BitVector())) { + N1 = Op0; + N2 = Op1; + } else if (Op1->getOpcode() == ISD::BUILD_VECTOR && + (Op0->getValueType(0).is128BitVector() || + Op0->getValueType(0).is256BitVector())) { + N1 = Op1; + N2 = Op0; + } else + return SDValue(); + + APInt Mask1, Mask2; + if (!isVSplat(N1, Mask1)) + return SDValue(); + + if (!N1->getValueType(0).isSimple()) + return SDValue(); + + ConstantSDNode *C1; + uint64_t Imm; + unsigned ImmL; + if (!(C1 = dyn_cast(N1.getOperand(0))) || + !isPowerOf2_64(C1->getZExtValue())) + return SDValue(); + + Imm = C1->getZExtValue(); + ImmL = Log2_64(Imm); + MVT VT = N1->getSimpleValueType(0).SimpleTy; + + SDNode *Res; + + if (Subtarget.hasLASX() && N->getValueType(0).is256BitVector()) { + if (VT == MVT::v32i8 && ImmL < 8) + Res = DAG.getMachineNode(LoongArch::XVBITSETI_B, SDLoc(N), VT, N2, + DAG.getTargetConstant(ImmL, SDLoc(N), MVT::i32)); + else if (VT == MVT::v16i16 && ImmL < 16) + Res = DAG.getMachineNode(LoongArch::XVBITSETI_H, SDLoc(N), VT, N2, + DAG.getTargetConstant(ImmL, SDLoc(N), MVT::i32)); + else if (VT == MVT::v8i32 && ImmL < 32) + Res = DAG.getMachineNode(LoongArch::XVBITSETI_W, SDLoc(N), VT, N2, + DAG.getTargetConstant(ImmL, SDLoc(N), MVT::i32)); + else if (VT == MVT::v4i64 && ImmL < 64) + Res = DAG.getMachineNode(LoongArch::XVBITSETI_D, SDLoc(N), VT, N2, + DAG.getTargetConstant(ImmL, SDLoc(N), MVT::i32)); + else + return SDValue(); + } else if (N->getValueType(0).is128BitVector()) { + if (VT == MVT::v16i8 && ImmL < 8) + Res = DAG.getMachineNode(LoongArch::VBITSETI_B, SDLoc(N), VT, N2, + DAG.getTargetConstant(ImmL, SDLoc(N), MVT::i32)); + else if (VT == MVT::v8i16 && ImmL < 16) + Res = DAG.getMachineNode(LoongArch::VBITSETI_H, SDLoc(N), VT, N2, + DAG.getTargetConstant(ImmL, SDLoc(N), MVT::i32)); + else if (VT == MVT::v4i32 && ImmL < 32) + Res = DAG.getMachineNode(LoongArch::VBITSETI_W, SDLoc(N), VT, N2, + DAG.getTargetConstant(ImmL, SDLoc(N), MVT::i32)); + else if (VT == MVT::v2i64 && ImmL < 64) + Res = DAG.getMachineNode(LoongArch::VBITSETI_D, SDLoc(N), VT, N2, + DAG.getTargetConstant(ImmL, SDLoc(N), MVT::i32)); + else + return SDValue(); + + } else + return SDValue(); + + return SDValue(Res, 0); +} + +static SDValue performORCombine(SDNode *N, SelectionDAG &DAG, + TargetLowering::DAGCombinerInfo &DCI, + const LoongArchSubtarget &Subtarget) { + + SDValue Res; + if (Subtarget.hasLSX() && (N->getValueType(0).is128BitVector() || + N->getValueType(0).is256BitVector())) { + SDValue Op0 = N->getOperand(0); + SDValue Op1 = N->getOperand(1); + + if (Op0->getOpcode() == ISD::AND && Op1->getOpcode() == ISD::AND) { + SDValue Op0Op0 = Op0->getOperand(0); + SDValue Op0Op1 = Op0->getOperand(1); + SDValue Op1Op0 = Op1->getOperand(0); + SDValue Op1Op1 = Op1->getOperand(1); + + SDValue IfSet, IfClr, Cond; + bool IsConstantMask = false; + APInt Mask, InvMask; + + // If Op0Op0 is an appropriate mask, try to find it's inverse in either + // Op1Op0, or Op1Op1. Keep track of the Cond, IfSet, and IfClr nodes, + // while looking. IfClr will be set if we find a valid match. + if (isVSplat(Op0Op0, Mask)) { + Cond = Op0Op0; + IfSet = Op0Op1; + + if (isVSplat(Op1Op0, InvMask) && + Mask.getBitWidth() == InvMask.getBitWidth() && Mask == ~InvMask) + IfClr = Op1Op1; + else if (isVSplat(Op1Op1, InvMask) && + Mask.getBitWidth() == InvMask.getBitWidth() && + Mask == ~InvMask) + IfClr = Op1Op0; + + IsConstantMask = true; + } + + // If IfClr is not yet set, and Op0Op1 is an appropriate mask, try the + // same thing again using this mask. IfClr will be set if we find a valid + // match. + if (!IfClr.getNode() && isVSplat(Op0Op1, Mask)) { + Cond = Op0Op1; + IfSet = Op0Op0; + + if (isVSplat(Op1Op0, InvMask) && + Mask.getBitWidth() == InvMask.getBitWidth() && Mask == ~InvMask) + IfClr = Op1Op1; + else if (isVSplat(Op1Op1, InvMask) && + Mask.getBitWidth() == InvMask.getBitWidth() && + Mask == ~InvMask) + IfClr = Op1Op0; + + IsConstantMask = true; + } + + // If IfClr is not yet set, try looking for a non-constant match. + // IfClr will be set if we find a valid match amongst the eight + // possibilities. + if (!IfClr.getNode()) { + if (isBitwiseInverse(Op0Op0, Op1Op0)) { + Cond = Op1Op0; + IfSet = Op1Op1; + IfClr = Op0Op1; + } else if (isBitwiseInverse(Op0Op1, Op1Op0)) { + Cond = Op1Op0; + IfSet = Op1Op1; + IfClr = Op0Op0; + } else if (isBitwiseInverse(Op0Op0, Op1Op1)) { + Cond = Op1Op1; + IfSet = Op1Op0; + IfClr = Op0Op1; + } else if (isBitwiseInverse(Op0Op1, Op1Op1)) { + Cond = Op1Op1; + IfSet = Op1Op0; + IfClr = Op0Op0; + } else if (isBitwiseInverse(Op1Op0, Op0Op0)) { + Cond = Op0Op0; + IfSet = Op0Op1; + IfClr = Op1Op1; + } else if (isBitwiseInverse(Op1Op1, Op0Op0)) { + Cond = Op0Op0; + IfSet = Op0Op1; + IfClr = Op1Op0; + } else if (isBitwiseInverse(Op1Op0, Op0Op1)) { + Cond = Op0Op1; + IfSet = Op0Op0; + IfClr = Op1Op1; + } else if (isBitwiseInverse(Op1Op1, Op0Op1)) { + Cond = Op0Op1; + IfSet = Op0Op0; + IfClr = Op1Op0; + } + } + + // At this point, IfClr will be set if we have a valid match. + if (IfClr.getNode()) { + assert(Cond.getNode() && IfSet.getNode()); + + // Fold degenerate cases. + if (IsConstantMask) { + if (Mask.isAllOnesValue()) + return IfSet; + else if (Mask == 0) + return IfClr; + } + + // Transform the DAG into an equivalent VSELECT. + return DAG.getNode(ISD::VSELECT, SDLoc(N), N->getValueType(0), Cond, + IfSet, IfClr); + } + } + + if (Res = performSet(N, DAG, DCI, Subtarget)) + return Res; + } + + // Pattern match BSTRINS. + // $dst = or (and $src1 , mask0), (and (shl $src, lsb), mask1), + // where mask1 = (2**size - 1) << lsb, mask0 = ~mask1 + // => bstrins $dst, $src, lsb+size-1, lsb, $src1 + if (DCI.isBeforeLegalizeOps()) + return SDValue(); + + SDValue And0 = N->getOperand(0), And1 = N->getOperand(1); + uint64_t SMLsb0, SMSize0, SMLsb1, SMSize1; + ConstantSDNode *CN, *CN1; + + // See if Op's first operand matches (and $src1 , mask0). + if (And0.getOpcode() != ISD::AND) + return SDValue(); + + if (!(CN = dyn_cast(And0.getOperand(1))) || + !isShiftedMask(~CN->getSExtValue(), SMLsb0, SMSize0)) + return SDValue(); + + // See if Op's second operand matches (and (shl $src, lsb), mask1). + if (And1.getOpcode() == ISD::AND && + And1.getOperand(0).getOpcode() == ISD::SHL) { + + if (!(CN = dyn_cast(And1.getOperand(1))) || + !isShiftedMask(CN->getZExtValue(), SMLsb1, SMSize1)) + return SDValue(); + + // The shift masks must have the same least significant bit and size. + if (SMLsb0 != SMLsb1 || SMSize0 != SMSize1) + return SDValue(); + + SDValue Shl = And1.getOperand(0); + + if (!(CN = dyn_cast(Shl.getOperand(1)))) + return SDValue(); + + unsigned Shamt = CN->getZExtValue(); + + // Return if the shift amount and the first bit position of mask are not the + // same. + EVT ValTy = N->getValueType(0); + if ((Shamt != SMLsb0) || (SMLsb0 + SMSize0 > ValTy.getSizeInBits())) + return SDValue(); + + SDLoc DL(N); + return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, Shl.getOperand(0), + DAG.getConstant((SMLsb0 + SMSize0 - 1), DL, MVT::i32), + DAG.getConstant(SMLsb0, DL, MVT::i32), + And0.getOperand(0)); + } else { + // Pattern match BSTRINS. + // $dst = or (and $src, mask0), mask1 + // where mask0 = ((1 << SMSize0) -1) << SMLsb0 + // => bstrins $dst, $src, SMLsb0+SMSize0-1, SMLsb0 + if (~CN->getSExtValue() == ((((int64_t)1 << SMSize0) - 1) << SMLsb0) && + (SMSize0 + SMLsb0 <= 64)) { + // Check if AND instruction has constant as argument + bool isConstCase = And1.getOpcode() != ISD::AND; + if (And1.getOpcode() == ISD::AND) { + if (!(CN1 = dyn_cast(And1->getOperand(1)))) + return SDValue(); + } else { + if (!(CN1 = dyn_cast(N->getOperand(1)))) + return SDValue(); + } + // Don't generate BSTRINS if constant OR operand doesn't fit into bits + // cleared by constant AND operand. + if (CN->getSExtValue() & CN1->getSExtValue()) + return SDValue(); + + SDLoc DL(N); + EVT ValTy = N->getOperand(0)->getValueType(0); + SDValue Const1; + SDValue SrlX; + if (!isConstCase) { + Const1 = DAG.getConstant(SMLsb0, DL, MVT::i32); + SrlX = DAG.getNode(ISD::SRL, DL, And1->getValueType(0), And1, Const1); + } + return DAG.getNode( + LoongArchISD::BSTRINS, DL, N->getValueType(0), + isConstCase + ? DAG.getConstant(CN1->getSExtValue() >> SMLsb0, DL, ValTy) + : SrlX, + DAG.getConstant(ValTy.getSizeInBits() / 8 < 8 ? (SMLsb0 + (SMSize0 & 31) - 1) + : (SMLsb0 + SMSize0 - 1), + DL, MVT::i32), + DAG.getConstant(SMLsb0, DL, MVT::i32), + And0->getOperand(0)); + + } + return SDValue(); + } +} + +static bool +shouldTransformMulToShiftsAddsSubs(APInt C, EVT VT, + SelectionDAG &DAG, + const LoongArchSubtarget &Subtarget) { + // Estimate the number of operations the below transform will turn a + // constant multiply into. The number is approximately equal to the minimal + // number of powers of two that constant can be broken down to by adding + // or subtracting them. + // + // If we have taken more than 10[1] / 8[2] steps to attempt the + // optimization for a native sized value, it is more than likely that this + // optimization will make things worse. + // + // [1] LA64 requires 4 instructions at most to materialize any constant, + // multiplication requires at least 4 cycles, but another cycle (or two) + // to retrieve the result from corresponding registers. + // + // [2] LA32 requires 2 instructions at most to materialize any constant, + // multiplication requires at least 4 cycles, but another cycle (or two) + // to retrieve the result from corresponding registers. + // + // TODO: + // - MaxSteps needs to consider the `VT` of the constant for the current + // target. + // - Consider to perform this optimization after type legalization. + // That allows to remove a workaround for types not supported natively. + // - Take in account `-Os, -Oz` flags because this optimization + // increases code size. + unsigned MaxSteps = Subtarget.is64Bit() ? 10 : 8; + + SmallVector WorkStack(1, C); + unsigned Steps = 0; + unsigned BitWidth = C.getBitWidth(); + + while (!WorkStack.empty()) { + APInt Val = WorkStack.pop_back_val(); + + if (Val == 0 || Val == 1) + continue; + + if (Steps >= MaxSteps) + return false; + + if (Val.isPowerOf2()) { + ++Steps; + continue; + } + + APInt Floor = APInt(BitWidth, 1) << Val.logBase2(); + APInt Ceil = Val.isNegative() ? APInt(BitWidth, 0) + : APInt(BitWidth, 1) << C.ceilLogBase2(); + + if ((Val - Floor).ule(Ceil - Val)) { + WorkStack.push_back(Floor); + WorkStack.push_back(Val - Floor); + } else { + WorkStack.push_back(Ceil); + WorkStack.push_back(Ceil - Val); + } + + ++Steps; + } + + // If the value being multiplied is not supported natively, we have to pay + // an additional legalization cost, conservatively assume an increase in the + // cost of 3 instructions per step. This values for this heuristic were + // determined experimentally. + unsigned RegisterSize = DAG.getTargetLoweringInfo() + .getRegisterType(*DAG.getContext(), VT) + .getSizeInBits(); + Steps *= (VT.getSizeInBits() != RegisterSize) * 3; + if (Steps > 27) + return false; + + return true; +} + +static SDValue genConstMult(SDValue X, APInt C, const SDLoc &DL, EVT VT, + EVT ShiftTy, SelectionDAG &DAG) { + // Return 0. + if (C == 0) + return DAG.getConstant(0, DL, VT); + + // Return x. + if (C == 1) + return X; + + // If c is power of 2, return (shl x, log2(c)). + if (C.isPowerOf2()) + return DAG.getNode(ISD::SHL, DL, VT, X, + DAG.getConstant(C.logBase2(), DL, ShiftTy)); + + unsigned BitWidth = C.getBitWidth(); + APInt Floor = APInt(BitWidth, 1) << C.logBase2(); + APInt Ceil = C.isNegative() ? APInt(BitWidth, 0) : + APInt(BitWidth, 1) << C.ceilLogBase2(); + + // If |c - floor_c| <= |c - ceil_c|, + // where floor_c = pow(2, floor(log2(c))) and ceil_c = pow(2, ceil(log2(c))), + // return (add constMult(x, floor_c), constMult(x, c - floor_c)). + if ((C - Floor).ule(Ceil - C)) { + SDValue Op0 = genConstMult(X, Floor, DL, VT, ShiftTy, DAG); + SDValue Op1 = genConstMult(X, C - Floor, DL, VT, ShiftTy, DAG); + return DAG.getNode(ISD::ADD, DL, VT, Op0, Op1); + } + + // If |c - floor_c| > |c - ceil_c|, + // return (sub constMult(x, ceil_c), constMult(x, ceil_c - c)). + SDValue Op0 = genConstMult(X, Ceil, DL, VT, ShiftTy, DAG); + SDValue Op1 = genConstMult(X, Ceil - C, DL, VT, ShiftTy, DAG); + return DAG.getNode(ISD::SUB, DL, VT, Op0, Op1); +} + +static SDValue performLogicCombine(SDNode *N, SelectionDAG &DAG, + const LoongArchSubtarget &Subtarget) { + + SDLoc DL(N); + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + + if (!(N0->getOpcode() == ISD::TRUNCATE && N1->getOpcode() == ISD::TRUNCATE)) + return SDValue(); + + if (!(N->getValueType(0).isSimple() && N0->getValueType(0).isSimple() && + N1->getValueType(0).isSimple() && + N0->getOperand(0)->getValueType(0).isSimple() && + N1->getOperand(0)->getValueType(0).isSimple())) + return SDValue(); + + if (!(N->getValueType(0).isSimple() && N0->getValueType(0).isSimple() && + N1->getValueType(0).isSimple() && + N0->getOperand(0)->getValueType(0).isSimple() && + N1->getOperand(0)->getValueType(0).isSimple())) + return SDValue(); + + if (!(N->getSimpleValueType(0).SimpleTy == MVT::i32 && + N0->getSimpleValueType(0).SimpleTy == MVT::i32 && + N1->getSimpleValueType(0).SimpleTy == MVT::i32)) + return SDValue(); + + if (!(N0->getOperand(0)->getSimpleValueType(0).SimpleTy == MVT::i64 && + N1->getOperand(0)->getSimpleValueType(0).SimpleTy == MVT::i64)) + return SDValue(); + + SDValue SubReg = DAG.getTargetConstant(LoongArch::sub_32, DL, MVT::i32); + SDValue Val0 = SDValue(DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL, + N0->getValueType(0), + N0->getOperand(0), SubReg), + 0); + SDValue Val1 = SDValue(DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL, + N1->getValueType(0), + N1->getOperand(0), SubReg), + 0); + + return DAG.getNode(N->getOpcode(), DL, N0->getValueType(0), Val0, Val1); +} + +static SDValue performMULCombine(SDNode *N, SelectionDAG &DAG, + const TargetLowering::DAGCombinerInfo &DCI, + const LoongArchTargetLowering *TL, + const LoongArchSubtarget &Subtarget) { + EVT VT = N->getValueType(0); + + SDValue Res; + if ((Res = performLogicCombine(N, DAG, Subtarget))) + return Res; + + if (ConstantSDNode *C = dyn_cast(N->getOperand(1))) + if (!VT.isVector() && shouldTransformMulToShiftsAddsSubs( + C->getAPIntValue(), VT, DAG, Subtarget)) + return genConstMult(N->getOperand(0), C->getAPIntValue(), SDLoc(N), VT, + TL->getScalarShiftAmountTy(DAG.getDataLayout(), VT), + DAG); + + return SDValue(N, 0); +} + +// Fold sign-extensions into LoongArchISD::VEXTRACT_[SZ]EXT_ELT for LSX. +// +// Performs the following transformations: +// - Changes LoongArchISD::VEXTRACT_[SZ]EXT_ELT to sign extension if its +// sign/zero-extension is completely overwritten by the new one performed by +// the ISD::SRA and ISD::SHL nodes. +// - Removes redundant sign extensions performed by an ISD::SRA and ISD::SHL +// sequence. +static SDValue performSRACombine(SDNode *N, SelectionDAG &DAG, + TargetLowering::DAGCombinerInfo &DCI, + const LoongArchSubtarget &Subtarget) { + + SDValue Res; + if ((Res = performLogicCombine(N, DAG, Subtarget))) + return Res; + + if (Subtarget.hasLSX() || Subtarget.hasLASX()) { + SDValue Op0 = N->getOperand(0); + SDValue Op1 = N->getOperand(1); + + // (sra (shl (LoongArchVExtract[SZ]Ext $a, $b, $c), imm:$d), imm:$d) + // where $d + sizeof($c) == 32 + // or $d + sizeof($c) <= 32 and SExt + // -> (LoongArchVExtractSExt $a, $b, $c) + if (Op0->getOpcode() == ISD::SHL && Op1 == Op0->getOperand(1)) { + SDValue Op0Op0 = Op0->getOperand(0); + ConstantSDNode *ShAmount = dyn_cast(Op1); + + if (!ShAmount) + return SDValue(); + + if (Op0Op0->getOpcode() != LoongArchISD::VEXTRACT_SEXT_ELT && + Op0Op0->getOpcode() != LoongArchISD::VEXTRACT_ZEXT_ELT) + return SDValue(); + + EVT ExtendTy = cast(Op0Op0->getOperand(2))->getVT(); + unsigned TotalBits = ShAmount->getZExtValue() + ExtendTy.getSizeInBits(); + + if (TotalBits == 32 || + (Op0Op0->getOpcode() == LoongArchISD::VEXTRACT_SEXT_ELT && + TotalBits <= 32)) { + SDValue Ops[] = {Op0Op0->getOperand(0), Op0Op0->getOperand(1), + Op0Op0->getOperand(2)}; + return DAG.getNode(LoongArchISD::VEXTRACT_SEXT_ELT, SDLoc(Op0Op0), + Op0Op0->getVTList(), + makeArrayRef(Ops, Op0Op0->getNumOperands())); + } + } + } + + return SDValue(); +} + +// combine vsub/vslt/vbitsel.v to vabsd +static SDValue performVSELECTCombine(SDNode *N, SelectionDAG &DAG) { + assert((N->getOpcode() == ISD::VSELECT) && "Need ISD::VSELECT"); + + SDLoc dl(N); + SDValue Cond = N->getOperand(0); + SDValue TrueOpnd = N->getOperand(1); + SDValue FalseOpnd = N->getOperand(2); + + if (Cond.getOpcode() != ISD::SETCC || TrueOpnd.getOpcode() != ISD::SUB || + FalseOpnd.getOpcode() != ISD::SUB) + return SDValue(); + + if (!(Cond.hasOneUse() || TrueOpnd.hasOneUse() || FalseOpnd.hasOneUse())) + return SDValue(); + + ISD::CondCode CC = cast(Cond.getOperand(2))->get(); + + switch (CC) { + default: + return SDValue(); + case ISD::SETUGT: + case ISD::SETUGE: + case ISD::SETGT: + case ISD::SETGE: + break; + case ISD::SETULT: + case ISD::SETULE: + case ISD::SETLT: + case ISD::SETLE: + std::swap(TrueOpnd, FalseOpnd); + break; + } + + SDValue Op1 = Cond.getOperand(0); + SDValue Op2 = Cond.getOperand(1); + + if (TrueOpnd.getOperand(0) == Op1 && TrueOpnd.getOperand(1) == Op2 && + FalseOpnd.getOperand(0) == Op2 && FalseOpnd.getOperand(1) == Op1) { + if (ISD::isSignedIntSetCC(CC)) { + return DAG.getNode(LoongArchISD::VABSD, dl, + N->getOperand(1).getValueType(), Op1, Op2, + DAG.getTargetConstant(0, dl, MVT::i32)); + } else { + return DAG.getNode(LoongArchISD::UVABSD, dl, + N->getOperand(1).getValueType(), Op1, Op2, + DAG.getTargetConstant(0, dl, MVT::i32)); + } + } + return SDValue(); +} + +static SDValue performXORCombine(SDNode *N, SelectionDAG &DAG, + const LoongArchSubtarget &Subtarget) { + + EVT Ty = N->getValueType(0); + + if ((Subtarget.hasLSX() && Ty.is128BitVector() && Ty.isInteger()) || + (Subtarget.hasLASX() && Ty.is256BitVector() && Ty.isInteger())) { + // Try the following combines: + // (xor (or $a, $b), (build_vector allones)) + // (xor (or $a, $b), (bitcast (build_vector allones))) + SDValue Op0 = N->getOperand(0); + SDValue Op1 = N->getOperand(1); + SDValue NotOp; + + if (ISD::isBuildVectorAllOnes(Op0.getNode())) + NotOp = Op1; + else if (ISD::isBuildVectorAllOnes(Op1.getNode())) + NotOp = Op0; + else + return SDValue(); + + if (NotOp->getOpcode() == ISD::OR) + return DAG.getNode(LoongArchISD::VNOR, SDLoc(N), Ty, NotOp->getOperand(0), + NotOp->getOperand(1)); + } + + return SDValue(); +} + +// When using a 256-bit vector is less expensive than using a 128-bit vector, +// use this function to convert a 128-bit vector to a 256-bit vector. +static SDValue +performCONCAT_VECTORSCombine(SDNode *N, SelectionDAG &DAG, + TargetLowering::DAGCombinerInfo &DCI, + const LoongArchSubtarget &Subtarget) { + + assert((N->getOpcode() == ISD::CONCAT_VECTORS) && "Need ISD::CONCAT_VECTORS"); + if (DCI.isAfterLegalizeDAG()) + return SDValue(); + + SDLoc DL(N); + SDValue Top0 = N->getOperand(0); + SDValue Top1 = N->getOperand(1); + + // Check for cheaper optimizations. + if (!((Top0->getOpcode() == ISD::SIGN_EXTEND) && + (Top1->getOpcode() == ISD::SIGN_EXTEND))) + return SDValue(); + if (!((Top0->getOperand(0)->getOpcode() == ISD::ADD) && + (Top1->getOperand(0)->getOpcode() == ISD::ADD))) + return SDValue(); + + SDValue Op_a0 = Top0->getOperand(0); + SDValue Op_a1 = Top1->getOperand(0); + for (int i = 0; i < 2; i++) { + if (!((Op_a0->getOperand(i)->getOpcode() == ISD::BUILD_VECTOR) && + (Op_a1->getOperand(i)->getOpcode() == ISD::BUILD_VECTOR))) + return SDValue(); + } + + SDValue Ops_b[] = {Op_a0->getOperand(0), Op_a0->getOperand(1), + Op_a1->getOperand(0), Op_a1->getOperand(1)}; + for (int i = 0; i < 4; i++) { + if (Ops_b[i]->getNumOperands() != 2) + return SDValue(); + } + + // Currently only a single case is handled, and more optimization scenarios + // will be added in the future. + SDValue Ops_e[] = {Ops_b[0]->getOperand(0), Ops_b[0]->getOperand(1), + Ops_b[2]->getOperand(0), Ops_b[2]->getOperand(1), + Ops_b[1]->getOperand(0), Ops_b[1]->getOperand(1), + Ops_b[3]->getOperand(0), Ops_b[3]->getOperand(1)}; + for (int i = 0; i < 8; i++) { + if (dyn_cast(Ops_e[i])) + return SDValue(); + if (i < 4) { + if (cast(Ops_e[i]->getOperand(1))->getSExtValue() != + (2 * i)) + return SDValue(); + } else { + if (cast(Ops_e[i]->getOperand(1))->getSExtValue() != + (2 * i - 7)) + return SDValue(); + } + } + + for (int i = 0; i < 5; i = i + 4) { + if (!((Ops_e[i]->getOperand(0) == Ops_e[i + 1]->getOperand(0)) && + (Ops_e[i + 1]->getOperand(0) == Ops_e[i + 2]->getOperand(0)) && + (Ops_e[i + 2]->getOperand(0) == Ops_e[i + 3]->getOperand(0)))) + return SDValue(); + } + return SDValue(DAG.getMachineNode(LoongArch::XVHADDW_D_W, DL, MVT::v4i64, + Ops_e[6]->getOperand(0), + Ops_e[0]->getOperand(0)), + 0); +} + +static SDValue performParity(SDNode *N, SelectionDAG &DAG, + TargetLowering::DAGCombinerInfo &DCI, + const LoongArchSubtarget &Subtarget) { + + SDLoc DL(N); + SDValue T = N->getOperand(0); + if (!(N->getValueType(0).isSimple() && T->getValueType(0).isSimple())) + return SDValue(); + + if (DCI.isAfterLegalizeDAG()) + return SDValue(); + + SDValue Ops[4]; + bool pos_e = false; + bool pos_o = false; + + for (int i = 0; i < 4; i++) { + Ops[i] = T->getOperand(i); + if (!Ops[i]->getValueType(0).isSimple()) + return SDValue(); + if (Ops[i]->getOpcode() != ISD::EXTRACT_VECTOR_ELT) + return SDValue(); + + if (!dyn_cast(Ops[i]->getOperand(1))) + return SDValue(); + + if (cast(Ops[i]->getOperand(1))->getSExtValue() == + (2 * i)) { + pos_e = true; + } else if (cast(Ops[i]->getOperand(1))->getSExtValue() == + (2 * i + 1)) { + pos_o = true; + } else + return SDValue(); + } + + if (!(N->getSimpleValueType(0).SimpleTy == MVT::v4i64 && + T->getSimpleValueType(0).SimpleTy == MVT::v4i32)) + return SDValue(); + + for (int j = 0; j < 3; j++) { + if (Ops[j]->getOperand(0) != Ops[j + 1]->getOperand(0)) + return SDValue(); + } + if (pos_e) { + if (N->getOpcode() == ISD::SIGN_EXTEND) { + if (Ops[0]->getOperand(0)->getOpcode() == ISD::ADD) + return SDValue(DAG.getMachineNode(LoongArch::XVADDWEV_D_W, DL, + MVT::v4i64, + Ops[0]->getOperand(0)->getOperand(1), + Ops[0]->getOperand(0)->getOperand(0)), + 0); + else if (Ops[0]->getOperand(0)->getOpcode() == ISD::SUB) + return SDValue(DAG.getMachineNode(LoongArch::XVSUBWEV_D_W, DL, + MVT::v4i64, + Ops[0]->getOperand(0)->getOperand(0), + Ops[0]->getOperand(0)->getOperand(1)), + 0); + } else if (N->getOpcode() == ISD::ZERO_EXTEND) { + if (Ops[0]->getOperand(0)->getOpcode() == ISD::ADD) + return SDValue(DAG.getMachineNode(LoongArch::XVADDWEV_D_WU, DL, + MVT::v4i64, + Ops[0]->getOperand(0)->getOperand(1), + Ops[0]->getOperand(0)->getOperand(0)), + 0); + else if (Ops[0]->getOperand(0)->getOpcode() == ISD::SUB) + return SDValue(DAG.getMachineNode(LoongArch::XVSUBWEV_D_WU, DL, + MVT::v4i64, + Ops[0]->getOperand(0)->getOperand(0), + Ops[0]->getOperand(0)->getOperand(1)), + 0); + } + } else if (pos_o) { + if (N->getOpcode() == ISD::SIGN_EXTEND) { + if (Ops[0]->getOperand(0)->getOpcode() == ISD::ADD) + return SDValue(DAG.getMachineNode(LoongArch::XVADDWOD_D_W, DL, + MVT::v4i64, + Ops[0]->getOperand(0)->getOperand(1), + Ops[0]->getOperand(0)->getOperand(0)), + 0); + else if (Ops[0]->getOperand(0)->getOpcode() == ISD::SUB) + return SDValue(DAG.getMachineNode(LoongArch::XVSUBWOD_D_W, DL, + MVT::v4i64, + Ops[0]->getOperand(0)->getOperand(0), + Ops[0]->getOperand(0)->getOperand(1)), + 0); + } else if (N->getOpcode() == ISD::ZERO_EXTEND) { + if (Ops[0]->getOperand(0)->getOpcode() == ISD::ADD) + return SDValue(DAG.getMachineNode(LoongArch::XVADDWOD_D_WU, DL, + MVT::v4i64, + Ops[0]->getOperand(0)->getOperand(1), + Ops[0]->getOperand(0)->getOperand(0)), + 0); + else if (Ops[0]->getOperand(0)->getOpcode() == ISD::SUB) + return SDValue(DAG.getMachineNode(LoongArch::XVSUBWOD_D_WU, DL, + MVT::v4i64, + Ops[0]->getOperand(0)->getOperand(0), + Ops[0]->getOperand(0)->getOperand(1)), + 0); + } + } else + return SDValue(); + + return SDValue(); +} + +// Optimize zero extension and sign extension of data +static SDValue performExtend(SDNode *N, SelectionDAG &DAG, + TargetLowering::DAGCombinerInfo &DCI, + const LoongArchSubtarget &Subtarget) { + + if (!Subtarget.hasLASX()) + return SDValue(); + + SDLoc DL(N); + SDValue T = N->getOperand(0); + + if (T->getOpcode() == ISD::BUILD_VECTOR) + return performParity(N, DAG, DCI, Subtarget); + + if (T->getOpcode() != ISD::ADD && T->getOpcode() != ISD::SUB) + return SDValue(); + + SDValue T0 = T->getOperand(0); + SDValue T1 = T->getOperand(1); + + if (!(T0->getOpcode() == ISD::BUILD_VECTOR && + T1->getOpcode() == ISD::BUILD_VECTOR)) + return SDValue(); + + if (DCI.isAfterLegalizeDAG()) + return SDValue(); + + if (!(T->getValueType(0).isSimple() && T0->getValueType(0).isSimple() && + T1->getValueType(0).isSimple() && N->getValueType(0).isSimple())) + return SDValue(); + + if (!(N->getSimpleValueType(0).SimpleTy == MVT::v4i64 && + T->getSimpleValueType(0).SimpleTy == MVT::v4i32 && + T0->getSimpleValueType(0).SimpleTy == MVT::v4i32 && + T1->getSimpleValueType(0).SimpleTy == MVT::v4i32)) + return SDValue(); + + SDValue Opse0[4]; + SDValue Opse1[4]; + + for (int i = 0; i < 4; i++) { + if (T->getOpcode() == ISD::ADD) { + Opse0[i] = T1->getOperand(i); + Opse1[i] = T0->getOperand(i); + } else if (T->getOpcode() == ISD::SUB) { + Opse0[i] = T0->getOperand(i); + Opse1[i] = T1->getOperand(i); + } + + if (Opse0[i]->getOpcode() != ISD::EXTRACT_VECTOR_ELT || + Opse1[i]->getOpcode() != ISD::EXTRACT_VECTOR_ELT) + return SDValue(); + + if (!(dyn_cast(Opse0[i]->getOperand(1)) && + dyn_cast(Opse1[i]->getOperand(1)))) + return SDValue(); + + if (cast(Opse0[i]->getOperand(1))->getSExtValue() != + (2 * i + 1) || + cast(Opse1[i]->getOperand(1))->getSExtValue() != + (2 * i)) + return SDValue(); + + if (i > 0 && (Opse0[i]->getOperand(0) != Opse0[i - 1]->getOperand(0) || + Opse1[i]->getOperand(0) != Opse1[i - 1]->getOperand(0))) + return SDValue(); + } + + if (N->getOpcode() == ISD::SIGN_EXTEND) { + if (T->getOpcode() == ISD::ADD) + return SDValue(DAG.getMachineNode(LoongArch::XVHADDW_D_W, DL, MVT::v4i64, + Opse0[0]->getOperand(0), + Opse1[0]->getOperand(0)), + 0); + else if (T->getOpcode() == ISD::SUB) + return SDValue(DAG.getMachineNode(LoongArch::XVHSUBW_D_W, DL, MVT::v4i64, + Opse0[0]->getOperand(0), + Opse1[0]->getOperand(0)), + 0); + } else if (N->getOpcode() == ISD::ZERO_EXTEND) { + if (T->getOpcode() == ISD::ADD) + return SDValue(DAG.getMachineNode(LoongArch::XVHADDW_DU_WU, DL, + MVT::v4i64, Opse0[0]->getOperand(0), + Opse1[0]->getOperand(0)), + 0); + else if (T->getOpcode() == ISD::SUB) + return SDValue(DAG.getMachineNode(LoongArch::XVHSUBW_DU_WU, DL, + MVT::v4i64, Opse0[0]->getOperand(0), + Opse1[0]->getOperand(0)), + 0); + } + + return SDValue(); +} + +static SDValue performSIGN_EXTENDCombine(SDNode *N, SelectionDAG &DAG, + TargetLowering::DAGCombinerInfo &DCI, + const LoongArchSubtarget &Subtarget) { + + assert((N->getOpcode() == ISD::SIGN_EXTEND) && "Need ISD::SIGN_EXTEND"); + + SDLoc DL(N); + SDValue Top = N->getOperand(0); + + SDValue Res; + if (Res = performExtend(N, DAG, DCI, Subtarget)) + return Res; + + if (!(Top->getOpcode() == ISD::CopyFromReg)) + return SDValue(); + + if ((Top->getOperand(0)->getOpcode() == ISD::EntryToken) && + (N->getValueType(0) == MVT::i64)) { + + SDValue SubReg = DAG.getTargetConstant(LoongArch::sub_32, DL, MVT::i32); + SDNode *Res = DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, MVT::i64); + + Res = DAG.getMachineNode(TargetOpcode::INSERT_SUBREG, DL, MVT::i64, + SDValue(Res, 0), Top, SubReg); + + return SDValue(Res, 0); + } + + return SDValue(); +} + +static SDValue performZERO_EXTENDCombine(SDNode *N, SelectionDAG &DAG, + TargetLowering::DAGCombinerInfo &DCI, + const LoongArchSubtarget &Subtarget) { + + assert((N->getOpcode() == ISD::ZERO_EXTEND) && "Need ISD::ZERO_EXTEND"); + + SDLoc DL(N); + + SDValue Res; + if (Res = performExtend(N, DAG, DCI, Subtarget)) + return Res; + + return SDValue(); +} + +SDValue LoongArchTargetLowering:: +PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const { + SelectionDAG &DAG = DCI.DAG; + SDValue Val; + + switch (N->getOpcode()) { + default: break; + case ISD::AND: + return performANDCombine(N, DAG, DCI, Subtarget); + case ISD::OR: + return performORCombine(N, DAG, DCI, Subtarget); + case ISD::XOR: + return performXORCombine(N, DAG, Subtarget); + case ISD::MUL: + return performMULCombine(N, DAG, DCI, this, Subtarget); + case ISD::SRA: + return performSRACombine(N, DAG, DCI, Subtarget); + case ISD::SELECT: + return performSELECTCombine(N, DAG, DCI, Subtarget); + case ISD::VSELECT: + return performVSELECTCombine(N, DAG); + case ISD::CONCAT_VECTORS: + return performCONCAT_VECTORSCombine(N, DAG, DCI, Subtarget); + case ISD::SIGN_EXTEND: + return performSIGN_EXTENDCombine(N, DAG, DCI, Subtarget); + case ISD::ZERO_EXTEND: + return performZERO_EXTENDCombine(N, DAG, DCI, Subtarget); + case ISD::ADD: + case ISD::SUB: + case ISD::SHL: + case ISD::SRL: + return performLogicCombine(N, DAG, Subtarget); + } + return SDValue(); +} + +static SDValue lowerLSXSplatZExt(SDValue Op, unsigned OpNr, SelectionDAG &DAG) { + EVT ResVecTy = Op->getValueType(0); + EVT ViaVecTy = ResVecTy; + SDLoc DL(Op); + + // When ResVecTy == MVT::v2i64, LaneA is the upper 32 bits of the lane and + // LaneB is the lower 32-bits. Otherwise LaneA and LaneB are alternating + // lanes. + SDValue LaneA = Op->getOperand(OpNr); + SDValue LaneB; + + if (ResVecTy == MVT::v2i64) { + // In case of the index being passed as an immediate value, set the upper + // lane to 0 so that the splati.d instruction can be matched. + if (isa(LaneA)) + LaneB = DAG.getConstant(0, DL, MVT::i32); + // Having the index passed in a register, set the upper lane to the same + // value as the lower - this results in the BUILD_VECTOR node not being + // expanded through stack. This way we are able to pattern match the set of + // nodes created here to splat.d. + else + LaneB = LaneA; + ViaVecTy = MVT::v4i32; + } else + LaneB = LaneA; + + SDValue Ops[16] = {LaneA, LaneB, LaneA, LaneB, LaneA, LaneB, LaneA, LaneB, + LaneA, LaneB, LaneA, LaneB, LaneA, LaneB, LaneA, LaneB}; + + SDValue Result = DAG.getBuildVector( + ViaVecTy, DL, makeArrayRef(Ops, ViaVecTy.getVectorNumElements())); + + if (ViaVecTy != ResVecTy) { + SDValue One = DAG.getConstant(1, DL, ViaVecTy); + Result = DAG.getNode(ISD::BITCAST, DL, ResVecTy, + DAG.getNode(ISD::AND, DL, ViaVecTy, Result, One)); + } + + return Result; +} + +static SDValue lowerLSXSplatImm(SDValue Op, unsigned ImmOp, SelectionDAG &DAG, + bool IsSigned = false) { + return DAG.getConstant( + APInt(Op->getValueType(0).getScalarType().getSizeInBits(), + Op->getConstantOperandVal(ImmOp), IsSigned), + SDLoc(Op), Op->getValueType(0)); +} + +static SDValue getBuildVectorSplat(EVT VecTy, SDValue SplatValue, + SelectionDAG &DAG) { + EVT ViaVecTy = VecTy; + SDValue SplatValueA = SplatValue; + SDValue SplatValueB = SplatValue; + SDLoc DL(SplatValue); + + if (VecTy == MVT::v2i64) { + // v2i64 BUILD_VECTOR must be performed via v4i32 so split into i32's. + ViaVecTy = MVT::v4i32; + + SplatValueA = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, SplatValue); + SplatValueB = DAG.getNode(ISD::SRL, DL, MVT::i64, SplatValue, + DAG.getConstant(32, DL, MVT::i32)); + SplatValueB = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, SplatValueB); + } + + SDValue Ops[32] = {SplatValueA, SplatValueB, SplatValueA, SplatValueB, + SplatValueA, SplatValueB, SplatValueA, SplatValueB, + SplatValueA, SplatValueB, SplatValueA, SplatValueB, + SplatValueA, SplatValueB, SplatValueA, SplatValueB, + SplatValueA, SplatValueB, SplatValueA, SplatValueB, + SplatValueA, SplatValueB, SplatValueA, SplatValueB, + SplatValueA, SplatValueB, SplatValueA, SplatValueB, + SplatValueA, SplatValueB, SplatValueA, SplatValueB}; + + SDValue Result = DAG.getBuildVector( + ViaVecTy, DL, makeArrayRef(Ops, ViaVecTy.getVectorNumElements())); + + if (VecTy != ViaVecTy) + Result = DAG.getNode(ISD::BITCAST, DL, VecTy, Result); + + return Result; +} + +static SDValue truncateVecElts(SDValue Op, SelectionDAG &DAG) { + SDLoc DL(Op); + EVT ResTy = Op->getValueType(0); + SDValue Vec = Op->getOperand(2); + MVT ResEltTy = + (ResTy == MVT::v2i64 || ResTy == MVT::v4i64) ? MVT::i64 : MVT::i32; + SDValue ConstValue = + DAG.getConstant(Vec.getScalarValueSizeInBits() - 1, DL, ResEltTy); + SDValue SplatVec = getBuildVectorSplat(ResTy, ConstValue, DAG); + + return DAG.getNode(ISD::AND, DL, ResTy, Vec, SplatVec); +} + +static SDValue lowerLSXBitClear(SDValue Op, SelectionDAG &DAG) { + EVT ResTy = Op->getValueType(0); + SDLoc DL(Op); + SDValue One = DAG.getConstant(1, DL, ResTy); + SDValue Bit = DAG.getNode(ISD::SHL, DL, ResTy, One, truncateVecElts(Op, DAG)); + + return DAG.getNode(ISD::AND, DL, ResTy, Op->getOperand(1), + DAG.getNOT(DL, Bit, ResTy)); +} + +static SDValue lowerLSXLoadIntr(SDValue Op, SelectionDAG &DAG, unsigned Intr, + const LoongArchSubtarget &Subtarget) { + SDLoc DL(Op); + SDValue ChainIn = Op->getOperand(0); + SDValue Address = Op->getOperand(2); + SDValue Offset = Op->getOperand(3); + EVT ResTy = Op->getValueType(0); + EVT PtrTy = Address->getValueType(0); + + // For LP64 addresses have the underlying type MVT::i64. This intrinsic + // however takes an i32 signed constant offset. The actual type of the + // intrinsic is a scaled signed i12. + if (Subtarget.isABI_LP64D()) + Offset = DAG.getNode(ISD::SIGN_EXTEND, DL, PtrTy, Offset); + + Address = DAG.getNode(ISD::ADD, DL, PtrTy, Address, Offset); + return DAG.getLoad(ResTy, DL, ChainIn, Address, MachinePointerInfo(), + /* Alignment = */ 16); +} + +static SDValue lowerLASXLoadIntr(SDValue Op, SelectionDAG &DAG, unsigned Intr, + const LoongArchSubtarget &Subtarget) { + SDLoc DL(Op); + SDValue ChainIn = Op->getOperand(0); + SDValue Address = Op->getOperand(2); + SDValue Offset = Op->getOperand(3); + EVT ResTy = Op->getValueType(0); + EVT PtrTy = Address->getValueType(0); + + // For LP64 addresses have the underlying type MVT::i64. This intrinsic + // however takes an i32 signed constant offset. The actual type of the + // intrinsic is a scaled signed i12. + if (Subtarget.isABI_LP64D()) + Offset = DAG.getNode(ISD::SIGN_EXTEND, DL, PtrTy, Offset); + + Address = DAG.getNode(ISD::ADD, DL, PtrTy, Address, Offset); + return DAG.getLoad(ResTy, DL, ChainIn, Address, MachinePointerInfo(), + /* Alignment = */ 32); +} + +static SDValue lowerLASXVLDRIntr(SDValue Op, SelectionDAG &DAG, unsigned Intr, + const LoongArchSubtarget &Subtarget) { + SDLoc DL(Op); + SDValue ChainIn = Op->getOperand(0); + SDValue Address = Op->getOperand(2); + SDValue Offset = Op->getOperand(3); + EVT ResTy = Op->getValueType(0); + EVT PtrTy = Address->getValueType(0); + + // For LP64 addresses have the underlying type MVT::i64. This intrinsic + // however takes an i32 signed constant offset. The actual type of the + // intrinsic is a scaled signed i12. + if (Subtarget.isABI_LP64D()) + Offset = DAG.getNode(ISD::SIGN_EXTEND, DL, PtrTy, Offset); + + Address = DAG.getNode(ISD::ADD, DL, PtrTy, Address, Offset); + SDValue Load = DAG.getLoad(ResTy, DL, ChainIn, Address, MachinePointerInfo(), + /* Alignment = */ 32); + return DAG.getNode(LoongArchISD::XVBROADCAST, DL, + DAG.getVTList(ResTy, MVT::Other), Load); +} + +static SDValue lowerLSXVLDRIntr(SDValue Op, SelectionDAG &DAG, unsigned Intr, + const LoongArchSubtarget &Subtarget) { + SDLoc DL(Op); + SDValue ChainIn = Op->getOperand(0); + SDValue Address = Op->getOperand(2); + SDValue Offset = Op->getOperand(3); + EVT ResTy = Op->getValueType(0); + EVT PtrTy = Address->getValueType(0); + + // For LP64 addresses have the underlying type MVT::i64. This intrinsic + // however takes an i32 signed constant offset. The actual type of the + // intrinsic is a scaled signed i12. + if (Subtarget.isABI_LP64D()) + Offset = DAG.getNode(ISD::SIGN_EXTEND, DL, PtrTy, Offset); + + Address = DAG.getNode(ISD::ADD, DL, PtrTy, Address, Offset); + SDValue Load = DAG.getLoad(ResTy, DL, ChainIn, Address, MachinePointerInfo(), + /* Alignment = */ 16); + return DAG.getNode(LoongArchISD::VBROADCAST, DL, + DAG.getVTList(ResTy, MVT::Other), Load); +} + +static SDValue lowerLSXStoreIntr(SDValue Op, SelectionDAG &DAG, unsigned Intr, + const LoongArchSubtarget &Subtarget) { + SDLoc DL(Op); + SDValue ChainIn = Op->getOperand(0); + SDValue Value = Op->getOperand(2); + SDValue Address = Op->getOperand(3); + SDValue Offset = Op->getOperand(4); + EVT PtrTy = Address->getValueType(0); + + // For LP64 addresses have the underlying type MVT::i64. This intrinsic + // however takes an i32 signed constant offset. The actual type of the + // intrinsic is a scaled signed i12. + if (Subtarget.isABI_LP64D()) + Offset = DAG.getNode(ISD::SIGN_EXTEND, DL, PtrTy, Offset); + + Address = DAG.getNode(ISD::ADD, DL, PtrTy, Address, Offset); + + return DAG.getStore(ChainIn, DL, Value, Address, MachinePointerInfo(), + /* Alignment = */ 16); +} + +static SDValue lowerLASXStoreIntr(SDValue Op, SelectionDAG &DAG, unsigned Intr, + const LoongArchSubtarget &Subtarget) { + SDLoc DL(Op); + SDValue ChainIn = Op->getOperand(0); + SDValue Value = Op->getOperand(2); + SDValue Address = Op->getOperand(3); + SDValue Offset = Op->getOperand(4); + EVT PtrTy = Address->getValueType(0); + + // For LP64 addresses have the underlying type MVT::i64. This intrinsic + // however takes an i32 signed constant offset. The actual type of the + // intrinsic is a scaled signed i12. + if (Subtarget.isABI_LP64D()) + Offset = DAG.getNode(ISD::SIGN_EXTEND, DL, PtrTy, Offset); + + Address = DAG.getNode(ISD::ADD, DL, PtrTy, Address, Offset); + + return DAG.getStore(ChainIn, DL, Value, Address, MachinePointerInfo(), + /* Alignment = */ 32); +} + +static SDValue LowerSUINT_TO_FP(unsigned ExtOpcode, SDValue Op, SelectionDAG &DAG) { + + EVT ResTy = Op->getValueType(0); + SDValue Op0 = Op->getOperand(0); + EVT ViaTy = Op0->getValueType(0); + SDLoc DL(Op); + + if (!ResTy.isVector()) { + if(ResTy.getScalarSizeInBits() == ViaTy.getScalarSizeInBits()) + return DAG.getNode(ISD::BITCAST, DL, ResTy, Op0); + else if(ResTy.getScalarSizeInBits() > ViaTy.getScalarSizeInBits()) { + Op0 = DAG.getNode(ISD::BITCAST, DL, MVT::f32, Op0); + return DAG.getNode(ISD::FP_EXTEND, DL, MVT::f64, Op0); + } else { + Op0 = DAG.getNode(ISD::BITCAST, DL, MVT::f64, Op0); + return DAG.getNode(ISD::TRUNCATE, DL, MVT::f32, Op0); + } + + } + + if (ResTy.getScalarSizeInBits() == ViaTy.getScalarSizeInBits()) { + // v4i32 => v4f32 v8i32 => v8f32 + // v2i64 => v2f64 v4i64 => v4f64 + // do nothing + } else if (ResTy.getScalarSizeInBits() > ViaTy.getScalarSizeInBits()) { + // v4i32 => v4i64 => v4f64 + Op0 = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v8i32, {Op0, Op0}); + Op0 = DAG.getNode(ExtOpcode, DL, MVT::v4i64, Op0); + } else { + // v4i64 => v4f32 + SDValue Ops[4]; + for (unsigned i = 0; i < 4; i++) { + SDValue I64 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i64, Op0, + DAG.getConstant(i, DL, MVT::i32)); + Ops[i] = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, I64); + } + Op0 = DAG.getBuildVector(MVT::v4i32, DL, makeArrayRef(Ops, 4)); + } + + return Op0; +} + +static SDValue LowerFP_TO_SUINT(unsigned FPToSUI, unsigned ExtOpcode, + SDValue Op, SelectionDAG &DAG) { + + EVT ResTy = Op->getValueType(0); + SDValue Op0 = Op->getOperand(0); + EVT ViaTy = Op0->getValueType(0); + SDLoc DL(Op); + + if (ResTy.getScalarSizeInBits() == ViaTy.getScalarSizeInBits()) { + // v4f32 => v4i32 v8f32 => v8i32 + // v2f64 => v2i64 v4f64 => v4i64 + // do nothing + Op0 = DAG.getNode(FPToSUI, DL, ResTy, Op0); + } else if (ResTy.getScalarSizeInBits() > ViaTy.getScalarSizeInBits()) { + // v4f32 => v4i32 => v4i64 + Op0 = DAG.getNode(FPToSUI, DL, MVT::v4i32, Op0); + Op0 = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v8i32, {Op0, Op0}); + Op0 = DAG.getNode(ExtOpcode, DL, MVT::v4i64, Op0); + } else { + SDValue Ops[4]; + Ops[0] = DAG.getNode(FPToSUI, DL, MVT::i32, + DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f64, Op0, + DAG.getConstant(0, DL, MVT::i64))); + Ops[1] = DAG.getNode(FPToSUI, DL, MVT::i32, + DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f64, Op0, + DAG.getConstant(1, DL, MVT::i64))); + Ops[2] = DAG.getNode(FPToSUI, DL, MVT::i32, + DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f64, Op0, + DAG.getConstant(2, DL, MVT::i64))); + Ops[3] = DAG.getNode(FPToSUI, DL, MVT::i32, + DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f64, Op0, + DAG.getConstant(3, DL, MVT::i64))); + + Op0 = DAG.getBuildVector(MVT::v4i32, DL, makeArrayRef(Ops, 4)); + } + + return Op0; +} + +// Lower VECTOR_SHUFFLE into SHF (if possible). +// +// SHF splits the vector into blocks of four elements, then shuffles these +// elements according to a <4 x i2> constant (encoded as an integer immediate). +// +// It is therefore possible to lower into SHF when the mask takes the form: +// +// When undef's appear they are treated as if they were whatever value is +// necessary in order to fit the above forms. +// +// For example: +// %2 = shufflevector <8 x i16> %0, <8 x i16> undef, +// <8 x i32> +// is lowered to: +// (VSHUF4I_H $v0, $v1, 27) +// where the 27 comes from: +// 3 + (2 << 2) + (1 << 4) + (0 << 6) +static SDValue lowerVECTOR_SHUFFLE_SHF(SDValue Op, EVT ResTy, + SmallVector Indices, + SelectionDAG &DAG) { + int SHFIndices[4] = {-1, -1, -1, -1}; + + if (Indices.size() < 4) + return SDValue(); + + for (unsigned i = 0; i < 4; ++i) { + for (unsigned j = i; j < Indices.size(); j += 4) { + int Idx = Indices[j]; + + // Convert from vector index to 4-element subvector index + // If an index refers to an element outside of the subvector then give up + if (Idx != -1) { + Idx -= 4 * (j / 4); + if (Idx < 0 || Idx >= 4) + return SDValue(); + } + + // If the mask has an undef, replace it with the current index. + // Note that it might still be undef if the current index is also undef + if (SHFIndices[i] == -1) + SHFIndices[i] = Idx; + + // Check that non-undef values are the same as in the mask. If they + // aren't then give up + if (!(Idx == -1 || Idx == SHFIndices[i])) + return SDValue(); + } + } + + // Calculate the immediate. Replace any remaining undefs with zero + APInt Imm(32, 0); + for (int i = 3; i >= 0; --i) { + int Idx = SHFIndices[i]; + + if (Idx == -1) + Idx = 0; + + Imm <<= 2; + Imm |= Idx & 0x3; + } + + SDLoc DL(Op); + return DAG.getNode(LoongArchISD::SHF, DL, ResTy, + DAG.getConstant(Imm, DL, MVT::i32), Op->getOperand(0)); +} + +/// Determine whether a range fits a regular pattern of values. +/// This function accounts for the possibility of jumping over the End iterator. +template +static bool +fitsRegularPattern(typename SmallVectorImpl::const_iterator Begin, + unsigned CheckStride, + typename SmallVectorImpl::const_iterator End, + ValType ExpectedIndex, unsigned ExpectedIndexStride) { + auto &I = Begin; + + while (I != End) { + if (*I != -1 && *I != ExpectedIndex) + return false; + ExpectedIndex += ExpectedIndexStride; + + // Incrementing past End is undefined behaviour so we must increment one + // step at a time and check for End at each step. + for (unsigned n = 0; n < CheckStride && I != End; ++n, ++I) + ; // Empty loop body. + } + return true; +} + +// Determine whether VECTOR_SHUFFLE is a VREPLVEI. +// +// It is a VREPLVEI when the mask is: +// +// where x is any valid index. +// +// When undef's appear in the mask they are treated as if they were whatever +// value is necessary in order to fit the above form. +static bool isVECTOR_SHUFFLE_VREPLVEI(SDValue Op, EVT ResTy, + SmallVector Indices, + SelectionDAG &DAG) { + assert((Indices.size() % 2) == 0); + + int SplatIndex = -1; + for (const auto &V : Indices) { + if (V != -1) { + SplatIndex = V; + break; + } + } + + return fitsRegularPattern(Indices.begin(), 1, Indices.end(), SplatIndex, + 0); +} + +// Lower VECTOR_SHUFFLE into VPACKEV (if possible). +// +// VPACKEV interleaves the even elements from each vector. +// +// It is possible to lower into VPACKEV when the mask consists of two of the +// following forms interleaved: +// <0, 2, 4, ...> +// +// where n is the number of elements in the vector. +// For example: +// <0, 0, 2, 2, 4, 4, ...> +// <0, n, 2, n+2, 4, n+4, ...> +// +// When undef's appear in the mask they are treated as if they were whatever +// value is necessary in order to fit the above forms. +static SDValue lowerVECTOR_SHUFFLE_VPACKEV(SDValue Op, EVT ResTy, + SmallVector Indices, + SelectionDAG &DAG) { + assert((Indices.size() % 2) == 0); + + SDValue Vj; + SDValue Vk; + const auto &Begin = Indices.begin(); + const auto &End = Indices.end(); + + // Check even elements are taken from the even elements of one half or the + // other and pick an operand accordingly. + if (fitsRegularPattern(Begin, 2, End, 0, 2)) + Vj = Op->getOperand(0); + else if (fitsRegularPattern(Begin, 2, End, Indices.size(), 2)) + Vj = Op->getOperand(1); + else + return SDValue(); + + // Check odd elements are taken from the even elements of one half or the + // other and pick an operand accordingly. + if (fitsRegularPattern(Begin + 1, 2, End, 0, 2)) + Vk = Op->getOperand(0); + else if (fitsRegularPattern(Begin + 1, 2, End, Indices.size(), 2)) + Vk = Op->getOperand(1); + else + return SDValue(); + + return DAG.getNode(LoongArchISD::VPACKEV, SDLoc(Op), ResTy, Vk, Vj); +} + +// Lower VECTOR_SHUFFLE into VPACKOD (if possible). +// +// VPACKOD interleaves the odd elements from each vector. +// +// It is possible to lower into VPACKOD when the mask consists of two of the +// following forms interleaved: +// <1, 3, 5, ...> +// +// where n is the number of elements in the vector. +// For example: +// <1, 1, 3, 3, 5, 5, ...> +// <1, n+1, 3, n+3, 5, n+5, ...> +// +// When undef's appear in the mask they are treated as if they were whatever +// value is necessary in order to fit the above forms. +static SDValue lowerVECTOR_SHUFFLE_VPACKOD(SDValue Op, EVT ResTy, + SmallVector Indices, + SelectionDAG &DAG) { + assert((Indices.size() % 2) == 0); + + SDValue Vj; + SDValue Vk; + const auto &Begin = Indices.begin(); + const auto &End = Indices.end(); + + // Check even elements are taken from the odd elements of one half or the + // other and pick an operand accordingly. + if (fitsRegularPattern(Begin, 2, End, 1, 2)) + Vj = Op->getOperand(0); + else if (fitsRegularPattern(Begin, 2, End, Indices.size() + 1, 2)) + Vj = Op->getOperand(1); + else + return SDValue(); + + // Check odd elements are taken from the odd elements of one half or the + // other and pick an operand accordingly. + if (fitsRegularPattern(Begin + 1, 2, End, 1, 2)) + Vk = Op->getOperand(0); + else if (fitsRegularPattern(Begin + 1, 2, End, Indices.size() + 1, 2)) + Vk = Op->getOperand(1); + else + return SDValue(); + + return DAG.getNode(LoongArchISD::VPACKOD, SDLoc(Op), ResTy, Vk, Vj); +} + +// Lower VECTOR_SHUFFLE into VILVL (if possible). +// +// VILVL interleaves consecutive elements from the right (lowest-indexed) half +// of each vector. +// +// It is possible to lower into VILVL when the mask consists of two of the +// following forms interleaved: +// <0, 1, 2, ...> +// +// where n is the number of elements in the vector. +// For example: +// <0, 0, 1, 1, 2, 2, ...> +// <0, n, 1, n+1, 2, n+2, ...> +// +// When undef's appear in the mask they are treated as if they were whatever +// value is necessary in order to fit the above forms. +static SDValue lowerVECTOR_SHUFFLE_VILVL(SDValue Op, EVT ResTy, + SmallVector Indices, + SelectionDAG &DAG) { + assert((Indices.size() % 2) == 0); + + SDValue Vj; + SDValue Vk; + const auto &Begin = Indices.begin(); + const auto &End = Indices.end(); + + // Check even elements are taken from the right (lowest-indexed) elements of + // one half or the other and pick an operand accordingly. + if (fitsRegularPattern(Begin, 2, End, 0, 1)) + Vj = Op->getOperand(0); + else if (fitsRegularPattern(Begin, 2, End, Indices.size(), 1)) + Vj = Op->getOperand(1); + else + return SDValue(); + + // Check odd elements are taken from the right (lowest-indexed) elements of + // one half or the other and pick an operand accordingly. + if (fitsRegularPattern(Begin + 1, 2, End, 0, 1)) + Vk = Op->getOperand(0); + else if (fitsRegularPattern(Begin + 1, 2, End, Indices.size(), 1)) + Vk = Op->getOperand(1); + else + return SDValue(); + + return DAG.getNode(LoongArchISD::VILVL, SDLoc(Op), ResTy, Vk, Vj); +} + +// Lower VECTOR_SHUFFLE into VILVH (if possible). +// +// VILVH interleaves consecutive elements from the left (highest-indexed) half +// of each vector. +// +// It is possible to lower into VILVH when the mask consists of two of the +// following forms interleaved: +// +// +// where n is the number of elements in the vector and x is half n. +// For example: +// +// +// +// When undef's appear in the mask they are treated as if they were whatever +// value is necessary in order to fit the above forms. +static SDValue lowerVECTOR_SHUFFLE_VILVH(SDValue Op, EVT ResTy, + SmallVector Indices, + SelectionDAG &DAG) { + assert((Indices.size() % 2) == 0); + + unsigned HalfSize = Indices.size() / 2; + SDValue Vj; + SDValue Vk; + const auto &Begin = Indices.begin(); + const auto &End = Indices.end(); + + // Check even elements are taken from the left (highest-indexed) elements of + // one half or the other and pick an operand accordingly. + if (fitsRegularPattern(Begin, 2, End, HalfSize, 1)) + Vj = Op->getOperand(0); + else if (fitsRegularPattern(Begin, 2, End, Indices.size() + HalfSize, 1)) + Vj = Op->getOperand(1); + else + return SDValue(); + + // Check odd elements are taken from the left (highest-indexed) elements of + // one half or the other and pick an operand accordingly. + if (fitsRegularPattern(Begin + 1, 2, End, HalfSize, 1)) + Vk = Op->getOperand(0); + else if (fitsRegularPattern(Begin + 1, 2, End, Indices.size() + HalfSize, + 1)) + Vk = Op->getOperand(1); + else + return SDValue(); + + return DAG.getNode(LoongArchISD::VILVH, SDLoc(Op), ResTy, Vk, Vj); +} + +// Lower VECTOR_SHUFFLE into VPICKEV (if possible). +// +// VPICKEV copies the even elements of each vector into the result vector. +// +// It is possible to lower into VPICKEV when the mask consists of two of the +// following forms concatenated: +// <0, 2, 4, ...> +// +// where n is the number of elements in the vector. +// For example: +// <0, 2, 4, ..., 0, 2, 4, ...> +// <0, 2, 4, ..., n, n+2, n+4, ...> +// +// When undef's appear in the mask they are treated as if they were whatever +// value is necessary in order to fit the above forms. +static SDValue lowerVECTOR_SHUFFLE_VPICKEV(SDValue Op, EVT ResTy, + SmallVector Indices, + SelectionDAG &DAG) { + assert((Indices.size() % 2) == 0); + + SDValue Vj; + SDValue Vk; + const auto &Begin = Indices.begin(); + const auto &Mid = Indices.begin() + Indices.size() / 2; + const auto &End = Indices.end(); + + if (fitsRegularPattern(Begin, 1, Mid, 0, 2)) + Vj = Op->getOperand(0); + else if (fitsRegularPattern(Begin, 1, Mid, Indices.size(), 2)) + Vj = Op->getOperand(1); + else + return SDValue(); + + if (fitsRegularPattern(Mid, 1, End, 0, 2)) + Vk = Op->getOperand(0); + else if (fitsRegularPattern(Mid, 1, End, Indices.size(), 2)) + Vk = Op->getOperand(1); + else + return SDValue(); + + return DAG.getNode(LoongArchISD::VPICKEV, SDLoc(Op), ResTy, Vk, Vj); +} + +// Lower VECTOR_SHUFFLE into VPICKOD (if possible). +// +// VPICKOD copies the odd elements of each vector into the result vector. +// +// It is possible to lower into VPICKOD when the mask consists of two of the +// following forms concatenated: +// <1, 3, 5, ...> +// +// where n is the number of elements in the vector. +// For example: +// <1, 3, 5, ..., 1, 3, 5, ...> +// <1, 3, 5, ..., n+1, n+3, n+5, ...> +// +// When undef's appear in the mask they are treated as if they were whatever +// value is necessary in order to fit the above forms. +static SDValue lowerVECTOR_SHUFFLE_VPICKOD(SDValue Op, EVT ResTy, + SmallVector Indices, + SelectionDAG &DAG) { + assert((Indices.size() % 2) == 0); + + SDValue Vj; + SDValue Vk; + const auto &Begin = Indices.begin(); + const auto &Mid = Indices.begin() + Indices.size() / 2; + const auto &End = Indices.end(); + + if (fitsRegularPattern(Begin, 1, Mid, 1, 2)) + Vj = Op->getOperand(0); + else if (fitsRegularPattern(Begin, 1, Mid, Indices.size() + 1, 2)) + Vj = Op->getOperand(1); + else + return SDValue(); + + if (fitsRegularPattern(Mid, 1, End, 1, 2)) + Vk = Op->getOperand(0); + else if (fitsRegularPattern(Mid, 1, End, Indices.size() + 1, 2)) + Vk = Op->getOperand(1); + else + return SDValue(); + + return DAG.getNode(LoongArchISD::VPICKOD, SDLoc(Op), ResTy, Vk, Vj); +} + +// Lower VECTOR_SHUFFLE into VSHF. +// +// This mostly consists of converting the shuffle indices in Indices into a +// BUILD_VECTOR and adding it as an operand to the resulting VSHF. There is +// also code to eliminate unused operands of the VECTOR_SHUFFLE. For example, +// if the type is v8i16 and all the indices are less than 8 then the second +// operand is unused and can be replaced with anything. We choose to replace it +// with the used operand since this reduces the number of instructions overall. +static SDValue lowerVECTOR_SHUFFLE_VSHF(SDValue Op, EVT ResTy, + SmallVector Indices, + SelectionDAG &DAG) { + SmallVector Ops; + SDValue Op0; + SDValue Op1; + EVT MaskVecTy = ResTy.changeVectorElementTypeToInteger(); + EVT MaskEltTy = MaskVecTy.getVectorElementType(); + bool Using1stVec = false; + bool Using2ndVec = false; + SDLoc DL(Op); + int ResTyNumElts = ResTy.getVectorNumElements(); + + for (int i = 0; i < ResTyNumElts; ++i) { + // Idx == -1 means UNDEF + int Idx = Indices[i]; + + if (0 <= Idx && Idx < ResTyNumElts) + Using1stVec = true; + if (ResTyNumElts <= Idx && Idx < ResTyNumElts * 2) + Using2ndVec = true; + } + + for (SmallVector::iterator I = Indices.begin(); I != Indices.end(); + ++I) + Ops.push_back(DAG.getTargetConstant(*I, DL, MaskEltTy)); + + SDValue MaskVec = DAG.getBuildVector(MaskVecTy, DL, Ops); + + if (Using1stVec && Using2ndVec) { + Op0 = Op->getOperand(0); + Op1 = Op->getOperand(1); + } else if (Using1stVec) + Op0 = Op1 = Op->getOperand(0); + else if (Using2ndVec) + Op0 = Op1 = Op->getOperand(1); + else + llvm_unreachable("shuffle vector mask references neither vector operand?"); + + // VECTOR_SHUFFLE concatenates the vectors in an vectorwise fashion. + // <0b00, 0b01> + <0b10, 0b11> -> <0b00, 0b01, 0b10, 0b11> + // VSHF concatenates the vectors in a bitwise fashion: + // <0b00, 0b01> + <0b10, 0b11> -> + // 0b0100 + 0b1110 -> 0b01001110 + // <0b10, 0b11, 0b00, 0b01> + // We must therefore swap the operands to get the correct result. + return DAG.getNode(LoongArchISD::VSHF, DL, ResTy, MaskVec, Op1, Op0); +} + +static SDValue lowerVECTOR_SHUFFLE_XVILVL(SDValue Op, EVT ResTy, + SmallVector Indices, + SelectionDAG &DAG) { + assert((Indices.size() % 2) == 0); + + SDValue Xj; + SDValue Xk; + const auto &Begin = Indices.begin(); + const auto &End = Indices.end(); + unsigned HalfSize = Indices.size() / 2; + + if (fitsRegularPattern(Begin, 2, End, 0, 1) && + fitsRegularPattern(Begin + HalfSize, 2, End, HalfSize, 1)) + Xj = Op->getOperand(0); + else if (fitsRegularPattern(Begin, 2, End, Indices.size(), 1) && + fitsRegularPattern(Begin + HalfSize, 2, End, + Indices.size() + HalfSize, 1)) + Xj = Op->getOperand(1); + else + return SDValue(); + + if (fitsRegularPattern(Begin + 1, 2, End, 0, 1) && + fitsRegularPattern(Begin + 1 + HalfSize, 2, End, HalfSize, 1)) + Xk = Op->getOperand(0); + else if (fitsRegularPattern(Begin + 1, 2, End, Indices.size(), 1) && + fitsRegularPattern(Begin + 1 + HalfSize, 2, End, + Indices.size() + HalfSize, 1)) + Xk = Op->getOperand(1); + else + return SDValue(); + + return DAG.getNode(LoongArchISD::VILVL, SDLoc(Op), ResTy, Xk, Xj); +} + +static SDValue lowerVECTOR_SHUFFLE_XVILVH(SDValue Op, EVT ResTy, + SmallVector Indices, + SelectionDAG &DAG) { + assert((Indices.size() % 2) == 0); + + unsigned HalfSize = Indices.size() / 2; + unsigned LeftSize = HalfSize / 2; + SDValue Xj; + SDValue Xk; + const auto &Begin = Indices.begin(); + const auto &End = Indices.end(); + + if (fitsRegularPattern(Begin, 2, End, HalfSize - LeftSize, 1) && + fitsRegularPattern(Begin + HalfSize + LeftSize, 2, End, + HalfSize + LeftSize, 1)) + Xj = Op->getOperand(0); + else if (fitsRegularPattern(Begin, 2, End, + Indices.size() + HalfSize - LeftSize, 1) && + fitsRegularPattern(Begin + HalfSize + LeftSize, 2, End, + Indices.size() + HalfSize + LeftSize, 1)) + Xj = Op->getOperand(1); + else + return SDValue(); + + if (fitsRegularPattern(Begin + 1, 2, End, HalfSize, 1) && + fitsRegularPattern(Begin + 1 + HalfSize + LeftSize, 2, End, + HalfSize + LeftSize, 1)) + Xk = Op->getOperand(0); + else if (fitsRegularPattern(Begin + 1, 2, End, Indices.size() + HalfSize, + 1) && + fitsRegularPattern(Begin + 1 + HalfSize + LeftSize, 2, End, + Indices.size() + HalfSize + LeftSize, 1)) + Xk = Op->getOperand(1); + else + return SDValue(); + + return DAG.getNode(LoongArchISD::VILVH, SDLoc(Op), ResTy, Xk, Xj); +} + +static SDValue lowerVECTOR_SHUFFLE_XVPACKEV(SDValue Op, EVT ResTy, + SmallVector Indices, + SelectionDAG &DAG) { + assert((Indices.size() % 2) == 0); + + SDValue Xj; + SDValue Xk; + const auto &Begin = Indices.begin(); + const auto &End = Indices.end(); + unsigned HalfSize = Indices.size() / 2; + + if (fitsRegularPattern(Begin, 2, End, 0, 2) && + fitsRegularPattern(Begin + HalfSize, 2, End, HalfSize, 2)) + Xj = Op->getOperand(0); + else if (fitsRegularPattern(Begin, 2, End, Indices.size(), 2) && + fitsRegularPattern(Begin + HalfSize, 2, End, + Indices.size() + HalfSize, 2)) + Xj = Op->getOperand(1); + else + return SDValue(); + + if (fitsRegularPattern(Begin + 1, 2, End, 0, 2) && + fitsRegularPattern(Begin + 1 + HalfSize, 2, End, HalfSize, 2)) + Xk = Op->getOperand(0); + else if (fitsRegularPattern(Begin + 1, 2, End, Indices.size(), 2) && + fitsRegularPattern(Begin + 1 + HalfSize, 2, End, + Indices.size() + HalfSize, 2)) + Xk = Op->getOperand(1); + else + return SDValue(); + + return DAG.getNode(LoongArchISD::VPACKEV, SDLoc(Op), ResTy, Xk, Xj); +} + +static SDValue lowerVECTOR_SHUFFLE_XVPACKOD(SDValue Op, EVT ResTy, + SmallVector Indices, + SelectionDAG &DAG) { + assert((Indices.size() % 2) == 0); + + SDValue Xj; + SDValue Xk; + const auto &Begin = Indices.begin(); + const auto &End = Indices.end(); + unsigned HalfSize = Indices.size() / 2; + + if (fitsRegularPattern(Begin, 2, End, 1, 2) && + fitsRegularPattern(Begin + HalfSize, 2, End, HalfSize + 1, 2)) + Xj = Op->getOperand(0); + else if (fitsRegularPattern(Begin, 2, End, Indices.size() + 1, 2) && + fitsRegularPattern(Begin + HalfSize, 2, End, + Indices.size() + HalfSize + 1, 2)) + Xj = Op->getOperand(1); + else + return SDValue(); + + if (fitsRegularPattern(Begin + 1, 2, End, 1, 2) && + fitsRegularPattern(Begin + 1 + HalfSize, 2, End, HalfSize + 1, 2)) + Xk = Op->getOperand(0); + else if (fitsRegularPattern(Begin + 1, 2, End, Indices.size() + 1, 2) && + fitsRegularPattern(Begin + 1 + HalfSize, 2, End, + Indices.size() + HalfSize + 1, 2)) + Xk = Op->getOperand(1); + else + return SDValue(); + + return DAG.getNode(LoongArchISD::VPACKOD, SDLoc(Op), ResTy, Xk, Xj); +} + +static bool isVECTOR_SHUFFLE_XVREPLVEI(SDValue Op, EVT ResTy, + SmallVector Indices, + SelectionDAG &DAG) { + assert((Indices.size() % 2) == 0); + unsigned HalfSize = Indices.size() / 2; + + for (unsigned i = 0; i < HalfSize; i++) { + if (Indices[i] == -1 || Indices[HalfSize + i] == -1) + return false; + if (Indices[0] != Indices[i] || Indices[HalfSize] != Indices[HalfSize + i]) + return false; + } + return true; +} + +static SDValue lowerVECTOR_SHUFFLE_XVPICKEV(SDValue Op, EVT ResTy, + SmallVector Indices, + SelectionDAG &DAG) { + assert((Indices.size() % 2) == 0); + + SDValue Xj; + SDValue Xk; + const auto &Begin = Indices.begin(); + const auto &LeftMid = Indices.begin() + Indices.size() / 4; + const auto &End = Indices.end(); + const auto &RightMid = Indices.end() - Indices.size() / 4; + const auto &Mid = Indices.begin() + Indices.size() / 2; + unsigned HalfSize = Indices.size() / 2; + + if (fitsRegularPattern(Begin, 1, LeftMid, 0, 2) && + fitsRegularPattern(Mid, 1, RightMid, HalfSize, 2)) + Xj = Op->getOperand(0); + else if (fitsRegularPattern(Begin, 1, LeftMid, Indices.size(), 2) && + fitsRegularPattern(Mid, 1, RightMid, Indices.size() + HalfSize, + 2)) + Xj = Op->getOperand(1); + else + return SDValue(); + + if (fitsRegularPattern(LeftMid, 1, Mid, 0, 2) && + fitsRegularPattern(RightMid, 1, End, HalfSize, 2)) + Xk = Op->getOperand(0); + else if (fitsRegularPattern(LeftMid, 1, Mid, Indices.size(), 2) && + fitsRegularPattern(RightMid, 1, End, Indices.size() + HalfSize, + 2)) + Xk = Op->getOperand(1); + else + return SDValue(); + + return DAG.getNode(LoongArchISD::VPICKEV, SDLoc(Op), ResTy, Xk, Xj); +} + +static SDValue lowerVECTOR_SHUFFLE_XVPICKOD(SDValue Op, EVT ResTy, + SmallVector Indices, + SelectionDAG &DAG) { + assert((Indices.size() % 2) == 0); + + SDValue Xj; + SDValue Xk; + const auto &Begin = Indices.begin(); + const auto &LeftMid = Indices.begin() + Indices.size() / 4; + const auto &Mid = Indices.begin() + Indices.size() / 2; + const auto &RightMid = Indices.end() - Indices.size() / 4; + const auto &End = Indices.end(); + unsigned HalfSize = Indices.size() / 2; + + if (fitsRegularPattern(Begin, 1, LeftMid, 1, 2) && + fitsRegularPattern(Mid, 1, RightMid, HalfSize + 1, 2)) + Xj = Op->getOperand(0); + else if (fitsRegularPattern(Begin, 1, LeftMid, Indices.size() + 1, 2) && + fitsRegularPattern(Mid, 1, RightMid, + Indices.size() + HalfSize + 1, 2)) + Xj = Op->getOperand(1); + else + return SDValue(); + + if (fitsRegularPattern(LeftMid, 1, Mid, 1, 2) && + fitsRegularPattern(RightMid, 1, End, HalfSize + 1, 2)) + Xk = Op->getOperand(0); + else if (fitsRegularPattern(LeftMid, 1, Mid, Indices.size() + 1, 2) && + fitsRegularPattern(RightMid, 1, End, + Indices.size() + HalfSize + 1, 2)) + Xk = Op->getOperand(1); + else + return SDValue(); + + return DAG.getNode(LoongArchISD::VPICKOD, SDLoc(Op), ResTy, Xk, Xj); +} + +static SDValue lowerVECTOR_SHUFFLE_XSHF(SDValue Op, EVT ResTy, + SmallVector Indices, + SelectionDAG &DAG) { + int SHFIndices[4] = {-1, -1, -1, -1}; + + if (Indices.size() < 4) + return SDValue(); + + int HalfSize = Indices.size() / 2; + for (int i = 0; i < 4; ++i) { + for (int j = i; j < HalfSize; j += 4) { + int Idx = Indices[j]; + // check mxshf + if (Idx + HalfSize != Indices[j + HalfSize]) + return SDValue(); + + // Convert from vector index to 4-element subvector index + // If an index refers to an element outside of the subvector then give up + if (Idx != -1) { + Idx -= 4 * (j / 4); + if (Idx < 0 || Idx >= 4) + return SDValue(); + } + + // If the mask has an undef, replace it with the current index. + // Note that it might still be undef if the current index is also undef + if (SHFIndices[i] == -1) + SHFIndices[i] = Idx; + + // Check that non-undef values are the same as in the mask. If they + // aren't then give up + if (!(Idx == -1 || Idx == SHFIndices[i])) + return SDValue(); + } + } + + // Calculate the immediate. Replace any remaining undefs with zero + APInt Imm(32, 0); + for (int i = 3; i >= 0; --i) { + int Idx = SHFIndices[i]; + + if (Idx == -1) + Idx = 0; + + Imm <<= 2; + Imm |= Idx & 0x3; + } + SDLoc DL(Op); + return DAG.getNode(LoongArchISD::SHF, DL, ResTy, + DAG.getConstant(Imm, DL, MVT::i32), Op->getOperand(0)); +} + +static bool isConstantOrUndef(const SDValue Op) { + if (Op->isUndef()) + return true; + if (isa(Op)) + return true; + if (isa(Op)) + return true; + return false; +} + +static bool isConstantOrUndefBUILD_VECTOR(const BuildVectorSDNode *Op) { + for (unsigned i = 0; i < Op->getNumOperands(); ++i) + if (isConstantOrUndef(Op->getOperand(i))) + return true; + return false; +} + +static bool isLASXBySplatBitSize(unsigned SplatBitSize, EVT &ViaVecTy) { + switch (SplatBitSize) { + default: + return false; + case 8: + ViaVecTy = MVT::v32i8; + break; + case 16: + ViaVecTy = MVT::v16i16; + break; + case 32: + ViaVecTy = MVT::v8i32; + break; + case 64: + ViaVecTy = MVT::v4i64; + break; + case 128: + // There's no fill.q to fall back on for 64-bit values + return false; + } + + return true; +} + +static bool isLSXBySplatBitSize(unsigned SplatBitSize, EVT &ViaVecTy) { + switch (SplatBitSize) { + default: + return false; + case 8: + ViaVecTy = MVT::v16i8; + break; + case 16: + ViaVecTy = MVT::v8i16; + break; + case 32: + ViaVecTy = MVT::v4i32; + break; + case 64: + // There's no fill.d to fall back on for 64-bit values + return false; + } + + return true; +} + +bool LoongArchTargetLowering::isCheapToSpeculateCttz() const { return true; } + +bool LoongArchTargetLowering::isCheapToSpeculateCtlz() const { return true; } + +void LoongArchTargetLowering::LowerOperationWrapper( + SDNode *N, SmallVectorImpl &Results, SelectionDAG &DAG) const { + SDValue Res = LowerOperation(SDValue(N, 0), DAG); + + for (unsigned I = 0, E = Res->getNumValues(); I != E; ++I) + Results.push_back(Res.getValue(I)); +} + +void LoongArchTargetLowering::ReplaceNodeResults( + SDNode *N, SmallVectorImpl &Results, SelectionDAG &DAG) const { + return LowerOperationWrapper(N, Results, DAG); +} + +SDValue LoongArchTargetLowering::LowerOperation(SDValue Op, + SelectionDAG &DAG) const { + switch (Op.getOpcode()) { + case ISD::STORE: + return lowerSTORE(Op, DAG); + case ISD::INTRINSIC_WO_CHAIN: + return lowerINTRINSIC_WO_CHAIN(Op, DAG); + case ISD::INTRINSIC_W_CHAIN: + return lowerINTRINSIC_W_CHAIN(Op, DAG); + case ISD::INTRINSIC_VOID: + return lowerINTRINSIC_VOID(Op, DAG); + case ISD::EXTRACT_VECTOR_ELT: + return lowerEXTRACT_VECTOR_ELT(Op, DAG); + case ISD::INSERT_VECTOR_ELT: + return lowerINSERT_VECTOR_ELT(Op, DAG); + case ISD::BUILD_VECTOR: + return lowerBUILD_VECTOR(Op, DAG); + case ISD::VECTOR_SHUFFLE: + return lowerVECTOR_SHUFFLE(Op, DAG); + case ISD::UINT_TO_FP: + return lowerUINT_TO_FP(Op, DAG); + case ISD::SINT_TO_FP: + return lowerSINT_TO_FP(Op, DAG); + case ISD::FP_TO_UINT: + return lowerFP_TO_UINT(Op, DAG); + case ISD::FP_TO_SINT: + return lowerFP_TO_SINT(Op, DAG); + case ISD::BRCOND: + return lowerBRCOND(Op, DAG); + case ISD::ConstantPool: + return lowerConstantPool(Op, DAG); + case ISD::GlobalAddress: + return lowerGlobalAddress(Op, DAG); + case ISD::BlockAddress: + return lowerBlockAddress(Op, DAG); + case ISD::GlobalTLSAddress: + return lowerGlobalTLSAddress(Op, DAG); + case ISD::JumpTable: + return lowerJumpTable(Op, DAG); + case ISD::SELECT: + return lowerSELECT(Op, DAG); + case ISD::SETCC: + return lowerSETCC(Op, DAG); + case ISD::VASTART: + return lowerVASTART(Op, DAG); + case ISD::VAARG: + return lowerVAARG(Op, DAG); + case ISD::FRAMEADDR: + return lowerFRAMEADDR(Op, DAG); + case ISD::RETURNADDR: + return lowerRETURNADDR(Op, DAG); + case ISD::EH_RETURN: + return lowerEH_RETURN(Op, DAG); + case ISD::ATOMIC_FENCE: + return lowerATOMIC_FENCE(Op, DAG); + case ISD::SHL_PARTS: + return lowerShiftLeftParts(Op, DAG); + case ISD::SRA_PARTS: + return lowerShiftRightParts(Op, DAG, true); + case ISD::SRL_PARTS: + return lowerShiftRightParts(Op, DAG, false); + case ISD::EH_DWARF_CFA: + return lowerEH_DWARF_CFA(Op, DAG); + } + return SDValue(); +} + +//===----------------------------------------------------------------------===// +// Lower helper functions +//===----------------------------------------------------------------------===// + +template +SDValue LoongArchTargetLowering::getAddr(NodeTy *N, SelectionDAG &DAG, + bool IsLocal) const { + SDLoc DL(N); + EVT Ty = getPointerTy(DAG.getDataLayout()); + + if (isPositionIndependent()) { + SDValue Addr = getTargetNode(N, Ty, DAG, 0U); + if (IsLocal) + // Use PC-relative addressing to access the symbol. + return SDValue(DAG.getMachineNode(LoongArch::LoadAddrLocal, DL, Ty, Addr), + 0); + + // Use PC-relative addressing to access the GOT for this symbol, then load + // the address from the GOT. + return SDValue(DAG.getMachineNode(LoongArch::LoadAddrGlobal, DL, Ty, Addr), + 0); + } + + SDValue Addr = getTargetNode(N, Ty, DAG, 0U); + return SDValue(DAG.getMachineNode(LoongArch::LoadAddrLocal, DL, Ty, Addr), 0); +} + +// addLiveIn - This helper function adds the specified physical register to the +// MachineFunction as a live in value. It also creates a corresponding +// virtual register for it. +static unsigned addLiveIn(MachineFunction &MF, unsigned PReg, + const TargetRegisterClass *RC) { + unsigned VReg = MF.getRegInfo().createVirtualRegister(RC); + MF.getRegInfo().addLiveIn(PReg, VReg); + return VReg; +} + +static MachineBasicBlock *insertDivByZeroTrap(MachineInstr &MI, + MachineBasicBlock &MBB, + const TargetInstrInfo &TII, + bool Is64Bit) { + if (NoZeroDivCheck) + return &MBB; + + // Insert pseudo instruction(PseudoTEQ), will expand: + // beq $divisor_reg, $zero, 8 + // break 7 + MachineBasicBlock::iterator I(MI); + MachineInstrBuilder MIB; + MachineOperand &Divisor = MI.getOperand(2); + unsigned TeqOp = LoongArch::PseudoTEQ; + + MIB = BuildMI(MBB, std::next(I), MI.getDebugLoc(), TII.get(TeqOp)) + .addReg(Divisor.getReg(), getKillRegState(Divisor.isKill())); + + // Use the 32-bit sub-register if this is a 64-bit division. + //if (Is64Bit) + // MIB->getOperand(0).setSubReg(LoongArch::sub_32); + + // Clear Divisor's kill flag. + Divisor.setIsKill(false); + + // We would normally delete the original instruction here but in this case + // we only needed to inject an additional instruction rather than replace it. + + return &MBB; +} + +MachineBasicBlock * +LoongArchTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, + MachineBasicBlock *BB) const { + switch (MI.getOpcode()) { + default: + llvm_unreachable("Unexpected instr type to insert"); + case LoongArch::FILL_FW_PSEUDO: + return emitFILL_FW(MI, BB); + case LoongArch::FILL_FD_PSEUDO: + return emitFILL_FD(MI, BB); + case LoongArch::SNZ_B_PSEUDO: + return emitLSXCBranchPseudo(MI, BB, LoongArch::VSETALLNEZ_B); + case LoongArch::SNZ_H_PSEUDO: + return emitLSXCBranchPseudo(MI, BB, LoongArch::VSETALLNEZ_H); + case LoongArch::SNZ_W_PSEUDO: + return emitLSXCBranchPseudo(MI, BB, LoongArch::VSETALLNEZ_W); + case LoongArch::SNZ_D_PSEUDO: + return emitLSXCBranchPseudo(MI, BB, LoongArch::VSETALLNEZ_D); + case LoongArch::SNZ_V_PSEUDO: + return emitLSXCBranchPseudo(MI, BB, LoongArch::VSETNEZ_V); + case LoongArch::SZ_B_PSEUDO: + return emitLSXCBranchPseudo(MI, BB, LoongArch::VSETANYEQZ_B); + case LoongArch::SZ_H_PSEUDO: + return emitLSXCBranchPseudo(MI, BB, LoongArch::VSETANYEQZ_H); + case LoongArch::SZ_W_PSEUDO: + return emitLSXCBranchPseudo(MI, BB, LoongArch::VSETANYEQZ_W); + case LoongArch::SZ_D_PSEUDO: + return emitLSXCBranchPseudo(MI, BB, LoongArch::VSETANYEQZ_D); + case LoongArch::SZ_V_PSEUDO: + return emitLSXCBranchPseudo(MI, BB, LoongArch::VSETEQZ_V); + case LoongArch::XSNZ_B_PSEUDO: + return emitLSXCBranchPseudo(MI, BB, LoongArch::XVSETALLNEZ_B); + case LoongArch::XSNZ_H_PSEUDO: + return emitLSXCBranchPseudo(MI, BB, LoongArch::XVSETALLNEZ_H); + case LoongArch::XSNZ_W_PSEUDO: + return emitLSXCBranchPseudo(MI, BB, LoongArch::XVSETALLNEZ_W); + case LoongArch::XSNZ_D_PSEUDO: + return emitLSXCBranchPseudo(MI, BB, LoongArch::XVSETALLNEZ_D); + case LoongArch::XSNZ_V_PSEUDO: + return emitLSXCBranchPseudo(MI, BB, LoongArch::XVSETNEZ_V); + case LoongArch::XSZ_B_PSEUDO: + return emitLSXCBranchPseudo(MI, BB, LoongArch::XVSETANYEQZ_B); + case LoongArch::XSZ_H_PSEUDO: + return emitLSXCBranchPseudo(MI, BB, LoongArch::XVSETANYEQZ_H); + case LoongArch::XSZ_W_PSEUDO: + return emitLSXCBranchPseudo(MI, BB, LoongArch::XVSETANYEQZ_W); + case LoongArch::XSZ_D_PSEUDO: + return emitLSXCBranchPseudo(MI, BB, LoongArch::XVSETANYEQZ_D); + case LoongArch::XSZ_V_PSEUDO: + return emitLSXCBranchPseudo(MI, BB, LoongArch::XVSETEQZ_V); + case LoongArch::INSERT_FW_PSEUDO: + return emitINSERT_FW(MI, BB); + case LoongArch::INSERT_FD_PSEUDO: + return emitINSERT_FD(MI, BB); + case LoongArch::XINSERT_H_PSEUDO: + return emitXINSERT_BH(MI, BB, 2); + case LoongArch::XCOPY_FW_PSEUDO: + return emitXCOPY_FW(MI, BB); + case LoongArch::XCOPY_FD_PSEUDO: + return emitXCOPY_FD(MI, BB); + case LoongArch::XINSERT_FW_PSEUDO: + return emitXINSERT_FW(MI, BB); + case LoongArch::COPY_FW_PSEUDO: + return emitCOPY_FW(MI, BB); + case LoongArch::XFILL_FW_PSEUDO: + return emitXFILL_FW(MI, BB); + case LoongArch::XFILL_FD_PSEUDO: + return emitXFILL_FD(MI, BB); + case LoongArch::COPY_FD_PSEUDO: + return emitCOPY_FD(MI, BB); + case LoongArch::XINSERT_FD_PSEUDO: + return emitXINSERT_FD(MI, BB); + case LoongArch::XINSERT_B_PSEUDO: + return emitXINSERT_BH(MI, BB, 1); + case LoongArch::CONCAT_VECTORS_B_PSEUDO: + return emitCONCAT_VECTORS(MI, BB, 1); + case LoongArch::CONCAT_VECTORS_H_PSEUDO: + return emitCONCAT_VECTORS(MI, BB, 2); + case LoongArch::CONCAT_VECTORS_W_PSEUDO: + case LoongArch::CONCAT_VECTORS_FW_PSEUDO: + return emitCONCAT_VECTORS(MI, BB, 4); + case LoongArch::CONCAT_VECTORS_D_PSEUDO: + case LoongArch::CONCAT_VECTORS_FD_PSEUDO: + return emitCONCAT_VECTORS(MI, BB, 8); + case LoongArch::XCOPY_FW_GPR_PSEUDO: + return emitXCOPY_FW_GPR(MI, BB); + + case LoongArch::ATOMIC_LOAD_ADD_I8: + return emitAtomicBinaryPartword(MI, BB, 1); + case LoongArch::ATOMIC_LOAD_ADD_I16: + return emitAtomicBinaryPartword(MI, BB, 2); + case LoongArch::ATOMIC_LOAD_ADD_I32: + return emitAtomicBinary(MI, BB); + case LoongArch::ATOMIC_LOAD_ADD_I64: + return emitAtomicBinary(MI, BB); + + case LoongArch::ATOMIC_LOAD_AND_I8: + return emitAtomicBinaryPartword(MI, BB, 1); + case LoongArch::ATOMIC_LOAD_AND_I16: + return emitAtomicBinaryPartword(MI, BB, 2); + case LoongArch::ATOMIC_LOAD_AND_I32: + return emitAtomicBinary(MI, BB); + case LoongArch::ATOMIC_LOAD_AND_I64: + return emitAtomicBinary(MI, BB); + + case LoongArch::ATOMIC_LOAD_OR_I8: + return emitAtomicBinaryPartword(MI, BB, 1); + case LoongArch::ATOMIC_LOAD_OR_I16: + return emitAtomicBinaryPartword(MI, BB, 2); + case LoongArch::ATOMIC_LOAD_OR_I32: + return emitAtomicBinary(MI, BB); + case LoongArch::ATOMIC_LOAD_OR_I64: + return emitAtomicBinary(MI, BB); + + case LoongArch::ATOMIC_LOAD_XOR_I8: + return emitAtomicBinaryPartword(MI, BB, 1); + case LoongArch::ATOMIC_LOAD_XOR_I16: + return emitAtomicBinaryPartword(MI, BB, 2); + case LoongArch::ATOMIC_LOAD_XOR_I32: + return emitAtomicBinary(MI, BB); + case LoongArch::ATOMIC_LOAD_XOR_I64: + return emitAtomicBinary(MI, BB); + + case LoongArch::ATOMIC_LOAD_NAND_I8: + return emitAtomicBinaryPartword(MI, BB, 1); + case LoongArch::ATOMIC_LOAD_NAND_I16: + return emitAtomicBinaryPartword(MI, BB, 2); + case LoongArch::ATOMIC_LOAD_NAND_I32: + return emitAtomicBinary(MI, BB); + case LoongArch::ATOMIC_LOAD_NAND_I64: + return emitAtomicBinary(MI, BB); + + case LoongArch::ATOMIC_LOAD_SUB_I8: + return emitAtomicBinaryPartword(MI, BB, 1); + case LoongArch::ATOMIC_LOAD_SUB_I16: + return emitAtomicBinaryPartword(MI, BB, 2); + case LoongArch::ATOMIC_LOAD_SUB_I32: + return emitAtomicBinary(MI, BB); + case LoongArch::ATOMIC_LOAD_SUB_I64: + return emitAtomicBinary(MI, BB); + + case LoongArch::ATOMIC_SWAP_I8: + return emitAtomicBinaryPartword(MI, BB, 1); + case LoongArch::ATOMIC_SWAP_I16: + return emitAtomicBinaryPartword(MI, BB, 2); + case LoongArch::ATOMIC_SWAP_I32: + return emitAtomicBinary(MI, BB); + case LoongArch::ATOMIC_SWAP_I64: + return emitAtomicBinary(MI, BB); + + case LoongArch::XINSERT_B_VIDX_PSEUDO: + case LoongArch::XINSERT_B_VIDX64_PSEUDO: + return emitXINSERT_B(MI, BB); + case LoongArch::INSERT_H_VIDX64_PSEUDO: + return emitINSERT_H_VIDX(MI, BB); + case LoongArch::XINSERT_FW_VIDX_PSEUDO: + return emitXINSERT_DF_VIDX(MI, BB, false); + case LoongArch::XINSERT_FW_VIDX64_PSEUDO: + return emitXINSERT_DF_VIDX(MI, BB, true); + + case LoongArch::ATOMIC_LOAD_MAX_I8: + return emitAtomicBinaryPartword(MI, BB, 1); + case LoongArch::ATOMIC_LOAD_MAX_I16: + return emitAtomicBinaryPartword(MI, BB, 2); + case LoongArch::ATOMIC_LOAD_MAX_I32: + return emitAtomicBinary(MI, BB); + case LoongArch::ATOMIC_LOAD_MAX_I64: + return emitAtomicBinary(MI, BB); + + case LoongArch::ATOMIC_LOAD_MIN_I8: + return emitAtomicBinaryPartword(MI, BB, 1); + case LoongArch::ATOMIC_LOAD_MIN_I16: + return emitAtomicBinaryPartword(MI, BB, 2); + case LoongArch::ATOMIC_LOAD_MIN_I32: + return emitAtomicBinary(MI, BB); + case LoongArch::ATOMIC_LOAD_MIN_I64: + return emitAtomicBinary(MI, BB); + + case LoongArch::ATOMIC_LOAD_UMAX_I8: + return emitAtomicBinaryPartword(MI, BB, 1); + case LoongArch::ATOMIC_LOAD_UMAX_I16: + return emitAtomicBinaryPartword(MI, BB, 2); + case LoongArch::ATOMIC_LOAD_UMAX_I32: + return emitAtomicBinary(MI, BB); + case LoongArch::ATOMIC_LOAD_UMAX_I64: + return emitAtomicBinary(MI, BB); + + case LoongArch::ATOMIC_LOAD_UMIN_I8: + return emitAtomicBinaryPartword(MI, BB, 1); + case LoongArch::ATOMIC_LOAD_UMIN_I16: + return emitAtomicBinaryPartword(MI, BB, 2); + case LoongArch::ATOMIC_LOAD_UMIN_I32: + return emitAtomicBinary(MI, BB); + case LoongArch::ATOMIC_LOAD_UMIN_I64: + return emitAtomicBinary(MI, BB); + + case LoongArch::ATOMIC_CMP_SWAP_I8: + return emitAtomicCmpSwapPartword(MI, BB, 1); + case LoongArch::ATOMIC_CMP_SWAP_I16: + return emitAtomicCmpSwapPartword(MI, BB, 2); + case LoongArch::ATOMIC_CMP_SWAP_I32: + return emitAtomicCmpSwap(MI, BB); + case LoongArch::ATOMIC_CMP_SWAP_I64: + return emitAtomicCmpSwap(MI, BB); + + case LoongArch::PseudoSELECT_I: + case LoongArch::PseudoSELECT_I64: + case LoongArch::PseudoSELECT_S: + case LoongArch::PseudoSELECT_D64: + return emitPseudoSELECT(MI, BB, false, LoongArch::BNE32); + + case LoongArch::PseudoSELECTFP_T_I: + case LoongArch::PseudoSELECTFP_T_I64: + return emitPseudoSELECT(MI, BB, true, LoongArch::BCNEZ); + + case LoongArch::PseudoSELECTFP_F_I: + case LoongArch::PseudoSELECTFP_F_I64: + return emitPseudoSELECT(MI, BB, true, LoongArch::BCEQZ); + case LoongArch::DIV_W: + case LoongArch::DIV_WU: + case LoongArch::MOD_W: + case LoongArch::MOD_WU: + return insertDivByZeroTrap(MI, *BB, *Subtarget.getInstrInfo(), false); + case LoongArch::DIV_D: + case LoongArch::DIV_DU: + case LoongArch::MOD_D: + case LoongArch::MOD_DU: + return insertDivByZeroTrap(MI, *BB, *Subtarget.getInstrInfo(), true); + } +} + +MachineBasicBlock *LoongArchTargetLowering::emitXINSERT_DF_VIDX( + MachineInstr &MI, MachineBasicBlock *BB, bool IsGPR64) const { + + MachineFunction *MF = BB->getParent(); + MachineRegisterInfo &RegInfo = MF->getRegInfo(); + const TargetInstrInfo *TII = Subtarget.getInstrInfo(); + DebugLoc DL = MI.getDebugLoc(); + + unsigned insertOp; + insertOp = IsGPR64 ? LoongArch::XINSERT_FW_VIDX64_PSEUDO_POSTRA + : LoongArch::XINSERT_FW_VIDX_PSEUDO_POSTRA; + + unsigned DstReg = MI.getOperand(0).getReg(); + unsigned SrcVecReg = MI.getOperand(1).getReg(); + unsigned LaneReg = MI.getOperand(2).getReg(); + unsigned SrcValReg = MI.getOperand(3).getReg(); + unsigned Dest = RegInfo.createVirtualRegister(RegInfo.getRegClass(DstReg)); + + MachineBasicBlock::iterator II(MI); + + unsigned VecCopy = + RegInfo.createVirtualRegister(RegInfo.getRegClass(SrcVecReg)); + unsigned LaneCopy = + RegInfo.createVirtualRegister(RegInfo.getRegClass(LaneReg)); + unsigned ValCopy = + RegInfo.createVirtualRegister(RegInfo.getRegClass(SrcValReg)); + + const TargetRegisterClass *RC = + IsGPR64 ? &LoongArch::GPR64RegClass : &LoongArch::GPR32RegClass; + unsigned RI = RegInfo.createVirtualRegister(RC); + + unsigned Rj = RegInfo.createVirtualRegister(&LoongArch::GPR32RegClass); + unsigned Xj = RegInfo.createVirtualRegister(&LoongArch::LASX256WRegClass); + BuildMI(*BB, II, DL, TII->get(LoongArch::SUBREG_TO_REG), Xj) + .addImm(0) + .addReg(SrcValReg) + .addImm(LoongArch::sub_lo); + BuildMI(*BB, II, DL, TII->get(LoongArch::XVPICKVE2GR_W), Rj) + .addReg(Xj) + .addImm(0); + + BuildMI(*BB, II, DL, TII->get(LoongArch::COPY), VecCopy).addReg(SrcVecReg); + BuildMI(*BB, II, DL, TII->get(LoongArch::COPY), LaneCopy).addReg(LaneReg); + BuildMI(*BB, II, DL, TII->get(LoongArch::COPY), ValCopy).addReg(SrcValReg); + + BuildMI(*BB, II, DL, TII->get(insertOp)) + .addReg(DstReg, RegState::Define | RegState::EarlyClobber) + .addReg(VecCopy) + .addReg(LaneCopy) + .addReg(ValCopy) + .addReg(Dest, RegState::Define | RegState::EarlyClobber | + RegState::Implicit | RegState::Dead) + .addReg(RI, RegState::Define | RegState::EarlyClobber | + RegState::Implicit | RegState::Dead) + .addReg(Rj, RegState::Define | RegState::EarlyClobber | + RegState::Implicit | RegState::Dead); + + MI.eraseFromParent(); + + return BB; +} + +MachineBasicBlock * +LoongArchTargetLowering::emitINSERT_H_VIDX(MachineInstr &MI, + MachineBasicBlock *BB) const { + + MachineFunction *MF = BB->getParent(); + MachineRegisterInfo &RegInfo = MF->getRegInfo(); + const TargetInstrInfo *TII = Subtarget.getInstrInfo(); + DebugLoc DL = MI.getDebugLoc(); + + unsigned insertOp; + unsigned isGP64 = 0; + switch (MI.getOpcode()) { + case LoongArch::INSERT_H_VIDX64_PSEUDO: + isGP64 = 1; + insertOp = LoongArch::INSERT_H_VIDX64_PSEUDO_POSTRA; + break; + default: + llvm_unreachable("Unknown pseudo vector for replacement!"); + } + + unsigned DstReg = MI.getOperand(0).getReg(); + unsigned SrcVecReg = MI.getOperand(1).getReg(); + unsigned LaneReg = MI.getOperand(2).getReg(); + unsigned SrcValReg = MI.getOperand(3).getReg(); + unsigned Dest = RegInfo.createVirtualRegister(RegInfo.getRegClass(DstReg)); + + MachineBasicBlock::iterator II(MI); + + unsigned VecCopy = + RegInfo.createVirtualRegister(RegInfo.getRegClass(SrcVecReg)); + unsigned LaneCopy = + RegInfo.createVirtualRegister(RegInfo.getRegClass(LaneReg)); + unsigned ValCopy = + RegInfo.createVirtualRegister(RegInfo.getRegClass(SrcValReg)); + + const TargetRegisterClass *RC = + isGP64 ? &LoongArch::GPR64RegClass : &LoongArch::GPR32RegClass; + unsigned RI = RegInfo.createVirtualRegister(RC); + + BuildMI(*BB, II, DL, TII->get(LoongArch::COPY), VecCopy).addReg(SrcVecReg); + BuildMI(*BB, II, DL, TII->get(LoongArch::COPY), LaneCopy).addReg(LaneReg); + BuildMI(*BB, II, DL, TII->get(LoongArch::COPY), ValCopy).addReg(SrcValReg); + + BuildMI(*BB, II, DL, TII->get(insertOp)) + .addReg(DstReg, RegState::Define | RegState::EarlyClobber) + .addReg(VecCopy) + .addReg(LaneCopy) + .addReg(ValCopy) + .addReg(Dest, RegState::Define | RegState::EarlyClobber | + RegState::Implicit | RegState::Dead) + .addReg(RI, RegState::Define | RegState::EarlyClobber | + RegState::Implicit | RegState::Dead); + + MI.eraseFromParent(); + + return BB; +} + +MachineBasicBlock * +LoongArchTargetLowering::emitXINSERT_B(MachineInstr &MI, + MachineBasicBlock *BB) const { + + MachineFunction *MF = BB->getParent(); + MachineRegisterInfo &RegInfo = MF->getRegInfo(); + const TargetInstrInfo *TII = Subtarget.getInstrInfo(); + DebugLoc DL = MI.getDebugLoc(); + + unsigned insertOp; + unsigned isGP64 = 0; + switch (MI.getOpcode()) { + case LoongArch::XINSERT_B_VIDX64_PSEUDO: + isGP64 = 1; + insertOp = LoongArch::XINSERT_B_VIDX64_PSEUDO_POSTRA; + break; + case LoongArch::XINSERT_B_VIDX_PSEUDO: + insertOp = LoongArch::XINSERT_B_VIDX_PSEUDO_POSTRA; + break; + default: + llvm_unreachable("Unknown pseudo vector for replacement!"); + } + + unsigned DstReg = MI.getOperand(0).getReg(); + unsigned SrcVecReg = MI.getOperand(1).getReg(); + unsigned LaneReg = MI.getOperand(2).getReg(); + unsigned SrcValReg = MI.getOperand(3).getReg(); + unsigned Dest = RegInfo.createVirtualRegister(RegInfo.getRegClass(DstReg)); + + MachineBasicBlock::iterator II(MI); + + unsigned VecCopy = + RegInfo.createVirtualRegister(RegInfo.getRegClass(SrcVecReg)); + unsigned LaneCopy = + RegInfo.createVirtualRegister(RegInfo.getRegClass(LaneReg)); + unsigned ValCopy = + RegInfo.createVirtualRegister(RegInfo.getRegClass(SrcValReg)); + const TargetRegisterClass *RC = + isGP64 ? &LoongArch::GPR64RegClass : &LoongArch::GPR32RegClass; + unsigned Rimm = RegInfo.createVirtualRegister(RC); + unsigned R4r = RegInfo.createVirtualRegister(RC); + unsigned Rib = RegInfo.createVirtualRegister(RC); + unsigned Ris = RegInfo.createVirtualRegister(RC); + unsigned R7b1 = RegInfo.createVirtualRegister(RC); + unsigned R7b2 = RegInfo.createVirtualRegister(RC); + unsigned R7b3 = RegInfo.createVirtualRegister(RC); + unsigned RI = RegInfo.createVirtualRegister(RC); + + unsigned R7r80_3 = RegInfo.createVirtualRegister(&LoongArch::GPR32RegClass); + unsigned R7r80l_3 = RegInfo.createVirtualRegister(&LoongArch::GPR32RegClass); + unsigned R7r81_3 = RegInfo.createVirtualRegister(&LoongArch::GPR32RegClass); + unsigned R7r81l_3 = RegInfo.createVirtualRegister(&LoongArch::GPR32RegClass); + unsigned R7r82_3 = RegInfo.createVirtualRegister(&LoongArch::GPR32RegClass); + unsigned R7r82l_3 = RegInfo.createVirtualRegister(&LoongArch::GPR32RegClass); + unsigned R70 = RegInfo.createVirtualRegister(&LoongArch::GPR32RegClass); + unsigned tmp_Dst73 = + RegInfo.createVirtualRegister(&LoongArch::LASX256BRegClass); + + BuildMI(*BB, II, DL, TII->get(LoongArch::COPY), VecCopy).addReg(SrcVecReg); + BuildMI(*BB, II, DL, TII->get(LoongArch::COPY), LaneCopy).addReg(LaneReg); + BuildMI(*BB, II, DL, TII->get(LoongArch::COPY), ValCopy).addReg(SrcValReg); + + BuildMI(*BB, II, DL, TII->get(insertOp)) + .addReg(DstReg, RegState::Define | RegState::EarlyClobber) + .addReg(VecCopy) + .addReg(LaneCopy) + .addReg(ValCopy) + .addReg(Dest, RegState::Define | RegState::EarlyClobber | + RegState::Implicit | RegState::Dead) + .addReg(R4r, RegState::Define | RegState::EarlyClobber | + RegState::Implicit | RegState::Dead) + .addReg(Rib, RegState::Define | RegState::EarlyClobber | + RegState::Implicit | RegState::Dead) + .addReg(Ris, RegState::Define | RegState::EarlyClobber | + RegState::Implicit | RegState::Dead) + .addReg(R7b1, RegState::Define | RegState::EarlyClobber | + RegState::Implicit | RegState::Dead) + .addReg(R7b2, RegState::Define | RegState::EarlyClobber | + RegState::Implicit | RegState::Dead) + .addReg(R7b3, RegState::Define | RegState::EarlyClobber | + RegState::Implicit | RegState::Dead) + .addReg(R7r80_3, RegState::Define | RegState::EarlyClobber | + RegState::Implicit | RegState::Dead) + .addReg(R7r80l_3, RegState::Define | RegState::EarlyClobber | + RegState::Implicit | RegState::Dead) + .addReg(R7r81_3, RegState::Define | RegState::EarlyClobber | + RegState::Implicit | RegState::Dead) + .addReg(R7r81l_3, RegState::Define | RegState::EarlyClobber | + RegState::Implicit | RegState::Dead) + .addReg(R7r82_3, RegState::Define | RegState::EarlyClobber | + RegState::Implicit | RegState::Dead) + .addReg(R7r82l_3, RegState::Define | RegState::EarlyClobber | + RegState::Implicit | RegState::Dead) + .addReg(RI, RegState::Define | RegState::EarlyClobber | + RegState::Implicit | RegState::Dead) + .addReg(tmp_Dst73, RegState::Define | RegState::EarlyClobber | + RegState::Implicit | RegState::Dead) + .addReg(Rimm, RegState::Define | RegState::EarlyClobber | + RegState::Implicit | RegState::Dead) + .addReg(R70, RegState::Define | RegState::EarlyClobber | + RegState::Implicit | RegState::Dead); + + MI.eraseFromParent(); + + return BB; +} + +const TargetRegisterClass * +LoongArchTargetLowering::getRepRegClassFor(MVT VT) const { + return TargetLowering::getRepRegClassFor(VT); +} + +// This function also handles LoongArch::ATOMIC_SWAP_I32 (when BinOpcode == 0), and +// LoongArch::ATOMIC_LOAD_NAND_I32 (when Nand == true) +MachineBasicBlock * +LoongArchTargetLowering::emitAtomicBinary(MachineInstr &MI, + MachineBasicBlock *BB) const { + + MachineFunction *MF = BB->getParent(); + MachineRegisterInfo &RegInfo = MF->getRegInfo(); + const TargetInstrInfo *TII = Subtarget.getInstrInfo(); + DebugLoc DL = MI.getDebugLoc(); + + unsigned AtomicOp; + switch (MI.getOpcode()) { + case LoongArch::ATOMIC_LOAD_ADD_I32: + AtomicOp = LoongArch::ATOMIC_LOAD_ADD_I32_POSTRA; + break; + case LoongArch::ATOMIC_LOAD_SUB_I32: + AtomicOp = LoongArch::ATOMIC_LOAD_SUB_I32_POSTRA; + break; + case LoongArch::ATOMIC_LOAD_AND_I32: + AtomicOp = LoongArch::ATOMIC_LOAD_AND_I32_POSTRA; + break; + case LoongArch::ATOMIC_LOAD_OR_I32: + AtomicOp = LoongArch::ATOMIC_LOAD_OR_I32_POSTRA; + break; + case LoongArch::ATOMIC_LOAD_XOR_I32: + AtomicOp = LoongArch::ATOMIC_LOAD_XOR_I32_POSTRA; + break; + case LoongArch::ATOMIC_LOAD_NAND_I32: + AtomicOp = LoongArch::ATOMIC_LOAD_NAND_I32_POSTRA; + break; + case LoongArch::ATOMIC_SWAP_I32: + AtomicOp = LoongArch::ATOMIC_SWAP_I32_POSTRA; + break; + case LoongArch::ATOMIC_LOAD_MAX_I32: + AtomicOp = LoongArch::ATOMIC_LOAD_MAX_I32_POSTRA; + break; + case LoongArch::ATOMIC_LOAD_MIN_I32: + AtomicOp = LoongArch::ATOMIC_LOAD_MIN_I32_POSTRA; + break; + case LoongArch::ATOMIC_LOAD_UMAX_I32: + AtomicOp = LoongArch::ATOMIC_LOAD_UMAX_I32_POSTRA; + break; + case LoongArch::ATOMIC_LOAD_UMIN_I32: + AtomicOp = LoongArch::ATOMIC_LOAD_UMIN_I32_POSTRA; + break; + case LoongArch::ATOMIC_LOAD_ADD_I64: + AtomicOp = LoongArch::ATOMIC_LOAD_ADD_I64_POSTRA; + break; + case LoongArch::ATOMIC_LOAD_SUB_I64: + AtomicOp = LoongArch::ATOMIC_LOAD_SUB_I64_POSTRA; + break; + case LoongArch::ATOMIC_LOAD_AND_I64: + AtomicOp = LoongArch::ATOMIC_LOAD_AND_I64_POSTRA; + break; + case LoongArch::ATOMIC_LOAD_OR_I64: + AtomicOp = LoongArch::ATOMIC_LOAD_OR_I64_POSTRA; + break; + case LoongArch::ATOMIC_LOAD_XOR_I64: + AtomicOp = LoongArch::ATOMIC_LOAD_XOR_I64_POSTRA; + break; + case LoongArch::ATOMIC_LOAD_NAND_I64: + AtomicOp = LoongArch::ATOMIC_LOAD_NAND_I64_POSTRA; + break; + case LoongArch::ATOMIC_SWAP_I64: + AtomicOp = LoongArch::ATOMIC_SWAP_I64_POSTRA; + break; + case LoongArch::ATOMIC_LOAD_MAX_I64: + AtomicOp = LoongArch::ATOMIC_LOAD_MAX_I64_POSTRA; + break; + case LoongArch::ATOMIC_LOAD_MIN_I64: + AtomicOp = LoongArch::ATOMIC_LOAD_MIN_I64_POSTRA; + break; + case LoongArch::ATOMIC_LOAD_UMAX_I64: + AtomicOp = LoongArch::ATOMIC_LOAD_UMAX_I64_POSTRA; + break; + case LoongArch::ATOMIC_LOAD_UMIN_I64: + AtomicOp = LoongArch::ATOMIC_LOAD_UMIN_I64_POSTRA; + break; + default: + llvm_unreachable("Unknown pseudo atomic for replacement!"); + } + + unsigned OldVal = MI.getOperand(0).getReg(); + unsigned Ptr = MI.getOperand(1).getReg(); + unsigned Incr = MI.getOperand(2).getReg(); + unsigned Scratch = RegInfo.createVirtualRegister(RegInfo.getRegClass(OldVal)); + + MachineBasicBlock::iterator II(MI); + + // The scratch registers here with the EarlyClobber | Define | Implicit + // flags is used to persuade the register allocator and the machine + // verifier to accept the usage of this register. This has to be a real + // register which has an UNDEF value but is dead after the instruction which + // is unique among the registers chosen for the instruction. + + // The EarlyClobber flag has the semantic properties that the operand it is + // attached to is clobbered before the rest of the inputs are read. Hence it + // must be unique among the operands to the instruction. + // The Define flag is needed to coerce the machine verifier that an Undef + // value isn't a problem. + // The Dead flag is needed as the value in scratch isn't used by any other + // instruction. Kill isn't used as Dead is more precise. + // The implicit flag is here due to the interaction between the other flags + // and the machine verifier. + + // For correctness purpose, a new pseudo is introduced here. We need this + // new pseudo, so that FastRegisterAllocator does not see an ll/sc sequence + // that is spread over >1 basic blocks. A register allocator which + // introduces (or any codegen infact) a store, can violate the expectations + // of the hardware. + // + // An atomic read-modify-write sequence starts with a linked load + // instruction and ends with a store conditional instruction. The atomic + // read-modify-write sequence fails if any of the following conditions + // occur between the execution of ll and sc: + // * A coherent store is completed by another process or coherent I/O + // module into the block of synchronizable physical memory containing + // the word. The size and alignment of the block is + // implementation-dependent. + // * A coherent store is executed between an LL and SC sequence on the + // same processor to the block of synchornizable physical memory + // containing the word. + // + + unsigned PtrCopy = RegInfo.createVirtualRegister(RegInfo.getRegClass(Ptr)); + unsigned IncrCopy = RegInfo.createVirtualRegister(RegInfo.getRegClass(Incr)); + + BuildMI(*BB, II, DL, TII->get(LoongArch::COPY), IncrCopy).addReg(Incr); + BuildMI(*BB, II, DL, TII->get(LoongArch::COPY), PtrCopy).addReg(Ptr); + + BuildMI(*BB, II, DL, TII->get(AtomicOp)) + .addReg(OldVal, RegState::Define | RegState::EarlyClobber) + .addReg(PtrCopy) + .addReg(IncrCopy) + .addReg(Scratch, RegState::Define | RegState::EarlyClobber | + RegState::Implicit | RegState::Dead); + + if(MI.getOpcode() == LoongArch::ATOMIC_LOAD_NAND_I32 + || MI.getOpcode() == LoongArch::ATOMIC_LOAD_NAND_I64){ + BuildMI(*BB, II, DL, TII->get(LoongArch::DBAR)).addImm(DBAR_HINT); + } + + MI.eraseFromParent(); + + return BB; +} + +MachineBasicBlock *LoongArchTargetLowering::emitSignExtendToI32InReg( + MachineInstr &MI, MachineBasicBlock *BB, unsigned Size, unsigned DstReg, + unsigned SrcReg) const { + const TargetInstrInfo *TII = Subtarget.getInstrInfo(); + const DebugLoc &DL = MI.getDebugLoc(); + if (Size == 1) { + BuildMI(BB, DL, TII->get(LoongArch::EXT_W_B32), DstReg).addReg(SrcReg); + return BB; + } + + if (Size == 2) { + BuildMI(BB, DL, TII->get(LoongArch::EXT_W_H32), DstReg).addReg(SrcReg); + return BB; + } + + MachineFunction *MF = BB->getParent(); + MachineRegisterInfo &RegInfo = MF->getRegInfo(); + const TargetRegisterClass *RC = getRegClassFor(MVT::i32); + unsigned ScrReg = RegInfo.createVirtualRegister(RC); + + assert(Size < 32); + int64_t ShiftImm = 32 - (Size * 8); + + BuildMI(BB, DL, TII->get(LoongArch::SLLI_W), ScrReg).addReg(SrcReg).addImm(ShiftImm); + BuildMI(BB, DL, TII->get(LoongArch::SRAI_W), DstReg).addReg(ScrReg).addImm(ShiftImm); + + return BB; +} + +MachineBasicBlock *LoongArchTargetLowering::emitAtomicBinaryPartword( + MachineInstr &MI, MachineBasicBlock *BB, unsigned Size) const { + assert((Size == 1 || Size == 2) && + "Unsupported size for EmitAtomicBinaryPartial."); + + MachineFunction *MF = BB->getParent(); + MachineRegisterInfo &RegInfo = MF->getRegInfo(); + const TargetRegisterClass *RC = getRegClassFor(MVT::i32); + const bool ArePtrs64bit = ABI.ArePtrs64bit(); + const TargetRegisterClass *RCp = + getRegClassFor(ArePtrs64bit ? MVT::i64 : MVT::i32); + const TargetInstrInfo *TII = Subtarget.getInstrInfo(); + DebugLoc DL = MI.getDebugLoc(); + + unsigned Dest = MI.getOperand(0).getReg(); + unsigned Ptr = MI.getOperand(1).getReg(); + unsigned Incr = MI.getOperand(2).getReg(); + + unsigned AlignedAddr = RegInfo.createVirtualRegister(RCp); + unsigned ShiftAmt = RegInfo.createVirtualRegister(RC); + unsigned Mask = RegInfo.createVirtualRegister(RC); + unsigned Mask2 = RegInfo.createVirtualRegister(RC); + unsigned Incr2 = RegInfo.createVirtualRegister(RC); + unsigned MaskLSB2 = RegInfo.createVirtualRegister(RCp); + unsigned PtrLSB2 = RegInfo.createVirtualRegister(RC); + unsigned MaskUpper = RegInfo.createVirtualRegister(RC); + unsigned MaskUppest = RegInfo.createVirtualRegister(RC); + unsigned Scratch = RegInfo.createVirtualRegister(RC); + unsigned Scratch2 = RegInfo.createVirtualRegister(RC); + unsigned Scratch3 = RegInfo.createVirtualRegister(RC); + unsigned Scratch4 = RegInfo.createVirtualRegister(RC); + unsigned Scratch5 = RegInfo.createVirtualRegister(RC); + + unsigned AtomicOp = 0; + switch (MI.getOpcode()) { + case LoongArch::ATOMIC_LOAD_NAND_I8: + AtomicOp = LoongArch::ATOMIC_LOAD_NAND_I8_POSTRA; + break; + case LoongArch::ATOMIC_LOAD_NAND_I16: + AtomicOp = LoongArch::ATOMIC_LOAD_NAND_I16_POSTRA; + break; + case LoongArch::ATOMIC_SWAP_I8: + AtomicOp = LoongArch::ATOMIC_SWAP_I8_POSTRA; + break; + case LoongArch::ATOMIC_SWAP_I16: + AtomicOp = LoongArch::ATOMIC_SWAP_I16_POSTRA; + break; + case LoongArch::ATOMIC_LOAD_MAX_I8: + AtomicOp = LoongArch::ATOMIC_LOAD_MAX_I8_POSTRA; + break; + case LoongArch::ATOMIC_LOAD_MAX_I16: + AtomicOp = LoongArch::ATOMIC_LOAD_MAX_I16_POSTRA; + break; + case LoongArch::ATOMIC_LOAD_MIN_I8: + AtomicOp = LoongArch::ATOMIC_LOAD_MIN_I8_POSTRA; + break; + case LoongArch::ATOMIC_LOAD_MIN_I16: + AtomicOp = LoongArch::ATOMIC_LOAD_MIN_I16_POSTRA; + break; + case LoongArch::ATOMIC_LOAD_UMAX_I8: + AtomicOp = LoongArch::ATOMIC_LOAD_UMAX_I8_POSTRA; + break; + case LoongArch::ATOMIC_LOAD_UMAX_I16: + AtomicOp = LoongArch::ATOMIC_LOAD_UMAX_I16_POSTRA; + break; + case LoongArch::ATOMIC_LOAD_UMIN_I8: + AtomicOp = LoongArch::ATOMIC_LOAD_UMIN_I8_POSTRA; + break; + case LoongArch::ATOMIC_LOAD_UMIN_I16: + AtomicOp = LoongArch::ATOMIC_LOAD_UMIN_I16_POSTRA; + break; + case LoongArch::ATOMIC_LOAD_ADD_I8: + AtomicOp = LoongArch::ATOMIC_LOAD_ADD_I8_POSTRA; + break; + case LoongArch::ATOMIC_LOAD_ADD_I16: + AtomicOp = LoongArch::ATOMIC_LOAD_ADD_I16_POSTRA; + break; + case LoongArch::ATOMIC_LOAD_SUB_I8: + AtomicOp = LoongArch::ATOMIC_LOAD_SUB_I8_POSTRA; + break; + case LoongArch::ATOMIC_LOAD_SUB_I16: + AtomicOp = LoongArch::ATOMIC_LOAD_SUB_I16_POSTRA; + break; + case LoongArch::ATOMIC_LOAD_AND_I8: + AtomicOp = LoongArch::ATOMIC_LOAD_AND_I8_POSTRA; + break; + case LoongArch::ATOMIC_LOAD_AND_I16: + AtomicOp = LoongArch::ATOMIC_LOAD_AND_I16_POSTRA; + break; + case LoongArch::ATOMIC_LOAD_OR_I8: + AtomicOp = LoongArch::ATOMIC_LOAD_OR_I8_POSTRA; + break; + case LoongArch::ATOMIC_LOAD_OR_I16: + AtomicOp = LoongArch::ATOMIC_LOAD_OR_I16_POSTRA; + break; + case LoongArch::ATOMIC_LOAD_XOR_I8: + AtomicOp = LoongArch::ATOMIC_LOAD_XOR_I8_POSTRA; + break; + case LoongArch::ATOMIC_LOAD_XOR_I16: + AtomicOp = LoongArch::ATOMIC_LOAD_XOR_I16_POSTRA; + break; + default: + llvm_unreachable("Unknown subword atomic pseudo for expansion!"); + } + + // insert new blocks after the current block + const BasicBlock *LLVM_BB = BB->getBasicBlock(); + MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB); + MachineFunction::iterator It = ++BB->getIterator(); + MF->insert(It, exitMBB); + + // Transfer the remainder of BB and its successor edges to exitMBB. + exitMBB->splice(exitMBB->begin(), BB, + std::next(MachineBasicBlock::iterator(MI)), BB->end()); + exitMBB->transferSuccessorsAndUpdatePHIs(BB); + + BB->addSuccessor(exitMBB, BranchProbability::getOne()); + + // thisMBB: + // addiu masklsb2,$0,-4 # 0xfffffffc + // and alignedaddr,ptr,masklsb2 + // andi ptrlsb2,ptr,3 + // sll shiftamt,ptrlsb2,3 + // ori maskupper,$0,255 # 0xff + // sll mask,maskupper,shiftamt + // nor mask2,$0,mask + // sll incr2,incr,shiftamt + + int64_t MaskImm = (Size == 1) ? 255 : 4095; + BuildMI(BB, DL, TII->get(ABI.GetPtrAddiOp()), MaskLSB2) + .addReg(ABI.GetNullPtr()).addImm(-4); + BuildMI(BB, DL, TII->get(ABI.GetPtrAndOp()), AlignedAddr) + .addReg(Ptr).addReg(MaskLSB2); + BuildMI(BB, DL, TII->get(LoongArch::ANDI32), PtrLSB2) + .addReg(Ptr, 0, ArePtrs64bit ? LoongArch::sub_32 : 0).addImm(3); + BuildMI(BB, DL, TII->get(LoongArch::SLLI_W), ShiftAmt).addReg(PtrLSB2).addImm(3); + + if(MaskImm==4095){ + BuildMI(BB, DL, TII->get(LoongArch::LU12I_W32), MaskUppest).addImm(0xf); + BuildMI(BB, DL, TII->get(LoongArch::ORI32), MaskUpper) + .addReg(MaskUppest).addImm(MaskImm); + } + else{ + BuildMI(BB, DL, TII->get(LoongArch::ORI32), MaskUpper) + .addReg(LoongArch::ZERO).addImm(MaskImm); + } + + BuildMI(BB, DL, TII->get(LoongArch::SLL_W), Mask) + .addReg(MaskUpper).addReg(ShiftAmt); + BuildMI(BB, DL, TII->get(LoongArch::NOR32), Mask2).addReg(LoongArch::ZERO).addReg(Mask); + BuildMI(BB, DL, TII->get(LoongArch::SLL_W), Incr2).addReg(Incr).addReg(ShiftAmt); + + + // The purposes of the flags on the scratch registers is explained in + // emitAtomicBinary. In summary, we need a scratch register which is going to + // be undef, that is unique among registers chosen for the instruction. + + BuildMI(BB, DL, TII->get(LoongArch::DBAR)).addImm(0); + BuildMI(BB, DL, TII->get(AtomicOp)) + .addReg(Dest, RegState::Define | RegState::EarlyClobber) + .addReg(AlignedAddr) + .addReg(Incr2) + .addReg(Mask) + .addReg(Mask2) + .addReg(ShiftAmt) + .addReg(Scratch, RegState::EarlyClobber | RegState::Define | + RegState::Dead | RegState::Implicit) + .addReg(Scratch2, RegState::EarlyClobber | RegState::Define | + RegState::Dead | RegState::Implicit) + .addReg(Scratch3, RegState::EarlyClobber | RegState::Define | + RegState::Dead | RegState::Implicit) + .addReg(Scratch4, RegState::EarlyClobber | RegState::Define | + RegState::Dead | RegState::Implicit) + .addReg(Scratch5, RegState::EarlyClobber | RegState::Define | + RegState::Dead | RegState::Implicit); + + + MI.eraseFromParent(); // The instruction is gone now. + + return exitMBB; +} + +// Lower atomic compare and swap to a pseudo instruction, taking care to +// define a scratch register for the pseudo instruction's expansion. The +// instruction is expanded after the register allocator as to prevent +// the insertion of stores between the linked load and the store conditional. + +MachineBasicBlock * +LoongArchTargetLowering::emitAtomicCmpSwap(MachineInstr &MI, + MachineBasicBlock *BB) const { + assert((MI.getOpcode() == LoongArch::ATOMIC_CMP_SWAP_I32 || + MI.getOpcode() == LoongArch::ATOMIC_CMP_SWAP_I64) && + "Unsupported atomic psseudo for EmitAtomicCmpSwap."); + + const unsigned Size = MI.getOpcode() == LoongArch::ATOMIC_CMP_SWAP_I32 ? 4 : 8; + + MachineFunction *MF = BB->getParent(); + MachineRegisterInfo &MRI = MF->getRegInfo(); + const TargetRegisterClass *RC = getRegClassFor(MVT::getIntegerVT(Size * 8)); + const TargetInstrInfo *TII = Subtarget.getInstrInfo(); + DebugLoc DL = MI.getDebugLoc(); + + unsigned AtomicOp = MI.getOpcode() == LoongArch::ATOMIC_CMP_SWAP_I32 + ? LoongArch::ATOMIC_CMP_SWAP_I32_POSTRA + : LoongArch::ATOMIC_CMP_SWAP_I64_POSTRA; + unsigned Dest = MI.getOperand(0).getReg(); + unsigned Ptr = MI.getOperand(1).getReg(); + unsigned OldVal = MI.getOperand(2).getReg(); + unsigned NewVal = MI.getOperand(3).getReg(); + + unsigned Scratch = MRI.createVirtualRegister(RC); + MachineBasicBlock::iterator II(MI); + + // We need to create copies of the various registers and kill them at the + // atomic pseudo. If the copies are not made, when the atomic is expanded + // after fast register allocation, the spills will end up outside of the + // blocks that their values are defined in, causing livein errors. + + unsigned PtrCopy = MRI.createVirtualRegister(MRI.getRegClass(Ptr)); + unsigned OldValCopy = MRI.createVirtualRegister(MRI.getRegClass(OldVal)); + unsigned NewValCopy = MRI.createVirtualRegister(MRI.getRegClass(NewVal)); + + BuildMI(*BB, II, DL, TII->get(LoongArch::COPY), PtrCopy).addReg(Ptr); + BuildMI(*BB, II, DL, TII->get(LoongArch::COPY), OldValCopy).addReg(OldVal); + BuildMI(*BB, II, DL, TII->get(LoongArch::COPY), NewValCopy).addReg(NewVal); + + // The purposes of the flags on the scratch registers is explained in + // emitAtomicBinary. In summary, we need a scratch register which is going to + // be undef, that is unique among registers chosen for the instruction. + + BuildMI(*BB, II, DL, TII->get(LoongArch::DBAR)).addImm(0); + BuildMI(*BB, II, DL, TII->get(AtomicOp)) + .addReg(Dest, RegState::Define | RegState::EarlyClobber) + .addReg(PtrCopy, RegState::Kill) + .addReg(OldValCopy, RegState::Kill) + .addReg(NewValCopy, RegState::Kill) + .addReg(Scratch, RegState::EarlyClobber | RegState::Define | + RegState::Dead | RegState::Implicit); + + BuildMI(*BB, II, DL, TII->get(LoongArch::DBAR)).addImm(DBAR_HINT); + + MI.eraseFromParent(); // The instruction is gone now. + + return BB; +} + +MachineBasicBlock *LoongArchTargetLowering::emitAtomicCmpSwapPartword( + MachineInstr &MI, MachineBasicBlock *BB, unsigned Size) const { + assert((Size == 1 || Size == 2) && + "Unsupported size for EmitAtomicCmpSwapPartial."); + + MachineFunction *MF = BB->getParent(); + MachineRegisterInfo &RegInfo = MF->getRegInfo(); + const TargetRegisterClass *RC = getRegClassFor(MVT::i32); + const bool ArePtrs64bit = ABI.ArePtrs64bit(); + const TargetRegisterClass *RCp = + getRegClassFor(ArePtrs64bit ? MVT::i64 : MVT::i32); + const TargetInstrInfo *TII = Subtarget.getInstrInfo(); + DebugLoc DL = MI.getDebugLoc(); + + unsigned Dest = MI.getOperand(0).getReg(); + unsigned Ptr = MI.getOperand(1).getReg(); + unsigned CmpVal = MI.getOperand(2).getReg(); + unsigned NewVal = MI.getOperand(3).getReg(); + + unsigned AlignedAddr = RegInfo.createVirtualRegister(RCp); + unsigned ShiftAmt = RegInfo.createVirtualRegister(RC); + unsigned Mask = RegInfo.createVirtualRegister(RC); + unsigned Mask2 = RegInfo.createVirtualRegister(RC); + unsigned ShiftedCmpVal = RegInfo.createVirtualRegister(RC); + unsigned ShiftedNewVal = RegInfo.createVirtualRegister(RC); + unsigned MaskLSB2 = RegInfo.createVirtualRegister(RCp); + unsigned PtrLSB2 = RegInfo.createVirtualRegister(RC); + unsigned MaskUpper = RegInfo.createVirtualRegister(RC); + unsigned MaskUppest = RegInfo.createVirtualRegister(RC); + unsigned Mask3 = RegInfo.createVirtualRegister(RC); + unsigned MaskedCmpVal = RegInfo.createVirtualRegister(RC); + unsigned MaskedNewVal = RegInfo.createVirtualRegister(RC); + unsigned AtomicOp = MI.getOpcode() == LoongArch::ATOMIC_CMP_SWAP_I8 + ? LoongArch::ATOMIC_CMP_SWAP_I8_POSTRA + : LoongArch::ATOMIC_CMP_SWAP_I16_POSTRA; + + // The scratch registers here with the EarlyClobber | Define | Dead | Implicit + // flags are used to coerce the register allocator and the machine verifier to + // accept the usage of these registers. + // The EarlyClobber flag has the semantic properties that the operand it is + // attached to is clobbered before the rest of the inputs are read. Hence it + // must be unique among the operands to the instruction. + // The Define flag is needed to coerce the machine verifier that an Undef + // value isn't a problem. + // The Dead flag is needed as the value in scratch isn't used by any other + // instruction. Kill isn't used as Dead is more precise. + unsigned Scratch = RegInfo.createVirtualRegister(RC); + unsigned Scratch2 = RegInfo.createVirtualRegister(RC); + + // insert new blocks after the current block + const BasicBlock *LLVM_BB = BB->getBasicBlock(); + MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB); + MachineFunction::iterator It = ++BB->getIterator(); + MF->insert(It, exitMBB); + + // Transfer the remainder of BB and its successor edges to exitMBB. + exitMBB->splice(exitMBB->begin(), BB, + std::next(MachineBasicBlock::iterator(MI)), BB->end()); + exitMBB->transferSuccessorsAndUpdatePHIs(BB); + + BB->addSuccessor(exitMBB, BranchProbability::getOne()); + + // thisMBB: + // addiu masklsb2,$0,-4 # 0xfffffffc + // and alignedaddr,ptr,masklsb2 + // andi ptrlsb2,ptr,3 + // xori ptrlsb2,ptrlsb2,3 # Only for BE + // sll shiftamt,ptrlsb2,3 + // ori maskupper,$0,255 # 0xff + // sll mask,maskupper,shiftamt + // nor mask2,$0,mask + // andi maskedcmpval,cmpval,255 + // sll shiftedcmpval,maskedcmpval,shiftamt + // andi maskednewval,newval,255 + // sll shiftednewval,maskednewval,shiftamt + + + int64_t MaskImm = (Size == 1) ? 255 : 4095; + BuildMI(BB, DL, TII->get(ArePtrs64bit ? LoongArch::ADDI_D : LoongArch::ADDI_W), MaskLSB2) + .addReg(ABI.GetNullPtr()).addImm(-4); + BuildMI(BB, DL, TII->get(ArePtrs64bit ? LoongArch::AND : LoongArch::AND32), AlignedAddr) + .addReg(Ptr).addReg(MaskLSB2); + BuildMI(BB, DL, TII->get(LoongArch::ANDI32), PtrLSB2) + .addReg(Ptr, 0, ArePtrs64bit ? LoongArch::sub_32 : 0).addImm(3); + BuildMI(BB, DL, TII->get(LoongArch::SLLI_W), ShiftAmt).addReg(PtrLSB2).addImm(3); + + if(MaskImm==4095){ + BuildMI(BB, DL, TII->get(LoongArch::LU12I_W32), MaskUppest).addImm(0xf); + BuildMI(BB, DL, TII->get(LoongArch::ORI32), MaskUpper) + .addReg(MaskUppest).addImm(MaskImm); + } + else{ + BuildMI(BB, DL, TII->get(LoongArch::ORI32), MaskUpper) + .addReg(LoongArch::ZERO).addImm(MaskImm); + } + + BuildMI(BB, DL, TII->get(LoongArch::SLL_W), Mask) + .addReg(MaskUpper).addReg(ShiftAmt); + BuildMI(BB, DL, TII->get(LoongArch::NOR32), Mask2).addReg(LoongArch::ZERO).addReg(Mask); + if(MaskImm==4095){ + BuildMI(BB, DL, TII->get(LoongArch::ORI32), Mask3) + .addReg(MaskUppest).addImm(MaskImm); + BuildMI(BB, DL, TII->get(LoongArch::AND32), MaskedCmpVal) + .addReg(CmpVal).addReg(Mask3); + BuildMI(BB, DL, TII->get(LoongArch::SLL_W), ShiftedCmpVal) + .addReg(MaskedCmpVal).addReg(ShiftAmt); + BuildMI(BB, DL, TII->get(LoongArch::AND32), MaskedNewVal) + .addReg(NewVal).addReg(Mask3); + } + else{ + BuildMI(BB, DL, TII->get(LoongArch::ANDI32), MaskedCmpVal) + .addReg(CmpVal).addImm(MaskImm); + BuildMI(BB, DL, TII->get(LoongArch::SLL_W), ShiftedCmpVal) + .addReg(MaskedCmpVal).addReg(ShiftAmt); + BuildMI(BB, DL, TII->get(LoongArch::ANDI32), MaskedNewVal) + .addReg(NewVal).addImm(MaskImm); + } + BuildMI(BB, DL, TII->get(LoongArch::SLL_W), ShiftedNewVal) + .addReg(MaskedNewVal).addReg(ShiftAmt); + + // The purposes of the flags on the scratch registers are explained in + // emitAtomicBinary. In summary, we need a scratch register which is going to + // be undef, that is unique among the register chosen for the instruction. + + BuildMI(BB, DL, TII->get(LoongArch::DBAR)).addImm(0); + BuildMI(BB, DL, TII->get(AtomicOp)) + .addReg(Dest, RegState::Define | RegState::EarlyClobber) + .addReg(AlignedAddr) + .addReg(Mask) + .addReg(ShiftedCmpVal) + .addReg(Mask2) + .addReg(ShiftedNewVal) + .addReg(ShiftAmt) + .addReg(Scratch, RegState::EarlyClobber | RegState::Define | + RegState::Dead | RegState::Implicit) + .addReg(Scratch2, RegState::EarlyClobber | RegState::Define | + RegState::Dead | RegState::Implicit); + + + MI.eraseFromParent(); // The instruction is gone now. + + return exitMBB; +} + +SDValue LoongArchTargetLowering::lowerBRCOND(SDValue Op, SelectionDAG &DAG) const { + // The first operand is the chain, the second is the condition, the third is + // the block to branch to if the condition is true. + SDValue Chain = Op.getOperand(0); + SDValue Dest = Op.getOperand(2); + SDLoc DL(Op); + + SDValue CondRes = createFPCmp(DAG, Op.getOperand(1)); + + // Return if flag is not set by a floating point comparison. + if (CondRes.getOpcode() != LoongArchISD::FPCmp) + return Op; + + SDValue CCNode = CondRes.getOperand(2); + LoongArch::CondCode CC = + (LoongArch::CondCode)cast(CCNode)->getZExtValue(); + unsigned Opc = invertFPCondCodeUser(CC) ? LoongArch::BRANCH_F : LoongArch::BRANCH_T; + SDValue BrCode = DAG.getConstant(Opc, DL, MVT::i32); + SDValue FCC0 = DAG.getRegister(LoongArch::FCC0, MVT::i32); + return DAG.getNode(LoongArchISD::FPBrcond, DL, Op.getValueType(), Chain, BrCode, + FCC0, Dest, CondRes); +} + +SDValue LoongArchTargetLowering::lowerSELECT(SDValue Op, + SelectionDAG &DAG) const { + SDValue Cond = createFPCmp(DAG, Op.getOperand(0)); + + // Return if flag is not set by a floating point comparison. + if (Cond.getOpcode() != LoongArchISD::FPCmp) + return Op; + + SDValue N1 = Op.getOperand(1); + SDValue N2 = Op.getOperand(2); + SDLoc DL(Op); + + ConstantSDNode *CC = cast(Cond.getOperand(2)); + bool invert = invertFPCondCodeUser((LoongArch::CondCode)CC->getSExtValue()); + SDValue FCC = DAG.getRegister(LoongArch::FCC0, MVT::i32); + + if (Op->getSimpleValueType(0).SimpleTy == MVT::f64 || + Op->getSimpleValueType(0).SimpleTy == MVT::f32) { + if (invert) + return DAG.getNode(LoongArchISD::FSEL, DL, N1.getValueType(), N1, FCC, N2, + Cond); + else + return DAG.getNode(LoongArchISD::FSEL, DL, N1.getValueType(), N2, FCC, N1, + Cond); + + } else + return Op; +} + +SDValue LoongArchTargetLowering::lowerSETCC(SDValue Op, SelectionDAG &DAG) const { + SDValue Cond = createFPCmp(DAG, Op); + + assert(Cond.getOpcode() == LoongArchISD::FPCmp && + "Floating point operand expected."); + + SDLoc DL(Op); + SDValue True = DAG.getConstant(1, DL, MVT::i32); + SDValue False = DAG.getConstant(0, DL, MVT::i32); + + return createCMovFP(DAG, Cond, True, False, DL); +} + +SDValue LoongArchTargetLowering::lowerGlobalAddress(SDValue Op, + SelectionDAG &DAG) const { + GlobalAddressSDNode *N = cast(Op); + + const GlobalValue *GV = N->getGlobal(); + bool IsLocal = getTargetMachine().shouldAssumeDSOLocal(*GV->getParent(), GV); + SDValue Addr = getAddr(N, DAG, IsLocal); + + return Addr; +} + +SDValue LoongArchTargetLowering::lowerBlockAddress(SDValue Op, + SelectionDAG &DAG) const { + BlockAddressSDNode *N = cast(Op); + + return getAddr(N, DAG); +} + +SDValue LoongArchTargetLowering:: +lowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const +{ + GlobalAddressSDNode *GA = cast(Op); + if (DAG.getTarget().useEmulatedTLS()) + return LowerToTLSEmulatedModel(GA, DAG); + + SDLoc DL(GA); + const GlobalValue *GV = GA->getGlobal(); + EVT PtrVT = getPointerTy(DAG.getDataLayout()); + + TLSModel::Model model = getTargetMachine().getTLSModel(GV); + + if (model == TLSModel::GeneralDynamic || model == TLSModel::LocalDynamic) { + // General Dynamic TLS Model && Local Dynamic TLS Model + unsigned PtrSize = PtrVT.getSizeInBits(); + IntegerType *PtrTy = Type::getIntNTy(*DAG.getContext(), PtrSize); + // SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, PtrTy, 0, 0); + SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, 0U); + SDValue Load = SDValue(DAG.getMachineNode(LoongArch::LoadAddrTLS_GD , + DL, PtrVT, Addr), 0); + SDValue TlsGetAddr = DAG.getExternalSymbol("__tls_get_addr", PtrVT); + + ArgListTy Args; + ArgListEntry Entry; + Entry.Node = Load; + Entry.Ty = PtrTy; + Args.push_back(Entry); + + TargetLowering::CallLoweringInfo CLI(DAG); + CLI.setDebugLoc(DL) + .setChain(DAG.getEntryNode()) + .setLibCallee(CallingConv::C, PtrTy, TlsGetAddr, std::move(Args)); + std::pair CallResult = LowerCallTo(CLI); + + SDValue Ret = CallResult.first; + + return Ret; + } + + SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, 0U); + SDValue Offset; + if (model == TLSModel::InitialExec) { + // Initial Exec TLS Model + Offset = SDValue(DAG.getMachineNode(LoongArch::LoadAddrTLS_IE, DL, + PtrVT, Addr), 0); + } else { + // Local Exec TLS Model + assert(model == TLSModel::LocalExec); + Offset = SDValue(DAG.getMachineNode(LoongArch::LoadAddrTLS_LE, DL, + PtrVT, Addr), 0); + } + + SDValue ThreadPointer = DAG.getRegister((PtrVT == MVT::i32) + ? LoongArch::TP + : LoongArch::TP_64, PtrVT); + return DAG.getNode(ISD::ADD, DL, PtrVT, ThreadPointer, Offset); +} + +SDValue LoongArchTargetLowering:: +lowerJumpTable(SDValue Op, SelectionDAG &DAG) const +{ + JumpTableSDNode *N = cast(Op); + + return getAddr(N, DAG); +} + +SDValue LoongArchTargetLowering:: +lowerConstantPool(SDValue Op, SelectionDAG &DAG) const +{ + ConstantPoolSDNode *N = cast(Op); + + return getAddr(N, DAG); +} + +SDValue LoongArchTargetLowering::lowerVASTART(SDValue Op, SelectionDAG &DAG) const { + MachineFunction &MF = DAG.getMachineFunction(); + LoongArchFunctionInfo *FuncInfo = MF.getInfo(); + + SDLoc DL(Op); + SDValue FI = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), + getPointerTy(MF.getDataLayout())); + + // vastart just stores the address of the VarArgsFrameIndex slot into the + // memory location argument. + const Value *SV = cast(Op.getOperand(2))->getValue(); + return DAG.getStore(Op.getOperand(0), DL, FI, Op.getOperand(1), + MachinePointerInfo(SV)); +} + +SDValue LoongArchTargetLowering::lowerVAARG(SDValue Op, SelectionDAG &DAG) const { + SDNode *Node = Op.getNode(); + EVT VT = Node->getValueType(0); + SDValue Chain = Node->getOperand(0); + SDValue VAListPtr = Node->getOperand(1); + const Align Align = + llvm::MaybeAlign(Node->getConstantOperandVal(3)).valueOrOne(); + const Value *SV = cast(Node->getOperand(2))->getValue(); + SDLoc DL(Node); + unsigned ArgSlotSizeInBytes = Subtarget.is64Bit() ? 8 : 4; + + SDValue VAListLoad = DAG.getLoad(getPointerTy(DAG.getDataLayout()), DL, Chain, + VAListPtr, MachinePointerInfo(SV)); + SDValue VAList = VAListLoad; + + // Re-align the pointer if necessary. + // It should only ever be necessary for 64-bit types on ILP32D/ILP32F/ILP32S + // since the minimum argument alignment is the same as the maximum type + // alignment for LP64D/LP64S/LP64F. + // + // FIXME: We currently align too often. The code generator doesn't notice + // when the pointer is still aligned from the last va_arg (or pair of + // va_args for the i64 on ILP32D/ILP32F/ILP32S case). + if (Align > getMinStackArgumentAlignment()) { + VAList = DAG.getNode( + ISD::ADD, DL, VAList.getValueType(), VAList, + DAG.getConstant(Align.value() - 1, DL, VAList.getValueType())); + + VAList = DAG.getNode( + ISD::AND, DL, VAList.getValueType(), VAList, + DAG.getConstant(-(int64_t)Align.value(), DL, VAList.getValueType())); + } + + // Increment the pointer, VAList, to the next vaarg. + auto &TD = DAG.getDataLayout(); + unsigned ArgSizeInBytes = + TD.getTypeAllocSize(VT.getTypeForEVT(*DAG.getContext())); + SDValue Tmp3 = + DAG.getNode(ISD::ADD, DL, VAList.getValueType(), VAList, + DAG.getConstant(alignTo(ArgSizeInBytes, ArgSlotSizeInBytes), + DL, VAList.getValueType())); + // Store the incremented VAList to the legalized pointer + Chain = DAG.getStore(VAListLoad.getValue(1), DL, Tmp3, VAListPtr, + MachinePointerInfo(SV)); + + // Load the actual argument out of the pointer VAList + return DAG.getLoad(VT, DL, Chain, VAList, MachinePointerInfo()); +} + +SDValue LoongArchTargetLowering:: +lowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const { + // check the depth + assert((cast(Op.getOperand(0))->getZExtValue() == 0) && + "Frame address can only be determined for current frame."); + + MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo(); + MFI.setFrameAddressIsTaken(true); + EVT VT = Op.getValueType(); + SDLoc DL(Op); + SDValue FrameAddr = DAG.getCopyFromReg( + DAG.getEntryNode(), DL, + Subtarget.is64Bit() ? LoongArch::FP_64 : LoongArch::FP, VT); + return FrameAddr; +} + +SDValue LoongArchTargetLowering::lowerRETURNADDR(SDValue Op, + SelectionDAG &DAG) const { + if (verifyReturnAddressArgumentIsConstant(Op, DAG)) + return SDValue(); + + // check the depth + assert((cast(Op.getOperand(0))->getZExtValue() == 0) && + "Return address can be determined only for current frame."); + + MachineFunction &MF = DAG.getMachineFunction(); + MachineFrameInfo &MFI = MF.getFrameInfo(); + MVT VT = Op.getSimpleValueType(); + unsigned RA = Subtarget.is64Bit() ? LoongArch::RA_64 : LoongArch::RA; + MFI.setReturnAddressIsTaken(true); + + // Return RA, which contains the return address. Mark it an implicit live-in. + unsigned Reg = MF.addLiveIn(RA, getRegClassFor(VT)); + return DAG.getCopyFromReg(DAG.getEntryNode(), SDLoc(Op), Reg, VT); +} + +// An EH_RETURN is the result of lowering llvm.eh.return which in turn is +// generated from __builtin_eh_return (offset, handler) +// The effect of this is to adjust the stack pointer by "offset" +// and then branch to "handler". +SDValue LoongArchTargetLowering::lowerEH_RETURN(SDValue Op, SelectionDAG &DAG) + const { + MachineFunction &MF = DAG.getMachineFunction(); + LoongArchFunctionInfo *LoongArchFI = MF.getInfo(); + + LoongArchFI->setCallsEhReturn(); + SDValue Chain = Op.getOperand(0); + SDValue Offset = Op.getOperand(1); + SDValue Handler = Op.getOperand(2); + SDLoc DL(Op); + EVT Ty = Subtarget.is64Bit() ? MVT::i64 : MVT::i32; + + // Store stack offset in A1, store jump target in A0. Glue CopyToReg and + // EH_RETURN nodes, so that instructions are emitted back-to-back. + unsigned OffsetReg = Subtarget.is64Bit() ? LoongArch::A1_64 : LoongArch::A1; + unsigned AddrReg = Subtarget.is64Bit() ? LoongArch::A0_64 : LoongArch::A0; + Chain = DAG.getCopyToReg(Chain, DL, OffsetReg, Offset, SDValue()); + Chain = DAG.getCopyToReg(Chain, DL, AddrReg, Handler, Chain.getValue(1)); + return DAG.getNode(LoongArchISD::EH_RETURN, DL, MVT::Other, Chain, + DAG.getRegister(OffsetReg, Ty), + DAG.getRegister(AddrReg, getPointerTy(MF.getDataLayout())), + Chain.getValue(1)); +} + +SDValue LoongArchTargetLowering::lowerATOMIC_FENCE(SDValue Op, + SelectionDAG &DAG) const { + // FIXME: Need pseudo-fence for 'singlethread' fences + // FIXME: Set SType for weaker fences where supported/appropriate. + unsigned SType = 0; + SDLoc DL(Op); + return DAG.getNode(LoongArchISD::DBAR, DL, MVT::Other, Op.getOperand(0), + DAG.getConstant(SType, DL, MVT::i32)); +} + +SDValue LoongArchTargetLowering::lowerShiftLeftParts(SDValue Op, + SelectionDAG &DAG) const { + SDLoc DL(Op); + MVT VT = Subtarget.is64Bit() ? MVT::i64 : MVT::i32; + + SDValue Lo = Op.getOperand(0), Hi = Op.getOperand(1); + SDValue Shamt = Op.getOperand(2); + // if shamt < (VT.bits): + // lo = (shl lo, shamt) + // hi = (or (shl hi, shamt) (srl (srl lo, 1), ~shamt)) + // else: + // lo = 0 + // hi = (shl lo, shamt[4:0]) + SDValue Not = DAG.getNode(ISD::XOR, DL, MVT::i32, Shamt, + DAG.getConstant(-1, DL, MVT::i32)); + SDValue ShiftRight1Lo = DAG.getNode(ISD::SRL, DL, VT, Lo, + DAG.getConstant(1, DL, VT)); + SDValue ShiftRightLo = DAG.getNode(ISD::SRL, DL, VT, ShiftRight1Lo, Not); + SDValue ShiftLeftHi = DAG.getNode(ISD::SHL, DL, VT, Hi, Shamt); + SDValue Or = DAG.getNode(ISD::OR, DL, VT, ShiftLeftHi, ShiftRightLo); + SDValue ShiftLeftLo = DAG.getNode(ISD::SHL, DL, VT, Lo, Shamt); + SDValue Cond = DAG.getNode(ISD::AND, DL, MVT::i32, Shamt, + DAG.getConstant(VT.getSizeInBits(), DL, MVT::i32)); + Lo = DAG.getNode(ISD::SELECT, DL, VT, Cond, + DAG.getConstant(0, DL, VT), ShiftLeftLo); + Hi = DAG.getNode(ISD::SELECT, DL, VT, Cond, ShiftLeftLo, Or); + + SDValue Ops[2] = {Lo, Hi}; + return DAG.getMergeValues(Ops, DL); +} + +SDValue LoongArchTargetLowering::lowerShiftRightParts(SDValue Op, SelectionDAG &DAG, + bool IsSRA) const { + SDLoc DL(Op); + SDValue Lo = Op.getOperand(0), Hi = Op.getOperand(1); + SDValue Shamt = Op.getOperand(2); + MVT VT = Subtarget.is64Bit() ? MVT::i64 : MVT::i32; + + // if shamt < (VT.bits): + // lo = (or (shl (shl hi, 1), ~shamt) (srl lo, shamt)) + // if isSRA: + // hi = (sra hi, shamt) + // else: + // hi = (srl hi, shamt) + // else: + // if isSRA: + // lo = (sra hi, shamt[4:0]) + // hi = (sra hi, 31) + // else: + // lo = (srl hi, shamt[4:0]) + // hi = 0 + SDValue Not = DAG.getNode(ISD::XOR, DL, MVT::i32, Shamt, + DAG.getConstant(-1, DL, MVT::i32)); + SDValue ShiftLeft1Hi = DAG.getNode(ISD::SHL, DL, VT, Hi, + DAG.getConstant(1, DL, VT)); + SDValue ShiftLeftHi = DAG.getNode(ISD::SHL, DL, VT, ShiftLeft1Hi, Not); + SDValue ShiftRightLo = DAG.getNode(ISD::SRL, DL, VT, Lo, Shamt); + SDValue Or = DAG.getNode(ISD::OR, DL, VT, ShiftLeftHi, ShiftRightLo); + SDValue ShiftRightHi = DAG.getNode(IsSRA ? ISD::SRA : ISD::SRL, + DL, VT, Hi, Shamt); + SDValue Cond = DAG.getNode(ISD::AND, DL, MVT::i32, Shamt, + DAG.getConstant(VT.getSizeInBits(), DL, MVT::i32)); + SDValue Ext = DAG.getNode(ISD::SRA, DL, VT, Hi, + DAG.getConstant(VT.getSizeInBits() - 1, DL, VT)); + Lo = DAG.getNode(ISD::SELECT, DL, VT, Cond, ShiftRightHi, Or); + Hi = DAG.getNode(ISD::SELECT, DL, VT, Cond, + IsSRA ? Ext : DAG.getConstant(0, DL, VT), ShiftRightHi); + + SDValue Ops[2] = {Lo, Hi}; + return DAG.getMergeValues(Ops, DL); +} + +// Lower (store (fp_to_sint $fp) $ptr) to (store (TruncIntFP $fp), $ptr). +static SDValue lowerFP_TO_SINT_STORE(StoreSDNode *SD, SelectionDAG &DAG, + bool SingleFloat) { + SDValue Val = SD->getValue(); + + if (Val.getOpcode() != ISD::FP_TO_SINT || + (Val.getValueSizeInBits() > 32 && SingleFloat)) + return SDValue(); + + EVT FPTy = EVT::getFloatingPointVT(Val.getValueSizeInBits()); + SDValue Tr = DAG.getNode(LoongArchISD::TruncIntFP, SDLoc(Val), FPTy, + Val.getOperand(0)); + return DAG.getStore(SD->getChain(), SDLoc(SD), Tr, SD->getBasePtr(), + SD->getPointerInfo(), SD->getAlignment(), + SD->getMemOperand()->getFlags()); +} + +SDValue LoongArchTargetLowering::lowerSTORE(SDValue Op, SelectionDAG &DAG) const { + StoreSDNode *SD = cast(Op); + return lowerFP_TO_SINT_STORE( + SD, DAG, (Subtarget.hasBasicF() && !Subtarget.hasBasicD())); +} + +SDValue LoongArchTargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op, + SelectionDAG &DAG) const { + SDLoc DL(Op); + unsigned Intrinsic = cast(Op->getOperand(0))->getZExtValue(); + switch (Intrinsic) { + default: + return SDValue(); + case Intrinsic::loongarch_lsx_vaddi_bu: + case Intrinsic::loongarch_lsx_vaddi_hu: + case Intrinsic::loongarch_lsx_vaddi_wu: + case Intrinsic::loongarch_lsx_vaddi_du: + return DAG.getNode(ISD::ADD, DL, Op->getValueType(0), Op->getOperand(1), + lowerLSXSplatImm(Op, 2, DAG)); + case Intrinsic::loongarch_lsx_vand_v: + case Intrinsic::loongarch_lasx_xvand_v: + return DAG.getNode(ISD::AND, DL, Op->getValueType(0), Op->getOperand(1), + Op->getOperand(2)); + case Intrinsic::loongarch_lsx_vbitclr_b: + case Intrinsic::loongarch_lsx_vbitclr_h: + case Intrinsic::loongarch_lsx_vbitclr_w: + case Intrinsic::loongarch_lsx_vbitclr_d: + return lowerLSXBitClear(Op, DAG); + case Intrinsic::loongarch_lsx_vdiv_b: + case Intrinsic::loongarch_lsx_vdiv_h: + case Intrinsic::loongarch_lsx_vdiv_w: + case Intrinsic::loongarch_lsx_vdiv_d: + return DAG.getNode(ISD::SDIV, DL, Op->getValueType(0), Op->getOperand(1), + Op->getOperand(2)); + case Intrinsic::loongarch_lsx_vdiv_bu: + case Intrinsic::loongarch_lsx_vdiv_hu: + case Intrinsic::loongarch_lsx_vdiv_wu: + case Intrinsic::loongarch_lsx_vdiv_du: + return DAG.getNode(ISD::UDIV, DL, Op->getValueType(0), Op->getOperand(1), + Op->getOperand(2)); + case Intrinsic::loongarch_lsx_vfdiv_s: + case Intrinsic::loongarch_lsx_vfdiv_d: + return DAG.getNode(ISD::FDIV, DL, Op->getValueType(0), Op->getOperand(1), + Op->getOperand(2)); + case Intrinsic::loongarch_lsx_vffint_s_wu: + case Intrinsic::loongarch_lsx_vffint_d_lu: + return DAG.getNode(ISD::UINT_TO_FP, DL, Op->getValueType(0), + Op->getOperand(1)); + case Intrinsic::loongarch_lsx_vffint_s_w: + case Intrinsic::loongarch_lsx_vffint_d_l: + return DAG.getNode(ISD::SINT_TO_FP, DL, Op->getValueType(0), + Op->getOperand(1)); + case Intrinsic::loongarch_lsx_vfmul_s: + case Intrinsic::loongarch_lsx_vfmul_d: + return DAG.getNode(ISD::FMUL, DL, Op->getValueType(0), Op->getOperand(1), + Op->getOperand(2)); + case Intrinsic::loongarch_lsx_vfrint_s: + case Intrinsic::loongarch_lsx_vfrint_d: + return DAG.getNode(ISD::FRINT, DL, Op->getValueType(0), Op->getOperand(1)); + case Intrinsic::loongarch_lsx_vfsqrt_s: + case Intrinsic::loongarch_lsx_vfsqrt_d: + return DAG.getNode(ISD::FSQRT, DL, Op->getValueType(0), Op->getOperand(1)); + case Intrinsic::loongarch_lsx_vftintrz_wu_s: + case Intrinsic::loongarch_lsx_vftintrz_lu_d: + return DAG.getNode(ISD::FP_TO_UINT, DL, Op->getValueType(0), + Op->getOperand(1)); + case Intrinsic::loongarch_lsx_vpackev_b: + case Intrinsic::loongarch_lsx_vpackev_h: + case Intrinsic::loongarch_lsx_vpackev_w: + case Intrinsic::loongarch_lsx_vpackev_d: + return DAG.getNode(LoongArchISD::VPACKEV, DL, Op->getValueType(0), + Op->getOperand(1), Op->getOperand(2)); + case Intrinsic::loongarch_lsx_vilvh_b: + case Intrinsic::loongarch_lsx_vilvh_h: + case Intrinsic::loongarch_lsx_vilvh_w: + case Intrinsic::loongarch_lsx_vilvh_d: + return DAG.getNode(LoongArchISD::VILVH, DL, Op->getValueType(0), + Op->getOperand(1), Op->getOperand(2)); + case Intrinsic::loongarch_lsx_vpackod_b: + case Intrinsic::loongarch_lsx_vpackod_h: + case Intrinsic::loongarch_lsx_vpackod_w: + case Intrinsic::loongarch_lsx_vpackod_d: + return DAG.getNode(LoongArchISD::VPACKOD, DL, Op->getValueType(0), + Op->getOperand(1), Op->getOperand(2)); + case Intrinsic::loongarch_lsx_vilvl_b: + case Intrinsic::loongarch_lsx_vilvl_h: + case Intrinsic::loongarch_lsx_vilvl_w: + case Intrinsic::loongarch_lsx_vilvl_d: + return DAG.getNode(LoongArchISD::VILVL, DL, Op->getValueType(0), + Op->getOperand(1), Op->getOperand(2)); + case Intrinsic::loongarch_lsx_vmadd_b: + case Intrinsic::loongarch_lsx_vmadd_h: + case Intrinsic::loongarch_lsx_vmadd_w: + case Intrinsic::loongarch_lsx_vmadd_d: { + EVT ResTy = Op->getValueType(0); + return DAG.getNode(ISD::ADD, SDLoc(Op), ResTy, Op->getOperand(1), + DAG.getNode(ISD::MUL, SDLoc(Op), ResTy, + Op->getOperand(2), Op->getOperand(3))); + } + case Intrinsic::loongarch_lsx_vmax_b: + case Intrinsic::loongarch_lsx_vmax_h: + case Intrinsic::loongarch_lsx_vmax_w: + case Intrinsic::loongarch_lsx_vmax_d: + return DAG.getNode(ISD::SMAX, DL, Op->getValueType(0), Op->getOperand(1), + Op->getOperand(2)); + case Intrinsic::loongarch_lsx_vmax_bu: + case Intrinsic::loongarch_lsx_vmax_hu: + case Intrinsic::loongarch_lsx_vmax_wu: + case Intrinsic::loongarch_lsx_vmax_du: + return DAG.getNode(ISD::UMAX, DL, Op->getValueType(0), Op->getOperand(1), + Op->getOperand(2)); + case Intrinsic::loongarch_lsx_vmin_b: + case Intrinsic::loongarch_lsx_vmin_h: + case Intrinsic::loongarch_lsx_vmin_w: + case Intrinsic::loongarch_lsx_vmin_d: + return DAG.getNode(ISD::SMIN, DL, Op->getValueType(0), Op->getOperand(1), + Op->getOperand(2)); + case Intrinsic::loongarch_lsx_vmin_bu: + case Intrinsic::loongarch_lsx_vmin_hu: + case Intrinsic::loongarch_lsx_vmin_wu: + case Intrinsic::loongarch_lsx_vmin_du: + return DAG.getNode(ISD::UMIN, DL, Op->getValueType(0), Op->getOperand(1), + Op->getOperand(2)); + case Intrinsic::loongarch_lsx_vmini_bu: + case Intrinsic::loongarch_lsx_vmini_hu: + case Intrinsic::loongarch_lsx_vmini_wu: + case Intrinsic::loongarch_lsx_vmini_du: + return DAG.getNode(ISD::UMIN, DL, Op->getValueType(0), Op->getOperand(1), + lowerLSXSplatImm(Op, 2, DAG)); + case Intrinsic::loongarch_lsx_vmod_b: + case Intrinsic::loongarch_lsx_vmod_h: + case Intrinsic::loongarch_lsx_vmod_w: + case Intrinsic::loongarch_lsx_vmod_d: + return DAG.getNode(ISD::SREM, DL, Op->getValueType(0), Op->getOperand(1), + Op->getOperand(2)); + case Intrinsic::loongarch_lsx_vmod_bu: + case Intrinsic::loongarch_lsx_vmod_hu: + case Intrinsic::loongarch_lsx_vmod_wu: + case Intrinsic::loongarch_lsx_vmod_du: + return DAG.getNode(ISD::UREM, DL, Op->getValueType(0), Op->getOperand(1), + Op->getOperand(2)); + case Intrinsic::loongarch_lsx_vmul_b: + case Intrinsic::loongarch_lsx_vmul_h: + case Intrinsic::loongarch_lsx_vmul_w: + case Intrinsic::loongarch_lsx_vmul_d: + return DAG.getNode(ISD::MUL, DL, Op->getValueType(0), Op->getOperand(1), + Op->getOperand(2)); + case Intrinsic::loongarch_lsx_vmsub_b: + case Intrinsic::loongarch_lsx_vmsub_h: + case Intrinsic::loongarch_lsx_vmsub_w: + case Intrinsic::loongarch_lsx_vmsub_d: { + EVT ResTy = Op->getValueType(0); + return DAG.getNode(ISD::SUB, SDLoc(Op), ResTy, Op->getOperand(1), + DAG.getNode(ISD::MUL, SDLoc(Op), ResTy, + Op->getOperand(2), Op->getOperand(3))); + } + case Intrinsic::loongarch_lsx_vclz_b: + case Intrinsic::loongarch_lsx_vclz_h: + case Intrinsic::loongarch_lsx_vclz_w: + case Intrinsic::loongarch_lsx_vclz_d: + return DAG.getNode(ISD::CTLZ, DL, Op->getValueType(0), Op->getOperand(1)); + case Intrinsic::loongarch_lsx_vnor_v: + case Intrinsic::loongarch_lasx_xvnor_v: { + SDValue Res = DAG.getNode(ISD::OR, DL, Op->getValueType(0), + Op->getOperand(1), Op->getOperand(2)); + return DAG.getNOT(DL, Res, Res->getValueType(0)); + } + case Intrinsic::loongarch_lsx_vor_v: + case Intrinsic::loongarch_lasx_xvor_v: + return DAG.getNode(ISD::OR, DL, Op->getValueType(0), Op->getOperand(1), + Op->getOperand(2)); + case Intrinsic::loongarch_lsx_vpickev_b: + case Intrinsic::loongarch_lsx_vpickev_h: + case Intrinsic::loongarch_lsx_vpickev_w: + case Intrinsic::loongarch_lsx_vpickev_d: + return DAG.getNode(LoongArchISD::VPICKEV, DL, Op->getValueType(0), + Op->getOperand(1), Op->getOperand(2)); + case Intrinsic::loongarch_lsx_vpickod_b: + case Intrinsic::loongarch_lsx_vpickod_h: + case Intrinsic::loongarch_lsx_vpickod_w: + case Intrinsic::loongarch_lsx_vpickod_d: + return DAG.getNode(LoongArchISD::VPICKOD, DL, Op->getValueType(0), + Op->getOperand(1), Op->getOperand(2)); + case Intrinsic::loongarch_lsx_vpcnt_b: + case Intrinsic::loongarch_lsx_vpcnt_h: + case Intrinsic::loongarch_lsx_vpcnt_w: + case Intrinsic::loongarch_lsx_vpcnt_d: + return DAG.getNode(ISD::CTPOP, DL, Op->getValueType(0), Op->getOperand(1)); + case Intrinsic::loongarch_lsx_vsat_b: + case Intrinsic::loongarch_lsx_vsat_h: + case Intrinsic::loongarch_lsx_vsat_w: + case Intrinsic::loongarch_lsx_vsat_d: + case Intrinsic::loongarch_lsx_vsat_bu: + case Intrinsic::loongarch_lsx_vsat_hu: + case Intrinsic::loongarch_lsx_vsat_wu: + case Intrinsic::loongarch_lsx_vsat_du: { + // Report an error for out of range values. + int64_t Max; + switch (Intrinsic) { + case Intrinsic::loongarch_lsx_vsat_b: + case Intrinsic::loongarch_lsx_vsat_bu: + Max = 7; + break; + case Intrinsic::loongarch_lsx_vsat_h: + case Intrinsic::loongarch_lsx_vsat_hu: + Max = 15; + break; + case Intrinsic::loongarch_lsx_vsat_w: + case Intrinsic::loongarch_lsx_vsat_wu: + Max = 31; + break; + case Intrinsic::loongarch_lsx_vsat_d: + case Intrinsic::loongarch_lsx_vsat_du: + Max = 63; + break; + default: + llvm_unreachable("Unmatched intrinsic"); + } + int64_t Value = cast(Op->getOperand(2))->getSExtValue(); + if (Value < 0 || Value > Max) + report_fatal_error("Immediate out of range"); + return SDValue(); + } + case Intrinsic::loongarch_lsx_vshuf4i_b: + case Intrinsic::loongarch_lsx_vshuf4i_h: + case Intrinsic::loongarch_lsx_vshuf4i_w: + // case Intrinsic::loongarch_lsx_vshuf4i_d: + { + int64_t Value = cast(Op->getOperand(2))->getSExtValue(); + if (Value < 0 || Value > 255) + report_fatal_error("Immediate out of range"); + return DAG.getNode(LoongArchISD::SHF, DL, Op->getValueType(0), + Op->getOperand(2), Op->getOperand(1)); + } + case Intrinsic::loongarch_lsx_vsll_b: + case Intrinsic::loongarch_lsx_vsll_h: + case Intrinsic::loongarch_lsx_vsll_w: + case Intrinsic::loongarch_lsx_vsll_d: + return DAG.getNode(ISD::SHL, DL, Op->getValueType(0), Op->getOperand(1), + truncateVecElts(Op, DAG)); + case Intrinsic::loongarch_lsx_vslli_b: + case Intrinsic::loongarch_lsx_vslli_h: + case Intrinsic::loongarch_lsx_vslli_w: + case Intrinsic::loongarch_lsx_vslli_d: + return DAG.getNode(ISD::SHL, DL, Op->getValueType(0), Op->getOperand(1), + lowerLSXSplatImm(Op, 2, DAG)); + case Intrinsic::loongarch_lsx_vreplve_b: + case Intrinsic::loongarch_lsx_vreplve_h: + case Intrinsic::loongarch_lsx_vreplve_w: + case Intrinsic::loongarch_lsx_vreplve_d: + // We can't lower via VECTOR_SHUFFLE because it requires constant shuffle + // masks, nor can we lower via BUILD_VECTOR & EXTRACT_VECTOR_ELT because + // EXTRACT_VECTOR_ELT can't extract i64's on LoongArch32. + // Instead we lower to LoongArchISD::VSHF and match from there. + return DAG.getNode(LoongArchISD::VSHF, DL, Op->getValueType(0), + lowerLSXSplatZExt(Op, 2, DAG), Op->getOperand(1), + Op->getOperand(1)); + case Intrinsic::loongarch_lsx_vreplvei_b: + case Intrinsic::loongarch_lsx_vreplvei_h: + case Intrinsic::loongarch_lsx_vreplvei_w: + case Intrinsic::loongarch_lsx_vreplvei_d: + return DAG.getNode(LoongArchISD::VSHF, DL, Op->getValueType(0), + lowerLSXSplatImm(Op, 2, DAG), Op->getOperand(1), + Op->getOperand(1)); + case Intrinsic::loongarch_lsx_vsra_b: + case Intrinsic::loongarch_lsx_vsra_h: + case Intrinsic::loongarch_lsx_vsra_w: + case Intrinsic::loongarch_lsx_vsra_d: + return DAG.getNode(ISD::SRA, DL, Op->getValueType(0), Op->getOperand(1), + truncateVecElts(Op, DAG)); + case Intrinsic::loongarch_lsx_vsrari_b: + case Intrinsic::loongarch_lsx_vsrari_h: + case Intrinsic::loongarch_lsx_vsrari_w: + case Intrinsic::loongarch_lsx_vsrari_d: { + // Report an error for out of range values. + int64_t Max; + switch (Intrinsic) { + case Intrinsic::loongarch_lsx_vsrari_b: + Max = 7; + break; + case Intrinsic::loongarch_lsx_vsrari_h: + Max = 15; + break; + case Intrinsic::loongarch_lsx_vsrari_w: + Max = 31; + break; + case Intrinsic::loongarch_lsx_vsrari_d: + Max = 63; + break; + default: + llvm_unreachable("Unmatched intrinsic"); + } + int64_t Value = cast(Op->getOperand(2))->getSExtValue(); + if (Value < 0 || Value > Max) + report_fatal_error("Immediate out of range"); + return SDValue(); + } + case Intrinsic::loongarch_lsx_vsrl_b: + case Intrinsic::loongarch_lsx_vsrl_h: + case Intrinsic::loongarch_lsx_vsrl_w: + case Intrinsic::loongarch_lsx_vsrl_d: + return DAG.getNode(ISD::SRL, DL, Op->getValueType(0), Op->getOperand(1), + truncateVecElts(Op, DAG)); + case Intrinsic::loongarch_lsx_vsrli_b: + case Intrinsic::loongarch_lsx_vsrli_h: + case Intrinsic::loongarch_lsx_vsrli_w: + case Intrinsic::loongarch_lsx_vsrli_d: + return DAG.getNode(ISD::SRL, DL, Op->getValueType(0), Op->getOperand(1), + lowerLSXSplatImm(Op, 2, DAG)); + case Intrinsic::loongarch_lsx_vsrlri_b: + case Intrinsic::loongarch_lsx_vsrlri_h: + case Intrinsic::loongarch_lsx_vsrlri_w: + case Intrinsic::loongarch_lsx_vsrlri_d: { + // Report an error for out of range values. + int64_t Max; + switch (Intrinsic) { + case Intrinsic::loongarch_lsx_vsrlri_b: + Max = 7; + break; + case Intrinsic::loongarch_lsx_vsrlri_h: + Max = 15; + break; + case Intrinsic::loongarch_lsx_vsrlri_w: + Max = 31; + break; + case Intrinsic::loongarch_lsx_vsrlri_d: + Max = 63; + break; + default: + llvm_unreachable("Unmatched intrinsic"); + } + int64_t Value = cast(Op->getOperand(2))->getSExtValue(); + if (Value < 0 || Value > Max) + report_fatal_error("Immediate out of range"); + return SDValue(); + } + case Intrinsic::loongarch_lsx_vsubi_bu: + case Intrinsic::loongarch_lsx_vsubi_hu: + case Intrinsic::loongarch_lsx_vsubi_wu: + case Intrinsic::loongarch_lsx_vsubi_du: + return DAG.getNode(ISD::SUB, DL, Op->getValueType(0), Op->getOperand(1), + lowerLSXSplatImm(Op, 2, DAG)); + case Intrinsic::loongarch_lsx_vshuf_h: + case Intrinsic::loongarch_lsx_vshuf_w: + case Intrinsic::loongarch_lsx_vshuf_d: + case Intrinsic::loongarch_lasx_xvshuf_h: + case Intrinsic::loongarch_lasx_xvshuf_w: + case Intrinsic::loongarch_lasx_xvshuf_d: + return DAG.getNode(LoongArchISD::VSHF, DL, Op->getValueType(0), + Op->getOperand(1), Op->getOperand(2), Op->getOperand(3)); + case Intrinsic::loongarch_lsx_vxor_v: + case Intrinsic::loongarch_lasx_xvxor_v: + return DAG.getNode(ISD::XOR, DL, Op->getValueType(0), Op->getOperand(1), + Op->getOperand(2)); + case Intrinsic::loongarch_lsx_vrotr_b: + case Intrinsic::loongarch_lsx_vrotr_h: + case Intrinsic::loongarch_lsx_vrotr_w: + case Intrinsic::loongarch_lsx_vrotr_d: + return DAG.getNode(LoongArchISD::VROR, DL, Op->getValueType(0), + Op->getOperand(1), Op->getOperand(2)); + case Intrinsic::loongarch_lsx_vrotri_b: + case Intrinsic::loongarch_lsx_vrotri_h: + case Intrinsic::loongarch_lsx_vrotri_w: + case Intrinsic::loongarch_lsx_vrotri_d: + return DAG.getNode(LoongArchISD::VRORI, DL, Op->getValueType(0), + Op->getOperand(1), Op->getOperand(2)); + case Intrinsic::thread_pointer: { + EVT PtrVT = getPointerTy(DAG.getDataLayout()); + if (PtrVT == MVT::i64) + return DAG.getRegister(LoongArch::TP_64, MVT::i64); + return DAG.getRegister(LoongArch::TP, MVT::i32); + } + } +} + +SDValue +LoongArchTargetLowering::lowerINTRINSIC_W_CHAIN(SDValue Op, + SelectionDAG &DAG) const { + unsigned Intr = cast(Op->getOperand(1))->getZExtValue(); + switch (Intr) { + default: + return SDValue(); + case Intrinsic::loongarch_lsx_vld: + return lowerLSXLoadIntr(Op, DAG, Intr, Subtarget); + case Intrinsic::loongarch_lasx_xvld: + return lowerLASXLoadIntr(Op, DAG, Intr, Subtarget); + case Intrinsic::loongarch_lasx_xvldrepl_b: + case Intrinsic::loongarch_lasx_xvldrepl_h: + case Intrinsic::loongarch_lasx_xvldrepl_w: + case Intrinsic::loongarch_lasx_xvldrepl_d: + return lowerLASXVLDRIntr(Op, DAG, Intr, Subtarget); + case Intrinsic::loongarch_lsx_vldrepl_b: + case Intrinsic::loongarch_lsx_vldrepl_h: + case Intrinsic::loongarch_lsx_vldrepl_w: + case Intrinsic::loongarch_lsx_vldrepl_d: + return lowerLSXVLDRIntr(Op, DAG, Intr, Subtarget); + } +} + +SDValue LoongArchTargetLowering::lowerINTRINSIC_VOID(SDValue Op, + SelectionDAG &DAG) const { + unsigned Intr = cast(Op->getOperand(1))->getZExtValue(); + switch (Intr) { + default: + return SDValue(); + case Intrinsic::loongarch_lsx_vst: + return lowerLSXStoreIntr(Op, DAG, Intr, Subtarget); + case Intrinsic::loongarch_lasx_xvst: + return lowerLASXStoreIntr(Op, DAG, Intr, Subtarget); + } +} + +// Lower ISD::EXTRACT_VECTOR_ELT into LoongArchISD::VEXTRACT_SEXT_ELT. +// +// The non-value bits resulting from ISD::EXTRACT_VECTOR_ELT are undefined. We +// choose to sign-extend but we could have equally chosen zero-extend. The +// DAGCombiner will fold any sign/zero extension of the ISD::EXTRACT_VECTOR_ELT +// result into this node later (possibly changing it to a zero-extend in the +// process). +SDValue +LoongArchTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op, + SelectionDAG &DAG) const { + SDLoc DL(Op); + EVT ResTy = Op->getValueType(0); + SDValue Op0 = Op->getOperand(0); + EVT VecTy = Op0->getValueType(0); + + if (!VecTy.is128BitVector() && !VecTy.is256BitVector()) + return SDValue(); + + if (ResTy.isInteger()) { + SDValue Op1 = Op->getOperand(1); + EVT EltTy = VecTy.getVectorElementType(); + if (VecTy.is128BitVector()) + return DAG.getNode(LoongArchISD::VEXTRACT_SEXT_ELT, DL, ResTy, Op0, Op1, + DAG.getValueType(EltTy)); + + ConstantSDNode *cn = dyn_cast(Op1); + if (!cn) + return SDValue(); + + if (EltTy == MVT::i32 || EltTy == MVT::i64) + return DAG.getNode(LoongArchISD::VEXTRACT_SEXT_ELT, DL, ResTy, Op0, Op1, + DAG.getValueType(EltTy)); + } + + return SDValue(); +} + +SDValue +LoongArchTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op, + SelectionDAG &DAG) const { + + MVT VT = Op.getSimpleValueType(); + MVT EltVT = VT.getVectorElementType(); + + SDLoc DL(Op); + SDValue Op0 = Op.getOperand(0); + SDValue Op1 = Op.getOperand(1); + SDValue Op2 = Op.getOperand(2); + + if (!EltVT.isInteger()) + return Op; + + if (!isa(Op2)) { + if (EltVT == MVT::i8 || EltVT == MVT::i16) { + return Op; // ==> pseudo + // use stack + return SDValue(); + } else { + return Op; + } + } + + if (VT.is128BitVector()) + return DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, Op0, Op1, Op2); + + if (VT.is256BitVector()) { + + if (EltVT == MVT::i32 || EltVT == MVT::i64) + return DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, Op0, Op1, Op2); + + return Op; + } + + return SDValue(); +} + +// Lowers ISD::BUILD_VECTOR into appropriate SelectionDAG nodes for the +// backend. +// +// Lowers according to the following rules: +// - Constant splats are legal as-is as long as the SplatBitSize is a power of +// 2 less than or equal to 64 and the value fits into a signed 10-bit +// immediate +// - Constant splats are lowered to bitconverted BUILD_VECTORs if SplatBitSize +// is a power of 2 less than or equal to 64 and the value does not fit into a +// signed 10-bit immediate +// - Non-constant splats are legal as-is. +// - Non-constant non-splats are lowered to sequences of INSERT_VECTOR_ELT. +// - All others are illegal and must be expanded. +SDValue LoongArchTargetLowering::lowerBUILD_VECTOR(SDValue Op, + SelectionDAG &DAG) const { + BuildVectorSDNode *Node = cast(Op); + EVT ResTy = Op->getValueType(0); + SDLoc DL(Op); + APInt SplatValue, SplatUndef; + unsigned SplatBitSize; + bool HasAnyUndefs; + + if ((!Subtarget.hasLSX() || !ResTy.is128BitVector()) && + (!Subtarget.hasLASX() || !ResTy.is256BitVector())) + return SDValue(); + + if (Node->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, HasAnyUndefs, + 8) && + SplatBitSize <= 64) { + // We can only cope with 8, 16, 32, or 64-bit elements + if ((ResTy.is128BitVector() && SplatBitSize != 8 && SplatBitSize != 16 && + SplatBitSize != 32 && SplatBitSize != 64) || + (ResTy.is256BitVector() && SplatBitSize != 8 && SplatBitSize != 16 && + SplatBitSize != 32 && SplatBitSize != 64)) + return SDValue(); + + // If the value isn't an integer type we will have to bitcast + // from an integer type first. Also, if there are any undefs, we must + // lower them to defined values first. + if (ResTy.isInteger() && !HasAnyUndefs) + return Op; + + EVT ViaVecTy; + + if ((ResTy.is128BitVector() && + !isLSXBySplatBitSize(SplatBitSize, ViaVecTy)) || + (ResTy.is256BitVector() && + !isLASXBySplatBitSize(SplatBitSize, ViaVecTy))) + return SDValue(); + + // SelectionDAG::getConstant will promote SplatValue appropriately. + SDValue Result = DAG.getConstant(SplatValue, DL, ViaVecTy); + + // Bitcast to the type we originally wanted + if (ViaVecTy != ResTy) + Result = DAG.getNode(ISD::BITCAST, SDLoc(Node), ResTy, Result); + + return Result; + } else if (DAG.isSplatValue(Op, /* AllowUndefs */ false)) + return Op; + else if (!isConstantOrUndefBUILD_VECTOR(Node)) { + // Use INSERT_VECTOR_ELT operations rather than expand to stores. + // The resulting code is the same length as the expansion, but it doesn't + // use memory operations + EVT ResTy = Node->getValueType(0); + + assert(ResTy.isVector()); + + unsigned NumElts = ResTy.getVectorNumElements(); + SDValue Vector = DAG.getUNDEF(ResTy); + for (unsigned i = 0; i < NumElts; ++i) { + Vector = + DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ResTy, Vector, + Node->getOperand(i), DAG.getConstant(i, DL, MVT::i32)); + } + return Vector; + } + + return SDValue(); +} + +SDValue LoongArchTargetLowering::lowerUINT_TO_FP(SDValue Op, + SelectionDAG &DAG) const { + SDLoc DL(Op); + EVT ResTy = Op->getValueType(0); + Op = LowerSUINT_TO_FP(ISD::ZERO_EXTEND_VECTOR_INREG, Op, DAG); + if (!ResTy.isVector()) + return Op; + return DAG.getNode(ISD::UINT_TO_FP, DL, ResTy, Op); +} + +SDValue LoongArchTargetLowering::lowerSINT_TO_FP(SDValue Op, + SelectionDAG &DAG) const { + SDLoc DL(Op); + EVT ResTy = Op->getValueType(0); + Op = LowerSUINT_TO_FP(ISD::SIGN_EXTEND_VECTOR_INREG, Op, DAG); + if (!ResTy.isVector()) + return Op; + return DAG.getNode(ISD::SINT_TO_FP, DL, ResTy, Op); +} + +SDValue LoongArchTargetLowering::lowerFP_TO_UINT(SDValue Op, + SelectionDAG &DAG) const { + if (!Op->getValueType(0).isVector()) + return SDValue(); + return LowerFP_TO_SUINT(ISD::FP_TO_UINT, ISD::ZERO_EXTEND_VECTOR_INREG, Op, + DAG); +} + +SDValue LoongArchTargetLowering::lowerFP_TO_SINT(SDValue Op, + SelectionDAG &DAG) const { + if (Op->getValueType(0).isVector()) + return LowerFP_TO_SUINT(ISD::FP_TO_SINT, ISD::SIGN_EXTEND_VECTOR_INREG, Op, + DAG); + + if (Op.getValueSizeInBits() > 32 && + (Subtarget.hasBasicF() && !Subtarget.hasBasicD())) + return SDValue(); + + EVT FPTy = EVT::getFloatingPointVT(Op.getValueSizeInBits()); + SDValue Trunc = + DAG.getNode(LoongArchISD::TruncIntFP, SDLoc(Op), FPTy, Op.getOperand(0)); + return DAG.getNode(ISD::BITCAST, SDLoc(Op), Op.getValueType(), Trunc); +} + +static bool checkUndef(ArrayRef Mask, int Lo, int Hi) { + + for (int i = Lo, end = Hi; i != end; i++, Hi++) + if (!((Mask[i] == -1) || (Mask[i] == Hi))) + return false; + return true; +} + +static bool CheckRev(ArrayRef Mask) { + + int Num = Mask.size() - 1; + for (long unsigned int i = 0; i < Mask.size(); i++, Num--) + if (Mask[i] != Num) + return false; + return true; +} + +static bool checkHalf(ArrayRef Mask, int Lo, int Hi, int base) { + + for (int i = Lo; i < Hi; i++) + if (Mask[i] != (base + i)) + return false; + return true; +} + +static SDValue lowerHalfHalf(const SDLoc &DL, MVT VT, SDValue Op1, SDValue Op2, + ArrayRef Mask, SelectionDAG &DAG) { + + int Num = VT.getVectorNumElements(); + int HalfNum = Num / 2; + + if (Op1->isUndef() || Op2->isUndef() || Mask.size() > (long unsigned int)Num) + return SDValue(); + + if (checkHalf(Mask, HalfNum, Num, Num) && checkHalf(Mask, 0, HalfNum, 0)) { + return SDValue(DAG.getMachineNode(LoongArch::XVPERMI_Q, DL, VT, Op2, Op1, + DAG.getTargetConstant(48, DL, MVT::i32)), + 0); + } + + return SDValue(); +} + +static bool checkHalfUndef(ArrayRef Mask, int Lo, int Hi) { + + for (int i = Lo; i < Hi; i++) + if (Mask[i] != -1) + return false; + return true; +} + +// Lowering vectors with half undef data, +// use EXTRACT_SUBVECTOR and INSERT_SUBVECTOR instead of VECTOR_SHUFFLE +static SDValue lowerHalfUndef(const SDLoc &DL, MVT VT, SDValue Op1, SDValue Op2, + ArrayRef Mask, SelectionDAG &DAG) { + + int Num = VT.getVectorNumElements(); + int HalfNum = Num / 2; + MVT HalfVT = MVT::getVectorVT(VT.getVectorElementType(), HalfNum); + MVT VT1 = Op1.getSimpleValueType(); + SDValue Op; + + bool check1 = Op1->isUndef() && (!Op2->isUndef()); + bool check2 = Op2->isUndef() && (!Op1->isUndef()); + + if ((check1 || check2) && (VT1 == VT)) { + if (check1) { + Op = DAG.getNode(ISD::BITCAST, DL, MVT::v4i64, Op2); + } else if (check2) { + Op = DAG.getNode(ISD::BITCAST, DL, MVT::v4i64, Op1); + } + + if (VT == MVT::v32i8 && CheckRev(Mask)) { + SDValue Vector; + SDValue Rev[4]; + SDValue Ext[4]; + for (int i = 0; i < 4; i++) { + Ext[i] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i64, Op, + DAG.getConstant(i, DL, MVT::i32)); + Rev[i] = DAG.getNode(LoongArchISD::REVBD, DL, MVT::i64, Ext[i]); + } + + Vector = + DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, MVT::v4i64, DAG.getUNDEF(VT), + Rev[3], DAG.getConstant(3, DL, MVT::i32)); + Vector = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, MVT::v4i64, Vector, + Rev[2], DAG.getConstant(2, DL, MVT::i32)); + Vector = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, MVT::v4i64, Vector, + Rev[1], DAG.getConstant(1, DL, MVT::i32)); + Vector = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, MVT::v4i64, Vector, + Rev[0], DAG.getConstant(0, DL, MVT::i32)); + + Vector = DAG.getNode(ISD::BITCAST, DL, MVT::v32i8, Vector); + + return Vector; + } + } + + if (checkHalfUndef(Mask, HalfNum, Num) && checkUndef(Mask, 0, HalfNum)) { + SDValue High = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HalfVT, Op1, + DAG.getConstant(HalfNum, DL, MVT::i64)); + return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getUNDEF(VT), High, + DAG.getConstant(0, DL, MVT::i64)); + } + + if (checkHalfUndef(Mask, HalfNum, Num) && (VT == MVT::v8i32) && + (Mask[0] == 0) && (Mask[1] == 1) && (Mask[2] == (Num + 2)) && + (Mask[3] == (Num + 3))) { + + SDValue Val1 = + SDValue(DAG.getMachineNode(LoongArch::XVPERMI_Q, DL, VT, Op2, Op1, + DAG.getTargetConstant(32, DL, MVT::i32)), + 0); + + SDValue Val2 = + SDValue(DAG.getMachineNode(LoongArch::XVPERMI_D, DL, VT, Val1, + DAG.getTargetConstant(12, DL, MVT::i32)), + 0); + + SDValue Val3 = SDValue( + DAG.getMachineNode(LoongArch::XVPERMI_Q, DL, VT, Val2, DAG.getUNDEF(VT), + DAG.getTargetConstant(2, DL, MVT::i32)), + 0); + return Val3; + } + + if (checkHalfUndef(Mask, 0, HalfNum) && checkUndef(Mask, HalfNum, Num)) { + SDValue Low = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HalfVT, Op1, + DAG.getConstant(0, DL, MVT::i32)); + return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getUNDEF(VT), Low, + DAG.getConstant(HalfNum, DL, MVT::i32)); + } + + if (checkHalfUndef(Mask, 0, HalfNum) && (VT == MVT::v8i32) && + (Mask[HalfNum] == HalfNum) && (Mask[HalfNum + 1] == (HalfNum + 1)) && + (Mask[HalfNum + 2] == (2 * Num - 2)) && + (Mask[HalfNum + 3] == (2 * Num - 1))) { + + SDValue Val1 = + SDValue(DAG.getMachineNode(LoongArch::XVPERMI_Q, DL, VT, Op2, Op1, + DAG.getTargetConstant(49, DL, MVT::i32)), + 0); + + SDValue Val2 = + SDValue(DAG.getMachineNode(LoongArch::XVPERMI_D, DL, VT, Val1, + DAG.getTargetConstant(12, DL, MVT::i32)), + 0); + + SDValue Val3 = SDValue( + DAG.getMachineNode(LoongArch::XVPERMI_Q, DL, VT, Val2, DAG.getUNDEF(VT), + DAG.getTargetConstant(32, DL, MVT::i32)), + 0); + return Val3; + } + + if ((VT == MVT::v8i32) || (VT == MVT::v4i64)) { + int def = 0; + int j = 0; + int ext[3]; + int ins[3]; + bool useOp1[3] = {true, true, true}; + bool checkdef = true; + + for (int i = 0; i < Num; i++) { + if (def > 2) { + checkdef = false; + break; + } + if (Mask[i] != -1) { + def++; + ins[j] = i; + if (Mask[i] >= Num) { + ext[j] = Mask[i] - Num; + useOp1[j] = false; + } else { + ext[j] = Mask[i]; + } + j++; + } + } + + if (checkdef) { + SDValue Vector = DAG.getUNDEF(VT); + EVT EltTy = VT.getVectorElementType(); + SDValue Ext[2]; + + if (check1 || check2) { + for (int i = 0; i < def; i++) { + if (check1) { + Ext[i] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltTy, Op2, + DAG.getConstant(ext[i], DL, MVT::i32)); + Vector = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, Vector, Ext[i], + DAG.getConstant(ins[i], DL, MVT::i32)); + } else if (check2) { + Ext[i] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltTy, Op1, + DAG.getConstant(ext[i], DL, MVT::i32)); + Vector = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, Vector, Ext[i], + DAG.getConstant(ins[i], DL, MVT::i32)); + } + } + return Vector; + } else { + for (int i = 0; i < def; i++) { + if (!useOp1[i]) { + Ext[i] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltTy, Op2, + DAG.getConstant(ext[i], DL, MVT::i32)); + Vector = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, Vector, Ext[i], + DAG.getConstant(ins[i], DL, MVT::i32)); + } else { + Ext[i] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltTy, Op1, + DAG.getConstant(ext[i], DL, MVT::i32)); + Vector = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, Vector, Ext[i], + DAG.getConstant(ins[i], DL, MVT::i32)); + } + } + return Vector; + } + } + } + + return SDValue(); +} + +static SDValue lowerHalfUndef_LSX(const SDLoc &DL, EVT ResTy, MVT VT, + SDValue Op1, SDValue Op2, ArrayRef Mask, + SelectionDAG &DAG) { + + MVT VT1 = Op1.getSimpleValueType(); + + bool check1 = Op1->isUndef() && (!Op2->isUndef()); + bool check2 = Op2->isUndef() && (!Op1->isUndef()); + + if ((check1 || check2) && (VT1 == VT)) { + SDValue Op; + + if (VT == MVT::v16i8 && CheckRev(Mask)) { + + if (check1) { + Op = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, Op2); + } else if (check2) { + Op = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, Op1); + } + + SDValue Vector; + SDValue Rev[2]; + SDValue Ext[2]; + for (int i = 0; i < 2; i++) { + Ext[i] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i64, Op, + DAG.getConstant(i, DL, MVT::i32)); + Rev[i] = DAG.getNode(LoongArchISD::REVBD, DL, MVT::i64, Ext[i]); + } + + Vector = + DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, MVT::v2i64, DAG.getUNDEF(VT), + Rev[1], DAG.getConstant(1, DL, MVT::i32)); + Vector = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, MVT::v2i64, Vector, + Rev[0], DAG.getConstant(0, DL, MVT::i32)); + + Vector = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, Vector); + + return Vector; + } + } + + return SDValue(); +} + +// Use SDNode of LoongArchINSVE instead of +// a series of EXTRACT_VECTOR_ELT and INSERT_VECTOR_ELT +static SDValue lowerVECTOR_SHUFFLE_INSVE(const SDLoc &DL, MVT VT, EVT ResTy, + SDValue Op1, SDValue Op2, + ArrayRef Mask, + SelectionDAG &DAG) { + + int Num = VT.getVectorNumElements(); + if (ResTy == MVT::v16i16 || ResTy == MVT::v32i8) + return SDValue(); + + int CheckOne = 0; + int CheckOther = 0; + int Idx; + + for (int i = 0; i < Num; i++) { + if ((Mask[i] == i) || (Mask[i] == -1)) { + CheckOther++; + } else if (Mask[i] == Num) { + CheckOne++; + Idx = i; + } else + return SDValue(); + } + + if ((CheckOne != 1) || (CheckOther != (Num - 1))) + return SDValue(); + else { + return DAG.getNode(LoongArchISD::INSVE, DL, ResTy, Op1, Op2, + DAG.getConstant(Idx, DL, MVT::i32)); + } + + return SDValue(); +} + +static SDValue lowerVECTOR_SHUFFLE_XVPICKVE(const SDLoc &DL, MVT VT, EVT ResTy, + SDValue Op1, SDValue Op2, + ArrayRef Mask, + SelectionDAG &DAG) { + + int Num = VT.getVectorNumElements(); + if (ResTy == MVT::v16i16 || ResTy == MVT::v32i8 || + (!ISD::isBuildVectorAllZeros(Op1.getNode()))) + return SDValue(); + + bool CheckV = true; + + if ((Mask[0] < Num) || (Mask[0] > (2 * Num - 1))) + CheckV = false; + + for (int i = 1; i < Num; i++) { + if (Mask[i] != 0) { + CheckV = false; + break; + } + } + + if (!CheckV) + return SDValue(); + else { + return DAG.getNode(LoongArchISD::XVPICKVE, DL, ResTy, Op1, Op2, + DAG.getConstant(Mask[0] - Num, DL, MVT::i32)); + } + + return SDValue(); +} + +static SDValue lowerVECTOR_SHUFFLE_XVSHUF(const SDLoc &DL, MVT VT, EVT ResTy, + SDValue Op1, SDValue Op2, + ArrayRef Mask, + SelectionDAG &DAG) { + + if (VT == MVT::v4i64) { + int Num = VT.getVectorNumElements(); + + bool CheckV = true; + for (int i = 0; i < Num; i++) { + if (Mask[i] != (i * 2)) { + CheckV = false; + break; + } + } + + if (!CheckV) + return SDValue(); + else { + SDValue Res = DAG.getNode(LoongArchISD::XVSHUF4I, DL, ResTy, Op1, Op2, + DAG.getConstant(8, DL, MVT::i32)); + return DAG.getNode(LoongArchISD::XVPERMI, DL, ResTy, Res, + DAG.getConstant(0xD8, DL, MVT::i32)); + } + } else + return SDValue(); +} + +// Lower VECTOR_SHUFFLE into one of a number of instructions depending on the +// indices in the shuffle. +SDValue LoongArchTargetLowering::lowerVECTOR_SHUFFLE(SDValue Op, + SelectionDAG &DAG) const { + ShuffleVectorSDNode *Node = cast(Op); + EVT ResTy = Op->getValueType(0); + ArrayRef Mask = Node->getMask(); + SDValue Op1 = Op.getOperand(0); + SDValue Op2 = Op.getOperand(1); + MVT VT = Op.getSimpleValueType(); + SDLoc DL(Op); + + if (ResTy.is128BitVector()) { + + int ResTyNumElts = ResTy.getVectorNumElements(); + SmallVector Indices; + + for (int i = 0; i < ResTyNumElts; ++i) + Indices.push_back(Node->getMaskElt(i)); + + SDValue Result; + if (isVECTOR_SHUFFLE_VREPLVEI(Op, ResTy, Indices, DAG)) + return lowerVECTOR_SHUFFLE_VSHF(Op, ResTy, Indices, DAG); + if ((Result = lowerVECTOR_SHUFFLE_VPACKEV(Op, ResTy, Indices, DAG))) + return Result; + if ((Result = lowerVECTOR_SHUFFLE_VPACKOD(Op, ResTy, Indices, DAG))) + return Result; + if ((Result = lowerVECTOR_SHUFFLE_VILVH(Op, ResTy, Indices, DAG))) + return Result; + if ((Result = lowerVECTOR_SHUFFLE_VILVL(Op, ResTy, Indices, DAG))) + return Result; + if ((Result = lowerVECTOR_SHUFFLE_VPICKEV(Op, ResTy, Indices, DAG))) + return Result; + if ((Result = lowerVECTOR_SHUFFLE_VPICKOD(Op, ResTy, Indices, DAG))) + return Result; + if ((Result = lowerVECTOR_SHUFFLE_SHF(Op, ResTy, Indices, DAG))) + return Result; + if ((Result = lowerHalfUndef_LSX(DL, ResTy, VT, Op1, Op2, Mask, DAG))) + return Result; + return lowerVECTOR_SHUFFLE_VSHF(Op, ResTy, Indices, DAG); + + } else if (ResTy.is256BitVector()) { + int ResTyNumElts = ResTy.getVectorNumElements(); + SmallVector Indices; + + for (int i = 0; i < ResTyNumElts; ++i) + Indices.push_back(Node->getMaskElt(i)); + + SDValue Result; + if ((Result = lowerHalfHalf(DL, VT, Op1, Op2, Mask, DAG))) + return Result; + if ((Result = lowerHalfUndef(DL, VT, Op1, Op2, Mask, DAG))) + return Result; + if (isVECTOR_SHUFFLE_XVREPLVEI(Op, ResTy, Indices, DAG)) + return SDValue(); + if ((Result = lowerVECTOR_SHUFFLE_XVPACKEV(Op, ResTy, Indices, DAG))) + return Result; + if ((Result = lowerVECTOR_SHUFFLE_XVPACKOD(Op, ResTy, Indices, DAG))) + return Result; + if ((Result = lowerVECTOR_SHUFFLE_XVILVH(Op, ResTy, Indices, DAG))) + return Result; + if ((Result = lowerVECTOR_SHUFFLE_XVILVL(Op, ResTy, Indices, DAG))) + return Result; + if ((Result = lowerVECTOR_SHUFFLE_XVPICKEV(Op, ResTy, Indices, DAG))) + return Result; + if ((Result = lowerVECTOR_SHUFFLE_XVPICKOD(Op, ResTy, Indices, DAG))) + return Result; + if ((Result = lowerVECTOR_SHUFFLE_XSHF(Op, ResTy, Indices, DAG))) + return Result; + if ((Result = + lowerVECTOR_SHUFFLE_INSVE(DL, VT, ResTy, Op1, Op2, Mask, DAG))) + return Result; + if ((Result = + lowerVECTOR_SHUFFLE_XVPICKVE(DL, VT, ResTy, Op1, Op2, Mask, DAG))) + return Result; + if ((Result = + lowerVECTOR_SHUFFLE_XVSHUF(DL, VT, ResTy, Op1, Op2, Mask, DAG))) + return Result; + } + + return SDValue(); +} + +SDValue LoongArchTargetLowering::lowerEH_DWARF_CFA(SDValue Op, + SelectionDAG &DAG) const { + + // Return a fixed StackObject with offset 0 which points to the old stack + // pointer. + MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo(); + EVT ValTy = Op->getValueType(0); + int FI = MFI.CreateFixedObject(Op.getValueSizeInBits() / 8, 0, false); + return DAG.getFrameIndex(FI, ValTy); +} + +// Check whether the tail call optimization conditions are met +bool LoongArchTargetLowering::isEligibleForTailCallOptimization( + const CCState &CCInfo, CallLoweringInfo &CLI, MachineFunction &MF, + unsigned NextStackOffset, const LoongArchFunctionInfo &FI) const { + + auto CalleeCC = CLI.CallConv; + auto IsVarArg = CLI.IsVarArg; + auto &Outs = CLI.Outs; + auto &Caller = MF.getFunction(); + auto CallerCC = Caller.getCallingConv(); + + if (Caller.getFnAttribute("disable-tail-calls").getValueAsString() == "true") + return false; + + if (Caller.hasFnAttribute("interrupt")) + return false; + + if (IsVarArg) + return false; + + if (getTargetMachine().getCodeModel() == CodeModel::Large) + return false; + + if (getTargetMachine().getRelocationModel() == Reloc::Static) + return false; + + // Do not tail call optimize if the stack is used to pass parameters. + if (CCInfo.getNextStackOffset() != 0) + return false; + + // Do not tail call optimize functions with byval parameters. + for (auto &Arg : Outs) + if (Arg.Flags.isByVal()) + return false; + + // Do not tail call optimize if either caller or callee uses structret + // semantics. + auto IsCallerStructRet = Caller.hasStructRetAttr(); + auto IsCalleeStructRet = Outs.empty() ? false : Outs[0].Flags.isSRet(); + if (IsCallerStructRet || IsCalleeStructRet) + return false; + + // The callee has to preserve all registers the caller needs to preserve. + const LoongArchRegisterInfo *TRI = Subtarget.getRegisterInfo(); + const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC); + if (CalleeCC != CallerCC) { + const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC); + if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved)) + return false; + } + + // Return false if either the callee or caller has a byval argument. + if (CCInfo.getInRegsParamsCount() > 0 || FI.hasByvalArg()) + return false; + + // Return true if the callee's argument area is no larger than the + // caller's. + return NextStackOffset <= FI.getIncomingArgSize(); +} + +//===----------------------------------------------------------------------===// +// Calling Convention Implementation +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// TODO: Implement a generic logic using tblgen that can support this. +// LoongArch 32-bit ABI rules: +// --- +// i32 - Passed in A0, A1, A2, A3 and stack +// f32 - Only passed in f32 registers if no int reg has been used yet to hold +// an argument. Otherwise, passed in A1, A2, A3 and stack. +// f64 - Only passed in two aliased f32 registers if no int reg has been used +// yet to hold an argument. Otherwise, use A2, A3 and stack. If A1 is +// not used, it must be shadowed. If only A3 is available, shadow it and +// go to stack. +// vXiX - Received as scalarized i32s, passed in A0 - A3 and the stack. +// vXf32 - Passed in either a pair of registers {A0, A1}, {A2, A3} or {A0 - A3} +// with the remainder spilled to the stack. +// vXf64 - Passed in either {A0, A1, A2, A3} or {A2, A3} and in both cases +// spilling the remainder to the stack. +// +// For vararg functions, all arguments are passed in A0, A1, A2, A3 and stack. +//===----------------------------------------------------------------------===// + +static bool CC_LoongArchILP32(unsigned ValNo, MVT ValVT, MVT LocVT, + CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, + CCState &State, ArrayRef F64Regs) { + static const MCPhysReg IntRegs[] = { LoongArch::A0, LoongArch::A1, LoongArch::A2, LoongArch::A3 }; + + const LoongArchCCState * LoongArchState = static_cast(&State); + + static const MCPhysReg F32Regs[] = { LoongArch::F12, LoongArch::F14 }; + + static const MCPhysReg FloatVectorIntRegs[] = { LoongArch::A0, LoongArch::A2 }; + + // Do not process byval args here. + if (ArgFlags.isByVal()) + return true; + + + // Promote i8 and i16 + if (LocVT == MVT::i8 || LocVT == MVT::i16) { + LocVT = MVT::i32; + if (ArgFlags.isSExt()) + LocInfo = CCValAssign::SExt; + else if (ArgFlags.isZExt()) + LocInfo = CCValAssign::ZExt; + else + LocInfo = CCValAssign::AExt; + } + + unsigned Reg; + + // f32 and f64 are allocated in A0, A1, A2, A3 when either of the following + // is true: function is vararg, argument is 3rd or higher, there is previous + // argument which is not f32 or f64. + bool AllocateFloatsInIntReg = State.isVarArg() || ValNo > 1 || + State.getFirstUnallocated(F32Regs) != ValNo; + Align OrigAlign = ArgFlags.getNonZeroOrigAlign(); + bool isI64 = (ValVT == MVT::i32 && OrigAlign == Align(8)); + bool isVectorFloat = LoongArchState->WasOriginalArgVectorFloat(ValNo); + + // The LoongArch vector ABI for floats passes them in a pair of registers + if (ValVT == MVT::i32 && isVectorFloat) { + // This is the start of an vector that was scalarized into an unknown number + // of components. It doesn't matter how many there are. Allocate one of the + // notional 8 byte aligned registers which map onto the argument stack, and + // shadow the register lost to alignment requirements. + if (ArgFlags.isSplit()) { + Reg = State.AllocateReg(FloatVectorIntRegs); + if (Reg == LoongArch::A2) + State.AllocateReg(LoongArch::A1); + else if (Reg == 0) + State.AllocateReg(LoongArch::A3); + } else { + // If we're an intermediate component of the split, we can just attempt to + // allocate a register directly. + Reg = State.AllocateReg(IntRegs); + } + } else if (ValVT == MVT::i32 || (ValVT == MVT::f32 && AllocateFloatsInIntReg)) { + Reg = State.AllocateReg(IntRegs); + // If this is the first part of an i64 arg, + // the allocated register must be either A0 or A2. + if (isI64 && (Reg == LoongArch::A1 || Reg == LoongArch::A3)) + Reg = State.AllocateReg(IntRegs); + LocVT = MVT::i32; + } else if (ValVT == MVT::f64 && AllocateFloatsInIntReg) { + // Allocate int register and shadow next int register. If first + // available register is LoongArch::A1 or LoongArch::A3, shadow it too. + Reg = State.AllocateReg(IntRegs); + if (Reg == LoongArch::A1 || Reg == LoongArch::A3) + Reg = State.AllocateReg(IntRegs); + State.AllocateReg(IntRegs); + LocVT = MVT::i32; + } else if (ValVT.isFloatingPoint() && !AllocateFloatsInIntReg) { + // we are guaranteed to find an available float register + if (ValVT == MVT::f32) { + Reg = State.AllocateReg(F32Regs); + // Shadow int register + State.AllocateReg(IntRegs); + } else { + Reg = State.AllocateReg(F64Regs); + // Shadow int registers + unsigned Reg2 = State.AllocateReg(IntRegs); + if (Reg2 == LoongArch::A1 || Reg2 == LoongArch::A3) + State.AllocateReg(IntRegs); + State.AllocateReg(IntRegs); + } + } else + llvm_unreachable("Cannot handle this ValVT."); + + if (!Reg) { + unsigned Offset = State.AllocateStack(ValVT.getStoreSize(), OrigAlign); + State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo)); + } else + State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); + + return false; +} + +static bool CC_LoongArchILP32_FP32(unsigned ValNo, MVT ValVT, + MVT LocVT, CCValAssign::LocInfo LocInfo, + ISD::ArgFlagsTy ArgFlags, CCState &State) { + static const MCPhysReg F64Regs[] = {LoongArch::F0_64, LoongArch::F1_64, LoongArch::F2_64, \ + LoongArch::F3_64, LoongArch::F4_64, LoongArch::F5_64, \ + LoongArch::F6_64, LoongArch::F7_64 }; + + return CC_LoongArchILP32(ValNo, ValVT, LocVT, LocInfo, ArgFlags, State, F64Regs); +} + +static bool CC_LoongArchILP32_FP64(unsigned ValNo, MVT ValVT, + MVT LocVT, CCValAssign::LocInfo LocInfo, + ISD::ArgFlagsTy ArgFlags, CCState &State) { + static const MCPhysReg F64Regs[] = {LoongArch::F0_64, LoongArch::F1_64, LoongArch::F2_64, \ + LoongArch::F3_64, LoongArch::F4_64, LoongArch::F5_64, \ + LoongArch::F6_64, LoongArch::F7_64 }; + + return CC_LoongArchILP32(ValNo, ValVT, LocVT, LocInfo, ArgFlags, State, F64Regs); +} + +static bool CC_LoongArch_F128(unsigned ValNo, MVT ValVT, + MVT LocVT, CCValAssign::LocInfo LocInfo, + ISD::ArgFlagsTy ArgFlags, CCState &State) LLVM_ATTRIBUTE_UNUSED; + +static bool CC_LoongArch_F128(unsigned ValNo, MVT ValVT, + MVT LocVT, CCValAssign::LocInfo LocInfo, + ISD::ArgFlagsTy ArgFlags, CCState &State) { + + static const MCPhysReg ArgRegs[8] = { + LoongArch::A0_64, LoongArch::A1_64, LoongArch::A2_64, LoongArch::A3_64, + LoongArch::A4_64, LoongArch::A5_64, LoongArch::A6_64, LoongArch::A7_64}; + + unsigned Idx = State.getFirstUnallocated(ArgRegs); + // Skip 'odd' register if necessary. + if (!ArgFlags.isSplitEnd() && Idx != array_lengthof(ArgRegs) && Idx % 2 == 1) + State.AllocateReg(ArgRegs); + return true; +} + +static bool CC_LoongArchILP32(unsigned ValNo, MVT ValVT, MVT LocVT, + CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, + CCState &State) LLVM_ATTRIBUTE_UNUSED; + +#include "LoongArchGenCallingConv.inc" + + CCAssignFn *LoongArchTargetLowering::CCAssignFnForCall() const{ + return CC_LoongArch; + } + + CCAssignFn *LoongArchTargetLowering::CCAssignFnForReturn() const{ + return RetCC_LoongArch; + } + +//===----------------------------------------------------------------------===// +// Call Calling Convention Implementation +//===----------------------------------------------------------------------===// +SDValue LoongArchTargetLowering::passArgOnStack(SDValue StackPtr, unsigned Offset, + SDValue Chain, SDValue Arg, + const SDLoc &DL, bool IsTailCall, + SelectionDAG &DAG) const { + if (!IsTailCall) { + SDValue PtrOff = + DAG.getNode(ISD::ADD, DL, getPointerTy(DAG.getDataLayout()), StackPtr, + DAG.getIntPtrConstant(Offset, DL)); + return DAG.getStore(Chain, DL, Arg, PtrOff, MachinePointerInfo()); + } + + MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo(); + int FI = MFI.CreateFixedObject(Arg.getValueSizeInBits() / 8, Offset, false); + SDValue FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout())); + return DAG.getStore(Chain, DL, Arg, FIN, MachinePointerInfo(), + /* Alignment = */ 0, MachineMemOperand::MOVolatile); +} + +void LoongArchTargetLowering::getOpndList( + SmallVectorImpl &Ops, + std::deque> &RegsToPass, bool IsPICCall, + bool GlobalOrExternal, bool IsCallReloc, CallLoweringInfo &CLI, + SDValue Callee, SDValue Chain, bool IsTailCall) const { + // Build a sequence of copy-to-reg nodes chained together with token + // chain and flag operands which copy the outgoing args into registers. + // The InFlag in necessary since all emitted instructions must be + // stuck together. + SDValue InFlag; + + Ops.push_back(Callee); + + for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { + Chain = CLI.DAG.getCopyToReg(Chain, CLI.DL, RegsToPass[i].first, + RegsToPass[i].second, InFlag); + InFlag = Chain.getValue(1); + } + + // Add argument registers to the end of the list so that they are + // known live into the call. + for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) + Ops.push_back(CLI.DAG.getRegister(RegsToPass[i].first, + RegsToPass[i].second.getValueType())); + + if (!IsTailCall) { + // Add a register mask operand representing the call-preserved registers. + const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo(); + const uint32_t *Mask = + TRI->getCallPreservedMask(CLI.DAG.getMachineFunction(), CLI.CallConv); + assert(Mask && "Missing call preserved mask for calling convention"); + Ops.push_back(CLI.DAG.getRegisterMask(Mask)); + } + + if (InFlag.getNode()) + Ops.push_back(InFlag); +} + +void LoongArchTargetLowering::AdjustInstrPostInstrSelection(MachineInstr &MI, + SDNode *Node) const { + switch (MI.getOpcode()) { + default: + return; + } +} + +/// LowerCall - functions arguments are copied from virtual regs to +/// (physical regs)/(stack frame), CALLSEQ_START and CALLSEQ_END are emitted. +SDValue +LoongArchTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, + SmallVectorImpl &InVals) const { + SelectionDAG &DAG = CLI.DAG; + SDLoc DL = CLI.DL; + SmallVectorImpl &Outs = CLI.Outs; + SmallVectorImpl &OutVals = CLI.OutVals; + SmallVectorImpl &Ins = CLI.Ins; + SDValue Chain = CLI.Chain; + SDValue Callee = CLI.Callee; + bool &IsTailCall = CLI.IsTailCall; + CallingConv::ID CallConv = CLI.CallConv; + bool IsVarArg = CLI.IsVarArg; + + MachineFunction &MF = DAG.getMachineFunction(); + MachineFrameInfo &MFI = MF.getFrameInfo(); + const TargetFrameLowering *TFL = Subtarget.getFrameLowering(); + bool IsPIC = isPositionIndependent(); + + // Analyze operands of the call, assigning locations to each operand. + SmallVector ArgLocs; + LoongArchCCState CCInfo( + CallConv, IsVarArg, DAG.getMachineFunction(), ArgLocs, *DAG.getContext(), + LoongArchCCState::getSpecialCallingConvForCallee(Callee.getNode(), Subtarget)); + + const ExternalSymbolSDNode *ES = + dyn_cast_or_null(Callee.getNode()); + + // There is one case where CALLSEQ_START..CALLSEQ_END can be nested, which + // is during the lowering of a call with a byval argument which produces + // a call to memcpy. For the ILP32D/ILP32F/ILP32S case, this causes the caller + // to allocate stack space for the reserved argument area for the callee, then + // recursively again for the memcpy call. In the NEWABI case, this doesn't + // occur as those ABIs mandate that the callee allocates the reserved argument + // area. We do still produce nested CALLSEQ_START..CALLSEQ_END with zero space + // though. + // + // If the callee has a byval argument and memcpy is used, we are mandated + // to already have produced a reserved argument area for the callee for + // ILP32D/ILP32F/ILP32S. Therefore, the reserved argument area can be reused + // for both calls. + // + // Other cases of calling memcpy cannot have a chain with a CALLSEQ_START + // present, as we have yet to hook that node onto the chain. + // + // Hence, the CALLSEQ_START and CALLSEQ_END nodes can be eliminated in this + // case. GCC does a similar trick, in that wherever possible, it calculates + // the maximum out going argument area (including the reserved area), and + // preallocates the stack space on entrance to the caller. + // + // FIXME: We should do the same for efficiency and space. + + bool MemcpyInByVal = ES && + StringRef(ES->getSymbol()) == StringRef("memcpy") && + Chain.getOpcode() == ISD::CALLSEQ_START; + + CCInfo.AnalyzeCallOperands(Outs, CC_LoongArch, CLI.getArgs(), + ES ? ES->getSymbol() : nullptr); + + // Get a count of how many bytes are to be pushed on the stack. + unsigned NextStackOffset = CCInfo.getNextStackOffset(); + + // Check if it's really possible to do a tail call. Restrict it to functions + // that are part of this compilation unit. + if (IsTailCall) { + IsTailCall = isEligibleForTailCallOptimization( + CCInfo, CLI, MF, NextStackOffset, *MF.getInfo()); + if (GlobalAddressSDNode *G = dyn_cast(Callee)) { + if (G->getGlobal()->hasExternalWeakLinkage()) + IsTailCall = false; + } + } + if (!IsTailCall && CLI.CB && CLI.CB->isMustTailCall()) + report_fatal_error("failed to perform tail call elimination on a call " + "site marked musttail"); + + if (IsTailCall) + ++NumTailCalls; + + // Chain is the output chain of the last Load/Store or CopyToReg node. + // ByValChain is the output chain of the last Memcpy node created for copying + // byval arguments to the stack. + unsigned StackAlignment = TFL->getStackAlignment(); + NextStackOffset = alignTo(NextStackOffset, StackAlignment); + SDValue NextStackOffsetVal = DAG.getIntPtrConstant(NextStackOffset, DL, true); + + if (!(IsTailCall || MemcpyInByVal)) + Chain = DAG.getCALLSEQ_START(Chain, NextStackOffset, 0, DL); + + SDValue StackPtr = DAG.getCopyFromReg( + Chain, DL, Subtarget.is64Bit() ? LoongArch::SP_64 : LoongArch::SP, + getPointerTy(DAG.getDataLayout())); + + std::deque> RegsToPass; + SmallVector MemOpChains; + + CCInfo.rewindByValRegsInfo(); + + // Walk the register/memloc assignments, inserting copies/loads. + for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { + SDValue Arg = OutVals[i]; + CCValAssign &VA = ArgLocs[i]; + MVT ValVT = VA.getValVT(), LocVT = VA.getLocVT(); + ISD::ArgFlagsTy Flags = Outs[i].Flags; + bool UseUpperBits = false; + + // ByVal Arg. + if (Flags.isByVal()) { + unsigned FirstByValReg, LastByValReg; + unsigned ByValIdx = CCInfo.getInRegsParamsProcessed(); + CCInfo.getInRegsParamInfo(ByValIdx, FirstByValReg, LastByValReg); + + assert(Flags.getByValSize() && + "ByVal args of size 0 should have been ignored by front-end."); + assert(ByValIdx < CCInfo.getInRegsParamsCount()); + assert(!IsTailCall && + "Do not tail-call optimize if there is a byval argument."); + passByValArg(Chain, DL, RegsToPass, MemOpChains, StackPtr, MFI, DAG, Arg, + FirstByValReg, LastByValReg, Flags, + VA); + CCInfo.nextInRegsParam(); + continue; + } + + // Promote the value if needed. + switch (VA.getLocInfo()) { + default: + llvm_unreachable("Unknown loc info!"); + case CCValAssign::Full: + if (VA.isRegLoc()) { + if ((ValVT == MVT::f32 && LocVT == MVT::i32) || + (ValVT == MVT::f64 && LocVT == MVT::i64) || + (ValVT == MVT::i64 && LocVT == MVT::f64)) + Arg = DAG.getNode(ISD::BITCAST, DL, LocVT, Arg); + } + break; + case CCValAssign::BCvt: + Arg = DAG.getNode(ISD::BITCAST, DL, LocVT, Arg); + break; + case CCValAssign::SExtUpper: + UseUpperBits = true; + LLVM_FALLTHROUGH; + case CCValAssign::SExt: + Arg = DAG.getNode(ISD::SIGN_EXTEND, DL, LocVT, Arg); + break; + case CCValAssign::ZExtUpper: + UseUpperBits = true; + LLVM_FALLTHROUGH; + case CCValAssign::ZExt: + Arg = DAG.getNode(ISD::ZERO_EXTEND, DL, LocVT, Arg); + break; + case CCValAssign::AExtUpper: + UseUpperBits = true; + LLVM_FALLTHROUGH; + case CCValAssign::AExt: + Arg = DAG.getNode(ISD::ANY_EXTEND, DL, LocVT, Arg); + break; + } + + if (UseUpperBits) { + unsigned ValSizeInBits = Outs[i].ArgVT.getSizeInBits(); + unsigned LocSizeInBits = VA.getLocVT().getSizeInBits(); + Arg = DAG.getNode( + ISD::SHL, DL, VA.getLocVT(), Arg, + DAG.getConstant(LocSizeInBits - ValSizeInBits, DL, VA.getLocVT())); + } + + // Arguments that can be passed on register must be kept at + // RegsToPass vector + if (VA.isRegLoc()) { + RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg)); + continue; + } + + // Register can't get to this point... + assert(VA.isMemLoc()); + + // emit ISD::STORE whichs stores the + // parameter value to a stack Location + MemOpChains.push_back(passArgOnStack(StackPtr, VA.getLocMemOffset(), + Chain, Arg, DL, IsTailCall, DAG)); + } + + // Transform all store nodes into one single node because all store + // nodes are independent of each other. + if (!MemOpChains.empty()) + Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains); + + // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every + // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol + // node so that legalize doesn't hack it. + + bool GlobalOrExternal = false, IsCallReloc = false; + + if (GlobalAddressSDNode *G = dyn_cast(Callee)) { + Callee = DAG.getTargetGlobalAddress(G->getGlobal(), DL, + getPointerTy(DAG.getDataLayout()), 0, + LoongArchII::MO_NO_FLAG); + GlobalOrExternal = true; + } + else if (ExternalSymbolSDNode *S = dyn_cast(Callee)) { + const char *Sym = S->getSymbol(); + Callee = DAG.getTargetExternalSymbol( + Sym, getPointerTy(DAG.getDataLayout()), LoongArchII::MO_NO_FLAG); + + GlobalOrExternal = true; + } + + SmallVector Ops(1, Chain); + SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); + + getOpndList(Ops, RegsToPass, IsPIC, GlobalOrExternal, IsCallReloc, CLI, + Callee, Chain, IsTailCall); + + if (IsTailCall) { + MF.getFrameInfo().setHasTailCall(); + return DAG.getNode(LoongArchISD::TailCall, DL, MVT::Other, Ops); + } + + Chain = DAG.getNode(LoongArchISD::JmpLink, DL, NodeTys, Ops); + DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge); + SDValue InFlag = Chain.getValue(1); + + // Create the CALLSEQ_END node in the case of where it is not a call to + // memcpy. + if (!(MemcpyInByVal)) { + Chain = DAG.getCALLSEQ_END(Chain, NextStackOffsetVal, + DAG.getIntPtrConstant(0, DL, true), InFlag, DL); + InFlag = Chain.getValue(1); + } + + // Handle result values, copying them out of physregs into vregs that we + // return. + return LowerCallResult(Chain, InFlag, CallConv, IsVarArg, Ins, DL, DAG, + InVals, CLI); +} + +/// LowerCallResult - Lower the result values of a call into the +/// appropriate copies out of appropriate physical registers. +SDValue LoongArchTargetLowering::LowerCallResult( + SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool IsVarArg, + const SmallVectorImpl &Ins, const SDLoc &DL, + SelectionDAG &DAG, SmallVectorImpl &InVals, + TargetLowering::CallLoweringInfo &CLI) const { + // Assign locations to each value returned by this call. + SmallVector RVLocs; + LoongArchCCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs, + *DAG.getContext()); + + const ExternalSymbolSDNode *ES = + dyn_cast_or_null(CLI.Callee.getNode()); + CCInfo.AnalyzeCallResult(Ins, RetCC_LoongArch, CLI.RetTy, + ES ? ES->getSymbol() : nullptr); + + // Copy all of the result registers out of their specified physreg. + for (unsigned i = 0; i != RVLocs.size(); ++i) { + CCValAssign &VA = RVLocs[i]; + assert(VA.isRegLoc() && "Can only return in registers!"); + + SDValue Val = DAG.getCopyFromReg(Chain, DL, RVLocs[i].getLocReg(), + RVLocs[i].getLocVT(), InFlag); + Chain = Val.getValue(1); + InFlag = Val.getValue(2); + + if (VA.isUpperBitsInLoc()) { + unsigned ValSizeInBits = Ins[i].ArgVT.getSizeInBits(); + unsigned LocSizeInBits = VA.getLocVT().getSizeInBits(); + unsigned Shift = + VA.getLocInfo() == CCValAssign::ZExtUpper ? ISD::SRL : ISD::SRA; + Val = DAG.getNode( + Shift, DL, VA.getLocVT(), Val, + DAG.getConstant(LocSizeInBits - ValSizeInBits, DL, VA.getLocVT())); + } + + switch (VA.getLocInfo()) { + default: + llvm_unreachable("Unknown loc info!"); + case CCValAssign::Full: + break; + case CCValAssign::BCvt: + Val = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Val); + break; + case CCValAssign::AExt: + case CCValAssign::AExtUpper: + Val = DAG.getNode(ISD::TRUNCATE, DL, VA.getValVT(), Val); + break; + case CCValAssign::ZExt: + case CCValAssign::ZExtUpper: + Val = DAG.getNode(ISD::AssertZext, DL, VA.getLocVT(), Val, + DAG.getValueType(VA.getValVT())); + Val = DAG.getNode(ISD::TRUNCATE, DL, VA.getValVT(), Val); + break; + case CCValAssign::SExt: + case CCValAssign::SExtUpper: + Val = DAG.getNode(ISD::AssertSext, DL, VA.getLocVT(), Val, + DAG.getValueType(VA.getValVT())); + Val = DAG.getNode(ISD::TRUNCATE, DL, VA.getValVT(), Val); + break; + } + + InVals.push_back(Val); + } + + return Chain; +} + +static SDValue UnpackFromArgumentSlot(SDValue Val, const CCValAssign &VA, + EVT ArgVT, const SDLoc &DL, + SelectionDAG &DAG) { + MVT LocVT = VA.getLocVT(); + EVT ValVT = VA.getValVT(); + + // Shift into the upper bits if necessary. + switch (VA.getLocInfo()) { + default: + break; + case CCValAssign::AExtUpper: + case CCValAssign::SExtUpper: + case CCValAssign::ZExtUpper: { + unsigned ValSizeInBits = ArgVT.getSizeInBits(); + unsigned LocSizeInBits = VA.getLocVT().getSizeInBits(); + unsigned Opcode = + VA.getLocInfo() == CCValAssign::ZExtUpper ? ISD::SRL : ISD::SRA; + Val = DAG.getNode( + Opcode, DL, VA.getLocVT(), Val, + DAG.getConstant(LocSizeInBits - ValSizeInBits, DL, VA.getLocVT())); + break; + } + } + + // If this is an value smaller than the argument slot size (32-bit for + // ILP32D/ILP32F/ILP32S, 64-bit for LP64D/LP64S/LP64F), it has been promoted + // in some way to the argument slot size. Extract the value and insert any + // appropriate assertions regarding sign/zero extension. + switch (VA.getLocInfo()) { + default: + llvm_unreachable("Unknown loc info!"); + case CCValAssign::Full: + break; + case CCValAssign::AExtUpper: + case CCValAssign::AExt: + Val = DAG.getNode(ISD::TRUNCATE, DL, ValVT, Val); + break; + case CCValAssign::SExtUpper: + case CCValAssign::SExt: { + if ((ArgVT == MVT::i1) || (ArgVT == MVT::i8) || (ArgVT == MVT::i16)) { + SDValue SubReg = DAG.getTargetConstant(LoongArch::sub_32, DL, MVT::i32); + Val = SDValue(DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL, ValVT, + Val, SubReg), + 0); + } else { + Val = + DAG.getNode(ISD::AssertSext, DL, LocVT, Val, DAG.getValueType(ValVT)); + Val = DAG.getNode(ISD::TRUNCATE, DL, ValVT, Val); + } + break; + } + case CCValAssign::ZExtUpper: + case CCValAssign::ZExt: + Val = DAG.getNode(ISD::AssertZext, DL, LocVT, Val, DAG.getValueType(ValVT)); + Val = DAG.getNode(ISD::TRUNCATE, DL, ValVT, Val); + break; + case CCValAssign::BCvt: + Val = DAG.getNode(ISD::BITCAST, DL, ValVT, Val); + break; + } + + return Val; +} + +//===----------------------------------------------------------------------===// +// Formal Arguments Calling Convention Implementation +//===----------------------------------------------------------------------===// +/// LowerFormalArguments - transform physical registers into virtual registers +/// and generate load operations for arguments places on the stack. +SDValue LoongArchTargetLowering::LowerFormalArguments( + SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, + const SmallVectorImpl &Ins, const SDLoc &DL, + SelectionDAG &DAG, SmallVectorImpl &InVals) const { + MachineFunction &MF = DAG.getMachineFunction(); + MachineFrameInfo &MFI = MF.getFrameInfo(); + LoongArchFunctionInfo *LoongArchFI = MF.getInfo(); + + LoongArchFI->setVarArgsFrameIndex(0); + + // Used with vargs to acumulate store chains. + std::vector OutChains; + + // Assign locations to all of the incoming arguments. + SmallVector ArgLocs; + LoongArchCCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), ArgLocs, + *DAG.getContext()); + const Function &Func = DAG.getMachineFunction().getFunction(); + Function::const_arg_iterator FuncArg = Func.arg_begin(); + + CCInfo.AnalyzeFormalArguments(Ins, CC_LoongArch_FixedArg); + LoongArchFI->setFormalArgInfo(CCInfo.getNextStackOffset(), + CCInfo.getInRegsParamsCount() > 0); + + unsigned CurArgIdx = 0; + CCInfo.rewindByValRegsInfo(); + + for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { + CCValAssign &VA = ArgLocs[i]; + if (Ins[i].isOrigArg()) { + std::advance(FuncArg, Ins[i].getOrigArgIndex() - CurArgIdx); + CurArgIdx = Ins[i].getOrigArgIndex(); + } + EVT ValVT = VA.getValVT(); + ISD::ArgFlagsTy Flags = Ins[i].Flags; + bool IsRegLoc = VA.isRegLoc(); + + if (Flags.isByVal()) { + assert(Ins[i].isOrigArg() && "Byval arguments cannot be implicit"); + unsigned FirstByValReg, LastByValReg; + unsigned ByValIdx = CCInfo.getInRegsParamsProcessed(); + CCInfo.getInRegsParamInfo(ByValIdx, FirstByValReg, LastByValReg); + + assert(Flags.getByValSize() && + "ByVal args of size 0 should have been ignored by front-end."); + assert(ByValIdx < CCInfo.getInRegsParamsCount()); + copyByValRegs(Chain, DL, OutChains, DAG, Flags, InVals, &*FuncArg, + FirstByValReg, LastByValReg, VA, CCInfo); + CCInfo.nextInRegsParam(); + continue; + } + + // Arguments stored on registers + if (IsRegLoc) { + MVT RegVT = VA.getLocVT(); + unsigned ArgReg = VA.getLocReg(); + const TargetRegisterClass *RC = getRegClassFor(RegVT); + + // Transform the arguments stored on + // physical registers into virtual ones + unsigned Reg = addLiveIn(DAG.getMachineFunction(), ArgReg, RC); + SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, RegVT); + + ArgValue = UnpackFromArgumentSlot(ArgValue, VA, Ins[i].ArgVT, DL, DAG); + + // Handle floating point arguments passed in integer registers and + // long double arguments passed in floating point registers. + if ((RegVT == MVT::i32 && ValVT == MVT::f32) || + (RegVT == MVT::i64 && ValVT == MVT::f64) || + (RegVT == MVT::f64 && ValVT == MVT::i64)) + ArgValue = DAG.getNode(ISD::BITCAST, DL, ValVT, ArgValue); + else if ((ABI.IsILP32D() || ABI.IsILP32F() || ABI.IsILP32S()) && + RegVT == MVT::i32 && ValVT == MVT::f64) { + // TODO + llvm_unreachable("Unimplemented ABI"); + } + + InVals.push_back(ArgValue); + } else { // VA.isRegLoc() + MVT LocVT = VA.getLocVT(); + + if (ABI.IsILP32D() || ABI.IsILP32F() || ABI.IsILP32S()) { + // TODO + llvm_unreachable("Unimplemented ABI"); + } + + // sanity check + assert(VA.isMemLoc()); + + // The stack pointer offset is relative to the caller stack frame. + int FI = MFI.CreateFixedObject(LocVT.getSizeInBits() / 8, + VA.getLocMemOffset(), true); + + // Create load nodes to retrieve arguments from the stack + SDValue FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout())); + SDValue ArgValue = DAG.getLoad( + LocVT, DL, Chain, FIN, + MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI)); + OutChains.push_back(ArgValue.getValue(1)); + + ArgValue = UnpackFromArgumentSlot(ArgValue, VA, Ins[i].ArgVT, DL, DAG); + + InVals.push_back(ArgValue); + } + } + + for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { + // The loongarch ABIs for returning structs by value requires that we copy + // the sret argument into $v0 for the return. Save the argument into + // a virtual register so that we can access it from the return points. + if (Ins[i].Flags.isSRet()) { + unsigned Reg = LoongArchFI->getSRetReturnReg(); + if (!Reg) { + Reg = MF.getRegInfo().createVirtualRegister( + getRegClassFor(Subtarget.is64Bit() ? MVT::i64 : MVT::i32)); + LoongArchFI->setSRetReturnReg(Reg); + } + SDValue Copy = DAG.getCopyToReg(DAG.getEntryNode(), DL, Reg, InVals[i]); + Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Copy, Chain); + break; + } + } + + if (IsVarArg) + writeVarArgRegs(OutChains, Chain, DL, DAG, CCInfo); + + // All stores are grouped in one node to allow the matching between + // the size of Ins and InVals. This only happens when on varg functions + if (!OutChains.empty()) { + OutChains.push_back(Chain); + Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains); + } + + return Chain; +} + +//===----------------------------------------------------------------------===// +// Return Value Calling Convention Implementation +//===----------------------------------------------------------------------===// + +bool +LoongArchTargetLowering::CanLowerReturn(CallingConv::ID CallConv, + MachineFunction &MF, bool IsVarArg, + const SmallVectorImpl &Outs, + LLVMContext &Context) const { + SmallVector RVLocs; + LoongArchCCState CCInfo(CallConv, IsVarArg, MF, RVLocs, Context); + return CCInfo.CheckReturn(Outs, RetCC_LoongArch); +} + +bool +LoongArchTargetLowering::shouldSignExtendTypeInLibCall(EVT Type, bool IsSigned) const { + if (Subtarget.is64Bit() && Type == MVT::i32) + return true; + + return IsSigned; +} + +SDValue +LoongArchTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, + bool IsVarArg, + const SmallVectorImpl &Outs, + const SmallVectorImpl &OutVals, + const SDLoc &DL, SelectionDAG &DAG) const { + // CCValAssign - represent the assignment of + // the return value to a location + SmallVector RVLocs; + MachineFunction &MF = DAG.getMachineFunction(); + + // CCState - Info about the registers and stack slot. + LoongArchCCState CCInfo(CallConv, IsVarArg, MF, RVLocs, *DAG.getContext()); + + // Analyze return values. + CCInfo.AnalyzeReturn(Outs, RetCC_LoongArch); + + SDValue Flag; + SmallVector RetOps(1, Chain); + + // Copy the result values into the output registers. + for (unsigned i = 0; i != RVLocs.size(); ++i) { + SDValue Val = OutVals[i]; + CCValAssign &VA = RVLocs[i]; + assert(VA.isRegLoc() && "Can only return in registers!"); + bool UseUpperBits = false; + + switch (VA.getLocInfo()) { + default: + llvm_unreachable("Unknown loc info!"); + case CCValAssign::Full: + break; + case CCValAssign::BCvt: + Val = DAG.getNode(ISD::BITCAST, DL, VA.getLocVT(), Val); + break; + case CCValAssign::AExtUpper: + UseUpperBits = true; + LLVM_FALLTHROUGH; + case CCValAssign::AExt: + Val = DAG.getNode(ISD::ANY_EXTEND, DL, VA.getLocVT(), Val); + break; + case CCValAssign::ZExtUpper: + UseUpperBits = true; + LLVM_FALLTHROUGH; + case CCValAssign::ZExt: + Val = DAG.getNode(ISD::ZERO_EXTEND, DL, VA.getLocVT(), Val); + break; + case CCValAssign::SExtUpper: + UseUpperBits = true; + LLVM_FALLTHROUGH; + case CCValAssign::SExt: + Val = DAG.getNode(ISD::SIGN_EXTEND, DL, VA.getLocVT(), Val); + break; + } + + if (UseUpperBits) { + unsigned ValSizeInBits = Outs[i].ArgVT.getSizeInBits(); + unsigned LocSizeInBits = VA.getLocVT().getSizeInBits(); + Val = DAG.getNode( + ISD::SHL, DL, VA.getLocVT(), Val, + DAG.getConstant(LocSizeInBits - ValSizeInBits, DL, VA.getLocVT())); + } + + Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), Val, Flag); + + // Guarantee that all emitted copies are stuck together with flags. + Flag = Chain.getValue(1); + RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT())); + } + + // The loongarch ABIs for returning structs by value requires that we copy + // the sret argument into $v0 for the return. We saved the argument into + // a virtual register in the entry block, so now we copy the value out + // and into $v0. + if (MF.getFunction().hasStructRetAttr()) { + LoongArchFunctionInfo *LoongArchFI = MF.getInfo(); + unsigned Reg = LoongArchFI->getSRetReturnReg(); + + if (!Reg) + llvm_unreachable("sret virtual register not created in the entry block"); + SDValue Val = + DAG.getCopyFromReg(Chain, DL, Reg, getPointerTy(DAG.getDataLayout())); + unsigned A0 = Subtarget.is64Bit() ? LoongArch::A0_64 : LoongArch::A0; + + Chain = DAG.getCopyToReg(Chain, DL, A0, Val, Flag); + Flag = Chain.getValue(1); + RetOps.push_back(DAG.getRegister(A0, getPointerTy(DAG.getDataLayout()))); + } + + RetOps[0] = Chain; // Update chain. + + // Add the flag if we have it. + if (Flag.getNode()) + RetOps.push_back(Flag); + + // Standard return on LoongArch is a "jr $ra" + return DAG.getNode(LoongArchISD::Ret, DL, MVT::Other, RetOps); +} + +//===----------------------------------------------------------------------===// +// LoongArch Inline Assembly Support +//===----------------------------------------------------------------------===// + +/// getConstraintType - Given a constraint letter, return the type of +/// constraint it is for this target. +LoongArchTargetLowering::ConstraintType +LoongArchTargetLowering::getConstraintType(StringRef Constraint) const { + // LoongArch specific constraints + // GCC config/loongarch/constraints.md + // + // 'f': Floating Point register + // 'G': Floating-point 0 + // 'l': Signed 16-bit constant + // 'R': Memory address that can be used in a non-macro load or store + // "ZC" Memory address with 16-bit and 4 bytes aligned offset + // "ZB" Memory address with 0 offset + + if (Constraint.size() == 1) { + switch (Constraint[0]) { + default : break; + case 'f': + return C_RegisterClass; + case 'l': + case 'G': + return C_Other; + case 'R': + return C_Memory; + } + } + + if (Constraint == "ZC" || Constraint == "ZB") + return C_Memory; + + return TargetLowering::getConstraintType(Constraint); +} + +/// Examine constraint type and operand type and determine a weight value. +/// This object must already have been set up with the operand type +/// and the current alternative constraint selected. +TargetLowering::ConstraintWeight +LoongArchTargetLowering::getSingleConstraintMatchWeight( + AsmOperandInfo &info, const char *constraint) const { + ConstraintWeight weight = CW_Invalid; + Value *CallOperandVal = info.CallOperandVal; + // If we don't have a value, we can't do a match, + // but allow it at the lowest weight. + if (!CallOperandVal) + return CW_Default; + Type *type = CallOperandVal->getType(); + // Look at the constraint type. + switch (*constraint) { + default: + weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint); + break; + case 'f': // FPU + if (Subtarget.hasLSX() && type->isVectorTy() && + type->getPrimitiveSizeInBits() == 128) + weight = CW_Register; + else if (Subtarget.hasLASX() && type->isVectorTy() && + type->getPrimitiveSizeInBits() == 256) + weight = CW_Register; + else if (type->isFloatTy()) + weight = CW_Register; + break; + case 'l': // signed 16 bit immediate + case 'I': // signed 12 bit immediate + case 'J': // integer zero + case 'G': // floating-point zero + case 'K': // unsigned 12 bit immediate + if (isa(CallOperandVal)) + weight = CW_Constant; + break; + case 'm': + case 'R': + weight = CW_Memory; + break; + } + return weight; +} + +/// This is a helper function to parse a physical register string and split it +/// into non-numeric and numeric parts (Prefix and Reg). The first boolean flag +/// that is returned indicates whether parsing was successful. The second flag +/// is true if the numeric part exists. +static std::pair parsePhysicalReg(StringRef C, StringRef &Prefix, + unsigned long long &Reg) { + if (C.empty() || C.front() != '{' || C.back() != '}') + return std::make_pair(false, false); + + // Search for the first numeric character. + StringRef::const_iterator I, B = C.begin() + 1, E = C.end() - 1; + I = std::find_if(B, E, isdigit); + + Prefix = StringRef(B, I - B); + + // The second flag is set to false if no numeric characters were found. + if (I == E) + return std::make_pair(true, false); + + // Parse the numeric characters. + return std::make_pair(!getAsUnsignedInteger(StringRef(I, E - I), 10, Reg), + true); +} + +EVT LoongArchTargetLowering::getTypeForExtReturn(LLVMContext &Context, EVT VT, + ISD::NodeType) const { + bool Cond = Subtarget.is64Bit() && VT.getSizeInBits() == 32; + EVT MinVT = getRegisterType(Context, Cond ? MVT::i64 : MVT::i32); + return VT.bitsLT(MinVT) ? MinVT : VT; +} + +static const TargetRegisterClass *getRegisterClassForVT(MVT VT, bool Is64Bit) { + // Newer llvm versions (>= 12) do not require simple VTs for constraints and + // they use MVT::Other for constraints with complex VTs. For more details, + // please see https://reviews.llvm.org/D91710. + if (VT == MVT::Other || VT.getSizeInBits() <= 32) + return &LoongArch::GPR32RegClass; + if (VT.getSizeInBits() <= 64) + return Is64Bit ? &LoongArch::GPR64RegClass : &LoongArch::GPR32RegClass; + return nullptr; +} + +std::pair LoongArchTargetLowering:: +parseRegForInlineAsmConstraint(StringRef C, MVT VT) const { + const TargetRegisterInfo *TRI = + Subtarget.getRegisterInfo(); + const TargetRegisterClass *RC; + StringRef Prefix; + unsigned long long Reg; + + std::pair R = parsePhysicalReg(C, Prefix, Reg); + + if (!R.first) + return std::make_pair(0U, nullptr); + + if (!R.second) + return std::make_pair(0U, nullptr); + + if (Prefix == "$f") { // Parse $f0-$f31. + // If the size of FP registers is 64-bit, select the 64-bit register class. + // Otherwise, select the 32-bit register class. + if (VT == MVT::Other) + VT = Subtarget.hasBasicD() ? MVT::f64 : MVT::f32; + + RC = getRegClassFor(VT); + } + else if (Prefix == "$vr") { // Parse $vr0-$vr31. + RC = getRegClassFor((VT == MVT::Other) ? MVT::v16i8 : VT); + } + else if (Prefix == "$xr") { // Parse $xr0-$xr31. + RC = getRegClassFor((VT == MVT::Other) ? MVT::v16i8 : VT); + } + else if (Prefix == "$fcc") // Parse $fcc0-$fcc7. + RC = TRI->getRegClass(LoongArch::FCFRRegClassID); + else { // Parse $r0-$r31. + assert(Prefix == "$r"); + if ((RC = getRegisterClassForVT(VT, Subtarget.is64Bit())) == nullptr) { + // This will generate an error message. + return std::make_pair(0U, nullptr); + } + } + + assert(Reg < RC->getNumRegs()); + + if (RC == &LoongArch::GPR64RegClass || RC == &LoongArch::GPR32RegClass) { + // Sync with the GPR32/GPR64 RegisterClass in LoongArchRegisterInfo.td + // that just like LoongArchAsmParser.cpp + switch (Reg) { + case 0: return std::make_pair(*(RC->begin() + 0), RC); // r0 + case 1: return std::make_pair(*(RC->begin() + 27), RC); // r1 + case 2: return std::make_pair(*(RC->begin() + 28), RC); // r2 + case 3: return std::make_pair(*(RC->begin() + 29), RC); // r3 + case 4: return std::make_pair(*(RC->begin() + 1), RC); // r4 + case 5: return std::make_pair(*(RC->begin() + 2), RC); // r5 + case 6: return std::make_pair(*(RC->begin() + 3), RC); // r6 + case 7: return std::make_pair(*(RC->begin() + 4), RC); // r7 + case 8: return std::make_pair(*(RC->begin() + 5), RC); // r8 + case 9: return std::make_pair(*(RC->begin() + 6), RC); // r9 + case 10: return std::make_pair(*(RC->begin() + 7), RC); // r10 + case 11: return std::make_pair(*(RC->begin() + 8), RC); // r11 + case 12: return std::make_pair(*(RC->begin() + 9), RC); // r12 + case 13: return std::make_pair(*(RC->begin() + 10), RC); // r13 + case 14: return std::make_pair(*(RC->begin() + 11), RC); // r14 + case 15: return std::make_pair(*(RC->begin() + 12), RC); // r15 + case 16: return std::make_pair(*(RC->begin() + 13), RC); // r16 + case 17: return std::make_pair(*(RC->begin() + 14), RC); // r17 + case 18: return std::make_pair(*(RC->begin() + 15), RC); // r18 + case 19: return std::make_pair(*(RC->begin() + 16), RC); // r19 + case 20: return std::make_pair(*(RC->begin() + 17), RC); // r20 + case 21: return std::make_pair(*(RC->begin() + 30), RC); // r21 + case 22: return std::make_pair(*(RC->begin() + 31), RC); // r22 + case 23: return std::make_pair(*(RC->begin() + 18), RC); // r23 + case 24: return std::make_pair(*(RC->begin() + 19), RC); // r24 + case 25: return std::make_pair(*(RC->begin() + 20), RC); // r25 + case 26: return std::make_pair(*(RC->begin() + 21), RC); // r26 + case 27: return std::make_pair(*(RC->begin() + 22), RC); // r27 + case 28: return std::make_pair(*(RC->begin() + 23), RC); // r28 + case 29: return std::make_pair(*(RC->begin() + 24), RC); // r29 + case 30: return std::make_pair(*(RC->begin() + 25), RC); // r30 + case 31: return std::make_pair(*(RC->begin() + 26), RC); // r31 + } + } + return std::make_pair(*(RC->begin() + Reg), RC); +} + +/// Given a register class constraint, like 'r', if this corresponds directly +/// to an LLVM register class, return a register of 0 and the register class +/// pointer. +std::pair +LoongArchTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, + StringRef Constraint, + MVT VT) const { + if (Constraint.size() == 1) { + switch (Constraint[0]) { + case 'r': + return std::make_pair(0U, getRegisterClassForVT(VT, Subtarget.is64Bit())); + case 'f': // FPU or LSX register + if (VT == MVT::v16i8) + return std::make_pair(0U, &LoongArch::LSX128BRegClass); + else if (VT == MVT::v8i16) + return std::make_pair(0U, &LoongArch::LSX128HRegClass); + else if (VT == MVT::v4i32 || VT == MVT::v4f32) + return std::make_pair(0U, &LoongArch::LSX128WRegClass); + else if (VT == MVT::v2i64 || VT == MVT::v2f64) + return std::make_pair(0U, &LoongArch::LSX128DRegClass); + else if (VT == MVT::v32i8) + return std::make_pair(0U, &LoongArch::LASX256BRegClass); + else if (VT == MVT::v16i16) + return std::make_pair(0U, &LoongArch::LASX256HRegClass); + else if (VT == MVT::v8i32 || VT == MVT::v8f32) + return std::make_pair(0U, &LoongArch::LASX256WRegClass); + else if (VT == MVT::v4i64 || VT == MVT::v4f64) + return std::make_pair(0U, &LoongArch::LASX256DRegClass); + else if (VT == MVT::f32) + return std::make_pair(0U, &LoongArch::FGR32RegClass); + else if (VT == MVT::f64) + return std::make_pair(0U, &LoongArch::FGR64RegClass); + break; + } + } + + std::pair R; + R = parseRegForInlineAsmConstraint(Constraint, VT); + + if (R.second) + return R; + + return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT); +} + +/// LowerAsmOperandForConstraint - Lower the specified operand into the Ops +/// vector. If it is invalid, don't add anything to Ops. +void LoongArchTargetLowering::LowerAsmOperandForConstraint(SDValue Op, + std::string &Constraint, + std::vector&Ops, + SelectionDAG &DAG) const { + SDLoc DL(Op); + SDValue Result; + + // Only support length 1 constraints for now. + if (Constraint.length() > 1) return; + + char ConstraintLetter = Constraint[0]; + switch (ConstraintLetter) { + default: break; // This will fall through to the generic implementation + case 'l': // Signed 16 bit constant + // If this fails, the parent routine will give an error + if (ConstantSDNode *C = dyn_cast(Op)) { + EVT Type = Op.getValueType(); + int64_t Val = C->getSExtValue(); + if (isInt<16>(Val)) { + Result = DAG.getTargetConstant(Val, DL, Type); + break; + } + } + return; + case 'I': // Signed 12 bit constant + // If this fails, the parent routine will give an error + if (ConstantSDNode *C = dyn_cast(Op)) { + EVT Type = Op.getValueType(); + int64_t Val = C->getSExtValue(); + if (isInt<12>(Val)) { + Result = DAG.getTargetConstant(Val, DL, Type); + break; + } + } + return; + case 'J': // integer zero + if (ConstantSDNode *C = dyn_cast(Op)) { + EVT Type = Op.getValueType(); + int64_t Val = C->getZExtValue(); + if (Val == 0) { + Result = DAG.getTargetConstant(0, DL, Type); + break; + } + } + return; + case 'G': // floating-point zero + if (ConstantFPSDNode *C = dyn_cast(Op)) { + if (C->isZero()) { + EVT Type = Op.getValueType(); + Result = DAG.getTargetConstantFP(0, DL, Type); + break; + } + } + return; + case 'K': // unsigned 12 bit immediate + if (ConstantSDNode *C = dyn_cast(Op)) { + EVT Type = Op.getValueType(); + uint64_t Val = (uint64_t)C->getZExtValue(); + if (isUInt<12>(Val)) { + Result = DAG.getTargetConstant(Val, DL, Type); + break; + } + } + return; + } + + if (Result.getNode()) { + Ops.push_back(Result); + return; + } + + TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG); +} + +bool LoongArchTargetLowering::isLegalAddressingMode(const DataLayout &DL, + const AddrMode &AM, Type *Ty, + unsigned AS, Instruction *I) const { + // No global is ever allowed as a base. + if (AM.BaseGV) + return false; + + switch (AM.Scale) { + case 0: // "r+i" or just "i", depending on HasBaseReg. + break; + case 1: + if (!AM.HasBaseReg) // allow "r+i". + break; + return false; // disallow "r+r" or "r+r+i". + default: + return false; + } + + return true; +} + +bool +LoongArchTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const { + // The LoongArch target isn't yet aware of offsets. + return false; +} + +EVT LoongArchTargetLowering::getOptimalMemOpType( + const MemOp &Op, const AttributeList &FuncAttributes) const { + if (!FuncAttributes.hasFnAttribute(Attribute::NoImplicitFloat)) { + if (Op.size() >= 16) { + if (Op.size() >= 32 && Subtarget.hasLASX()) { + return MVT::v32i8; + } + if (Subtarget.hasLSX()) + return MVT::v16i8; + } + } + + if (Subtarget.is64Bit()) + return MVT::i64; + + return MVT::i32; +} + +/// isFPImmLegal - Returns true if the target can instruction select the +/// specified FP immediate natively. If false, the legalizer will +/// materialize the FP immediate as a load from a constant pool. +bool LoongArchTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT, + bool ForCodeSize) const { + if (VT != MVT::f32 && VT != MVT::f64) + return false; + if (Imm.isNegZero()) + return false; + return (Imm.isZero() || Imm.isExactlyValue(+1.0)); +} + +bool LoongArchTargetLowering::useSoftFloat() const { + return Subtarget.useSoftFloat(); +} + +void LoongArchTargetLowering::copyByValRegs( + SDValue Chain, const SDLoc &DL, std::vector &OutChains, + SelectionDAG &DAG, const ISD::ArgFlagsTy &Flags, + SmallVectorImpl &InVals, const Argument *FuncArg, + unsigned FirstReg, unsigned LastReg, const CCValAssign &VA, + LoongArchCCState &State) const { + MachineFunction &MF = DAG.getMachineFunction(); + MachineFrameInfo &MFI = MF.getFrameInfo(); + unsigned GPRSizeInBytes = Subtarget.getGPRSizeInBytes(); + unsigned NumRegs = LastReg - FirstReg; + unsigned RegAreaSize = NumRegs * GPRSizeInBytes; + unsigned FrameObjSize = std::max(Flags.getByValSize(), RegAreaSize); + int FrameObjOffset; + ArrayRef ByValArgRegs = ABI.GetByValArgRegs(); + + if (RegAreaSize) + FrameObjOffset = -(int)((ByValArgRegs.size() - FirstReg) * GPRSizeInBytes); + else + FrameObjOffset = VA.getLocMemOffset(); + + // Create frame object. + EVT PtrTy = getPointerTy(DAG.getDataLayout()); + // Make the fixed object stored to mutable so that the load instructions + // referencing it have their memory dependencies added. + // Set the frame object as isAliased which clears the underlying objects + // vector in ScheduleDAGInstrs::buildSchedGraph() resulting in addition of all + // stores as dependencies for loads referencing this fixed object. + int FI = MFI.CreateFixedObject(FrameObjSize, FrameObjOffset, false, true); + SDValue FIN = DAG.getFrameIndex(FI, PtrTy); + InVals.push_back(FIN); + + if (!NumRegs) + return; + + // Copy arg registers. + MVT RegTy = MVT::getIntegerVT(GPRSizeInBytes * 8); + const TargetRegisterClass *RC = getRegClassFor(RegTy); + + for (unsigned I = 0; I < NumRegs; ++I) { + unsigned ArgReg = ByValArgRegs[FirstReg + I]; + unsigned VReg = addLiveIn(MF, ArgReg, RC); + unsigned Offset = I * GPRSizeInBytes; + SDValue StorePtr = DAG.getNode(ISD::ADD, DL, PtrTy, FIN, + DAG.getConstant(Offset, DL, PtrTy)); + SDValue Store = DAG.getStore(Chain, DL, DAG.getRegister(VReg, RegTy), + StorePtr, MachinePointerInfo(FuncArg, Offset)); + OutChains.push_back(Store); + } +} + +// Copy byVal arg to registers and stack. +void LoongArchTargetLowering::passByValArg( + SDValue Chain, const SDLoc &DL, + std::deque> &RegsToPass, + SmallVectorImpl &MemOpChains, SDValue StackPtr, + MachineFrameInfo &MFI, SelectionDAG &DAG, SDValue Arg, unsigned FirstReg, + unsigned LastReg, const ISD::ArgFlagsTy &Flags, + const CCValAssign &VA) const { + unsigned ByValSizeInBytes = Flags.getByValSize(); + unsigned OffsetInBytes = 0; // From beginning of struct + unsigned RegSizeInBytes = Subtarget.getGPRSizeInBytes(); + Align Alignment = + std::min(Flags.getNonZeroByValAlign(), Align(RegSizeInBytes)); + EVT PtrTy = getPointerTy(DAG.getDataLayout()), + RegTy = MVT::getIntegerVT(RegSizeInBytes * 8); + unsigned NumRegs = LastReg - FirstReg; + + if (NumRegs) { + ArrayRef ArgRegs = ABI.GetByValArgRegs(); + bool LeftoverBytes = (NumRegs * RegSizeInBytes > ByValSizeInBytes); + unsigned I = 0; + + // Copy words to registers. + for (; I < NumRegs - LeftoverBytes; ++I, OffsetInBytes += RegSizeInBytes) { + SDValue LoadPtr = DAG.getNode(ISD::ADD, DL, PtrTy, Arg, + DAG.getConstant(OffsetInBytes, DL, PtrTy)); + SDValue LoadVal = DAG.getLoad(RegTy, DL, Chain, LoadPtr, + MachinePointerInfo(), Alignment); + MemOpChains.push_back(LoadVal.getValue(1)); + unsigned ArgReg = ArgRegs[FirstReg + I]; + RegsToPass.push_back(std::make_pair(ArgReg, LoadVal)); + } + + // Return if the struct has been fully copied. + if (ByValSizeInBytes == OffsetInBytes) + return; + + // Copy the remainder of the byval argument with sub-word loads and shifts. + if (LeftoverBytes) { + SDValue Val; + + for (unsigned LoadSizeInBytes = RegSizeInBytes / 2, TotalBytesLoaded = 0; + OffsetInBytes < ByValSizeInBytes; LoadSizeInBytes /= 2) { + unsigned RemainingSizeInBytes = ByValSizeInBytes - OffsetInBytes; + + if (RemainingSizeInBytes < LoadSizeInBytes) + continue; + + // Load subword. + SDValue LoadPtr = DAG.getNode(ISD::ADD, DL, PtrTy, Arg, + DAG.getConstant(OffsetInBytes, DL, + PtrTy)); + SDValue LoadVal = DAG.getExtLoad( + ISD::ZEXTLOAD, DL, RegTy, Chain, LoadPtr, MachinePointerInfo(), + MVT::getIntegerVT(LoadSizeInBytes * 8), Alignment); + MemOpChains.push_back(LoadVal.getValue(1)); + + // Shift the loaded value. + unsigned Shamt; + + Shamt = TotalBytesLoaded * 8; + + SDValue Shift = DAG.getNode(ISD::SHL, DL, RegTy, LoadVal, + DAG.getConstant(Shamt, DL, MVT::i32)); + + if (Val.getNode()) + Val = DAG.getNode(ISD::OR, DL, RegTy, Val, Shift); + else + Val = Shift; + + OffsetInBytes += LoadSizeInBytes; + TotalBytesLoaded += LoadSizeInBytes; + Alignment = std::min(Alignment, Align(LoadSizeInBytes)); + } + + unsigned ArgReg = ArgRegs[FirstReg + I]; + RegsToPass.push_back(std::make_pair(ArgReg, Val)); + return; + } + } + + // Copy remainder of byval arg to it with memcpy. + unsigned MemCpySize = ByValSizeInBytes - OffsetInBytes; + SDValue Src = DAG.getNode(ISD::ADD, DL, PtrTy, Arg, + DAG.getConstant(OffsetInBytes, DL, PtrTy)); + SDValue Dst = DAG.getNode(ISD::ADD, DL, PtrTy, StackPtr, + DAG.getIntPtrConstant(VA.getLocMemOffset(), DL)); + Chain = DAG.getMemcpy( + Chain, DL, Dst, Src, DAG.getConstant(MemCpySize, DL, PtrTy), + Align(Alignment), /*isVolatile=*/false, /*AlwaysInline=*/false, + /*isTailCall=*/false, MachinePointerInfo(), MachinePointerInfo()); + MemOpChains.push_back(Chain); +} + +void LoongArchTargetLowering::writeVarArgRegs(std::vector &OutChains, + SDValue Chain, const SDLoc &DL, + SelectionDAG &DAG, + CCState &State) const { + ArrayRef ArgRegs = ABI.GetVarArgRegs(); + unsigned Idx = State.getFirstUnallocated(ArgRegs); + unsigned RegSizeInBytes = Subtarget.getGPRSizeInBytes(); + MVT RegTy = MVT::getIntegerVT(RegSizeInBytes * 8); + const TargetRegisterClass *RC = getRegClassFor(RegTy); + MachineFunction &MF = DAG.getMachineFunction(); + MachineFrameInfo &MFI = MF.getFrameInfo(); + LoongArchFunctionInfo *LoongArchFI = MF.getInfo(); + + // Offset of the first variable argument from stack pointer. + int VaArgOffset, VarArgsSaveSize; + + if (ArgRegs.size() == Idx) { + VaArgOffset = alignTo(State.getNextStackOffset(), RegSizeInBytes); + VarArgsSaveSize = 0; + } else { + VarArgsSaveSize = (int)(RegSizeInBytes * (ArgRegs.size() - Idx)); + VaArgOffset = -VarArgsSaveSize; + } + + // Record the frame index of the first variable argument + // which is a value necessary to VASTART. + int FI = MFI.CreateFixedObject(RegSizeInBytes, VaArgOffset, true); + LoongArchFI->setVarArgsFrameIndex(FI); + + // If saving an odd number of registers then create an extra stack slot to + // ensure that the frame pointer is 2*GRLEN-aligned, which in turn ensures + // offsets to even-numbered registered remain 2*GRLEN-aligned. + if (Idx % 2) { + MFI.CreateFixedObject(RegSizeInBytes, VaArgOffset - (int)RegSizeInBytes, + true); + VarArgsSaveSize += RegSizeInBytes; + } + + // Copy the integer registers that have not been used for argument passing + // to the argument register save area. For ILP32D/ILP32F/ILP32S, the save area + // is allocated in the caller's stack frame, while for LP64D/LP64S/LP64F, it + // is allocated in the callee's stack frame. + for (unsigned I = Idx; I < ArgRegs.size(); + ++I, VaArgOffset += RegSizeInBytes) { + unsigned Reg = addLiveIn(MF, ArgRegs[I], RC); + SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, RegTy); + FI = MFI.CreateFixedObject(RegSizeInBytes, VaArgOffset, true); + SDValue PtrOff = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout())); + SDValue Store = + DAG.getStore(Chain, DL, ArgValue, PtrOff, MachinePointerInfo()); + cast(Store.getNode())->getMemOperand()->setValue( + (Value *)nullptr); + OutChains.push_back(Store); + } + LoongArchFI->setVarArgsSaveSize(VarArgsSaveSize); +} + +void LoongArchTargetLowering::HandleByVal(CCState *State, unsigned &Size, + Align Alignment) const { + const TargetFrameLowering *TFL = Subtarget.getFrameLowering(); + + assert(Size && "Byval argument's size shouldn't be 0."); + + Alignment = std::min(Alignment, TFL->getStackAlign()); + + unsigned FirstReg = 0; + unsigned NumRegs = 0; + unsigned RegSizeInBytes = Subtarget.getGPRSizeInBytes(); + ArrayRef IntArgRegs = ABI.GetByValArgRegs(); + // FIXME: The ILP32D/ILP32F/ILP32S case actually describes no shadow + // registers. + const MCPhysReg *ShadowRegs = + (ABI.IsILP32D() || ABI.IsILP32F() || ABI.IsILP32S()) + ? IntArgRegs.data() + : LoongArch64DPRegs; + + // We used to check the size as well but we can't do that anymore since + // CCState::HandleByVal() rounds up the size after calling this function. + assert(Alignment >= Align(RegSizeInBytes) && + "Byval argument's alignment should be a multiple of RegSizeInBytes."); + + FirstReg = State->getFirstUnallocated(IntArgRegs); + + // If Alignment > RegSizeInBytes, the first arg register must be even. + // FIXME: This condition happens to do the right thing but it's not the + // right way to test it. We want to check that the stack frame offset + // of the register is aligned. + if ((Alignment > RegSizeInBytes) && (FirstReg % 2)) { + State->AllocateReg(IntArgRegs[FirstReg], ShadowRegs[FirstReg]); + ++FirstReg; + // assert(true && "debug#######################################"); + } + + // Mark the registers allocated. + // Size = alignTo(Size, RegSizeInBytes); + // for (unsigned I = FirstReg; Size > 0 && (I < IntArgRegs.size()); + // Size -= RegSizeInBytes, ++I, ++NumRegs) + // State->AllocateReg(IntArgRegs[I], ShadowRegs[I]); + + State->addInRegsParamInfo(FirstReg, FirstReg + NumRegs); +} + +MachineBasicBlock *LoongArchTargetLowering::emitPseudoSELECT(MachineInstr &MI, + MachineBasicBlock *BB, + bool isFPCmp, + unsigned Opc) const { + const TargetInstrInfo *TII = + Subtarget.getInstrInfo(); + DebugLoc DL = MI.getDebugLoc(); + + // To "insert" a SELECT instruction, we actually have to insert the + // diamond control-flow pattern. The incoming instruction knows the + // destination vreg to set, the condition code register to branch on, the + // true/false values to select between, and a branch opcode to use. + const BasicBlock *LLVM_BB = BB->getBasicBlock(); + MachineFunction::iterator It = ++BB->getIterator(); + + // thisMBB: + // ... + // TrueVal = ... + // setcc r1, r2, r3 + // bNE r1, r0, copy1MBB + // fallthrough --> copy0MBB + MachineBasicBlock *thisMBB = BB; + MachineFunction *F = BB->getParent(); + MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB); + F->insert(It, copy0MBB); + F->insert(It, sinkMBB); + + // Transfer the remainder of BB and its successor edges to sinkMBB. + sinkMBB->splice(sinkMBB->begin(), BB, + std::next(MachineBasicBlock::iterator(MI)), BB->end()); + sinkMBB->transferSuccessorsAndUpdatePHIs(BB); + + // Next, add the true and fallthrough blocks as its successors. + BB->addSuccessor(copy0MBB); + BB->addSuccessor(sinkMBB); + + if (isFPCmp) { + // bc1[tf] cc, sinkMBB + BuildMI(BB, DL, TII->get(Opc)) + .addReg(MI.getOperand(1).getReg()) + .addMBB(sinkMBB); + } else { + BuildMI(BB, DL, TII->get(Opc)) + .addReg(MI.getOperand(1).getReg()) + .addReg(LoongArch::ZERO) + .addMBB(sinkMBB); + } + + // copy0MBB: + // %FalseValue = ... + // # fallthrough to sinkMBB + BB = copy0MBB; + + // Update machine-CFG edges + BB->addSuccessor(sinkMBB); + + // sinkMBB: + // %Result = phi [ %TrueValue, thisMBB ], [ %FalseValue, copy0MBB ] + // ... + BB = sinkMBB; + + BuildMI(*BB, BB->begin(), DL, TII->get(LoongArch::PHI), MI.getOperand(0).getReg()) + .addReg(MI.getOperand(2).getReg()) + .addMBB(thisMBB) + .addReg(MI.getOperand(3).getReg()) + .addMBB(copy0MBB); + + MI.eraseFromParent(); // The pseudo instruction is gone now. + + return BB; +} + +MachineBasicBlock *LoongArchTargetLowering::emitLSXCBranchPseudo( + MachineInstr &MI, MachineBasicBlock *BB, unsigned BranchOp) const { + + MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); + const TargetInstrInfo *TII = Subtarget.getInstrInfo(); + const TargetRegisterClass *RC = &LoongArch::GPR32RegClass; + DebugLoc DL = MI.getDebugLoc(); + const BasicBlock *LLVM_BB = BB->getBasicBlock(); + MachineFunction::iterator It = std::next(MachineFunction::iterator(BB)); + MachineFunction *F = BB->getParent(); + MachineBasicBlock *FBB = F->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *TBB = F->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *Sink = F->CreateMachineBasicBlock(LLVM_BB); + F->insert(It, FBB); + F->insert(It, TBB); + F->insert(It, Sink); + + // Transfer the remainder of BB and its successor edges to Sink. + Sink->splice(Sink->begin(), BB, std::next(MachineBasicBlock::iterator(MI)), + BB->end()); + Sink->transferSuccessorsAndUpdatePHIs(BB); + + // Add successors. + BB->addSuccessor(FBB); + BB->addSuccessor(TBB); + FBB->addSuccessor(Sink); + TBB->addSuccessor(Sink); + // Insert the real bnz.b instruction to $BB. + BuildMI(BB, DL, TII->get(BranchOp)) + .addReg(LoongArch::FCC0) + .addReg(MI.getOperand(1).getReg()); + + BuildMI(BB, DL, TII->get(LoongArch::BCNEZ)) + .addReg(LoongArch::FCC0) + .addMBB(TBB); + + // Fill $FBB. + unsigned RD1 = RegInfo.createVirtualRegister(RC); + BuildMI(*FBB, FBB->end(), DL, TII->get(LoongArch::ADDI_W), RD1) + .addReg(LoongArch::ZERO) + .addImm(0); + BuildMI(*FBB, FBB->end(), DL, TII->get(LoongArch::B32)).addMBB(Sink); + + // Fill $TBB. + unsigned RD2 = RegInfo.createVirtualRegister(RC); + BuildMI(*TBB, TBB->end(), DL, TII->get(LoongArch::ADDI_W), RD2) + .addReg(LoongArch::ZERO) + .addImm(1); + + // Insert phi function to $Sink. + BuildMI(*Sink, Sink->begin(), DL, TII->get(LoongArch::PHI), + MI.getOperand(0).getReg()) + .addReg(RD1) + .addMBB(FBB) + .addReg(RD2) + .addMBB(TBB); + + MI.eraseFromParent(); // The pseudo instruction is gone now. + return Sink; +} + +// Emit the COPY_FW pseudo instruction. +// +// copy_fw_pseudo $fd, $vk, n +// => +// vreplvei.w $rt, $vk, $n +// copy $rt, $fd +// +// When n is zero, the equivalent operation can be performed with (potentially) +// zero instructions due to register overlaps. +MachineBasicBlock * +LoongArchTargetLowering::emitCOPY_FW(MachineInstr &MI, + MachineBasicBlock *BB) const { + const TargetInstrInfo *TII = Subtarget.getInstrInfo(); + MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); + DebugLoc DL = MI.getDebugLoc(); + unsigned Fd = MI.getOperand(0).getReg(); + unsigned Vk = MI.getOperand(1).getReg(); + unsigned Lane = MI.getOperand(2).getImm(); + + if (Lane == 0) { + unsigned Vj = Vk; + BuildMI(*BB, MI, DL, TII->get(LoongArch::COPY), Fd) + .addReg(Vj, 0, LoongArch::sub_lo); + } else { + unsigned Vj = RegInfo.createVirtualRegister(&LoongArch::LSX128WRegClass); + BuildMI(*BB, MI, DL, TII->get(LoongArch::VREPLVEI_W), Vj) + .addReg(Vk) + .addImm(Lane); + BuildMI(*BB, MI, DL, TII->get(LoongArch::COPY), Fd) + .addReg(Vj, 0, LoongArch::sub_lo); + } + + MI.eraseFromParent(); // The pseudo instruction is gone now. + return BB; +} + +// Emit the COPY_FD pseudo instruction. +// +// copy_fd_pseudo $fd, $vj, n +// => +// vreplvei.d $vd, $vj, $n +// copy $fd, $vd:sub_64 +// +// When n is zero, the equivalent operation can be performed with (potentially) +// zero instructions due to register overlaps. +MachineBasicBlock * +LoongArchTargetLowering::emitCOPY_FD(MachineInstr &MI, + MachineBasicBlock *BB) const { + assert(Subtarget.hasBasicD()); + + const TargetInstrInfo *TII = Subtarget.getInstrInfo(); + MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); + unsigned Fd = MI.getOperand(0).getReg(); + unsigned Vk = MI.getOperand(1).getReg(); + unsigned Lane = MI.getOperand(2).getImm(); + DebugLoc DL = MI.getDebugLoc(); + + if (Lane == 0) + BuildMI(*BB, MI, DL, TII->get(LoongArch::COPY), Fd) + .addReg(Vk, 0, LoongArch::sub_64); + else { + unsigned Vj = RegInfo.createVirtualRegister(&LoongArch::LSX128DRegClass); + assert(Lane == 1); + + BuildMI(*BB, MI, DL, TII->get(LoongArch::VREPLVEI_D), Vj) + .addReg(Vk) + .addImm(Lane); + BuildMI(*BB, MI, DL, TII->get(LoongArch::COPY), Fd) + .addReg(Vj, 0, LoongArch::sub_64); + } + + MI.eraseFromParent(); // The pseudo instruction is gone now. + return BB; +} + +MachineBasicBlock * +LoongArchTargetLowering::emitXCOPY_FW(MachineInstr &MI, + MachineBasicBlock *BB) const { + const TargetInstrInfo *TII = Subtarget.getInstrInfo(); + MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); + DebugLoc DL = MI.getDebugLoc(); + unsigned Fd = MI.getOperand(0).getReg(); + unsigned Xk = MI.getOperand(1).getReg(); + unsigned Lane = MI.getOperand(2).getImm(); + unsigned Rj = RegInfo.createVirtualRegister(&LoongArch::GPR32RegClass); + unsigned Xj = Xk; + + if (Lane == 0) { + BuildMI(*BB, MI, DL, TII->get(LoongArch::COPY), Fd) + .addReg(Xj, 0, LoongArch::sub_lo); + } else { + BuildMI(*BB, MI, DL, TII->get(LoongArch::XVPICKVE2GR_WU), Rj) + .addReg(Xk) + .addImm(Lane); + BuildMI(*BB, MI, DL, TII->get(LoongArch::COPY), Fd).addReg(Rj); + } + + MI.eraseFromParent(); // The pseudo instruction is gone now. + return BB; +} + +MachineBasicBlock * +LoongArchTargetLowering::emitXCOPY_FD(MachineInstr &MI, + MachineBasicBlock *BB) const { + assert(Subtarget.hasBasicD()); + + const TargetInstrInfo *TII = Subtarget.getInstrInfo(); + MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); + unsigned Fd = MI.getOperand(0).getReg(); + unsigned Xk = MI.getOperand(1).getReg(); + unsigned Lane = MI.getOperand(2).getImm(); + DebugLoc DL = MI.getDebugLoc(); + + unsigned Rj = RegInfo.createVirtualRegister(&LoongArch::GPR64RegClass); + if (Lane == 0) { + BuildMI(*BB, MI, DL, TII->get(LoongArch::COPY), Fd) + .addReg(Xk, 0, LoongArch::sub_64); + } else { + BuildMI(*BB, MI, DL, TII->get(LoongArch::XVPICKVE2GR_DU), Rj) + .addReg(Xk) + .addImm(Lane); + BuildMI(*BB, MI, DL, TII->get(LoongArch::COPY), Fd).addReg(Rj); + } + + MI.eraseFromParent(); // The pseudo instruction is gone now. + return BB; +} + +MachineBasicBlock *LoongArchTargetLowering::emitCONCAT_VECTORS( + MachineInstr &MI, MachineBasicBlock *BB, unsigned Bytes) const { + + MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); + const TargetInstrInfo *TII = Subtarget.getInstrInfo(); + DebugLoc DL = MI.getDebugLoc(); + unsigned Xd = MI.getOperand(0).getReg(); + unsigned SubReg1 = MI.getOperand(1).getReg(); + unsigned SubReg2 = MI.getOperand(2).getReg(); + const TargetRegisterClass *RC = nullptr; + + switch (Bytes) { + default: + llvm_unreachable("Unexpected size"); + case 1: + RC = &LoongArch::LASX256BRegClass; + break; + case 2: + RC = &LoongArch::LASX256HRegClass; + break; + case 4: + RC = &LoongArch::LASX256WRegClass; + break; + case 8: + RC = &LoongArch::LASX256DRegClass; + break; + } + + unsigned X0 = RegInfo.createVirtualRegister(RC); + BuildMI(*BB, MI, DL, TII->get(LoongArch::SUBREG_TO_REG), X0) + .addImm(0) + .addReg(SubReg1) + .addImm(LoongArch::sub_128); + unsigned X1 = RegInfo.createVirtualRegister(RC); + BuildMI(*BB, MI, DL, TII->get(LoongArch::SUBREG_TO_REG), X1) + .addImm(0) + .addReg(SubReg2) + .addImm(LoongArch::sub_128); + BuildMI(*BB, MI, DL, TII->get(LoongArch::XVPERMI_Q), Xd) + .addReg(X0) + .addReg(X1) + .addImm(2); + + MI.eraseFromParent(); // The pseudo instruction is gone now. + return BB; +} + +// xcopy_fw_gpr_pseudo $fd, $xs, $rk +// => +// bb: addi.d $rt1, zero, 4 +// bge $lane, $rt1 hbb +// lbb:xvreplve.w $xt1, $xs, $lane +// copy $rf0, $xt1 +// b sink +// hbb: addi.d $rt2, $lane, -4 +// xvpermi.q $xt2 $xs, 1 +// xvreplve.w $xt3, $xt2, $rt2 +// copy $rf1, $xt3 +// sink:phi +MachineBasicBlock * +LoongArchTargetLowering::emitXCOPY_FW_GPR(MachineInstr &MI, + MachineBasicBlock *BB) const { + const TargetInstrInfo *TII = Subtarget.getInstrInfo(); + MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); + DebugLoc DL = MI.getDebugLoc(); + unsigned Xs = MI.getOperand(1).getReg(); + unsigned Lane = MI.getOperand(2).getReg(); + + const TargetRegisterClass *RC = &LoongArch::GPR64RegClass; + const BasicBlock *LLVM_BB = BB->getBasicBlock(); + MachineFunction::iterator It = std::next(MachineFunction::iterator(BB)); + MachineFunction *F = BB->getParent(); + MachineBasicBlock *HBB = F->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *LBB = F->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *Sink = F->CreateMachineBasicBlock(LLVM_BB); + F->insert(It, LBB); + F->insert(It, HBB); + F->insert(It, Sink); + + Sink->splice(Sink->begin(), BB, std::next(MachineBasicBlock::iterator(MI)), + BB->end()); + Sink->transferSuccessorsAndUpdatePHIs(BB); + + BB->addSuccessor(LBB); + BB->addSuccessor(HBB); + HBB->addSuccessor(Sink); + LBB->addSuccessor(Sink); + + unsigned Rt1 = RegInfo.createVirtualRegister(RC); + BuildMI(BB, DL, TII->get(LoongArch::ADDI_D), Rt1) + .addReg(LoongArch::ZERO_64) + .addImm(4); + BuildMI(BB, DL, TII->get(LoongArch::BGE)) + .addReg(Lane) + .addReg(Rt1) + .addMBB(HBB); + + unsigned Xt1 = RegInfo.createVirtualRegister(&LoongArch::LASX256WRegClass); + unsigned Rf0 = RegInfo.createVirtualRegister(&LoongArch::FGR32RegClass); + BuildMI(*LBB, LBB->end(), DL, TII->get(LoongArch::XVREPLVE_W_N), Xt1) + .addReg(Xs) + .addReg(Lane); + BuildMI(*LBB, LBB->end(), DL, TII->get(LoongArch::COPY), Rf0) + .addReg(Xt1, 0, LoongArch::sub_lo); + BuildMI(*LBB, LBB->end(), DL, TII->get(LoongArch::B)).addMBB(Sink); + + unsigned Xt2 = RegInfo.createVirtualRegister(&LoongArch::LASX256WRegClass); + unsigned Xt3 = RegInfo.createVirtualRegister(&LoongArch::LASX256WRegClass); + unsigned Rt2 = RegInfo.createVirtualRegister(RC); + unsigned Rf1 = RegInfo.createVirtualRegister(&LoongArch::FGR32RegClass); + BuildMI(*HBB, HBB->end(), DL, TII->get(LoongArch::ADDI_D), Rt2) + .addReg(Lane) + .addImm(-4); + BuildMI(*HBB, HBB->end(), DL, TII->get(LoongArch::XVPERMI_Q), Xt2) + .addReg(Xs) + .addReg(Xs) + .addImm(1); + BuildMI(*HBB, HBB->end(), DL, TII->get(LoongArch::XVREPLVE_W_N), Xt3) + .addReg(Xt2) + .addReg(Rt2); + BuildMI(*HBB, HBB->end(), DL, TII->get(LoongArch::COPY), Rf1) + .addReg(Xt3, 0, LoongArch::sub_lo); + + BuildMI(*Sink, Sink->begin(), DL, TII->get(LoongArch::PHI), + MI.getOperand(0).getReg()) + .addReg(Rf0) + .addMBB(LBB) + .addReg(Rf1) + .addMBB(HBB); + + MI.eraseFromParent(); // The pseudo instruction is gone now. + return Sink; +} + +MachineBasicBlock * +LoongArchTargetLowering::emitXINSERT_BH(MachineInstr &MI, MachineBasicBlock *BB, + unsigned Size) const { + const TargetInstrInfo *TII = Subtarget.getInstrInfo(); + MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); + DebugLoc DL = MI.getDebugLoc(); + unsigned Xd = MI.getOperand(0).getReg(); + unsigned Xd_in = MI.getOperand(1).getReg(); + unsigned Lane = MI.getOperand(2).getImm(); + unsigned Fs = MI.getOperand(3).getReg(); + const TargetRegisterClass *VecRC = nullptr; + const TargetRegisterClass *SubVecRC = nullptr; + unsigned HalfSize = 0; + unsigned InsertOp = 0; + + if (Size == 1) { + VecRC = &LoongArch::LASX256BRegClass; + SubVecRC = &LoongArch::LSX128BRegClass; + HalfSize = 16; + InsertOp = LoongArch::VINSGR2VR_B; + } else if (Size == 2) { + VecRC = &LoongArch::LASX256HRegClass; + SubVecRC = &LoongArch::LSX128HRegClass; + HalfSize = 8; + InsertOp = LoongArch::VINSGR2VR_H; + } else { + llvm_unreachable("Unexpected type"); + } + + unsigned Xk = Xd_in; + unsigned Imm = Lane; + if (Lane >= HalfSize) { + Xk = RegInfo.createVirtualRegister(VecRC); + BuildMI(*BB, MI, DL, TII->get(LoongArch::XVPERMI_Q), Xk) + .addReg(Xd_in) + .addReg(Xd_in) + .addImm(1); + Imm = Lane - HalfSize; + } + + unsigned Xk128 = RegInfo.createVirtualRegister(SubVecRC); + unsigned Xd128 = RegInfo.createVirtualRegister(SubVecRC); + BuildMI(*BB, MI, DL, TII->get(LoongArch::COPY), Xk128) + .addReg(Xk, 0, LoongArch::sub_128); + BuildMI(*BB, MI, DL, TII->get(InsertOp), Xd128) + .addReg(Xk128) + .addReg(Fs) + .addImm(Imm); + + unsigned Xd256 = Xd; + if (Lane >= HalfSize) { + Xd256 = RegInfo.createVirtualRegister(VecRC); + } + + BuildMI(*BB, MI, DL, TII->get(LoongArch::SUBREG_TO_REG), Xd256) + .addImm(0) + .addReg(Xd128) + .addImm(LoongArch::sub_128); + + if (Lane >= HalfSize) { + BuildMI(*BB, MI, DL, TII->get(LoongArch::XVPERMI_Q), Xd) + .addReg(Xd_in) + .addReg(Xd256) + .addImm(2); + } + + MI.eraseFromParent(); // The pseudo instruction is gone now. + return BB; +} + +MachineBasicBlock * +LoongArchTargetLowering::emitXINSERT_FW(MachineInstr &MI, + MachineBasicBlock *BB) const { + const TargetInstrInfo *TII = Subtarget.getInstrInfo(); + MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); + DebugLoc DL = MI.getDebugLoc(); + unsigned Xd = MI.getOperand(0).getReg(); + unsigned Xd_in = MI.getOperand(1).getReg(); + unsigned Lane = MI.getOperand(2).getImm(); + unsigned Fs = MI.getOperand(3).getReg(); + unsigned Xj = RegInfo.createVirtualRegister(&LoongArch::LASX256WRegClass); + unsigned Rj = RegInfo.createVirtualRegister(&LoongArch::GPR32RegClass); + + BuildMI(*BB, MI, DL, TII->get(LoongArch::SUBREG_TO_REG), Xj) + .addImm(0) + .addReg(Fs) + .addImm(LoongArch::sub_lo); + BuildMI(*BB, MI, DL, TII->get(LoongArch::XVPICKVE2GR_WU), Rj) + .addReg(Xj) + .addImm(0); + BuildMI(*BB, MI, DL, TII->get(LoongArch::XVINSGR2VR_W), Xd) + .addReg(Xd_in) + .addReg(Rj) + .addImm(Lane); + + MI.eraseFromParent(); // The pseudo instruction is gone now. + return BB; +} + +// Emit the INSERT_FW pseudo instruction. +// +// insert_fw_pseudo $vd, $vd_in, $n, $fs +// => +// subreg_to_reg $vj:sub_lo, $fs +// vpickve2gr.w rj, vj, 0 +// vinsgr2vr.w, vd, rj, lane +MachineBasicBlock * +LoongArchTargetLowering::emitINSERT_FW(MachineInstr &MI, + MachineBasicBlock *BB) const { + const TargetInstrInfo *TII = Subtarget.getInstrInfo(); + MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); + DebugLoc DL = MI.getDebugLoc(); + unsigned Vd = MI.getOperand(0).getReg(); + unsigned Vd_in = MI.getOperand(1).getReg(); + unsigned Lane = MI.getOperand(2).getImm(); + unsigned Fs = MI.getOperand(3).getReg(); + unsigned Rj = RegInfo.createVirtualRegister(&LoongArch::GPR32RegClass); + unsigned Vj = RegInfo.createVirtualRegister(&LoongArch::LSX128WRegClass); + + BuildMI(*BB, MI, DL, TII->get(LoongArch::SUBREG_TO_REG), Vj) + .addImm(0) + .addReg(Fs) + .addImm(LoongArch::sub_lo); + BuildMI(*BB, MI, DL, TII->get(LoongArch::VPICKVE2GR_W), Rj) + .addReg(Vj) + .addImm(0); + BuildMI(*BB, MI, DL, TII->get(LoongArch::VINSGR2VR_W), Vd) + .addReg(Vd_in) + .addReg(Rj) + .addImm(Lane); + + MI.eraseFromParent(); // The pseudo instruction is gone now. + return BB; +} + +// Emit the INSERT_FD pseudo instruction. +// insert_fd_pseudo $vd, $fs, n +// => +// subreg_to_reg $vk:sub_64, $fs +// vpickve2gr.d rj, vk, 0 +// vinsgr2vr.d vd, rj, lane +MachineBasicBlock * +LoongArchTargetLowering::emitINSERT_FD(MachineInstr &MI, + MachineBasicBlock *BB) const { + assert(Subtarget.hasBasicD()); + + const TargetInstrInfo *TII = Subtarget.getInstrInfo(); + MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); + DebugLoc DL = MI.getDebugLoc(); + unsigned Vd = MI.getOperand(0).getReg(); + unsigned Vd_in = MI.getOperand(1).getReg(); + unsigned Lane = MI.getOperand(2).getImm(); + unsigned Fs = MI.getOperand(3).getReg(); + unsigned Vj = RegInfo.createVirtualRegister(&LoongArch::LSX128DRegClass); + unsigned Rj = RegInfo.createVirtualRegister(&LoongArch::GPR64RegClass); + + BuildMI(*BB, MI, DL, TII->get(LoongArch::SUBREG_TO_REG), Vj) + .addImm(0) + .addReg(Fs) + .addImm(LoongArch::sub_64); + BuildMI(*BB, MI, DL, TII->get(LoongArch::VPICKVE2GR_D), Rj) + .addReg(Vj) + .addImm(0); + BuildMI(*BB, MI, DL, TII->get(LoongArch::VINSGR2VR_D), Vd) + .addReg(Vd_in) + .addReg(Rj) + .addImm(Lane); + + MI.eraseFromParent(); // The pseudo instruction is gone now. + return BB; +} + +MachineBasicBlock * +LoongArchTargetLowering::emitXINSERT_FD(MachineInstr &MI, + MachineBasicBlock *BB) const { + assert(Subtarget.hasBasicD()); + + const TargetInstrInfo *TII = Subtarget.getInstrInfo(); + MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); + DebugLoc DL = MI.getDebugLoc(); + unsigned Xd = MI.getOperand(0).getReg(); + unsigned Xd_in = MI.getOperand(1).getReg(); + unsigned Lane = MI.getOperand(2).getImm(); + unsigned Fs = MI.getOperand(3).getReg(); + unsigned Xj = RegInfo.createVirtualRegister(&LoongArch::LASX256DRegClass); + + BuildMI(*BB, MI, DL, TII->get(LoongArch::SUBREG_TO_REG), Xj) + .addImm(0) + .addReg(Fs) + .addImm(LoongArch::sub_64); + BuildMI(*BB, MI, DL, TII->get(LoongArch::XVINSVE0_D), Xd) + .addReg(Xd_in) + .addReg(Xj) + .addImm(Lane); + + MI.eraseFromParent(); // The pseudo instruction is gone now. + return BB; +} + +// Emit the FILL_FW pseudo instruction. +// +// fill_fw_pseudo $vd, $fs +// => +// implicit_def $vt1 +// insert_subreg $vt2:subreg_lo, $vt1, $fs +// vreplvei.w vd, vt2, 0 +MachineBasicBlock * +LoongArchTargetLowering::emitFILL_FW(MachineInstr &MI, + MachineBasicBlock *BB) const { + const TargetInstrInfo *TII = Subtarget.getInstrInfo(); + MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); + DebugLoc DL = MI.getDebugLoc(); + unsigned Vd = MI.getOperand(0).getReg(); + unsigned Fs = MI.getOperand(1).getReg(); + unsigned Vj1 = RegInfo.createVirtualRegister(&LoongArch::LSX128WRegClass); + unsigned Vj2 = RegInfo.createVirtualRegister(&LoongArch::LSX128WRegClass); + + BuildMI(*BB, MI, DL, TII->get(LoongArch::IMPLICIT_DEF), Vj1); + BuildMI(*BB, MI, DL, TII->get(LoongArch::INSERT_SUBREG), Vj2) + .addReg(Vj1) + .addReg(Fs) + .addImm(LoongArch::sub_lo); + BuildMI(*BB, MI, DL, TII->get(LoongArch::VREPLVEI_W), Vd) + .addReg(Vj2) + .addImm(0); + + MI.eraseFromParent(); // The pseudo instruction is gone now. + return BB; +} + +// Emit the FILL_FD pseudo instruction. +// +// fill_fd_pseudo $vd, $fs +// => +// implicit_def $vt1 +// insert_subreg $vt2:subreg_64, $vt1, $fs +// vreplvei.d vd, vt2, 0 +MachineBasicBlock * +LoongArchTargetLowering::emitFILL_FD(MachineInstr &MI, + MachineBasicBlock *BB) const { + assert(Subtarget.hasBasicD()); + + const TargetInstrInfo *TII = Subtarget.getInstrInfo(); + MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); + DebugLoc DL = MI.getDebugLoc(); + unsigned Vd = MI.getOperand(0).getReg(); + unsigned Fs = MI.getOperand(1).getReg(); + unsigned Vj1 = RegInfo.createVirtualRegister(&LoongArch::LSX128DRegClass); + unsigned Vj2 = RegInfo.createVirtualRegister(&LoongArch::LSX128DRegClass); + + BuildMI(*BB, MI, DL, TII->get(LoongArch::IMPLICIT_DEF), Vj1); + BuildMI(*BB, MI, DL, TII->get(LoongArch::INSERT_SUBREG), Vj2) + .addReg(Vj1) + .addReg(Fs) + .addImm(LoongArch::sub_64); + BuildMI(*BB, MI, DL, TII->get(LoongArch::VREPLVEI_D), Vd) + .addReg(Vj2) + .addImm(0); + + MI.eraseFromParent(); // The pseudo instruction is gone now. + return BB; +} + +// Emit the XFILL_FW pseudo instruction. +// +// xfill_fw_pseudo $xd, $fs +// => +// implicit_def $xt1 +// insert_subreg $xt2:subreg_lo, $xt1, $fs +// xvreplve0.w xd, xt2, 0 +MachineBasicBlock * +LoongArchTargetLowering::emitXFILL_FW(MachineInstr &MI, + MachineBasicBlock *BB) const { + const TargetInstrInfo *TII = Subtarget.getInstrInfo(); + MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); + DebugLoc DL = MI.getDebugLoc(); + unsigned Xd = MI.getOperand(0).getReg(); + unsigned Fs = MI.getOperand(1).getReg(); + unsigned Xj1 = RegInfo.createVirtualRegister(&LoongArch::LASX256WRegClass); + unsigned Xj2 = RegInfo.createVirtualRegister(&LoongArch::LASX256WRegClass); + + BuildMI(*BB, MI, DL, TII->get(LoongArch::IMPLICIT_DEF), Xj1); + BuildMI(*BB, MI, DL, TII->get(LoongArch::INSERT_SUBREG), Xj2) + .addReg(Xj1) + .addReg(Fs) + .addImm(LoongArch::sub_lo); + BuildMI(*BB, MI, DL, TII->get(LoongArch::XVREPLVE0_W), Xd).addReg(Xj2); + + MI.eraseFromParent(); // The pseudo instruction is gone now. + return BB; +} + +// Emit the XFILL_FD pseudo instruction. +// +// xfill_fd_pseudo $xd, $fs +// => +// implicit_def $xt1 +// insert_subreg $xt2:subreg_64, $xt1, $fs +// xvreplve0.d xd, xt2, 0 +MachineBasicBlock * +LoongArchTargetLowering::emitXFILL_FD(MachineInstr &MI, + MachineBasicBlock *BB) const { + assert(Subtarget.hasBasicD()); + + const TargetInstrInfo *TII = Subtarget.getInstrInfo(); + MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); + DebugLoc DL = MI.getDebugLoc(); + unsigned Xd = MI.getOperand(0).getReg(); + unsigned Fs = MI.getOperand(1).getReg(); + unsigned Xj1 = RegInfo.createVirtualRegister(&LoongArch::LASX256DRegClass); + unsigned Xj2 = RegInfo.createVirtualRegister(&LoongArch::LASX256DRegClass); + + BuildMI(*BB, MI, DL, TII->get(LoongArch::IMPLICIT_DEF), Xj1); + BuildMI(*BB, MI, DL, TII->get(LoongArch::INSERT_SUBREG), Xj2) + .addReg(Xj1) + .addReg(Fs) + .addImm(LoongArch::sub_64); + BuildMI(*BB, MI, DL, TII->get(LoongArch::XVREPLVE0_D), Xd).addReg(Xj2); + + MI.eraseFromParent(); // The pseudo instruction is gone now. + return BB; +} + +bool LoongArchTargetLowering::isLegalAddImmediate(int64_t Imm) const { + bool IsLegal = false; + if (Subtarget.hasLSX() || Subtarget.hasLASX()) { + return isUInt<5>(Imm); + } + return IsLegal; +} + +bool LoongArchTargetLowering::isFMAFasterThanFMulAndFAdd( + const MachineFunction &MF, EVT VT) const { + + VT = VT.getScalarType(); + + if (!VT.isSimple()) + return false; + + switch (VT.getSimpleVT().SimpleTy) { + case MVT::f32: + case MVT::f64: + return true; + default: + break; + } + + return false; +} + +bool LoongArchTargetLowering::isExtractSubvectorCheap(EVT ResVT, EVT SrcVT, + unsigned Index) const { + if (!isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR, ResVT)) + return false; + + return ( + (ResVT != MVT::v16i8) && (ResVT != MVT::v8i16) && + (Index == 0 || (Index == ResVT.getVectorNumElements() && + (ResVT.getSizeInBits() == SrcVT.getSizeInBits() / 2)))); +} + +Register +LoongArchTargetLowering::getRegisterByName(const char *RegName, LLT VT, + const MachineFunction &MF) const { + // Named registers is expected to be fairly rare. For now, just support $r2 + // and $r21 since the linux kernel uses them. + if (Subtarget.is64Bit()) { + Register Reg = StringSwitch(RegName) + .Case("$r2", LoongArch::TP_64) + .Case("$r21", LoongArch::T9_64) + .Default(Register()); + if (Reg) + return Reg; + } else { + Register Reg = StringSwitch(RegName) + .Case("$r2", LoongArch::TP) + .Case("$r21", LoongArch::T9) + .Default(Register()); + if (Reg) + return Reg; + } + report_fatal_error("Invalid register name global variable"); +} diff --git a/lib/Target/LoongArch/LoongArchISelLowering.h b/lib/Target/LoongArch/LoongArchISelLowering.h new file mode 100644 index 00000000..0f94e9f4 --- /dev/null +++ b/lib/Target/LoongArch/LoongArchISelLowering.h @@ -0,0 +1,557 @@ +//===- LoongArchISelLowering.h - LoongArch DAG Lowering Interface ---------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines the interfaces that LoongArch uses to lower LLVM code into a +// selection DAG. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_LOONGARCH_LOONGARCHISELLOWERING_H +#define LLVM_LIB_TARGET_LOONGARCH_LOONGARCHISELLOWERING_H + +#include "MCTargetDesc/LoongArchABIInfo.h" +#include "MCTargetDesc/LoongArchBaseInfo.h" +#include "MCTargetDesc/LoongArchMCTargetDesc.h" +#include "LoongArch.h" +#include "llvm/CodeGen/CallingConvLower.h" +#include "llvm/CodeGen/ISDOpcodes.h" +#include "llvm/CodeGen/MachineMemOperand.h" +#include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/CodeGen/SelectionDAGNodes.h" +#include "llvm/CodeGen/TargetLowering.h" +#include "llvm/CodeGen/ValueTypes.h" +#include "llvm/IR/CallingConv.h" +#include "llvm/IR/InlineAsm.h" +#include "llvm/IR/Type.h" +#include "llvm/Support/MachineValueType.h" +#include "llvm/Target/TargetMachine.h" +#include +#include +#include +#include +#include +#include + +namespace llvm { + +class Argument; +class CCState; +class CCValAssign; +class FastISel; +class FunctionLoweringInfo; +class MachineBasicBlock; +class MachineFrameInfo; +class MachineInstr; +class LoongArchCCState; +class LoongArchFunctionInfo; +class LoongArchSubtarget; +class LoongArchTargetMachine; +class SelectionDAG; +class TargetLibraryInfo; +class TargetRegisterClass; + + namespace LoongArchISD { + + enum NodeType : unsigned { + // Start the numbering from where ISD NodeType finishes. + FIRST_NUMBER = ISD::BUILTIN_OP_END, + + // Jump and link (call) + JmpLink, + + // Tail call + TailCall, + + // global address + GlobalAddress, + + // Floating Point Branch Conditional + FPBrcond, + + // Floating Point Compare + FPCmp, + + // Floating Point Conditional Moves + CMovFP_T, + CMovFP_F, + FSEL, + + // FP-to-int truncation node. + TruncIntFP, + + // Return + Ret, + + // error trap Return + ERet, + + // Software Exception Return. + EH_RETURN, + + DBAR, + + BSTRPICK, + BSTRINS, + + // Vector comparisons. + // These take a vector and return a boolean. + VALL_ZERO, + VANY_ZERO, + VALL_NONZERO, + VANY_NONZERO, + + // Vector Shuffle with mask as an operand + VSHF, // Generic shuffle + SHF, // 4-element set shuffle. + VPACKEV, // Interleave even elements + VPACKOD, // Interleave odd elements + VILVH, // Interleave left elements + VILVL, // Interleave right elements + VPICKEV, // Pack even elements + VPICKOD, // Pack odd elements + + // Vector Lane Copy + INSVE, // Copy element from one vector to another + + // Combined (XOR (OR $a, $b), -1) + VNOR, + + VROR, + VRORI, + XVPICKVE, + XVPERMI, + XVSHUF4I, + REVBD, + + // Extended vector element extraction + VEXTRACT_SEXT_ELT, + VEXTRACT_ZEXT_ELT, + + XVBROADCAST, + VBROADCAST, + VABSD, + UVABSD, + }; + + } // ene namespace LoongArchISD + + //===--------------------------------------------------------------------===// + // TargetLowering Implementation + //===--------------------------------------------------------------------===// + + class LoongArchTargetLowering : public TargetLowering { + public: + explicit LoongArchTargetLowering(const LoongArchTargetMachine &TM, + const LoongArchSubtarget &STI); + + bool allowsMisalignedMemoryAccesses( + EVT VT, unsigned AS = 0, unsigned Alignment = 1, + MachineMemOperand::Flags Flags = MachineMemOperand::MONone, + bool *Fast = nullptr) const override; + + /// Enable LSX support for the given integer type and Register + /// class. + void addLSXIntType(MVT::SimpleValueType Ty, const TargetRegisterClass *RC); + + /// Enable LSX support for the given floating-point type and + /// Register class. + void addLSXFloatType(MVT::SimpleValueType Ty, + const TargetRegisterClass *RC); + + /// Enable LASX support for the given integer type and Register + /// class. + void addLASXIntType(MVT::SimpleValueType Ty, const TargetRegisterClass *RC); + + /// Enable LASX support for the given floating-point type and + /// Register class. + void addLASXFloatType(MVT::SimpleValueType Ty, + const TargetRegisterClass *RC); + + MVT getScalarShiftAmountTy(const DataLayout &, EVT) const override { + return MVT::i32; + } + + EVT getTypeForExtReturn(LLVMContext &Context, EVT VT, + ISD::NodeType) const override; + + bool isCheapToSpeculateCttz() const override; + bool isCheapToSpeculateCtlz() const override; + + bool isLegalAddImmediate(int64_t) const override; + + /// Return the correct alignment for the current calling convention. + Align getABIAlignmentForCallingConv(Type *ArgTy, + DataLayout DL) const override { + Align ABIAlign = DL.getABITypeAlign(ArgTy); + if (ArgTy->isVectorTy()) + return std::min(ABIAlign, Align(8)); + return ABIAlign; + } + + ISD::NodeType getExtendForAtomicOps() const override { + return ISD::SIGN_EXTEND; + } + + bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT, + unsigned Index) const override; + + void LowerOperationWrapper(SDNode *N, + SmallVectorImpl &Results, + SelectionDAG &DAG) const override; + + /// LowerOperation - Provide custom lowering hooks for some operations. + SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override; + + bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, + EVT VT) const override; + + /// ReplaceNodeResults - Replace the results of node with an illegal result + /// type with new values built out of custom code. + /// + void ReplaceNodeResults(SDNode *N, SmallVectorImpl&Results, + SelectionDAG &DAG) const override; + + /// getTargetNodeName - This method returns the name of a target specific + // DAG node. + const char *getTargetNodeName(unsigned Opcode) const override; + + /// getSetCCResultType - get the ISD::SETCC result ValueType + EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, + EVT VT) const override; + + SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override; + + MachineBasicBlock * + EmitInstrWithCustomInserter(MachineInstr &MI, + MachineBasicBlock *MBB) const override; + + bool isShuffleMaskLegal(ArrayRef Mask, EVT VT) const override { + return false; + } + + const TargetRegisterClass *getRepRegClassFor(MVT VT) const override; + + void AdjustInstrPostInstrSelection(MachineInstr &MI, + SDNode *Node) const override; + + void HandleByVal(CCState *, unsigned &, Align) const override; + + Register getRegisterByName(const char* RegName, LLT VT, + const MachineFunction &MF) const override; + + /// If a physical register, this returns the register that receives the + /// exception address on entry to an EH pad. + Register + getExceptionPointerRegister(const Constant *PersonalityFn) const override { + return ABI.IsLP64() ? LoongArch::A0_64 : LoongArch::A0; + } + + /// If a physical register, this returns the register that receives the + /// exception typeid on entry to a landing pad. + Register + getExceptionSelectorRegister(const Constant *PersonalityFn) const override { + return ABI.IsLP64() ? LoongArch::A1_64 : LoongArch::A1; + } + + bool isJumpTableRelative() const override { + return getTargetMachine().isPositionIndependent(); + } + + CCAssignFn *CCAssignFnForCall() const; + + CCAssignFn *CCAssignFnForReturn() const; + + private: + template + SDValue getAddr(NodeTy *N, SelectionDAG &DAG, bool IsLocal = true) const; + + /// This function fills Ops, which is the list of operands that will later + /// be used when a function call node is created. It also generates + /// copyToReg nodes to set up argument registers. + void getOpndList(SmallVectorImpl &Ops, + std::deque> &RegsToPass, + bool IsPICCall, bool GlobalOrExternal, bool IsCallReloc, + CallLoweringInfo &CLI, SDValue Callee, SDValue Chain, + bool IsTailCall) const; + + SDValue lowerLOAD(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerSTORE(SDValue Op, SelectionDAG &DAG) const; + + // Subtarget Info + const LoongArchSubtarget &Subtarget; + // Cache the ABI from the TargetMachine, we use it everywhere. + const LoongArchABIInfo &ABI; + + // Create a TargetGlobalAddress node. + SDValue getTargetNode(GlobalAddressSDNode *N, EVT Ty, SelectionDAG &DAG, + unsigned Flag) const; + + // Create a TargetExternalSymbol node. + SDValue getTargetNode(ExternalSymbolSDNode *N, EVT Ty, SelectionDAG &DAG, + unsigned Flag) const; + + // Create a TargetBlockAddress node. + SDValue getTargetNode(BlockAddressSDNode *N, EVT Ty, SelectionDAG &DAG, + unsigned Flag) const; + + // Create a TargetJumpTable node. + SDValue getTargetNode(JumpTableSDNode *N, EVT Ty, SelectionDAG &DAG, + unsigned Flag) const; + + // Create a TargetConstantPool node. + SDValue getTargetNode(ConstantPoolSDNode *N, EVT Ty, SelectionDAG &DAG, + unsigned Flag) const; + + // Lower Operand helpers + SDValue LowerCallResult(SDValue Chain, SDValue InFlag, + CallingConv::ID CallConv, bool isVarArg, + const SmallVectorImpl &Ins, + const SDLoc &dl, SelectionDAG &DAG, + SmallVectorImpl &InVals, + TargetLowering::CallLoweringInfo &CLI) const; + + // Lower Operand specifics + SDValue lowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerINTRINSIC_W_CHAIN(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerINTRINSIC_VOID(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerFP_TO_UINT(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) const; + /// Lower VECTOR_SHUFFLE into one of a number of instructions + /// depending on the indices in the shuffle. + SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerBRCOND(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerConstantPool(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerBlockAddress(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerJumpTable(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerSELECT(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerSETCC(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerVASTART(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerVAARG(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerFABS(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerATOMIC_FENCE(SDValue Op, SelectionDAG& DAG) const; + SDValue lowerShiftLeftParts(SDValue Op, SelectionDAG& DAG) const; + SDValue lowerShiftRightParts(SDValue Op, SelectionDAG& DAG, + bool IsSRA) const; + SDValue lowerEH_DWARF_CFA(SDValue Op, SelectionDAG &DAG) const; + + /// isEligibleForTailCallOptimization - Check whether the call is eligible + /// for tail call optimization. + bool + isEligibleForTailCallOptimization(const CCState &CCInfo, + CallLoweringInfo &CLI, MachineFunction &MF, + unsigned NextStackOffset, + const LoongArchFunctionInfo &FI) const; + + /// copyByValArg - Copy argument registers which were used to pass a byval + /// argument to the stack. Create a stack frame object for the byval + /// argument. + void copyByValRegs(SDValue Chain, const SDLoc &DL, + std::vector &OutChains, SelectionDAG &DAG, + const ISD::ArgFlagsTy &Flags, + SmallVectorImpl &InVals, + const Argument *FuncArg, unsigned FirstReg, + unsigned LastReg, const CCValAssign &VA, + LoongArchCCState &State) const; + + /// passByValArg - Pass a byval argument in registers or on stack. + void passByValArg(SDValue Chain, const SDLoc &DL, + std::deque> &RegsToPass, + SmallVectorImpl &MemOpChains, SDValue StackPtr, + MachineFrameInfo &MFI, SelectionDAG &DAG, SDValue Arg, + unsigned FirstReg, unsigned LastReg, + const ISD::ArgFlagsTy &Flags, + const CCValAssign &VA) const; + + /// writeVarArgRegs - Write variable function arguments passed in registers + /// to the stack. Also create a stack frame object for the first variable + /// argument. + void writeVarArgRegs(std::vector &OutChains, SDValue Chain, + const SDLoc &DL, SelectionDAG &DAG, + CCState &State) const; + + SDValue + LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, + const SmallVectorImpl &Ins, + const SDLoc &dl, SelectionDAG &DAG, + SmallVectorImpl &InVals) const override; + + SDValue passArgOnStack(SDValue StackPtr, unsigned Offset, SDValue Chain, + SDValue Arg, const SDLoc &DL, bool IsTailCall, + SelectionDAG &DAG) const; + + SDValue LowerCall(TargetLowering::CallLoweringInfo &CLI, + SmallVectorImpl &InVals) const override; + + bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF, + bool isVarArg, + const SmallVectorImpl &Outs, + LLVMContext &Context) const override; + + SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, + const SmallVectorImpl &Outs, + const SmallVectorImpl &OutVals, + const SDLoc &dl, SelectionDAG &DAG) const override; + + bool shouldSignExtendTypeInLibCall(EVT Type, bool IsSigned) const override; + + // Inline asm support + ConstraintType getConstraintType(StringRef Constraint) const override; + + /// Examine constraint string and operand type and determine a weight value. + /// The operand object must already have been set up with the operand type. + ConstraintWeight getSingleConstraintMatchWeight( + AsmOperandInfo &info, const char *constraint) const override; + + /// This function parses registers that appear in inline-asm constraints. + /// It returns pair (0, 0) on failure. + std::pair + parseRegForInlineAsmConstraint(StringRef C, MVT VT) const; + + std::pair + getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, + StringRef Constraint, MVT VT) const override; + + /// LowerAsmOperandForConstraint - Lower the specified operand into the Ops + /// vector. If it is invalid, don't add anything to Ops. If hasMemory is + /// true it means one of the asm constraint of the inline asm instruction + /// being processed is 'm'. + void LowerAsmOperandForConstraint(SDValue Op, + std::string &Constraint, + std::vector &Ops, + SelectionDAG &DAG) const override; + + unsigned + getInlineAsmMemConstraint(StringRef ConstraintCode) const override { + if (ConstraintCode == "R") + return InlineAsm::Constraint_R; + else if (ConstraintCode == "ZC") + return InlineAsm::Constraint_ZC; + else if (ConstraintCode == "ZB") + return InlineAsm::Constraint_ZB; + return TargetLowering::getInlineAsmMemConstraint(ConstraintCode); + } + + bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, + Type *Ty, unsigned AS, + Instruction *I = nullptr) const override; + + bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override; + + EVT getOptimalMemOpType(const MemOp &Op, + const AttributeList &FuncAttributes) const override; + + /// isFPImmLegal - Returns true if the target can instruction select the + /// specified FP immediate natively. If false, the legalizer will + /// materialize the FP immediate as a load from a constant pool. + bool isFPImmLegal(const APFloat &Imm, EVT VT, + bool ForCodeSize) const override; + + bool useSoftFloat() const override; + + bool shouldInsertFencesForAtomic(const Instruction *I) const override { + return isa(I) || isa(I); + } + + /// Emit a sign-extension using sll/sra, seb, or seh appropriately. + MachineBasicBlock *emitSignExtendToI32InReg(MachineInstr &MI, + MachineBasicBlock *BB, + unsigned Size, unsigned DstReg, + unsigned SrcRec) const; + + MachineBasicBlock *emitLoadAddress(MachineInstr &MI, + MachineBasicBlock *BB) const; + MachineBasicBlock *emitAtomicBinary(MachineInstr &MI, + MachineBasicBlock *BB) const; + MachineBasicBlock *emitAtomicBinaryPartword(MachineInstr &MI, + MachineBasicBlock *BB, + unsigned Size) const; + + MachineBasicBlock *emitXINSERT_B(MachineInstr &MI, + MachineBasicBlock *BB) const; + MachineBasicBlock *emitINSERT_H_VIDX(MachineInstr &MI, + MachineBasicBlock *BB) const; + + MachineBasicBlock *emitAtomicCmpSwap(MachineInstr &MI, + MachineBasicBlock *BB) const; + MachineBasicBlock *emitAtomicCmpSwapPartword(MachineInstr &MI, + MachineBasicBlock *BB, + unsigned Size) const; + MachineBasicBlock *emitSEL_D(MachineInstr &MI, MachineBasicBlock *BB) const; + + MachineBasicBlock *emitPseudoSELECT(MachineInstr &MI, MachineBasicBlock *BB, + bool isFPCmp, unsigned Opc) const; + + /// SE + MachineBasicBlock *emitLSXCBranchPseudo(MachineInstr &MI, + MachineBasicBlock *BB, + unsigned BranchOp) const; + /// Emit the COPY_FW pseudo instruction + MachineBasicBlock *emitCOPY_FW(MachineInstr &MI, + MachineBasicBlock *BB) const; + /// Emit the COPY_FD pseudo instruction + MachineBasicBlock *emitCOPY_FD(MachineInstr &MI, + MachineBasicBlock *BB) const; + + MachineBasicBlock *emitXCOPY_FW(MachineInstr &MI, + MachineBasicBlock *BB) const; + + MachineBasicBlock *emitXCOPY_FD(MachineInstr &MI, + MachineBasicBlock *BB) const; + + MachineBasicBlock *emitCONCAT_VECTORS(MachineInstr &MI, + MachineBasicBlock *BB, + unsigned Bytes) const; + + MachineBasicBlock *emitXCOPY_FW_GPR(MachineInstr &MI, + MachineBasicBlock *BB) const; + + MachineBasicBlock *emitXINSERT_BH(MachineInstr &MI, MachineBasicBlock *BB, + unsigned EltSizeInBytes) const; + + MachineBasicBlock *emitXINSERT_FW(MachineInstr &MI, + MachineBasicBlock *BB) const; + + /// Emit the INSERT_FW pseudo instruction + MachineBasicBlock *emitINSERT_FW(MachineInstr &MI, + MachineBasicBlock *BB) const; + /// Emit the INSERT_FD pseudo instruction + MachineBasicBlock *emitINSERT_FD(MachineInstr &MI, + MachineBasicBlock *BB) const; + + MachineBasicBlock *emitXINSERT_FD(MachineInstr &MI, + MachineBasicBlock *BB) const; + + MachineBasicBlock *emitXINSERT_DF_VIDX(MachineInstr &MI, + MachineBasicBlock *BB, + bool IsGPR64) const; + /// Emit the FILL_FW pseudo instruction + MachineBasicBlock *emitFILL_FW(MachineInstr &MI, + MachineBasicBlock *BB) const; + /// Emit the FILL_FD pseudo instruction + MachineBasicBlock *emitFILL_FD(MachineInstr &MI, + MachineBasicBlock *BB) const; + + MachineBasicBlock *emitXFILL_FW(MachineInstr &MI, + MachineBasicBlock *BB) const; + MachineBasicBlock *emitXFILL_FD(MachineInstr &MI, + MachineBasicBlock *BB) const; + }; + +} // end namespace llvm + +#endif // LLVM_LIB_TARGET_LOONGARCH_LOONGARCHISELLOWERING_H diff --git a/lib/Target/LoongArch/LoongArchInstrFormats.td b/lib/Target/LoongArch/LoongArchInstrFormats.td new file mode 100644 index 00000000..d75d5198 --- /dev/null +++ b/lib/Target/LoongArch/LoongArchInstrFormats.td @@ -0,0 +1,790 @@ +//===-- LoongArchInstrFormats.td - LoongArch Instruction Formats -----*- tablegen -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// Describe LoongArch instructions format +// +// CPU INSTRUCTION FORMATS +// +// opcode - operation code. +// rs - src reg. +// rt - dst reg (on a 2 regs instr) or src reg (on a 3 reg instr). +// rd - dst reg, only used on 3 regs instr. +// shamt - only used on shift instructions, contains the shift amount. +// funct - combined with opcode field give us an operation code. +// +//===----------------------------------------------------------------------===// + +class StdArch { + + bits<32> Inst; +} + +// Format specifies the encoding used by the instruction. This is part of the +// ad-hoc solution used to emit machine instruction encodings by our machine +// code emitter. +class Format val> { + bits<4> Value = val; +} + +def Pseudo : Format<0>; +def FrmR : Format<1>; +def FrmI : Format<2>; +def FrmJ : Format<3>; +def FrmFR : Format<4>; +def FrmFI : Format<5>; +def FrmOther : Format<6>; + +// Generic LoongArch Format +class InstLA pattern, Format f> + : Instruction +{ + field bits<32> Inst; + Format Form = f; + + let Namespace = "LoongArch"; + + let Size = 4; + + let OutOperandList = outs; + let InOperandList = ins; + let AsmString = asmstr; + let Pattern = pattern; + + // + // Attributes specific to LoongArch instructions... + // + bits<4> FormBits = Form.Value; + bit isCTI = 0; // Any form of Control Transfer Instruction. + // Required for LoongArch + bit hasForbiddenSlot = 0; // Instruction has a forbidden slot. + bit IsPCRelativeLoad = 0; // Load instruction with implicit source register + // ($pc) and with explicit offset and destination + // register + bit hasFCCRegOperand = 0; // Instruction uses $fcc register + + // TSFlags layout should be kept in sync with MCTargetDesc/LoongArchBaseInfo.h. + let TSFlags{3-0} = FormBits; + let TSFlags{4} = isCTI; + let TSFlags{5} = hasForbiddenSlot; + let TSFlags{6} = IsPCRelativeLoad; + let TSFlags{7} = hasFCCRegOperand; + + let DecoderNamespace = "LoongArch"; + + field bits<32> SoftFail = 0; +} + +class InstForm pattern, + Format f, string opstr = ""> : + InstLA { + string BaseOpcode = opstr; + string Arch; +} + +class LoongArch_str { + string Arch; + string BaseOpcode = opstr; +} + +//===-----------------------------------------------------------===// +// Format instruction classes in the LoongArch +//===-----------------------------------------------------------===// + +// R2 classes: 2 registers +// +class R2 : StdArch { + bits<5> rj; + bits<5> rd; + + let Inst{9-5} = rj; + let Inst{4-0} = rd; +} + +class R2I op> + : R2 { + let Inst{31-15} = 0x0; + let Inst{14-10} = op; +} + +class R2F op> + : R2 { + bits<5> fj; + bits<5> fd; + + let Inst{31-20} = 0x11; + let Inst{19-10} = op; + let Inst{9-5} = fj; + let Inst{4-0} = fd; +} + +class MOVFI op> + : R2 { + bits<5> rj; + bits<5> fd; + + let Inst{31-20} = 0x11; + let Inst{19-10} = op; + let Inst{9-5} = rj; + let Inst{4-0} = fd; +} + +class MOVIF op> + : R2 { + bits<5> fj; + bits<5> rd; + + let Inst{31-20} = 0x11; + let Inst{19-10} = op; + let Inst{9-5} = fj; + let Inst{4-0} = rd; +} + +class R2P op> + : R2 { + let Inst{31-13} = 0x3240; + let Inst{12-10} = op; +} + +class R2_CSR op> + : StdArch { + bits<5> rj; + bits<5> rd; + bits<14> csr; + + let Inst{31-24} = op; + let Inst{23-10} = csr; + let Inst{9-5} = rj; + let Inst{4-0} = rd; +} + +class R2_SI16 op> + : StdArch { + bits<5> rd; + bits<5> rj; + bits<16> si16; + + let Inst{31-26} = op; + let Inst{25-10} = si16; + let Inst{9-5} = rj; + let Inst{4-0} = rd; +} + +class R2_COND op, bits<5> cond> + : StdArch { + bits<5> fj; + bits<5> fk; + bits<3> cd; + + let Inst{31-22} = 0x30; + let Inst{21-20} = op; + let Inst{19-15} = cond; + let Inst{14-10} = fk; + let Inst{9-5} = fj; + let Inst{4-3} = 0b00; + let Inst{2-0} = cd; +} + +class R2_LEVEL op> + : StdArch { + bits<5> rj; + bits<5> rd; + bits<8> level; + + let Inst{31-18} = op; + let Inst{17-10} = level; + let Inst{9-5} = rj; + let Inst{4-0} = rd; +} + +class IMM32 op> + : StdArch { + let Inst{31-16} = 0x0648; + let Inst{15-10} = op; + let Inst{9-0} = 0; +} + +class WAIT_FM : StdArch { + bits<15> hint; + + let Inst{31-15} = 0xc91; + let Inst{14-0} = hint; +} + +class R2_INVTLB : StdArch { + bits<5> rj; + bits<5> op; + bits<5> rk; + + let Inst{31-15} = 0xc93; + let Inst{14-10} = rk; + let Inst{9-5} = rj; + let Inst{4-0} = op; +} + +class BAR_FM op> + : StdArch { + bits<15> hint; + + let Inst{31-16} = 0x3872; + let Inst{15} = op; + let Inst{14-0} = hint; +} + +class PRELD_FM : StdArch { + bits<5> rj; + bits<5> hint; + bits<12> imm12; + + let Inst{31-22} = 0xab; + let Inst{21-10} = imm12; + let Inst{9-5} = rj; + let Inst{4-0} = hint; +} + +// R3 classes: 3 registers +// +class R3 : StdArch { + bits<5> rk; + bits<5> rj; + bits<5> rd; + + let Inst{14-10} = rk; + let Inst{9-5} = rj; + let Inst{4-0} = rd; +} + +class R3I op> + : R3 { + let Inst{31-22} = 0x0; + let Inst{21-15} = op; +} + +class R3F op> + : R3 { + bits<5> fk; + bits<5> fj; + bits<5> fd; + + let Inst{31-21} = 0x8; + let Inst{20-15} = op; + let Inst{14-10} = fk; + let Inst{9-5} = fj; + let Inst{4-0} = fd; +} + +class R3MI op> + : R3 { + let Inst{31-23} = 0x70; + let Inst{22-15} = op; +} + +class AM op> : StdArch { + bits<5> rk; + bits<17> addr; // rj + 12 bits offset 0 + bits<5> rd; + + let Inst{31-21} = 0x1c3; + let Inst{20-15} = op; + let Inst{14-10} = rk; + let Inst{9-5} = addr{16-12}; + let Inst{4-0} = rd; +} + +class R3MF op> + : R3 { + bits<5> fd; + + let Inst{31-23} = 0x70; + let Inst{22-15} = op; + let Inst{4-0} = fd; +} + +class R3_SA2 op> + : StdArch { + bits<5> rk; + bits<5> rj; + bits<5> rd; + bits<2> sa; + + let Inst{31-22} = 0x0; + let Inst{21-17} = op; + let Inst{16-15} = sa; + let Inst{14-10} = rk; + let Inst{9-5} = rj; + let Inst{4-0} = rd; +} + +class R3_SA3 : StdArch { + bits<5> rk; + bits<5> rj; + bits<5> rd; + bits<3> sa; + + let Inst{31-18} = 3; + let Inst{17-15} = sa; + let Inst{14-10} = rk; + let Inst{9-5} = rj; + let Inst{4-0} = rd; +} + +// R4 classes: 4 registers +// +class R4MUL op> + : StdArch { + bits<5> fa; + bits<5> fk; + bits<5> fj; + bits<5> fd; + + let Inst{31-24} = 0x8; + let Inst{23-20} = op; + let Inst{19-15} = fa; + let Inst{14-10} = fk; + let Inst{9-5} = fj; + let Inst{4-0} = fd; +} + +class R4CMP op> + : StdArch { + bits<5> cond; + bits<5> fk; + bits<5> fj; + bits<3> cd; + + let Inst{31-22} = 0x30; + let Inst{21-20} = op; + let Inst{19-15} = cond; + let Inst{14-10} = fk; + let Inst{9-5} = fj; + let Inst{4-3} = 0; + let Inst{2-0} = cd; +} + +class R4SEL : StdArch { + bits<3> ca; + bits<5> fk; + bits<5> fj; + bits<5> fd; + + let Inst{31-18} = 0x340; + let Inst{17-15} = ca; + let Inst{14-10} = fk; + let Inst{9-5} = fj; + let Inst{4-0} = fd; +} + +// R2_IMM5 classes: 2registers and 1 5bit-immediate +// +class R2_IMM5 op> + : StdArch { + bits<5> rj; + bits<5> rd; + bits<5> imm5; + + let Inst{31-20} = 0x4; + let Inst{19-18} = op; + let Inst{17-15} = 0x1; + let Inst{14-10} = imm5; + let Inst{9-5} = rj; + let Inst{4-0} = rd; +} + +// R2_IMM6 classes: 2registers and 1 6bit-immediate +// +class R2_IMM6 op> + : StdArch { + bits<5> rj; + bits<5> rd; + bits<6> imm6; + + let Inst{31-20} = 0x4; + let Inst{19-18} = op; + let Inst{17-16} = 0x1; + let Inst{15-10} = imm6; + let Inst{9-5} = rj; + let Inst{4-0} = rd; +} + +// R2_IMM12 classes: 2 registers and 1 12bit-immediate +// +class LOAD_STORE op> + : StdArch { + bits<5> rd; + bits<17> addr; + + let Inst{31-26} = 0xa; + let Inst{25-22} = op; + let Inst{21-10} = addr{11-0}; + let Inst{9-5} = addr{16-12}; + let Inst{4-0} = rd; +} +// for reloc +class LOAD_STORE_RRI op> + : StdArch { + bits<5> rj; + bits<5> rd; + bits<12> imm12; + + let Inst{31-26} = 0xa; + let Inst{25-22} = op; + let Inst{21-10} = imm12; + let Inst{9-5} = rj; + let Inst{4-0} = rd; +} + + +class R2_IMM12 op> + : StdArch { + bits<5> rj; + bits<5> rd; + bits<12> imm12; + + let Inst{31-25} = 0x1; + let Inst{24-22} = op; + let Inst{21-10} = imm12; + let Inst{9-5} = rj; + let Inst{4-0} = rd; +} + +class LEA_ADDI_FM op> + : StdArch { + bits<5> rd; + bits<17> addr; + + let Inst{31-25} = 0x1; + let Inst{24-22} = op; + let Inst{21-10} = addr{11-0}; + let Inst{9-5} = addr{16-12}; + let Inst{4-0} = rd; +} + +// R2_IMM14 classes: 2 registers and 1 14bit-immediate +// +class LL_SC op> + : StdArch { + bits<5> rd; + bits<19> addr; + + let Inst{31-27} = 4; + let Inst{26-24} = op; + let Inst{23-10} = addr{13-0}; + let Inst{9-5} = addr{18-14}; + let Inst{4-0} = rd; +} + +// R2_IMM16 classes: 2 registers and 1 16bit-immediate +// +class R2_IMM16BEQ op> + : StdArch { + bits<5> rj; + bits<5> rd; + bits<16> offs16; + + let Inst{31-26} = op; + let Inst{25-10} = offs16; + let Inst{9-5} = rj; + let Inst{4-0} = rd; +} + +class R2_IMM16JIRL : StdArch { + bits<5> rj; + bits<5> rd; + bits<16> offs16; + + let Inst{31-26} = 0x13; + let Inst{25-10} = offs16; + let Inst{9-5} = rj; + let Inst{4-0} = rd; +} + +// R1_IMM21 classes: 1 registers and 1 21bit-immediate +// +class R1_IMM21BEQZ op> + : StdArch { + bits<5> rj; + bits<21> offs21; + + let Inst{31-26} = op; + let Inst{25-10} = offs21{15-0}; + let Inst{9-5} = rj; + let Inst{4-0} = offs21{20-16}; +} + +class R1_CSR op> + : StdArch { + bits<5> rd; + bits<14> csr; + + let Inst{31-24} = op{7-0}; + let Inst{23-10} = csr; + let Inst{9-5} = op{12-8}; + let Inst{4-0} = rd; +} + +class R1_SI20 op> + : StdArch { + bits<5> rd; + bits<20> si20; + + let Inst{31-25} = op; + let Inst{24-5} = si20; + let Inst{4-0} = rd; +} + +class R1_CACHE : StdArch { + bits<5> rj; + bits<5> op; + bits<12> si12; + + let Inst{31-22} = 0x18; + let Inst{21-10} = si12; + let Inst{9-5} = rj; + let Inst{4-0} = op; +} + +class R1_SEQ op> + : StdArch { + bits<5> rj; + bits<5> offset; + bits<8> seq; + + let Inst{31-18} = op; + let Inst{17-10} = seq; + let Inst{9-5} = rj; + let Inst{4-0} = 0b00000; +} + +class R1_BCEQZ op> + : StdArch { + bits<21> offset; + bits<3> cj; + + let Inst{31-26} = 0x12; + let Inst{25-10} = offset{15-0}; + let Inst{9-8} = op; + let Inst{7-5} = cj; + let Inst{4-0} = offset{20-16}; +} + +// IMM26 classes: 1 26bit-immediate +// +class IMM26B op> + : StdArch { + bits<26> offs26; + + let Inst{31-26} = op; + let Inst{25-10} = offs26{15-0}; + let Inst{9-0} = offs26{25-16}; +} + +// LoongArch Pseudo Instructions Format +class LoongArchPseudo pattern> : + InstLA { + let isCodeGenOnly = 1; + let isPseudo = 1; +} + +// Pseudo-instructions for alternate assembly syntax (never used by codegen). +// These are aliases that require C++ handling to convert to the target +// instruction, while InstAliases can be handled directly by tblgen. +class LoongArchAsmPseudoInst: + InstLA { + let isPseudo = 1; + let Pattern = []; +} + +// +// Misc instruction classes +class ASSERT op> + : StdArch { + bits<5> rk; + bits<5> rj; + + let Inst{31-17} = 0x0; + let Inst{16-15} = op; + let Inst{14-10} = rk; + let Inst{9-5} = rj; + let Inst{4-0} = 0x0; +} + +class CODE15 op> + : StdArch { + bits<15> Code; + + let Inst{31-22} = 0x0; + let Inst{21-15} = op; + let Inst{14-0} = Code; +} + +class INSERT_BIT32 op> + : StdArch { + bits<5> msbw; + bits<5> lsbw; + bits<5> rj; + bits<5> rd; + + let Inst{31-21} = 0x3; + let Inst{20-16} = msbw; + let Inst{15} = op; + let Inst{14-10} = lsbw; + let Inst{9-5} = rj; + let Inst{4-0} = rd; +} + +class INSERT_BIT64 op> + : StdArch { + bits<6> msbd; + bits<6> lsbd; + bits<5> rj; + bits<5> rd; + + let Inst{31-23} = 0x1; + let Inst{22} = op; + let Inst{21-16} = msbd; + let Inst{15-10} = lsbd; + let Inst{9-5} = rj; + let Inst{4-0} = rd; +} + +class MOVGPR2FCSR: StdArch { + bits<5> fcsr; + bits<5> rj; + + let Inst{31-10} = 0x4530; + let Inst{9-5} = rj; + let Inst{4-0} = fcsr; +} + +class MOVFCSR2GPR: StdArch { + bits<5> fcsr; + bits<5> rd; + + let Inst{31-10} = 0x4532; + let Inst{9-5} = fcsr; + let Inst{4-0} = rd; +} + +class MOVFGR2FCFR: StdArch { + bits<3> cd; + bits<5> fj; + + let Inst{31-10} = 0x4534; + let Inst{9-5} = fj; + let Inst{4-3} = 0; + let Inst{2-0} = cd; +} + +class MOVFCFR2FGR: StdArch { + bits<3> cj; + bits<5> fd; + + let Inst{31-10} = 0x4535; + let Inst{9-8} = 0; + let Inst{7-5} = cj; + let Inst{4-0} = fd; +} + +class MOVGPR2FCFR: StdArch { + bits<3> cd; + bits<5> rj; + + let Inst{31-10} = 0x4536; + let Inst{9-5} = rj; + let Inst{4-3} = 0; + let Inst{2-0} = cd; +} + +class MOVFCFR2GPR: StdArch { + bits<3> cj; + bits<5> rd; + + let Inst{31-10} = 0x4537; + let Inst{9-8} = 0; + let Inst{7-5} = cj; + let Inst{4-0} = rd; +} + +class LoongArchInst : InstLA<(outs), (ins), "", [], FrmOther> { +} +class JMP_OFFS_2R op> : LoongArchInst { + bits<5> rs; + bits<5> rd; + bits<16> offset; + + bits<32> Inst; + + let Inst{31-26} = op; + let Inst{25-10} = offset; + let Inst{9-5} = rs; + let Inst{4-0} = rd; +} + +class FJ op> : StdArch +{ + bits<26> target; + + let Inst{31-26} = op; + let Inst{25-10} = target{15-0}; + let Inst{9-0} = target{25-16}; +} + +class LUI_FM : StdArch { + bits<5> rt; + bits<16> imm16; + + let Inst{31-26} = 0xf; + let Inst{25-21} = 0; + let Inst{20-16} = rt; + let Inst{15-0} = imm16; +} + +class R2_IMM12M_STD op> : StdArch { + bits<5> rj; + bits<5> rd; + bits<12> imm12; + + let Inst{31-26} = 0xa; + let Inst{25-22} = op; + let Inst{21-10} = imm12; + let Inst{9-5} = rj; + let Inst{4-0} = rd; +} + +class LLD_2R Code> : LoongArchInst { + bits<5> rd; + bits<19> addr; + bits<5> rj = addr{18-14}; + bits<14> offset = addr{13-0}; + + bits<32> Inst; + + let Inst{31-27} = 0x4; + let Inst{26-24} = Code; + let Inst{23-10} = offset; + let Inst{9-5} = rj; + let Inst{4-0} = rd; +} + +class CEQS_FM op> { + bits<5> fj; + bits<5> fk; + bits<3> cd; + bits<5> cond; + + bits<32> Inst; + + let Inst{31-22} = 0x30; + let Inst{21-20} = op; + let Inst{19-15} = cond; + let Inst{14-10} = fk; + let Inst{9-5} = fj; + let Inst{4-3} = 0b00; + let Inst{2-0} = cd; +} + diff --git a/lib/Target/LoongArch/LoongArchInstrInfo.cpp b/lib/Target/LoongArch/LoongArchInstrInfo.cpp new file mode 100644 index 00000000..877827db --- /dev/null +++ b/lib/Target/LoongArch/LoongArchInstrInfo.cpp @@ -0,0 +1,1041 @@ +//===- LoongArchInstrInfo.cpp - LoongArch Instruction Information -------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file contains the LoongArch implementation of the TargetInstrInfo class. +// +//===----------------------------------------------------------------------===// + +#include "LoongArchInstrInfo.h" +#include "LoongArchSubtarget.h" +#include "MCTargetDesc/LoongArchAnalyzeImmediate.h" +#include "MCTargetDesc/LoongArchBaseInfo.h" +#include "MCTargetDesc/LoongArchMCTargetDesc.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineOperand.h" +#include "llvm/CodeGen/RegisterScavenging.h" +#include "llvm/CodeGen/TargetOpcodes.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" +#include "llvm/IR/DebugLoc.h" +#include "llvm/MC/MCInstrDesc.h" +#include "llvm/Target/TargetMachine.h" +#include + +using namespace llvm; + +#define GET_INSTRINFO_CTOR_DTOR +#include "LoongArchGenInstrInfo.inc" + +// Pin the vtable to this file. +void LoongArchInstrInfo::anchor() {} +LoongArchInstrInfo::LoongArchInstrInfo(const LoongArchSubtarget &STI) + : LoongArchGenInstrInfo(LoongArch::ADJCALLSTACKDOWN, + LoongArch::ADJCALLSTACKUP), + RI(), Subtarget(STI) {} + +const LoongArchRegisterInfo &LoongArchInstrInfo::getRegisterInfo() const { + return RI; +} + +/// isLoadFromStackSlot - If the specified machine instruction is a direct +/// load from a stack slot, return the virtual or physical register number of +/// the destination along with the FrameIndex of the loaded stack slot. If +/// not, return 0. This predicate must return 0 if the instruction has +/// any side effects other than loading from the stack slot. +unsigned LoongArchInstrInfo::isLoadFromStackSlot(const MachineInstr &MI, + int &FrameIndex) const { + unsigned Opc = MI.getOpcode(); + if ((Opc == LoongArch::LD_W) || (Opc == LoongArch::LD_D) || + (Opc == LoongArch::FLD_S) || (Opc == LoongArch::FLD_D)) { + if ((MI.getOperand(1).isFI()) && // is a stack slot + (MI.getOperand(2).isImm()) && // the imm is zero + (isZeroImm(MI.getOperand(2)))) { + FrameIndex = MI.getOperand(1).getIndex(); + return MI.getOperand(0).getReg(); + } + } + return 0; +} + +/// isStoreToStackSlot - If the specified machine instruction is a direct +/// store to a stack slot, return the virtual or physical register number of +/// the source reg along with the FrameIndex of the loaded stack slot. If +/// not, return 0. This predicate must return 0 if the instruction has +/// any side effects other than storing to the stack slot. +unsigned LoongArchInstrInfo::isStoreToStackSlot(const MachineInstr &MI, + int &FrameIndex) const { + unsigned Opc = MI.getOpcode(); + if ((Opc == LoongArch::ST_D) || (Opc == LoongArch::ST_W) || + (Opc == LoongArch::FST_S) ||(Opc == LoongArch::FST_D)) { + if ((MI.getOperand(1).isFI()) && // is a stack slot + (MI.getOperand(2).isImm()) && // the imm is zero + (isZeroImm(MI.getOperand(2)))) { + FrameIndex = MI.getOperand(1).getIndex(); + return MI.getOperand(0).getReg(); + } + } + return 0; +} + +void LoongArchInstrInfo::copyPhysReg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, + const DebugLoc &DL, MCRegister DestReg, + MCRegister SrcReg, bool KillSrc) const { + unsigned Opc = 0, ZeroReg = 0; + unsigned ZeroImm = 1; + if (LoongArch::GPR32RegClass.contains(DestReg)) { // Copy to CPU Reg. + if (LoongArch::GPR32RegClass.contains(SrcReg)) { + Opc = LoongArch::OR32, ZeroReg = LoongArch::ZERO; + } + else if (LoongArch::FGR32RegClass.contains(SrcReg)) + Opc = LoongArch::MOVFR2GR_S; + else if (LoongArch::FCFRRegClass.contains(SrcReg)) + Opc = LoongArch::MOVCF2GR; + } + else if (LoongArch::GPR32RegClass.contains(SrcReg)) { // Copy from CPU Reg. + if (LoongArch::FGR32RegClass.contains(DestReg)) + Opc = LoongArch::MOVGR2FR_W; + else if (LoongArch::FCFRRegClass.contains(DestReg)) + Opc = LoongArch::MOVGR2CF; + } + else if (LoongArch::FGR32RegClass.contains(DestReg, SrcReg)) + Opc = LoongArch::FMOV_S; + else if (LoongArch::FGR64RegClass.contains(DestReg, SrcReg)) + Opc = LoongArch::FMOV_D; + else if (LoongArch::GPR64RegClass.contains(DestReg)) { // Copy to CPU64 Reg. + if (LoongArch::GPR64RegClass.contains(SrcReg)) + Opc = LoongArch::OR, ZeroReg = LoongArch::ZERO_64; + else if (LoongArch::FGR64RegClass.contains(SrcReg)) + Opc = LoongArch::MOVFR2GR_D; + else if (LoongArch::FCFRRegClass.contains(SrcReg)) + Opc = LoongArch::MOVCF2GR; + } + else if (LoongArch::GPR64RegClass.contains(SrcReg)) { // Copy from CPU64 Reg. + if (LoongArch::FGR64RegClass.contains(DestReg)) + Opc = LoongArch::MOVGR2FR_D; + else if (LoongArch::FCFRRegClass.contains(DestReg)) + Opc = LoongArch::MOVGR2CF; + } + else if (LoongArch::FGR32RegClass.contains(DestReg)) // Copy to FGR32 Reg + Opc = LoongArch::MOVCF2FR; + else if (LoongArch::FGR32RegClass.contains(SrcReg)) // Copy from FGR32 Reg + Opc = LoongArch::MOVFR2CF; + else if (LoongArch::FGR64RegClass.contains(DestReg)) // Copy to FGR64 Reg + Opc = LoongArch::MOVCF2FR; + else if (LoongArch::FGR64RegClass.contains(SrcReg)) // Copy from FGR64 Reg + Opc = LoongArch::MOVFR2CF; + else if (LoongArch::LSX128BRegClass.contains(DestReg)) { // Copy to LSX reg + if (LoongArch::LSX128BRegClass.contains(SrcReg)) + Opc = LoongArch::VORI_B, ZeroImm = 0; + } else if (LoongArch::LASX256BRegClass.contains( + DestReg)) { // Copy to LASX reg + if (LoongArch::LASX256BRegClass.contains(SrcReg)) + Opc = LoongArch::XVORI_B, ZeroImm = 0; + } + + assert(Opc && "Cannot copy registers"); + + MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(Opc)); + + if (DestReg) + MIB.addReg(DestReg, RegState::Define); + + if (SrcReg) + MIB.addReg(SrcReg, getKillRegState(KillSrc)); + + if (ZeroReg) + MIB.addReg(ZeroReg); + + if (!ZeroImm) + MIB.addImm(0); +} + +static bool isORCopyInst(const MachineInstr &MI) { + switch (MI.getOpcode()) { + default: + break; + case LoongArch::OR: + if (MI.getOperand(2).getReg() == LoongArch::ZERO_64) + return true; + break; + case LoongArch::OR32: + if (MI.getOperand(2).getReg() == LoongArch::ZERO) + return true; + break; + } + return false; +} + +/// We check for the common case of 'or', as it's LoongArch' preferred instruction +/// for GPRs but we have to check the operands to ensure that is the case. +/// Other move instructions for LoongArch are directly identifiable. +Optional +LoongArchInstrInfo::isCopyInstrImpl(const MachineInstr &MI) const { + if (MI.isMoveReg() || isORCopyInst(MI)) { + return DestSourcePair{MI.getOperand(0), MI.getOperand(1)}; + } + return None; +} + +void LoongArchInstrInfo:: +storeRegToStack(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, + Register SrcReg, bool isKill, int FI, + const TargetRegisterClass *RC, const TargetRegisterInfo *TRI, + int64_t Offset) const { + DebugLoc DL; + MachineMemOperand *MMO = GetMemOperand(MBB, FI, MachineMemOperand::MOStore); + + unsigned Opc = 0; + if (LoongArch::GPR32RegClass.hasSubClassEq(RC)) + Opc = LoongArch::ST_W; + else if (LoongArch::GPR64RegClass.hasSubClassEq(RC)) + Opc = LoongArch::ST_D; + else if (LoongArch::FGR64RegClass.hasSubClassEq(RC)) + Opc = LoongArch::FST_D; + else if (LoongArch::FGR32RegClass.hasSubClassEq(RC)) + Opc = LoongArch::FST_S; + + else if (TRI->isTypeLegalForClass(*RC, MVT::v16i8)) + Opc = LoongArch::VST; + else if (TRI->isTypeLegalForClass(*RC, MVT::v8i16)) + Opc = LoongArch::VST_H; + else if (TRI->isTypeLegalForClass(*RC, MVT::v4i32) || + TRI->isTypeLegalForClass(*RC, MVT::v4f32)) + Opc = LoongArch::VST_W; + else if (TRI->isTypeLegalForClass(*RC, MVT::v2i64) || + TRI->isTypeLegalForClass(*RC, MVT::v2f64)) + Opc = LoongArch::VST_D; + else if (TRI->isTypeLegalForClass(*RC, MVT::v32i8)) + Opc = LoongArch::XVST; + else if (TRI->isTypeLegalForClass(*RC, MVT::v16i16)) + Opc = LoongArch::XVST_H; + else if (TRI->isTypeLegalForClass(*RC, MVT::v8i32) || + TRI->isTypeLegalForClass(*RC, MVT::v8f32)) + Opc = LoongArch::XVST_W; + else if (TRI->isTypeLegalForClass(*RC, MVT::v4i64) || + TRI->isTypeLegalForClass(*RC, MVT::v4f64)) + Opc = LoongArch::XVST_D; + + assert(Opc && "Register class not handled!"); + BuildMI(MBB, I, DL, get(Opc)) + .addReg(SrcReg, getKillRegState(isKill)) + .addFrameIndex(FI) + .addImm(Offset) + .addMemOperand(MMO); +} + +void LoongArchInstrInfo:: +loadRegFromStack(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, + Register DestReg, int FI, const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI, int64_t Offset) const { + DebugLoc DL; + if (I != MBB.end()) + DL = I->getDebugLoc(); + MachineMemOperand *MMO = GetMemOperand(MBB, FI, MachineMemOperand::MOLoad); + unsigned Opc = 0; + + if (LoongArch::GPR32RegClass.hasSubClassEq(RC)) + Opc = LoongArch::LD_W; + else if (LoongArch::GPR64RegClass.hasSubClassEq(RC)) + Opc = LoongArch::LD_D; + else if (LoongArch::FGR32RegClass.hasSubClassEq(RC)) + Opc = LoongArch::FLD_S; + else if (LoongArch::FGR64RegClass.hasSubClassEq(RC)) + Opc = LoongArch::FLD_D; + else if (TRI->isTypeLegalForClass(*RC, MVT::v16i8)) + Opc = LoongArch::VLD; + else if (TRI->isTypeLegalForClass(*RC, MVT::v8i16)) + Opc = LoongArch::VLD_H; + else if (TRI->isTypeLegalForClass(*RC, MVT::v4i32) || + TRI->isTypeLegalForClass(*RC, MVT::v4f32)) + Opc = LoongArch::VLD_W; + else if (TRI->isTypeLegalForClass(*RC, MVT::v2i64) || + TRI->isTypeLegalForClass(*RC, MVT::v2f64)) + Opc = LoongArch::VLD_D; + else if (TRI->isTypeLegalForClass(*RC, MVT::v32i8)) + Opc = LoongArch::XVLD; + else if (TRI->isTypeLegalForClass(*RC, MVT::v16i16)) + Opc = LoongArch::XVLD_H; + else if (TRI->isTypeLegalForClass(*RC, MVT::v8i32) || + TRI->isTypeLegalForClass(*RC, MVT::v8f32)) + Opc = LoongArch::XVLD_W; + else if (TRI->isTypeLegalForClass(*RC, MVT::v4i64) || + TRI->isTypeLegalForClass(*RC, MVT::v4f64)) + Opc = LoongArch::XVLD_D; + + assert(Opc && "Register class not handled!"); + + BuildMI(MBB, I, DL, get(Opc), DestReg) + .addFrameIndex(FI) + .addImm(Offset) + .addMemOperand(MMO); +} + +bool LoongArchInstrInfo::expandPostRAPseudo(MachineInstr &MI) const { + MachineBasicBlock &MBB = *MI.getParent(); + switch (MI.getDesc().getOpcode()) { + default: + return false; + case LoongArch::RetRA: + expandRetRA(MBB, MI); + break; + case LoongArch::ERet: + expandERet(MBB, MI); + break; + case LoongArch::PseudoFFINT_S_W: + expandCvtFPInt(MBB, MI, LoongArch::FFINT_S_W, LoongArch::MOVGR2FR_W, false); + break; + case LoongArch::PseudoFFINT_S_L: + expandCvtFPInt(MBB, MI, LoongArch::FFINT_S_L, LoongArch::MOVGR2FR_D, true); + break; + case LoongArch::PseudoFFINT_D_W: + expandCvtFPInt(MBB, MI, LoongArch::FFINT_D_W, LoongArch::MOVGR2FR_W, true); + break; + case LoongArch::PseudoFFINT_D_L: + expandCvtFPInt(MBB, MI, LoongArch::FFINT_D_L, LoongArch::MOVGR2FR_D, true); + break; + case LoongArch::LoongArcheh_return32: + case LoongArch::LoongArcheh_return64: + expandEhReturn(MBB, MI); + break; + } + + MBB.erase(MI); + return true; +} + +/// getOppositeBranchOpc - Return the inverse of the specified +/// opcode, e.g. turning BEQ to BNE. +unsigned LoongArchInstrInfo::getOppositeBranchOpc(unsigned Opc) const { + switch (Opc) { + default: llvm_unreachable("Illegal opcode!"); + case LoongArch::BEQ32: return LoongArch::BNE32; + case LoongArch::BEQ: return LoongArch::BNE; + case LoongArch::BNE32: return LoongArch::BEQ32; + case LoongArch::BNE: return LoongArch::BEQ; + case LoongArch::BEQZ32: return LoongArch::BNEZ32; + case LoongArch::BEQZ: return LoongArch::BNEZ; + case LoongArch::BNEZ32: return LoongArch::BEQZ32; + case LoongArch::BNEZ: return LoongArch::BEQZ; + case LoongArch::BCEQZ: return LoongArch::BCNEZ; + case LoongArch::BCNEZ: return LoongArch::BCEQZ; + case LoongArch::BLT32: return LoongArch::BGE32; + case LoongArch::BLT: return LoongArch::BGE; + case LoongArch::BGE32: return LoongArch::BLT32; + case LoongArch::BGE: return LoongArch::BLT; + case LoongArch::BLTU32: return LoongArch::BGEU32; + case LoongArch::BLTU: return LoongArch::BGEU; + case LoongArch::BGEU32: return LoongArch::BLTU32; + case LoongArch::BGEU: return LoongArch::BLTU; + } +} + +void LoongArchInstrInfo::adjustReg(unsigned DestReg, unsigned SrcReg, + int64_t Amount, MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, + MachineInstr::MIFlag Flag) const { + LoongArchABIInfo ABI = Subtarget.getABI(); + DebugLoc DL; + unsigned ADDI = ABI.GetPtrAddiOp(); + + if (Amount == 0) + return; + + if (isInt<12>(Amount)) { + // addi $DestReg, $SrcReg, amount + BuildMI(MBB, I, DL, get(ADDI), DestReg) + .addReg(SrcReg) + .addImm(Amount) + .setMIFlag(Flag); + } else { + // For numbers which are not 12bit integers we synthesize Amount inline + // then add or subtract it from $SrcReg. + unsigned Opc = ABI.GetPtrAddOp(); + if (Amount < 0) { + Opc = ABI.GetPtrSubOp(); + Amount = -Amount; + } + unsigned Reg = loadImmediate(Amount, MBB, I, DL); + BuildMI(MBB, I, DL, get(Opc), DestReg) + .addReg(SrcReg) + .addReg(Reg, RegState::Kill) + .setMIFlag(Flag); + } +} + +/// This function generates the sequence of instructions needed to get the +/// result of adding register REG and immediate IMM. +unsigned LoongArchInstrInfo::loadImmediate(int64_t Imm, MachineBasicBlock &MBB, + MachineBasicBlock::iterator II, + const DebugLoc &DL) const { + const TargetRegisterClass *RC = Subtarget.isABI_LP64() + ? &LoongArch::GPR64RegClass + : &LoongArch::GPR32RegClass; + LoongArchAnalyzeImmediate::InstSeq Seq = + LoongArchAnalyzeImmediate::generateInstSeq(Imm, Subtarget.is64Bit()); + unsigned DstReg = MBB.getParent()->getRegInfo().createVirtualRegister(RC); + unsigned SrcReg = + Subtarget.isABI_LP64() ? LoongArch::ZERO_64 : LoongArch::ZERO; + + // Build the instructions in Seq. + for (auto &Inst : Seq) { + if (Inst.Opc == LoongArch::LU12I_W || Inst.Opc == LoongArch::LU12I_W32) + BuildMI(MBB, II, DL, get(Inst.Opc), DstReg).addImm(Inst.Imm); + else + BuildMI(MBB, II, DL, get(Inst.Opc), DstReg) + .addReg(SrcReg, RegState::Kill) + .addImm(Inst.Imm); + SrcReg = DstReg; + } + return DstReg; +} + +unsigned LoongArchInstrInfo::getAnalyzableBrOpc(unsigned Opc) const { + return (Opc == LoongArch::B || Opc == LoongArch::B32 || + Opc == LoongArch::BEQZ || Opc == LoongArch::BEQZ32 || + Opc == LoongArch::BNEZ || Opc == LoongArch::BNEZ32 || + Opc == LoongArch::BCEQZ || + Opc == LoongArch::BCNEZ || + Opc == LoongArch::BEQ || Opc == LoongArch::BEQ32 || + Opc == LoongArch::BNE || Opc == LoongArch::BNE32 || + Opc == LoongArch::BLT || Opc == LoongArch::BLT32 || + Opc == LoongArch::BGE || Opc == LoongArch::BGE32 || + Opc == LoongArch::BLTU || Opc == LoongArch::BLTU32 || + Opc == LoongArch::BGEU || Opc == LoongArch::BGEU32) ? Opc : 0; +} + +void LoongArchInstrInfo::expandRetRA(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I) const { + + MachineInstrBuilder MIB; + + if (Subtarget.is64Bit()) + MIB = BuildMI(MBB, I, I->getDebugLoc(), get(LoongArch::PseudoReturn64)) + .addReg(LoongArch::RA_64, RegState::Undef); + else + MIB = BuildMI(MBB, I, I->getDebugLoc(), get(LoongArch::PseudoReturn)) + .addReg(LoongArch::RA, RegState::Undef); + + // Retain any imp-use flags. + for (auto & MO : I->operands()) { + if (MO.isImplicit()) + MIB.add(MO); + } +} + +void LoongArchInstrInfo::expandERet(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I) const { + BuildMI(MBB, I, I->getDebugLoc(), get(LoongArch::ERTN)); +} + +std::pair +LoongArchInstrInfo::compareOpndSize(unsigned Opc, + const MachineFunction &MF) const { + const MCInstrDesc &Desc = get(Opc); + assert(Desc.NumOperands == 2 && "Unary instruction expected."); + const LoongArchRegisterInfo *RI = &getRegisterInfo(); + unsigned DstRegSize = RI->getRegSizeInBits(*getRegClass(Desc, 0, RI, MF)); + unsigned SrcRegSize = RI->getRegSizeInBits(*getRegClass(Desc, 1, RI, MF)); + + return std::make_pair(DstRegSize > SrcRegSize, DstRegSize < SrcRegSize); +} + +void LoongArchInstrInfo::expandCvtFPInt(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, + unsigned CvtOpc, unsigned MovOpc, + bool IsI64) const { + const MCInstrDesc &CvtDesc = get(CvtOpc), &MovDesc = get(MovOpc); + const MachineOperand &Dst = I->getOperand(0), &Src = I->getOperand(1); + unsigned DstReg = Dst.getReg(), SrcReg = Src.getReg(), TmpReg = DstReg; + unsigned KillSrc = getKillRegState(Src.isKill()); + DebugLoc DL = I->getDebugLoc(); + bool DstIsLarger, SrcIsLarger; + + std::tie(DstIsLarger, SrcIsLarger) = + compareOpndSize(CvtOpc, *MBB.getParent()); + + if (DstIsLarger) + TmpReg = getRegisterInfo().getSubReg(DstReg, LoongArch::sub_lo); + + if (SrcIsLarger) + DstReg = getRegisterInfo().getSubReg(DstReg, LoongArch::sub_lo); + + BuildMI(MBB, I, DL, MovDesc, TmpReg).addReg(SrcReg, KillSrc); + BuildMI(MBB, I, DL, CvtDesc, DstReg).addReg(TmpReg, RegState::Kill); +} + +void LoongArchInstrInfo::expandEhReturn(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I) const { + // This pseudo instruction is generated as part of the lowering of + // ISD::EH_RETURN. We convert it to a stack increment by OffsetReg, and + // indirect jump to TargetReg + LoongArchABIInfo ABI = Subtarget.getABI(); + unsigned ADD = ABI.GetPtrAddOp(); + unsigned SP = Subtarget.is64Bit() ? LoongArch::SP_64 : LoongArch::SP; + unsigned RA = Subtarget.is64Bit() ? LoongArch::RA_64 : LoongArch::RA; + unsigned T8 = Subtarget.is64Bit() ? LoongArch::T8_64 : LoongArch::T8; + unsigned ZERO = Subtarget.is64Bit() ? LoongArch::ZERO_64 : LoongArch::ZERO; + unsigned OffsetReg = I->getOperand(0).getReg(); + unsigned TargetReg = I->getOperand(1).getReg(); + + // add $ra, $v0, $zero + // add $sp, $sp, $v1 + // jr $ra (via RetRA) + const TargetMachine &TM = MBB.getParent()->getTarget(); + if (TM.isPositionIndependent()) + BuildMI(MBB, I, I->getDebugLoc(), get(ADD), T8) + .addReg(TargetReg) + .addReg(ZERO); + BuildMI(MBB, I, I->getDebugLoc(), get(ADD), RA) + .addReg(TargetReg) + .addReg(ZERO); + BuildMI(MBB, I, I->getDebugLoc(), get(ADD), SP).addReg(SP).addReg(OffsetReg); + expandRetRA(MBB, I); +} + + +bool LoongArchInstrInfo::isZeroImm(const MachineOperand &op) const { + return op.isImm() && op.getImm() == 0; +} + +/// insertNoop - If data hazard condition is found insert the target nop +/// instruction. +// FIXME: This appears to be dead code. +void LoongArchInstrInfo:: +insertNoop(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI) const +{ + DebugLoc DL; + BuildMI(MBB, MI, DL, get(LoongArch::NOP)); +} + +MachineMemOperand * +LoongArchInstrInfo::GetMemOperand(MachineBasicBlock &MBB, int FI, + MachineMemOperand::Flags Flags) const { + MachineFunction &MF = *MBB.getParent(); + MachineFrameInfo &MFI = MF.getFrameInfo(); + + return MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(MF, FI), + Flags, MFI.getObjectSize(FI), + MFI.getObjectAlign(FI)); +} + +//===----------------------------------------------------------------------===// +// Branch Analysis +//===----------------------------------------------------------------------===// + +void LoongArchInstrInfo::AnalyzeCondBr(const MachineInstr *Inst, unsigned Opc, + MachineBasicBlock *&BB, + SmallVectorImpl &Cond) const { + assert(getAnalyzableBrOpc(Opc) && "Not an analyzable branch"); + int NumOp = Inst->getNumExplicitOperands(); + + // for both int and fp branches, the last explicit operand is the + // MBB. + BB = Inst->getOperand(NumOp-1).getMBB(); + Cond.push_back(MachineOperand::CreateImm(Opc)); + + for (int i = 0; i < NumOp-1; i++) + Cond.push_back(Inst->getOperand(i)); +} + +bool LoongArchInstrInfo::analyzeBranch(MachineBasicBlock &MBB, + MachineBasicBlock *&TBB, + MachineBasicBlock *&FBB, + SmallVectorImpl &Cond, + bool AllowModify) const { + SmallVector BranchInstrs; + BranchType BT = analyzeBranch(MBB, TBB, FBB, Cond, AllowModify, BranchInstrs); + + return (BT == BT_None) || (BT == BT_Indirect); +} + +MachineInstr * +LoongArchInstrInfo::BuildCondBr(MachineBasicBlock &MBB, MachineBasicBlock *TBB, + const DebugLoc &DL, + ArrayRef Cond) const { + unsigned Opc = Cond[0].getImm(); + const MCInstrDesc &MCID = get(Opc); + MachineInstrBuilder MIB = BuildMI(&MBB, DL, MCID); + + for (unsigned i = 1; i < Cond.size(); ++i) { + assert((Cond[i].isImm() || Cond[i].isReg()) && + "Cannot copy operand for conditional branch!"); + MIB.add(Cond[i]); + } + MIB.addMBB(TBB); + return MIB.getInstr(); +} + +unsigned LoongArchInstrInfo::insertBranch(MachineBasicBlock &MBB, + MachineBasicBlock *TBB, + MachineBasicBlock *FBB, + ArrayRef Cond, + const DebugLoc &DL, + int *BytesAdded) const { + unsigned UncondBrOpc = LoongArch::B; + // Shouldn't be a fall through. + assert(TBB && "insertBranch must not be told to insert a fallthrough"); + if (BytesAdded) + *BytesAdded = 0; + + // # of condition operands: + // Unconditional branches: 0 + // Floating point branches: 1 (opc) + // Int BranchZero: 2 (opc, reg) + // Int Branch: 3 (opc, reg0, reg1) + assert((Cond.size() <= 3) && + "# of LoongArch branch conditions must be <= 3!"); + + // Two-way Conditional branch. + if (FBB) { + MachineInstr &MI1 = *BuildCondBr(MBB, TBB, DL, Cond); + if (BytesAdded) + *BytesAdded += getInstSizeInBytes(MI1); + MachineInstr &MI2 = *BuildMI(&MBB, DL, get(UncondBrOpc)).addMBB(FBB); + if (BytesAdded) + *BytesAdded += getInstSizeInBytes(MI2); + return 2; + } + + // One way branch. + // Unconditional branch. + if (Cond.empty()) { + MachineInstr &MI = *BuildMI(&MBB, DL, get(UncondBrOpc)).addMBB(TBB); + if (BytesAdded) + *BytesAdded += getInstSizeInBytes(MI); + } + else {// Conditional branch. + MachineInstr &MI = *BuildCondBr(MBB, TBB, DL, Cond); + if (BytesAdded) + *BytesAdded += getInstSizeInBytes(MI); + } + return 1; +} + +unsigned LoongArchInstrInfo::insertIndirectBranch(MachineBasicBlock &MBB, + MachineBasicBlock &DestBB, + const DebugLoc &DL, + int64_t BrOffset, + RegScavenger *RS) const { + assert(RS && "RegScavenger required for long branching"); + assert(MBB.empty() && + "new block should be inserted for expanding unconditional branch"); + assert(MBB.pred_size() == 1); + + MachineFunction *MF = MBB.getParent(); + MachineRegisterInfo &MRI = MF->getRegInfo(); + const LoongArchSubtarget &Subtarget = MF->getSubtarget(); + bool is64 = Subtarget.isABI_LP64(); + const TargetRegisterClass *RC = + is64 ? &LoongArch::GPR64RegClass : &LoongArch::GPR32RegClass; + + if (!is64 && !isInt<32>(BrOffset)) + report_fatal_error( + "Branch offsets outside of the signed 32-bit range not supported"); + + unsigned ScratchReg = MRI.createVirtualRegister(RC); + unsigned ZeroReg = is64 ? LoongArch::ZERO_64 : LoongArch::ZERO; + auto II = MBB.end(); + + MachineInstr &Pcaddu12iMI = + *BuildMI(MBB, II, DL, get(LoongArch::LONG_BRANCH_PCADDU12I), ScratchReg) + .addMBB(&DestBB, LoongArchII::MO_PCREL_HI); + BuildMI(MBB, II, DL, get(LoongArch::LONG_BRANCH_ADDID2Op), ScratchReg) + .addReg(ScratchReg) + .addMBB(&DestBB, LoongArchII::MO_PCREL_LO); + BuildMI(MBB, II, DL, get(LoongArch::JIRL)) + .addReg(ZeroReg) + .addReg(ScratchReg, RegState::Kill) + .addImm(0); + RS->enterBasicBlockEnd(MBB); + unsigned Scav = RS->scavengeRegisterBackwards( + *RC, MachineBasicBlock::iterator(Pcaddu12iMI), false, 0); + MRI.replaceRegWith(ScratchReg, Scav); + MRI.clearVirtRegs(); + RS->setRegUsed(Scav); + + return 12; +} + +unsigned LoongArchInstrInfo::removeBranch(MachineBasicBlock &MBB, + int *BytesRemoved) const { + if (BytesRemoved) + *BytesRemoved = 0; + + MachineBasicBlock::reverse_iterator I = MBB.rbegin(), REnd = MBB.rend(); + unsigned removed = 0; + + // Up to 2 branches are removed. + // Note that indirect branches are not removed. + while (I != REnd && removed < 2) { + // Skip past debug instructions. + if (I->isDebugInstr()) { + ++I; + continue; + } + if (!getAnalyzableBrOpc(I->getOpcode())) + break; + // Remove the branch. + I->eraseFromParent(); + if (BytesRemoved) + *BytesRemoved += getInstSizeInBytes(*I); + I = MBB.rbegin(); + ++removed; + } + + return removed; +} + +/// reverseBranchCondition - Return the inverse opcode of the +/// specified Branch instruction. +bool LoongArchInstrInfo::reverseBranchCondition( + SmallVectorImpl &Cond) const { + assert( (Cond.size() && Cond.size() <= 3) && + "Invalid LoongArch branch condition!"); + Cond[0].setImm(getOppositeBranchOpc(Cond[0].getImm())); + return false; +} + +LoongArchInstrInfo::BranchType LoongArchInstrInfo::analyzeBranch( + MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, + SmallVectorImpl &Cond, bool AllowModify, + SmallVectorImpl &BranchInstrs) const { + MachineBasicBlock::reverse_iterator I = MBB.rbegin(), REnd = MBB.rend(); + + // Skip all the debug instructions. + while (I != REnd && I->isDebugInstr()) + ++I; + + if (I == REnd || !isUnpredicatedTerminator(*I)) { + // This block ends with no branches (it just falls through to its succ). + // Leave TBB/FBB null. + TBB = FBB = nullptr; + return BT_NoBranch; + } + + MachineInstr *LastInst = &*I; + unsigned LastOpc = LastInst->getOpcode(); + BranchInstrs.push_back(LastInst); + + // Not an analyzable branch (e.g., indirect jump). + if (!getAnalyzableBrOpc(LastOpc)) + return LastInst->isIndirectBranch() ? BT_Indirect : BT_None; + + // Get the second to last instruction in the block. + unsigned SecondLastOpc = 0; + MachineInstr *SecondLastInst = nullptr; + + // Skip past any debug instruction to see if the second last actual + // is a branch. + ++I; + while (I != REnd && I->isDebugInstr()) + ++I; + + if (I != REnd) { + SecondLastInst = &*I; + SecondLastOpc = getAnalyzableBrOpc(SecondLastInst->getOpcode()); + + // Not an analyzable branch (must be an indirect jump). + if (isUnpredicatedTerminator(*SecondLastInst) && !SecondLastOpc) + return BT_None; + } + + // If there is only one terminator instruction, process it. + if (!SecondLastOpc) { + // Unconditional branch. + if (LastInst->isUnconditionalBranch()) { + TBB = LastInst->getOperand(0).getMBB(); + return BT_Uncond; + } + + // Conditional branch + AnalyzeCondBr(LastInst, LastOpc, TBB, Cond); + return BT_Cond; + } + + // If we reached here, there are two branches. + // If there are three terminators, we don't know what sort of block this is. + if (++I != REnd && isUnpredicatedTerminator(*I)) + return BT_None; + + BranchInstrs.insert(BranchInstrs.begin(), SecondLastInst); + + // If second to last instruction is an unconditional branch, + // analyze it and remove the last instruction. + if (SecondLastInst->isUnconditionalBranch()) { + // Return if the last instruction cannot be removed. + if (!AllowModify) + return BT_None; + + TBB = SecondLastInst->getOperand(0).getMBB(); + LastInst->eraseFromParent(); + BranchInstrs.pop_back(); + return BT_Uncond; + } + + // Conditional branch followed by an unconditional branch. + // The last one must be unconditional. + if (!LastInst->isUnconditionalBranch()) + return BT_None; + + AnalyzeCondBr(SecondLastInst, SecondLastOpc, TBB, Cond); + FBB = LastInst->getOperand(0).getMBB(); + + return BT_CondUncond; +} + +MachineBasicBlock * +LoongArchInstrInfo::getBranchDestBlock(const MachineInstr &MI) const { + assert(MI.getDesc().isBranch() && "Unexpected opcode!"); + // The branch target is always the last operand. + int NumOp = MI.getNumExplicitOperands(); + return MI.getOperand(NumOp - 1).getMBB(); +} + +bool LoongArchInstrInfo::isBranchOffsetInRange(unsigned BranchOpc, int64_t BrOffset) const { +/* + switch (BranchOpc) { + case LoongArch::B: + case LoongArch::BAL: + case LoongArch::BAL_BR: + case LoongArch::BC1F: + case LoongArch::BC1FL: + case LoongArch::BC1T: + case LoongArch::BC1TL: + case LoongArch::BEQ: case LoongArch::BEQ64: + case LoongArch::BEQL: + case LoongArch::BGEZ: case LoongArch::BGEZ64: + case LoongArch::BGEZL: + case LoongArch::BGEZAL: + case LoongArch::BGEZALL: + case LoongArch::BGTZ: case LoongArch::BGTZ64: + case LoongArch::BGTZL: + case LoongArch::BLEZ: case LoongArch::BLEZ64: + case LoongArch::BLEZL: + case LoongArch::BLTZ: case LoongArch::BLTZ64: + case LoongArch::BLTZL: + case LoongArch::BLTZAL: + case LoongArch::BLTZALL: + case LoongArch::BNE: case LoongArch::BNE64: + case LoongArch::BNEL: + return isInt<18>(BrOffset); + + case LoongArch::BC1EQZ: + case LoongArch::BC1NEZ: + case LoongArch::BC2EQZ: + case LoongArch::BC2NEZ: + case LoongArch::BEQC: case LoongArch::BEQC64: + case LoongArch::BNEC: case LoongArch::BNEC64: + case LoongArch::BGEC: case LoongArch::BGEC64: + case LoongArch::BGEUC: case LoongArch::BGEUC64: + case LoongArch::BGEZC: case LoongArch::BGEZC64: + case LoongArch::BGTZC: case LoongArch::BGTZC64: + case LoongArch::BLEZC: case LoongArch::BLEZC64: + case LoongArch::BLTC: case LoongArch::BLTC64: + case LoongArch::BLTUC: case LoongArch::BLTUC64: + case LoongArch::BLTZC: case LoongArch::BLTZC64: + case LoongArch::BNVC: + case LoongArch::BOVC: + case LoongArch::BGEZALC: + case LoongArch::BEQZALC: + case LoongArch::BGTZALC: + case LoongArch::BLEZALC: + case LoongArch::BLTZALC: + case LoongArch::BNEZALC: + return isInt<18>(BrOffset); + + case LoongArch::BEQZC: case LoongArch::BEQZC64: + case LoongArch::BNEZC: case LoongArch::BNEZC64: + return isInt<23>(BrOffset); + } + */ + switch (BranchOpc) { + case LoongArch::B: case LoongArch::B32: + return isInt<28>(BrOffset); + + case LoongArch::BEQZ: case LoongArch::BEQZ32: + case LoongArch::BNEZ: case LoongArch::BNEZ32: + case LoongArch::BCEQZ: + case LoongArch::BCNEZ: + return isInt<23>(BrOffset); + + case LoongArch::BEQ: case LoongArch::BEQ32: + case LoongArch::BNE: case LoongArch::BNE32: + case LoongArch::BLT: case LoongArch::BLT32: + case LoongArch::BGE: case LoongArch::BGE32: + case LoongArch::BLTU: case LoongArch::BLTU32: + case LoongArch::BGEU: case LoongArch::BGEU32: + return isInt<18>(BrOffset); + } + + llvm_unreachable("Unknown branch instruction!"); +} + + +/// Predicate for distingushing between control transfer instructions and all +/// other instructions for handling forbidden slots. Consider inline assembly +/// as unsafe as well. +bool LoongArchInstrInfo::SafeInForbiddenSlot(const MachineInstr &MI) const { + if (MI.isInlineAsm()) + return false; + + return (MI.getDesc().TSFlags & LoongArchII::IsCTI) == 0; +} + +/// Predicate for distingushing instructions that have forbidden slots. +bool LoongArchInstrInfo::HasForbiddenSlot(const MachineInstr &MI) const { + return (MI.getDesc().TSFlags & LoongArchII::HasForbiddenSlot) != 0; +} + +/// Return the number of bytes of code the specified instruction may be. +unsigned LoongArchInstrInfo::getInstSizeInBytes(const MachineInstr &MI) const { + switch (MI.getOpcode()) { + default: + return MI.getDesc().getSize(); + case TargetOpcode::INLINEASM: { // Inline Asm: Variable size. + const MachineFunction *MF = MI.getParent()->getParent(); + const char *AsmStr = MI.getOperand(0).getSymbolName(); + return getInlineAsmLength(AsmStr, *MF->getTarget().getMCAsmInfo()); + } + } +} + +MachineInstrBuilder +LoongArchInstrInfo::genInstrWithNewOpc(unsigned NewOpc, + MachineBasicBlock::iterator I) const { + MachineInstrBuilder MIB; + + int ZeroOperandPosition = -1; + bool BranchWithZeroOperand = false; + if (I->isBranch() && !I->isPseudo()) { + auto TRI = I->getParent()->getParent()->getSubtarget().getRegisterInfo(); + ZeroOperandPosition = I->findRegisterUseOperandIdx(LoongArch::ZERO, false, TRI); + BranchWithZeroOperand = ZeroOperandPosition != -1; + } + + MIB = BuildMI(*I->getParent(), I, I->getDebugLoc(), get(NewOpc)); + + if (NewOpc == LoongArch::JIRL) { + MIB->RemoveOperand(0); + for (unsigned J = 0, E = I->getDesc().getNumOperands(); J < E; ++J) { + MIB.add(I->getOperand(J)); + } + MIB.addImm(0); + } else { + for (unsigned J = 0, E = I->getDesc().getNumOperands(); J < E; ++J) { + if (BranchWithZeroOperand && (unsigned)ZeroOperandPosition == J) + continue; + + MIB.add(I->getOperand(J)); + } + } + + MIB.copyImplicitOps(*I); + MIB.cloneMemRefs(*I); + return MIB; +} + +bool LoongArchInstrInfo::findCommutedOpIndices(const MachineInstr &MI, + unsigned &SrcOpIdx1, + unsigned &SrcOpIdx2) const { + assert(!MI.isBundle() && + "TargetInstrInfo::findCommutedOpIndices() can't handle bundles"); + + const MCInstrDesc &MCID = MI.getDesc(); + if (!MCID.isCommutable()) + return false; + + return TargetInstrInfo::findCommutedOpIndices(MI, SrcOpIdx1, SrcOpIdx2); +} + +// bstrins, bstrpick have the following constraints: +// 0 <= lsb <= msb <= High +static bool verifyBstrInstruction(const MachineInstr &MI, StringRef &ErrInfo, + const int64_t High) { + MachineOperand MOMsb = MI.getOperand(2); + if (!MOMsb.isImm()) { + ErrInfo = "Msb operand is not an immediate!"; + return false; + } + MachineOperand MOLsb = MI.getOperand(3); + if (!MOLsb.isImm()) { + ErrInfo = "Lsb operand is not an immediate!"; + return false; + } + + int64_t Lsb = MOLsb.getImm(); + if (!((0 <= Lsb) && (Lsb <= High))) { + ErrInfo = "Lsb operand is out of range!"; + return false; + } + + int64_t Msb = MOMsb.getImm(); + if (!((0 <= Msb) && (Msb <= High))) { + ErrInfo = "Msb operand is out of range!"; + return false; + } + + if (!(Lsb <= Msb)) { + ErrInfo = "Lsb operand is not less than or equal to msb operand!"; + return false; + } + + return true; +} + +// Perform target specific instruction verification. +bool LoongArchInstrInfo::verifyInstruction(const MachineInstr &MI, + StringRef &ErrInfo) const { + // Verify that bstrins and bstrpick instructions are well formed. + switch (MI.getOpcode()) { + case LoongArch::BSTRINS_W: + case LoongArch::BSTRPICK_W: + return verifyBstrInstruction(MI, ErrInfo, 31); + case LoongArch::BSTRINS_D: + case LoongArch::BSTRPICK_D: + return verifyBstrInstruction(MI, ErrInfo, 63); + default: + return true; + } + + return true; +} + +std::pair +LoongArchInstrInfo::decomposeMachineOperandsTargetFlags(unsigned TF) const { + return std::make_pair(TF, 0u); +} + +ArrayRef> +LoongArchInstrInfo::getSerializableDirectMachineOperandTargetFlags() const { + using namespace LoongArchII; + + static const std::pair Flags[] = { + {MO_PCREL_HI, "larch-pcrel-hi"}, + {MO_PCREL_LO, "larch-pcrel-lo"}, + {MO_TLSGD_HI, "larch-tlsgd-hi"}, + {MO_TLSGD_LO, "larch-tlsgd-lo"}, + {MO_TLSIE_HI, "larch-tlsie-hi"}, + {MO_TLSIE_LO, "larch-tlsie-lo"}, + {MO_TLSLE_HI, "larch-tlsle-hi"}, + {MO_TLSLE_LO, "larch-tlsle-lo"}, + {MO_ABS_HI, "larch-abs-hi"}, + {MO_ABS_LO, "larch-abs-lo"}, + {MO_ABS_HIGHER, "larch-abs-higher"}, + {MO_ABS_HIGHEST, "larch-abs-highest"}, + {MO_GOT_HI, "larch-got-hi"}, + {MO_GOT_LO, "larch-got-lo"}, + {MO_CALL_HI, "larch-call-hi"}, + {MO_CALL_LO, "larch-call-lo"} + }; + return makeArrayRef(Flags); +} diff --git a/lib/Target/LoongArch/LoongArchInstrInfo.h b/lib/Target/LoongArch/LoongArchInstrInfo.h new file mode 100644 index 00000000..a48eea72 --- /dev/null +++ b/lib/Target/LoongArch/LoongArchInstrInfo.h @@ -0,0 +1,245 @@ +//===- LoongArchInstrInfo.h - LoongArch Instruction Information -----------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file contains the LoongArch implementation of the TargetInstrInfo class. +// +// FIXME: We need to override TargetInstrInfo::getInlineAsmLength method in +// order for LoongArchLongBranch pass to work correctly when the code has inline +// assembly. The returned value doesn't have to be the asm instruction's exact +// size in bytes; LoongArchLongBranch only expects it to be the correct upper bound. +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_LOONGARCH_LOONGARCHINSTRINFO_H +#define LLVM_LIB_TARGET_LOONGARCH_LOONGARCHINSTRINFO_H + +#define DBAR_HINT 0x700 + +#include "MCTargetDesc/LoongArchMCTargetDesc.h" +#include "LoongArch.h" +#include "LoongArchRegisterInfo.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineMemOperand.h" +#include "llvm/CodeGen/TargetInstrInfo.h" +#include + +#define GET_INSTRINFO_HEADER +#include "LoongArchGenInstrInfo.inc" + +namespace llvm { + +class MachineInstr; +class MachineOperand; +class LoongArchSubtarget; +class TargetRegisterClass; +class TargetRegisterInfo; + +class LoongArchInstrInfo : public LoongArchGenInstrInfo { + virtual void anchor(); + const LoongArchRegisterInfo RI; + const LoongArchSubtarget &Subtarget; + +public: + enum BranchType { + BT_None, // Couldn't analyze branch. + BT_NoBranch, // No branches found. + BT_Uncond, // One unconditional branch. + BT_Cond, // One conditional branch. + BT_CondUncond, // A conditional branch followed by an unconditional branch. + BT_Indirect // One indirct branch. + }; + + explicit LoongArchInstrInfo(const LoongArchSubtarget &STI); + + /// isLoadFromStackSlot - If the specified machine instruction is a direct + /// load from a stack slot, return the virtual or physical register number of + /// the destination along with the FrameIndex of the loaded stack slot. If + /// not, return 0. This predicate must return 0 if the instruction has + /// any side effects other than loading from the stack slot. + unsigned isLoadFromStackSlot(const MachineInstr &MI, + int &FrameIndex) const override; + + /// isStoreToStackSlot - If the specified machine instruction is a direct + /// store to a stack slot, return the virtual or physical register number of + /// the source reg along with the FrameIndex of the loaded stack slot. If + /// not, return 0. This predicate must return 0 if the instruction has + /// any side effects other than storing to the stack slot. + unsigned isStoreToStackSlot(const MachineInstr &MI, + int &FrameIndex) const override; + + void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, + const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg, + bool KillSrc) const override; + + /// Branch Analysis + bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, + MachineBasicBlock *&FBB, + SmallVectorImpl &Cond, + bool AllowModify) const override; + + unsigned removeBranch(MachineBasicBlock &MBB, + int *BytesRemoved = nullptr) const override; + + unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, + MachineBasicBlock *FBB, ArrayRef Cond, + const DebugLoc &DL, + int *BytesAdded = nullptr) const override; + + unsigned insertIndirectBranch(MachineBasicBlock &MBB, + MachineBasicBlock &NewDestBB, + const DebugLoc &DL, int64_t BrOffset, + RegScavenger *RS = nullptr) const override; + bool + reverseBranchCondition(SmallVectorImpl &Cond) const override; + + BranchType analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, + MachineBasicBlock *&FBB, + SmallVectorImpl &Cond, + bool AllowModify, + SmallVectorImpl &BranchInstrs) const; + + /// Get the block that branch instruction jumps to. + MachineBasicBlock *getBranchDestBlock(const MachineInstr &MI) const override; + + /// Determine if the branch target is in range. + bool isBranchOffsetInRange(unsigned BranchOpc, + int64_t BrOffset) const override; + + /// Predicate to determine if an instruction can go in a forbidden slot. + bool SafeInForbiddenSlot(const MachineInstr &MI) const; + + /// Predicate to determine if an instruction has a forbidden slot. + bool HasForbiddenSlot(const MachineInstr &MI) const; + + /// Insert nop instruction when hazard condition is found + void insertNoop(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI) const override; + + /// getRegisterInfo - TargetInstrInfo is a superset of MRegister info. As + /// such, whenever a client has an instance of instruction info, it should + /// always be able to get register info as well (through this method). + const LoongArchRegisterInfo &getRegisterInfo() const; + + bool expandPostRAPseudo(MachineInstr &MI) const override; + + unsigned getOppositeBranchOpc(unsigned Opc) const; + + /// Emit a series of instructions to load an immediate. + unsigned loadImmediate(int64_t Imm, MachineBasicBlock &MBB, + MachineBasicBlock::iterator II, + const DebugLoc &DL) const; + + /// Return the number of bytes of code the specified instruction may be. + unsigned getInstSizeInBytes(const MachineInstr &MI) const override; + + void storeRegToStackSlot(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + Register SrcReg, bool isKill, int FrameIndex, + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const override { + storeRegToStack(MBB, MBBI, SrcReg, isKill, FrameIndex, RC, TRI, 0); + } + + void loadRegFromStackSlot(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + Register DestReg, int FrameIndex, + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const override { + loadRegFromStack(MBB, MBBI, DestReg, FrameIndex, RC, TRI, 0); + } + + void storeRegToStack(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + Register SrcReg, bool isKill, int FrameIndex, + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI, + int64_t Offset) const; + + void loadRegFromStack(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + Register DestReg, int FrameIndex, + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI, + int64_t Offset) const; + + /// Adjust register value(DestReg = SrcReg + Amount). + void + adjustReg(unsigned DestReg, unsigned SrcReg, int64_t Amount, + MachineBasicBlock &MBB, MachineBasicBlock::iterator I, + MachineInstr::MIFlag Flag = MachineInstr::MIFlag::NoFlags) const; + + /// Create an instruction which has the same operands and memory operands + /// as MI but has a new opcode. + MachineInstrBuilder genInstrWithNewOpc(unsigned NewOpc, + MachineBasicBlock::iterator I) const; + + bool findCommutedOpIndices(const MachineInstr &MI, unsigned &SrcOpIdx1, + unsigned &SrcOpIdx2) const override; + + /// Perform target specific instruction verification. + bool verifyInstruction(const MachineInstr &MI, + StringRef &ErrInfo) const override; + + std::pair + decomposeMachineOperandsTargetFlags(unsigned TF) const override; + + ArrayRef> + getSerializableDirectMachineOperandTargetFlags() const override; + +protected: + /// If the specific machine instruction is a instruction that moves/copies + /// value from one register to another register return true along with + /// @Source machine operand and @Destination machine operand. + Optional + isCopyInstrImpl(const MachineInstr &MI) const override; + +private: + + bool isZeroImm(const MachineOperand &op) const; + + MachineMemOperand *GetMemOperand(MachineBasicBlock &MBB, int FI, + MachineMemOperand::Flags Flags) const; + + unsigned getAnalyzableBrOpc(unsigned Opc) const; + + void AnalyzeCondBr(const MachineInstr *Inst, unsigned Opc, + MachineBasicBlock *&BB, + SmallVectorImpl &Cond) const; + + MachineInstr * + BuildCondBr(MachineBasicBlock &MBB, MachineBasicBlock *TBB, + const DebugLoc &DL, ArrayRef Cond) const; + + void expandRetRA(MachineBasicBlock &MBB, MachineBasicBlock::iterator I) const; + + void expandERet(MachineBasicBlock &MBB, MachineBasicBlock::iterator I) const; + + std::pair compareOpndSize(unsigned Opc, + const MachineFunction &MF) const; + + /// Expand pseudo Int-to-FP conversion instructions. + /// + /// For example, the following pseudo instruction + /// PseudoCVT_D32_W D2, A5 + /// gets expanded into these two instructions: + /// MTC1 F4, A5 + /// CVT_D32_W D2, F4 + /// + /// We do this expansion post-RA to avoid inserting a floating point copy + /// instruction between MTC1 and CVT_D32_W. + void expandCvtFPInt(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, + unsigned CvtOpc, unsigned MovOpc, bool IsI64) const; + + void expandEhReturn(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I) const; +}; + +} // end namespace llvm + +#endif // LLVM_LIB_TARGET_LOONGARCH_LOONGARCHINSTRINFO_H diff --git a/lib/Target/LoongArch/LoongArchInstrInfo.td b/lib/Target/LoongArch/LoongArchInstrInfo.td new file mode 100644 index 00000000..4d5241ac --- /dev/null +++ b/lib/Target/LoongArch/LoongArchInstrInfo.td @@ -0,0 +1,1882 @@ +//===- LoongArchInstrInfo.td - Target Description for LoongArch Target -*- tablegen -*-=// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file contains the LoongArch implementation of the TargetInstrInfo class. +// +//===----------------------------------------------------------------------===// +include "LoongArchInstrFormats.td" + +def SDT_Bstrpick : SDTypeProfile<1, 3, [SDTCisInt<0>, SDTCisSameAs<0, 1>, + SDTCisVT<2, i32>, SDTCisSameAs<2, 3>]>; +def SDT_Bstrins : SDTypeProfile<1, 4, [SDTCisInt<0>, SDTCisSameAs<0, 1>, + SDTCisVT<2, i32>, SDTCisSameAs<2, 3>, + SDTCisSameAs<0, 4>]>; + +def LoongArchBstrpick : SDNode<"LoongArchISD::BSTRPICK", SDT_Bstrpick>; + +def LoongArchBstrins : SDNode<"LoongArchISD::BSTRINS", SDT_Bstrins>; + +def SDT_DBAR : SDTypeProfile<0, 1, [SDTCisVT<0, i32>]>; +def LoongArchDBAR : SDNode<"LoongArchISD::DBAR", SDT_DBAR, [SDNPHasChain,SDNPSideEffect]>; + +def SDT_LoongArchEHRET : SDTypeProfile<0, 2, [SDTCisInt<0>, SDTCisPtrTy<1>]>; + +def LoongArchehret : SDNode<"LoongArchISD::EH_RETURN", SDT_LoongArchEHRET, + [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>; + +//===---------------------------------------------------------------------===/ +// Operand, Complex Patterns and Transformations Definitions. +//===---------------------------------------------------------------------===/ + +def assertzext_lt_i32 : PatFrag<(ops node:$src), (assertzext node:$src), [{ + return cast(N->getOperand(1))->getVT().bitsLT(MVT::i32); +}]>; + +def immz : PatLeaf<(imm), [{ return N->getSExtValue() == 0; }]>; +def immZExt12 : PatLeaf<(imm), [{ return isUInt<12>(N->getZExtValue()); }]>; +def immSExt12 : PatLeaf<(imm), [{ return isInt<12>(N->getSExtValue()); }]>; +def immSExt13 : PatLeaf<(imm), [{ return isInt<13>(N->getSExtValue()); }]>; + +def immZExt2Alsl : ImmLeaf(Imm - 1);}]>; +//class ImmAsmOperand : AsmOperandClass { +// let RenderMethod = "addImmOperands"; +// let PredicateMethod = "isImmediate<" # Low # "," # High # ">"; +// let DiagnosticString = "operand must be an immediate in the range [" # Low # "," # High # "]"; +//} +// +//def Imm8AsmOperand: ImmAsmOperand<8,8> { let Name = "Imm8"; } +//def imm8 : Operand, ImmLeaf { +// let ParserMatchClass = Imm8AsmOperand; +//} + +def HasLSX : Predicate<"Subtarget->hasLSX()">, + AssemblerPredicate<(all_of FeatureLSX)>; +def HasLASX : Predicate<"Subtarget->hasLASX()">, + AssemblerPredicate<(all_of FeatureLASX)>; + +class EXT_LSX { + list ExtPredicate = [HasLSX]; +} + +class EXT_LASX { + list ExtPredicate = [HasLASX]; +} + +class SImmOperand : AsmOperandClass { + let Name = "SImm" # width; + let DiagnosticType = "InvalidSImm" # width; + let RenderMethod = "addImmOperands"; + let PredicateMethod = "isSImm<" # width # ">"; +} + +def SImm2Operand : SImmOperand<2>; +def simm2 : Operand, ImmLeaf= -2 && Imm < 2; }]> { + let ParserMatchClass = SImm2Operand; + let DecoderMethod = "DecodeSImmWithOffsetAndScale<2>"; +} +def SImm3Operand : SImmOperand<3>; +def simm3 : Operand, ImmLeaf= -4 && Imm < 4; }]> { + let ParserMatchClass = SImm3Operand; + let DecoderMethod = "DecodeSImmWithOffsetAndScale<3>"; +} + +def SImm5Operand : SImmOperand<5>; +def simm5 : Operand, ImmLeaf= -16 && Imm < 16; }]> { + let ParserMatchClass = SImm5Operand; + let DecoderMethod = "DecodeSImmWithOffsetAndScale<5>"; +} + +def simm5_32 : Operand, ImmLeaf= -16 && Imm < 16; }]> { + let ParserMatchClass = SImm5Operand; + let DecoderMethod = "DecodeSImmWithOffsetAndScale<5>"; +} + +def SImm8Operand : SImmOperand<8>; +def simm8 : Operand, ImmLeaf= -128 && Imm < 128; }]> { + let ParserMatchClass = SImm8Operand; + let DecoderMethod = "DecodeSImmWithOffsetAndScale<8>"; +} +def simm8_32 : Operand, ImmLeaf= -128 && Imm < 128; }]> { + let ParserMatchClass = SImm8Operand; + let DecoderMethod = "DecodeSImmWithOffsetAndScale<8>"; +} + +def SImm12Operand : SImmOperand<12>; +def simm12 : Operand, ImmLeaf= -2048 && Imm < 2048; }]> { + let ParserMatchClass = SImm12Operand; + let DecoderMethod = "DecodeSImmWithOffsetAndScale<12>"; +} +def simm12_32 : Operand, ImmLeaf= -2048 && Imm < 2048; }]> { + let ParserMatchClass = SImm12Operand; + let DecoderMethod = "DecodeSImmWithOffsetAndScale<12>"; +} + +def SImm14Operand : SImmOperand<14>; +def simm14 : Operand, ImmLeaf= -8192 && Imm < 8192; }]> { + let ParserMatchClass = SImm14Operand; + let DecoderMethod = "DecodeSImmWithOffsetAndScale<14>"; +} + +def SImm15Operand : SImmOperand<15>; +def simm15 : Operand, ImmLeaf= -16384 && Imm < 16384; }]> { + let ParserMatchClass = SImm15Operand; + let DecoderMethod = "DecodeSImmWithOffsetAndScale<15>"; +} + +def SImm16Operand : SImmOperand<16>; +def simm16 : Operand, ImmLeaf= -32768 && Imm < 32768; }]> { + let ParserMatchClass = SImm16Operand; + let DecoderMethod = "DecodeSImmWithOffsetAndScale<16>"; +} + +def SImm20Operand : SImmOperand<20>; +def simm20 : Operand, ImmLeaf= -524288 && Imm < 524288; }]> { + let ParserMatchClass = SImm20Operand; + let DecoderMethod = "DecodeSImmWithOffsetAndScale<20>"; +} +def simm20_32 : Operand, ImmLeaf= -524288 && Imm < 524288; }]> { + let ParserMatchClass = SImm20Operand; + let DecoderMethod = "DecodeSImmWithOffsetAndScale<20>"; +} + +def SImm21Operand : SImmOperand<21>; +def simm21 : Operand, ImmLeaf= -1048576 && Imm < 1048576; }]> { + let ParserMatchClass = SImm21Operand; + let DecoderMethod = "DecodeSImmWithOffsetAndScale<21>"; +} + +def SImm26Operand : SImmOperand<26>; +def simm26 : Operand, ImmLeaf= -33554432 && Imm < 33554432; }]> { + let ParserMatchClass = SImm26Operand; + let DecoderMethod = "DecodeSImmWithOffsetAndScale<26>"; +} + +def UImm1Operand : AsmOperandClass { + let Name = "UImm1"; + let RenderMethod = "addUImmOperands<1>"; + let PredicateMethod = "isUImm<1>"; + let DiagnosticType = "InvalidImm0_1"; +} + +def UImm2Operand : AsmOperandClass { + let Name = "UImm2"; + let RenderMethod = "addUImmOperands<2>"; + let PredicateMethod = "isUImm<2>"; + let DiagnosticType = "InvalidImm0_3"; +} + +def UImm3Operand : AsmOperandClass { + let Name = "UImm3"; + let RenderMethod = "addUImmOperands<3>"; + let PredicateMethod = "isUImm<3>"; + let DiagnosticType = "InvalidImm0_7"; +} + +def UImm4Operand : AsmOperandClass { + let Name = "UImm4"; + let RenderMethod = "addUImmOperands<4>"; + let PredicateMethod = "isUImm<4>"; + let DiagnosticType = "InvalidImm0_15"; +} + +def UImm5Operand : AsmOperandClass { + let Name = "UImm5"; + let RenderMethod = "addUImmOperands<5>"; + let PredicateMethod = "isUImm<5>"; + let DiagnosticType = "InvalidImm0_31"; +} + +def uimm1i : Operand, ImmLeaf= 0 && Imm < 2; }]> { + let PrintMethod = "printUImm<1>"; + let ParserMatchClass = UImm1Operand; +} + +def uimm2 : Operand, ImmLeaf= 0 && Imm < 4; }]> { + let PrintMethod = "printUImm<2>"; + let ParserMatchClass = UImm2Operand; +} + +def uimm3 : Operand, ImmLeaf= 0 && Imm < 8; }]> { + let PrintMethod = "printUImm<3>"; + let ParserMatchClass = UImm3Operand; +} + +def uimm4i : Operand, ImmLeaf= 0 && Imm < 16; }]> { + let PrintMethod = "printUImm<4>"; + let ParserMatchClass = UImm4Operand; +} + +def uimm5 : Operand, ImmLeaf= 0 && Imm < 32; }]> { + let PrintMethod = "printUImm<5>"; + let ParserMatchClass = UImm5Operand; +} + +def UImm6Operand : AsmOperandClass { + let Name = "UImm6"; + let RenderMethod = "addUImmOperands<16>"; + let PredicateMethod = "isUImm<6>"; + let DiagnosticType = "InvalidImm0_63"; +} +def uimm6 : Operand, ImmLeaf= 0 && Imm < 64; }]> { + let PrintMethod = "printUImm<6>"; + let ParserMatchClass = UImm6Operand; +} + +def UImm7Operand : AsmOperandClass { + let Name = "UImm7"; + let RenderMethod = "addUImmOperands<16>"; + let PredicateMethod = "isUImm<7>"; + let DiagnosticType = "InvalidImm0_127"; +} + +def uimm7i : Operand, ImmLeaf= 0 && Imm < 128; }]> { + let PrintMethod = "printUImm<7>"; + let ParserMatchClass = UImm7Operand; +} + +def UImm12Operand : AsmOperandClass { + let Name = "UImm12"; + let RenderMethod = "addUImmOperands<12>"; + let PredicateMethod = "isUImm<12>"; + let DiagnosticType = "InvalidImm0_4095"; +} +def uimm12 : Operand, ImmLeaf= 0 && Imm < 4096; }]> { + let PrintMethod = "printUImm<12>"; + let ParserMatchClass = UImm12Operand; +} +def uimm12_32 : Operand, ImmLeaf= 0 && Imm < 4096; }]> { + let PrintMethod = "printUImm<12>"; + let ParserMatchClass = UImm12Operand; +} + +def UImm15Operand : AsmOperandClass { + let Name = "UImm15"; + let RenderMethod = "addUImmOperands<15>"; + let PredicateMethod = "isUImm<15>"; + let DiagnosticType = "InvalidImm0_32767"; +} +def uimm15 : Operand, ImmLeaf= 0 && Imm < 32768; }]> { + let PrintMethod = "printUImm<15>"; + let ParserMatchClass = UImm15Operand; +} + +def UImm14Operand : AsmOperandClass { + let Name = "UImm14"; + let RenderMethod = "addUImmOperands<14>"; + let PredicateMethod = "isUImm<14>"; + let DiagnosticType = "InvalidImm0_16383"; +} +def uimm14 : Operand, ImmLeaf= 0 && Imm < 16384; }]> { + let PrintMethod = "printUImm<14>"; + let ParserMatchClass = UImm14Operand; +} +def uimm14_32 : Operand, ImmLeaf= 0 && Imm < 16384; }]> { + let PrintMethod = "printUImm<14>"; + let ParserMatchClass = UImm14Operand; +} + +def UImm8Operand : AsmOperandClass { + let Name = "UImm8"; + let RenderMethod = "addUImmOperands<8>"; + let PredicateMethod = "isUImm<8>"; + let DiagnosticType = "InvalidImm0_255"; +} +def uimm8_64 : Operand, ImmLeaf= 0 && Imm < 256; }]> { + let PrintMethod = "printUImm<8>"; + let ParserMatchClass = UImm8Operand; +} + +def uimm8_32 : Operand, ImmLeaf= 0 && Imm < 256; }]> { + let PrintMethod = "printUImm<8>"; + let ParserMatchClass = UImm8Operand; +} + +def addr : +ComplexPattern; + +def addrDefault : +ComplexPattern; + +def addrRegImm : +ComplexPattern; + +def addrimm14lsl2 : ComplexPattern; + +class ConstantUImmAsmOperandClass Supers = [], + int Offset = 0> : AsmOperandClass { + let Name = "ConstantUImm" # Bits # "_" # Offset; + let RenderMethod = "addConstantUImmOperands<" # Bits # ", " # Offset # ">"; + let PredicateMethod = "isConstantUImm<" # Bits # ", " # Offset # ">"; + let SuperClasses = Supers; + let DiagnosticType = "UImm" # Bits # "_" # Offset; +} +class SImmAsmOperandClass Supers = []> + : AsmOperandClass { + let Name = "SImm" # Bits; + let RenderMethod = "addSImmOperands<" # Bits # ">"; + let PredicateMethod = "isSImm<" # Bits # ">"; + let SuperClasses = Supers; + let DiagnosticType = "SImm" # Bits; +} +class UImmAnyAsmOperandClass Supers = []> + : AsmOperandClass { + let Name = "ImmAny"; + let RenderMethod = "addConstantUImmOperands<32>"; + let PredicateMethod = "isSImm<" # Bits # ">"; + let SuperClasses = Supers; + let DiagnosticType = "ImmAny"; +} + +def UImm32CoercedAsmOperandClass : UImmAnyAsmOperandClass<33, []> { + let Name = "UImm32_Coerced"; + let DiagnosticType = "UImm32_Coerced"; +} +def SImm32RelaxedAsmOperandClass + : SImmAsmOperandClass<32, [UImm32CoercedAsmOperandClass]> { + let Name = "SImm32_Relaxed"; + let PredicateMethod = "isAnyImm<33>"; + let DiagnosticType = "SImm32_Relaxed"; +} +def SImm32AsmOperandClass + : SImmAsmOperandClass<32, [SImm32RelaxedAsmOperandClass]>; +def ConstantUImm26AsmOperandClass + : ConstantUImmAsmOperandClass<26, [SImm32AsmOperandClass]>; + +def ConstantUImm20AsmOperandClass + : ConstantUImmAsmOperandClass<20, [ConstantUImm26AsmOperandClass]>; + +def ConstantUImm2Plus1AsmOperandClass + : ConstantUImmAsmOperandClass<2, [ConstantUImm20AsmOperandClass], 1>; + +class UImmAsmOperandClass Supers = []> + : AsmOperandClass { + let Name = "UImm" # Bits; + let RenderMethod = "addUImmOperands<" # Bits # ">"; + let PredicateMethod = "isUImm<" # Bits # ">"; + let SuperClasses = Supers; + let DiagnosticType = "UImm" # Bits; +} + +def UImm16RelaxedAsmOperandClass + : UImmAsmOperandClass<16, [ConstantUImm20AsmOperandClass]> { + let Name = "UImm16_Relaxed"; + let PredicateMethod = "isAnyImm<16>"; + let DiagnosticType = "UImm16_Relaxed"; +} + +def ConstantSImm14Lsl2AsmOperandClass : AsmOperandClass { + let Name = "SImm14Lsl2"; + let RenderMethod = "addImmOperands"; + let PredicateMethod = "isScaledSImm<14, 2>"; + let SuperClasses = [UImm16RelaxedAsmOperandClass]; + let DiagnosticType = "SImm14_Lsl2"; +} + +foreach I = {2} in + def simm14_lsl # I : Operand { + let DecoderMethod = "DecodeSImmWithOffsetAndScale<14, " # I # ">"; + let ParserMatchClass = + !cast("ConstantSImm14Lsl" # I # "AsmOperandClass"); + } + +def uimm16_64_relaxed : Operand { + let PrintMethod = "printUImm<16>"; + let ParserMatchClass = + !cast("UImm16RelaxedAsmOperandClass"); +} + +def uimm2_plus1 : Operand { + let PrintMethod = "printUImm<2, 1>"; + let EncoderMethod = "getUImmWithOffsetEncoding<2, 1>"; + let DecoderMethod = "DecodeUImmWithOffset<2, 1>"; + let ParserMatchClass = ConstantUImm2Plus1AsmOperandClass; +} + +// like simm32 but coerces simm32 to uimm32. +def uimm32_coerced : Operand { + let ParserMatchClass = !cast("UImm32CoercedAsmOperandClass"); +} + +def imm64: Operand; + +def LoongArchMemAsmOperand : AsmOperandClass { + let Name = "Mem"; + let ParserMethod = "parseMemOperand"; +} + +def LoongArchAMemAsmOperand : AsmOperandClass { + let Name = "AMem"; + let ParserMethod = "parseAMemOperand"; + let RenderMethod = "addMemOperands"; + let PredicateMethod = "isZeroMemOff"; + let DiagnosticType = "MemZeroOff"; +} + +def LoongArchMemSimm14AsmOperand : AsmOperandClass { + let Name = "MemOffsetSimm14"; + let SuperClasses = [LoongArchMemAsmOperand]; + let RenderMethod = "addMemOperands"; + let ParserMethod = "parseMemOperand"; + let PredicateMethod = "isMemWithSimmOffset<14>"; + let DiagnosticType = "MemSImm14"; +} + +foreach I = {2} in + def LoongArchMemSimm14Lsl # I # AsmOperand : AsmOperandClass { + let Name = "MemOffsetSimm14_" # I; + let SuperClasses = [LoongArchMemAsmOperand]; + let RenderMethod = "addMemOperands"; + let ParserMethod = "parseMemOperand"; + let PredicateMethod = "isMemWithSimmOffset<14, " # I # ">"; + let DiagnosticType = "MemSImm14Lsl" # I; + } + +def LoongArchMemSimmPtrAsmOperand : AsmOperandClass { + let Name = "MemOffsetSimmPtr"; + let SuperClasses = [LoongArchMemAsmOperand]; + let RenderMethod = "addMemOperands"; + let ParserMethod = "parseMemOperand"; + let PredicateMethod = "isMemWithPtrSizeOffset"; + let DiagnosticType = "MemSImmPtr"; +} + +class mem_generic : Operand { + let PrintMethod = "printMemOperand"; + let MIOperandInfo = (ops ptr_rc, simm12); + let EncoderMethod = "getMemEncoding"; + let ParserMatchClass = LoongArchMemAsmOperand; + let OperandType = "OPERAND_MEMORY"; +} + +// Address operand +def mem : mem_generic; + +def amem : mem_generic { + let PrintMethod = "printAMemOperand"; + let EncoderMethod = "getAMemEncoding"; + let ParserMatchClass = LoongArchAMemAsmOperand; +} + +def mem_simmptr : mem_generic { + let ParserMatchClass = LoongArchMemSimmPtrAsmOperand; +} + +foreach I = {2} in + def mem_simm14_lsl # I : mem_generic { + let MIOperandInfo = (ops ptr_rc, !cast("simm14_lsl" # I)); + let EncoderMethod = "getSimm14MemEncoding<" # I # ">"; + let ParserMatchClass = + !cast("LoongArchMemSimm14Lsl" # I # "AsmOperand"); + } + +def mem_ea : Operand { + let PrintMethod = "printMemOperandEA"; + let MIOperandInfo = (ops ptr_rc, simm12); + let EncoderMethod = "getMemEncoding"; + let OperandType = "OPERAND_MEMORY"; +} + +def LoongArchJumpTargetAsmOperand : AsmOperandClass { + let Name = "JumpTarget"; + let ParserMethod = "parseJumpTarget"; + let PredicateMethod = "isImm"; + let RenderMethod = "addImmOperands"; +} + +def jmptarget : Operand { + let EncoderMethod = "getJumpTargetOpValue"; + let ParserMatchClass = LoongArchJumpTargetAsmOperand; +} + +def brtarget : Operand { + let EncoderMethod = "getBranchTargetOpValue"; + let OperandType = "OPERAND_PCREL"; + let DecoderMethod = "DecodeBranchTarget"; + let ParserMatchClass = LoongArchJumpTargetAsmOperand; +} + +def calltarget : Operand { + let EncoderMethod = "getJumpTargetOpValue"; + let ParserMatchClass = LoongArchJumpTargetAsmOperand; +} + +// +//SDNode +// +def IsGP64bit : Predicate<"Subtarget->is64Bit()">, + AssemblerPredicate<(all_of Feature64Bit)>; +def IsGP32bit : Predicate<"!Subtarget->is64Bit()">, + AssemblerPredicate<(all_of (not Feature64Bit))>; +def SDT_LoongArchCallSeqStart : SDCallSeqStart<[SDTCisVT<0, i32>, SDTCisVT<1, i32>]>; +def SDT_LoongArchCallSeqEnd : SDCallSeqEnd<[SDTCisVT<0, i32>, SDTCisVT<1, i32>]>; + +def LoongArchRet : SDNode<"LoongArchISD::Ret", SDTNone, + [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>; +def LoongArchERet : SDNode<"LoongArchISD::ERet", SDTNone, + [SDNPHasChain, SDNPOptInGlue, SDNPSideEffect]>; + +def callseq_start : SDNode<"ISD::CALLSEQ_START", SDT_LoongArchCallSeqStart, + [SDNPHasChain, SDNPSideEffect, SDNPOutGlue]>; +def callseq_end : SDNode<"ISD::CALLSEQ_END", SDT_LoongArchCallSeqEnd, + [SDNPHasChain, SDNPSideEffect, + SDNPOptInGlue, SDNPOutGlue]>; +def LoongArchAddress : SDNode<"LoongArchISD::GlobalAddress", SDTIntUnaryOp>; + +// Return RA. +let isReturn=1, isTerminator=1, isBarrier=1, hasCtrlDep=1, isCTI=1 in { + def RetRA : LoongArchPseudo<(outs), (ins), [(LoongArchRet)]>; + + let hasSideEffects=1 in + def ERet : LoongArchPseudo<(outs), (ins), [(LoongArchERet)]>; +} + +let Defs = [SP], Uses = [SP], hasSideEffects = 1 in { +def ADJCALLSTACKDOWN : LoongArchPseudo<(outs), (ins i32imm:$amt1, i32imm:$amt2), + [(callseq_start timm:$amt1, timm:$amt2)]>; +def ADJCALLSTACKUP : LoongArchPseudo<(outs), (ins i32imm:$amt1, i32imm:$amt2), + [(callseq_end timm:$amt1, timm:$amt2)]>; +} + +class LoongArchPat : Pat, PredicateControl; + +def SDT_LoongArchJmpLink : SDTypeProfile<0, 1, [SDTCisVT<0, iPTR>]>; + +def LoongArchJmpLink : SDNode<"LoongArchISD::JmpLink",SDT_LoongArchJmpLink, + [SDNPHasChain, SDNPOutGlue, SDNPOptInGlue, + SDNPVariadic]>; + +def LoongArchTailCall : SDNode<"LoongArchISD::TailCall", SDT_LoongArchJmpLink, + [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>; + +class GPR_32 { list GPRPredicates = [IsGP32bit]; } +class GPR_64 { list GPRPredicates = [IsGP64bit]; } + +//===---------------------------------------------------------------------===/ +// Instruction Class Templates +//===---------------------------------------------------------------------===/ +///R2 +class Int_Reg2 + : InstForm<(outs RO:$rd), (ins RO:$rj), + !strconcat(opstr, "\t$rd, $rj"), + [(set RO:$rd, (OpNode RO:$rj))], + FrmR, opstr>; + +class Int_Reg2_Iocsrrd + : InstForm<(outs RD:$rd), (ins RS:$rj), + !strconcat(opstr, "\t$rd, $rj"), + [(set RD:$rd, (OpNode RS:$rj))], + FrmR, opstr>; + +class Int_Reg2_Rdtime + : InstForm<(outs RO:$rd, RO:$rj), (ins), + !strconcat(opstr, "\t$rd, $rj"), + [(set (OpNode RO:$rd, RO:$rj))], + FrmR, opstr>; + +class Int_Reg2_Iocsrwr + : InstForm<(outs), (ins RD:$rd, RS:$rj), + !strconcat(opstr, "\t$rd, $rj"), + [(set (OpNode RD:$rd, RS:$rj))], + FrmR, opstr>; + +class Float_Reg2 + : InstForm<(outs RO:$fd), (ins RO:$fj), + !strconcat(opstr, "\t$fd, $fj"), + [(set RO:$fd, (OpNode RO:$fj))], + FrmFR, opstr>; + +class Count1 + : InstForm<(outs RO:$rd), (ins RO:$rj), + !strconcat(opstr, "\t$rd, $rj"), + [(set RO:$rd, (OpNode (not RO:$rj)))], + FrmR, opstr>; + +class SignExtInReg + : InstForm<(outs RO:$rd), (ins RO:$rj), !strconcat(opstr, "\t$rd, $rj"), + [(set RO:$rd, (sext_inreg RO:$rj, vt))], FrmR, opstr>; + +///R3 +class Int_Reg3 + : InstForm<(outs RO:$rd), (ins RO:$rj, RO:$rk), + !strconcat(opstr, "\t$rd, $rj, $rk"), + [(set RO:$rd, (OpNode RO:$rj, RO:$rk))], + FrmR, opstr>; + +class Int_Reg3_Crc + : InstForm<(outs RS:$rd), (ins RD:$rj, RS:$rk), + !strconcat(opstr, "\t$rd, $rj, $rk"), + [(set RS:$rd, (OpNode RD:$rj, RS:$rk))], + FrmR, opstr>; + +class SetCC_R + : InstForm<(outs GPR32Opnd:$rd), (ins RO:$rj, RO:$rk), + !strconcat(opstr, "\t$rd, $rj, $rk"), + [(set GPR32Opnd:$rd, (OpNode RO:$rj, RO:$rk))], + FrmR, opstr>; + +class SetCC_I + : InstForm<(outs GPR32Opnd:$rd), (ins RO:$rj, ImmOpnd:$imm12), + !strconcat(opstr, "\t$rd, $rj, $imm12"), + [(set GPR32Opnd:$rd, (OpNode RO:$rj, ImmOpnd:$imm12))], + FrmR, opstr>; + +class ATOMIC + : InstForm<(outs RD:$rd), (ins RD:$rk, MO:$addr), + !strconcat(opstr, "\t$rd, $rk, $addr"), + [(set RD:$rd, (OpNode RD:$rk, Addr:$addr))], + FrmR, opstr> { + let DecoderMethod = "DecodeAMem"; + let canFoldAsLoad = 1; + string BaseOpcode = opstr; + let mayLoad = 1; + let mayStore = 1; + let Constraints = "@earlyclobber $rd"; +} + +class Nor + : InstForm<(outs RO:$rd), (ins RO:$rj, RO:$rk), + !strconcat(opstr, "\t$rd, $rj, $rk"), + [(set RO:$rd, (not (or RO:$rj, RO:$rk)))], + FrmR, opstr>; + +class Shift_Var + : InstForm<(outs RO:$rd), (ins RO:$rj, GPR32Opnd:$rk), + !strconcat(opstr, "\t$rd, $rj, $rk"), + [(set RO:$rd, (OpNode RO:$rj, GPR32Opnd:$rk))], + FrmR, opstr>; + +class Float_Reg3 + : InstForm<(outs RO:$fd), (ins RO:$fj, RO:$fk), + !strconcat(opstr, "\t$fd, $fj, $fk"), + [(set RO:$fd, (OpNode RO:$fj, RO:$fk))], + FrmR, opstr>; + +class Float_Reg3_MA + : InstForm<(outs RO:$fd), (ins RO:$fj, RO:$fk), + !strconcat(opstr, "\t$fd, $fj, $fk"), + [(set RO:$fd, (OpNode (fabs RO:$fj), (fabs RO:$fk)))], + FrmR, opstr>; + +class Float_Int_Reg3 + : InstForm<(outs RD:$fd), (ins RS:$rj, RS:$rk), + !strconcat(opstr, "\t$fd, $rj, $rk"), + [(set RS:$fd, (OpNode RS:$rj, RS:$rk))], + FrmR, opstr>; + +///R4 +class Mul_Reg4 + : InstForm<(outs RO:$fd), (ins RO:$fj, RO:$fk, RO:$fa), + !strconcat(opstr, "\t$fd, $fj, $fk, $fa"), + [], + FrmFR, opstr>; + +class NMul_Reg4 + : InstForm<(outs RO:$fd), (ins RO:$fj, RO:$fk, RO:$fa), + !strconcat(opstr, "\t$fd, $fj, $fk, $fa"), + [], + FrmFR, opstr>; + +///R2_IMM5 +class Shift_Imm32 + : InstForm<(outs RO:$rd), (ins RO:$rj, uimm5:$imm5), + !strconcat(opstr, "\t$rd, $rj, $imm5"), + [(set RO:$rd, (OpNode RO:$rj, uimm5:$imm5))], + FrmR, opstr>; + +///R2_IMM6 +class Shift_Imm64 + : InstForm<(outs RO:$rd), (ins RO:$rj, uimm6:$imm6), + !strconcat(opstr, "\t$rd, $rj, $imm6"), + [(set RO:$rd, (OpNode RO:$rj, uimm6:$imm6))], + FrmR, opstr>; + +///LOAD_STORE +class FLd + : InstForm<(outs RD:$rd), (ins MO:$addr), + !strconcat(opstr, "\t$rd, $addr"), + [(set RD:$rd, (OpNode addrDefault:$addr))], + FrmR, opstr> { + let DecoderMethod = "DecodeFMem"; + let mayLoad = 1; +} + +class Ld + : InstForm<(outs RD:$rd), (ins MO:$addr), + !strconcat(opstr, "\t$rd, $addr"), + [(set RD:$rd, (OpNode Addr:$addr))], + FrmR, opstr> { + let DecoderMethod = "DecodeMem"; + let canFoldAsLoad = 1; + string BaseOpcode = opstr; + let mayLoad = 1; +} + +class FSt + : InstForm<(outs), (ins RD:$rd, MO:$addr), + !strconcat(opstr, "\t$rd, $addr"), + [(OpNode RD:$rd, addrDefault:$addr)], + FrmR, opstr> { + let DecoderMethod = "DecodeFMem"; + let mayStore = 1; +} + +class St + : InstForm<(outs), (ins RS:$rd, MO:$addr), + !strconcat(opstr, "\t$rd, $addr"), + [(OpNode RS:$rd, addr:$addr)], + FrmR, opstr> { + let DecoderMethod = "DecodeMem"; + string BaseOpcode = opstr; + let mayStore = 1; +} + +/// R2_IMM12 +class Int_Reg2_Imm12 + : InstForm<(outs RO:$rd), (ins RO:$rj, ImmOpnd:$imm12), + !strconcat(opstr, "\t$rd, $rj, $imm12"), + [(set RO:$rd, (OpNode RO:$rj, ImmOpnd:$imm12))], + FrmR, opstr>; +class RELOC_rrii + : InstForm<(outs RO:$rd), (ins RO:$rj, ImmOpnd:$imm12, ImmOpnd:$i12), + !strconcat(opstr, "\t$rd, $rj, $imm12"), + [(set RO:$rd, (OpNode RO:$rj, ImmOpnd:$imm12, ImmOpnd:$i12))], + FrmR, opstr>; + +///R2_IMM14 +class LdPtr + : InstForm<(outs RO:$rd), (ins mem_simm14_lsl2:$addr), + !strconcat(opstr, "\t$rd, $addr"), + [], FrmI, opstr>{ + let DecoderMethod = "DecodeMemSimm14"; + let canFoldAsLoad = 1; + string BaseOpcode = opstr; + let mayLoad = 1; +} + +class StPtr + : InstForm<(outs), (ins RO:$rd, mem_simm14_lsl2:$addr), + !strconcat(opstr, "\t$rd, $addr"), + [], FrmI, opstr> { + let DecoderMethod = "DecodeMemSimm14"; + string BaseOpcode = opstr; + let mayStore = 1; +} + +///R2_IMM16 +class FJirl + : InstForm<(outs RO:$rd), (ins RO:$rj, opnd:$offs16), + !strconcat(opstr, "\t$rd, $rj, $offs16"), + [], FrmJ, opstr>; + +class Beq + : InstForm<(outs), (ins RO:$rj, RO:$rd, opnd:$offs16), + !strconcat(opstr, "\t$rj, $rd, $offs16"), + [(brcond (i32 (cond_op RO:$rj, RO:$rd)), bb:$offs16)], + FrmI, opstr> { + let isBranch = 1; + let isTerminator = 1; + bit isCTI = 1; +} + +///R1_IMM21 +class Beqz + : InstForm<(outs), (ins RO:$rj, opnd:$offs21), + !strconcat(opstr, "\t$rj, $offs21"), + [(brcond (i32 (cond_op RO:$rj, 0)), bb:$offs21)], + FrmI, opstr> { + let isBranch = 1; + let isTerminator = 1; + bit isCTI = 1; +} + +///IMM26 +class JumpFB : + InstForm<(outs), (ins opnd:$offset26), !strconcat(opstr, "\t$offset26"), + [(operator targetoperator:$offset26)], FrmJ, opstr> { + let isBranch = 1; + let isTerminator=1; + let isBarrier=1; + let DecoderMethod = "DecodeJumpTarget"; + bit isCTI = 1; +} + +/// R3_SA +class Reg3_Sa + : InstForm<(outs RO:$rd), (ins RO:$rj, RO:$rk, ImmOpnd:$sa), + !strconcat(opstr, "\t$rd, $rj, $rk, $sa"), + [(set RO:$rd, (OpNode RO:$rj, RO:$rk, ImmOpnd:$sa))], + FrmR, opstr>; + +class Reg3_SaU + : InstForm<(outs RD:$rd), (ins RS:$rj, RS:$rk, ImmOpnd:$sa), + !strconcat(opstr, "\t$rd, $rj, $rk, $sa"), + [(set RD:$rd, (OpNode RS:$rj, RS:$rk, ImmOpnd:$sa))], + FrmR, opstr>; + +/// Assert +class Assert + : InstForm<(outs), (ins RO:$rj, RO:$rk), + !strconcat(opstr, "\t$rj, $rk"), + [(set (OpNode RO:$rj, RO:$rk))], + FrmR, opstr>; + +class Code15 + : InstForm<(outs), (ins uimm15:$Code), + !strconcat(opstr, "\t$Code"), + [(set (OpNode uimm15:$Code))], + FrmOther, opstr>; + +class TrapBase + : LoongArchPseudo<(outs), (ins), [(trap)]>, + PseudoInstExpansion<(RealInst 0)> { + let isBarrier = 1; + let isTerminator = 1; + let isCodeGenOnly = 1; + let isCTI = 1; +} + +class CSR + : InstForm<(outs RO:$rd), (ins ImmOpnd:$csr), + !strconcat(opstr, "\t$rd, $csr"), + [(set RO:$rd, (OpNode ImmOpnd:$csr))], + FrmOther, opstr>; + +class CSRW + : InstForm<(outs RO:$dst), (ins RO:$rd, ImmOpnd:$csr), + !strconcat(opstr, "\t$rd, $csr"), + [(set RO:$dst, (OpNode RO:$rd, ImmOpnd:$csr))], + FrmOther, opstr>{ + let Constraints = "$rd = $dst"; +} + +class CSRX + : InstForm<(outs RO:$dst), (ins RO:$rd, RO:$rj, ImmOpnd:$csr), + !strconcat(opstr, "\t$rd, $rj, $csr"), + [(set RO:$dst, (OpNode RO:$rd, RO:$rj, ImmOpnd:$csr))], + FrmOther, opstr>{ + let Constraints = "$rd = $dst"; +} + +class CAC + : InstForm<(outs), (ins uimm5:$op, RO:$rj, ImmOpnd:$si12), + !strconcat(opstr, "\t$op, $rj, $si12"), + [(set (OpNode uimm5:$op, RO:$rj, ImmOpnd:$si12))], + FrmOther, opstr>; + +class LEVEL + : InstForm<(outs RO:$rd), (ins RO:$rj, uimm8_64:$level), + !strconcat(opstr, "\t$rd, $rj, $level"), + [(set RO:$rd, (OpNode RO:$rj, uimm8_64:$level))], + FrmOther, opstr>; + +class SEQ + : InstForm<(outs), (ins RO:$rj, uimm8_64:$seq), + !strconcat(opstr, "\t$rj, $seq"), + [(set (OpNode RO:$rj, uimm8_64:$seq))], + FrmOther, opstr>; + +class Wait + : InstForm<(outs), (ins uimm15:$hint), + !strconcat(opstr, "\t$hint"), + [(set (OpNode uimm15:$hint))], + FrmOther, opstr>; + +class Invtlb + : InstForm<(outs), (ins uimm5:$op, RO:$rj, RO:$rk), + !strconcat(opstr, "\t$op, $rj, $rk"), + [(set (OpNode uimm5:$op, RO:$rj, RO:$rk))], + FrmOther, opstr>; + +class OP32 + : InstForm<(outs), (ins), + !strconcat(opstr, ""), + [(set (OpNode))], + FrmOther, opstr>; + +class Bar + : InstForm<(outs), (ins uimm15:$hint), + !strconcat(opstr, "\t$hint"), + [(set (OpNode uimm15:$hint))], + FrmOther, opstr>; + +//class CA op, string opstr> +// : R3_CA; + +class SI16_R2 + : InstForm<(outs RO:$rd), (ins RO:$rj, simm16:$si16), + !strconcat(opstr, "\t$rd, $rj, $si16"), + [(set RO:$rd, (OpNode RO:$rj, simm16:$si16))], + FrmR, opstr>; + +class SI20 + : InstForm<(outs RO:$rd), (ins ImmOpnd:$si20), + !strconcat(opstr, "\t$rd, $si20"), + [(set RO:$rd, (OpNode ImmOpnd:$si20))], + FrmR, opstr>; +let isCodeGenOnly = 1, Constraints = "$dst = $rd" in +class SI20_R2 + : InstForm<(outs RO:$dst), (ins RO:$rd, ImmOpnd:$si20), + !strconcat(opstr, "\t$rd, $si20"), + [(set RO:$dst, (OpNode RO:$rd, ImmOpnd:$si20))], + FrmR, opstr>; +class RELOC_rii + : InstForm<(outs RO:$rd), (ins ImmOpnd:$si20, ImmOpnd:$i20), + !strconcat(opstr, "\t$rd, $si20"), + [(set RO:$rd, (OpNode ImmOpnd:$si20, ImmOpnd:$i20))], + FrmR, opstr>; + +// preld +class Preld + : InstForm<(outs), (ins RO:$rj, MemOpnd:$addr, uimm5:$hint), + !strconcat(opstr, "\t$hint, $rj, $addr"), + [(set (OpNode RO:$rj, MemOpnd:$addr, uimm5:$hint))], + FrmR, opstr>; +class Preld_Raw + : InstForm<(outs), (ins RO:$rj, simm12:$imm12, uimm5:$hint), + !strconcat(opstr, "\t$hint, $rj, $imm12"), + [], + FrmR, opstr>; +class IsCall { + bit isCall = 1; + bit isCTI = 1; +} + +class EffectiveAddress + : InstForm<(outs RO:$rd), (ins mem_ea:$addr), + !strconcat(opstr, "\t$rd, $addr"), + [(set RO:$rd, addr:$addr)], FrmI, + !strconcat(opstr, "_lea")> { + let isCodeGenOnly = 1; + let hasNoSchedulingInfo = 1; + let DecoderMethod = "DecodeMem"; +} + +def PtrRC : Operand { + let MIOperandInfo = (ops ptr_rc); + let DecoderMethod = "DecodePtrRegisterClass"; + let ParserMatchClass = GPR32AsmOperand; +} + +class Atomic2Ops : + LoongArchPseudo<(outs DRC:$dst), (ins PtrRC:$ptr, DRC:$incr), + [(set DRC:$dst, (Op iPTR:$ptr, DRC:$incr))]>; + +class Atomic2OpsPostRA : + LoongArchPseudo<(outs RC:$dst), (ins PtrRC:$ptr, RC:$incr), []> { + let mayLoad = 1; + let mayStore = 1; +} + +class Atomic2OpsSubwordPostRA : + LoongArchPseudo<(outs RC:$dst), (ins PtrRC:$ptr, RC:$incr, RC:$mask, RC:$mask2, + RC:$shiftamnt), []>; +class AtomicCmpSwap : + LoongArchPseudo<(outs DRC:$dst), (ins PtrRC:$ptr, DRC:$cmp, DRC:$swap), + [(set DRC:$dst, (Op iPTR:$ptr, DRC:$cmp, DRC:$swap))]>; + +class AtomicCmpSwapPostRA : + LoongArchPseudo<(outs RC:$dst), (ins PtrRC:$ptr, RC:$cmp, RC:$swap), []> { + let mayLoad = 1; + let mayStore = 1; +} + +class AtomicCmpSwapSubwordPostRA : + LoongArchPseudo<(outs RC:$dst), (ins PtrRC:$ptr, RC:$mask, RC:$ShiftCmpVal, + RC:$mask2, RC:$ShiftNewVal, RC:$ShiftAmt), []> { + let mayLoad = 1; + let mayStore = 1; +} + +class LoongArchInstAlias : + InstAlias, PredicateControl; + +//===---------------------------------------------------------------------===/ +// Instruction Definitions. +//===---------------------------------------------------------------------===/ +/// +/// R2 +/// + +def CLO_D : Count1<"clo.d", GPR64Opnd, ctlz>, R2I<0b01000>; +def CLZ_D : Int_Reg2<"clz.d", GPR64Opnd, ctlz>, R2I<0b01001>; +def CTO_D : Count1<"cto.d", GPR64Opnd, cttz>, R2I<0b01010>; +def CTZ_D : Int_Reg2<"ctz.d", GPR64Opnd, cttz>, R2I<0b01011>; + +def REVB_4H : Int_Reg2<"revb.4h", GPR64Opnd>, R2I<0b01101>; //[] +def REVB_2W : Int_Reg2<"revb.2w", GPR64Opnd>, R2I<0b01110>; +def REVB_D : Int_Reg2<"revb.d", GPR64Opnd>, R2I<0b01111>; +def REVH_2W : Int_Reg2<"revh.2w", GPR64Opnd>, R2I<0b10000>; +def REVH_D : Int_Reg2<"revh.d", GPR64Opnd>, R2I<0b10001>; //[] + +def BITREV_8B : Int_Reg2<"bitrev.8b", GPR64Opnd>, R2I<0b10011>; //[] +def BITREV_D : Int_Reg2<"bitrev.d", GPR64Opnd, bitreverse>, R2I<0b10101>; + +def EXT_W_H : SignExtInReg<"ext.w.h", GPR64Opnd, i16>, R2I<0b10110>; +def EXT_W_B : SignExtInReg<"ext.w.b", GPR64Opnd, i8>, R2I<0b10111>; + +def RDTIME_D : Int_Reg2_Rdtime<"rdtime.d", GPR64Opnd>, R2I<0b11010>; +def RDTIMEL_W : Int_Reg2_Rdtime<"rdtimel.w", GPR64Opnd>, R2I<0b11000>; +def RDTIMEH_W : Int_Reg2_Rdtime<"rdtimeh.w", GPR64Opnd>, R2I<0b11001>; +/// +/// R3 +/// +def ADD_D : Int_Reg3<"add.d", GPR64Opnd, add>, R3I<0b0100001>; +def SUB_D : Int_Reg3<"sub.d", GPR64Opnd, sub>, R3I<0b0100011>; + +def SLT : SetCC_R<"slt", GPR64Opnd, setlt>, R3I<0b0100100>; +def SLTU : SetCC_R<"sltu", GPR64Opnd, setult>, R3I<0b0100101>; +def MASKEQZ : Int_Reg3<"maskeqz", GPR64Opnd>, R3I<0b0100110>; //[] +def MASKNEZ : Int_Reg3<"masknez", GPR64Opnd>, R3I<0b0100111>; //[] + +def NOR : Nor<"nor", GPR64Opnd>, R3I<0b0101000>; +def AND : Int_Reg3<"and", GPR64Opnd, and>, R3I<0b0101001>; +def OR : Int_Reg3<"or", GPR64Opnd, or>, R3I<0b0101010>; +def XOR : Int_Reg3<"xor", GPR64Opnd, xor>, R3I<0b0101011>; +def ORN : Int_Reg3<"orn", GPR64Opnd>, R3I<0b0101100>; +def ANDN : Int_Reg3<"andn", GPR64Opnd>, R3I<0b0101101>; + +def SLL_D : Shift_Var<"sll.d", GPR64Opnd, shl>, R3I<0b0110001>; +def SRL_D : Shift_Var<"srl.d", GPR64Opnd, srl>, R3I<0b0110010>; +def SRA_D : Shift_Var<"sra.d", GPR64Opnd, sra>, R3I<0b0110011>; +def ROTR_D: Shift_Var<"rotr.d", GPR64Opnd, rotr>, R3I<0b0110111>; + +def MUL_D : Int_Reg3<"mul.d", GPR64Opnd, mul>, R3I<0b0111011>; +def MULH_D : Int_Reg3<"mulh.d", GPR64Opnd, mulhs>, R3I<0b0111100>; +def MULH_DU : Int_Reg3<"mulh.du", GPR64Opnd, mulhu>, R3I<0b0111101>; +def MULW_D_W : Int_Reg3<"mulw.d.w", GPR64Opnd>, R3I<0b0111110>; +def MULW_D_WU : Int_Reg3<"mulw.d.wu", GPR64Opnd>, R3I<0b0111111>; + +let usesCustomInserter = 1 in { +def DIV_D : Int_Reg3<"div.d", GPR64Opnd, sdiv>, R3I<0b1000100>; +def MOD_D : Int_Reg3<"mod.d", GPR64Opnd, srem>, R3I<0b1000101>; +def DIV_DU : Int_Reg3<"div.du", GPR64Opnd, udiv>, R3I<0b1000110>; +def MOD_DU : Int_Reg3<"mod.du", GPR64Opnd, urem>, R3I<0b1000111>; +} + +def CRC_W_D_W : Int_Reg3_Crc<"crc.w.d.w", GPR64Opnd, GPR32Opnd, int_loongarch_crc_w_d_w>, R3I<0b1001011>; +def CRCC_W_D_W : Int_Reg3_Crc<"crcc.w.d.w", GPR64Opnd, GPR32Opnd, int_loongarch_crcc_w_d_w>, R3I<0b1001111>; +/// +/// SLLI +/// +def SLLI_D : Shift_Imm64<"slli.d", GPR64Opnd, shl>, R2_IMM6<0b00>; +def SRLI_D : Shift_Imm64<"srli.d", GPR64Opnd, srl>, R2_IMM6<0b01>; +def SRAI_D : Shift_Imm64<"srai.d", GPR64Opnd, sra>, R2_IMM6<0b10>; +def ROTRI_D : Shift_Imm64<"rotri.d", GPR64Opnd, rotr>, R2_IMM6<0b11>; +/// +/// Misc +/// +def ALSL_WU : Reg3_SaU<"alsl.wu", GPR64Opnd, GPR32Opnd, uimm2_plus1>, R3_SA2<0b00011> { + let Pattern = [(set GPR64Opnd:$rd, + (i64 (zext (add GPR32Opnd:$rk, (shl GPR32Opnd:$rj, immZExt2Alsl:$sa)))))]; +} + +def ALSL_D : Reg3_Sa<"alsl.d", GPR64Opnd, uimm2_plus1>, R3_SA2<0b10110> { + let Pattern = [(set GPR64Opnd:$rd, + (add GPR64Opnd:$rk, (shl GPR64Opnd:$rj, immZExt2Alsl:$sa)))]; +} +def BYTEPICK_D : Reg3_Sa<"bytepick.d", GPR64Opnd, uimm3>, R3_SA3; //[] + +def ASRTLE_D : Assert<"asrtle.d", GPR64Opnd, int_loongarch_asrtle_d>, ASSERT<0b10>; +def ASRTGT_D : Assert<"asrtgt.d", GPR64Opnd, int_loongarch_asrtgt_d>, ASSERT<0b11>; + +def DBCL : Code15<"dbcl">, CODE15<0b1010101>; +def HYPCALL : Code15<"hypcall">, CODE15<0b1010111>; + +/// +/// R2_IMM12 +/// +def SLTI : SetCC_I<"slti", GPR64Opnd, simm12, setlt>, R2_IMM12<0b000>; +def SLTUI : SetCC_I<"sltui", GPR64Opnd, simm12, setult>, R2_IMM12<0b001>; +def ADDI_W64 : Int_Reg2_Imm12<"addi.w", GPR64Opnd, simm12>, R2_IMM12<0b010>; +def ADDI_D : Int_Reg2_Imm12<"addi.d", GPR64Opnd, simm12, add>, R2_IMM12<0b011>; +def LU52I_D : Int_Reg2_Imm12<"lu52i.d", GPR64Opnd, simm12>, R2_IMM12<0b100>; +def ANDI : Int_Reg2_Imm12<"andi", GPR64Opnd, uimm12, and>, R2_IMM12<0b101>; +def ORI : Int_Reg2_Imm12<"ori", GPR64Opnd, uimm12, or>, R2_IMM12<0b110>; +def XORI : Int_Reg2_Imm12<"xori", GPR64Opnd, uimm12, xor>, R2_IMM12<0b111>; + +/// +/// Privilege Instructions +/// +def CSRRD : CSR<"csrrd", GPR64Opnd, uimm14, int_loongarch_dcsrrd>, R1_CSR<0b0000000000100>; +def CSRWR : CSRW<"csrwr", GPR64Opnd, uimm14, int_loongarch_dcsrwr>, R1_CSR<0b0000100000100>; +def CSRXCHG : CSRX<"csrxchg", GPR64Opnd, uimm14, int_loongarch_dcsrxchg>, R2_CSR<0b00000100>; +def IOCSRRD_D : Int_Reg2_Iocsrrd<"iocsrrd.d", GPR64Opnd, GPR32Opnd, int_loongarch_iocsrrd_d>, R2P<0b011>; +def IOCSRWR_D : Int_Reg2_Iocsrwr<"iocsrwr.d", GPR64Opnd, GPR32Opnd, int_loongarch_iocsrwr_d>, R2P<0b111>; +def CACOP : CAC<"cacop", GPR64Opnd, simm12, int_loongarch_dcacop>, R1_CACHE; +def LDDIR : LEVEL<"lddir", GPR64Opnd>, R2_LEVEL<0b00000110010000>; +def LDPTE : SEQ<"ldpte", GPR64Opnd>, R1_SEQ<0b00000110010001>; + +def IDLE : Wait<"idle">, WAIT_FM; +def INVTLB : Invtlb<"invtlb", GPR64Opnd>, R2_INVTLB; +// +def IOCSRRD_B : Int_Reg2<"iocsrrd.b", GPR64Opnd>, R2P<0b000>; +def IOCSRRD_H : Int_Reg2<"iocsrrd.h", GPR64Opnd>, R2P<0b001>; +def IOCSRRD_W : Int_Reg2<"iocsrrd.w", GPR64Opnd>, R2P<0b010>; +// +def TLBCLR : OP32<"tlbclr", int_loongarch_tlbclr>, IMM32<0b001000>; +def TLBFLUSH : OP32<"tlbflush", int_loongarch_tlbflush>, IMM32<0b001001>; +def TLBSRCH : OP32<"tlbsrch", int_loongarch_tlbsrch>, IMM32<0b001010>; +def TLBRD : OP32<"tlbrd", int_loongarch_tlbrd>, IMM32<0b001011>; +def TLBWR : OP32<"tlbwr", int_loongarch_tlbwr>, IMM32<0b001100>; +def TLBFILL : OP32<"tlbfill", int_loongarch_tlbfill>, IMM32<0b001101>; +def ERTN : OP32<"ertn">, IMM32<0b001110>; + +/// +/// R1_IMM20 +/// +def ADDU16I_D : SI16_R2<"addu16i.d", GPR64Opnd>, R2_SI16<0b000100>; +def LU12I_W : SI20<"lu12i.w", GPR64Opnd, simm20>, R1_SI20<0b0001010>; +def LU32I_D : SI20<"lu32i.d", GPR64Opnd, simm20>, R1_SI20<0b0001011>; +def LU32I_D_R2 : SI20_R2<"lu32i.d", GPR64Opnd, simm20>, R1_SI20<0b0001011>; +def PCADDI : SI20<"pcaddi", GPR64Opnd, simm20>, R1_SI20<0b0001100>; +def PCALAU12I : SI20<"pcalau12i", GPR64Opnd, simm20>, R1_SI20<0b0001101>; +def PCADDU12I : SI20<"pcaddu12i", GPR64Opnd, simm20>, R1_SI20<0b0001110>; +def PCADDU18I : SI20<"pcaddu18i", GPR64Opnd, simm20>, R1_SI20<0b0001111>; + + +def BEQZ : Beqz<"beqz", brtarget, seteq, GPR64Opnd>, R1_IMM21BEQZ<0b010000>; +def BNEZ : Beqz<"bnez", brtarget, setne, GPR64Opnd>, R1_IMM21BEQZ<0b010001>; + +def JIRL : FJirl<"jirl", simm16, GPR64Opnd>, R2_IMM16JIRL; +let isCall = 1, isCTI=1, isCodeGenOnly = 1 in { +def JIRL_CALL : FJirl<"jirl", simm16, GPR64Opnd>, R2_IMM16JIRL; +} + +def B : JumpFB, IMM26B<0b010100>; + +def BEQ : Beq<"beq", brtarget, seteq, GPR64Opnd>, R2_IMM16BEQ<0b010110>; +def BNE : Beq<"bne", brtarget, setne, GPR64Opnd>, R2_IMM16BEQ<0b010111>; +def BLT : Beq<"blt", brtarget, setlt, GPR64Opnd>, R2_IMM16BEQ<0b011000>; +def BGE : Beq<"bge", brtarget, setge, GPR64Opnd>, R2_IMM16BEQ<0b011001>; +def BLTU : Beq<"bltu", brtarget, setult, GPR64Opnd>, R2_IMM16BEQ<0b011010>; +def BGEU : Beq<"bgeu", brtarget, setuge, GPR64Opnd>, R2_IMM16BEQ<0b011011>; + +/// +/// Mem access +/// +class LLBase : + InstForm<(outs RO:$rd), (ins MO:$addr), !strconcat(opstr, "\t$rd, $addr"), + [], FrmI, opstr> { + let DecoderMethod = "DecodeMemSimm14"; + let mayLoad = 1; +} + +class SCBase : + InstForm<(outs RO:$dst), (ins RO:$rd, MO:$addr), + !strconcat(opstr, "\t$rd, $addr"), [], FrmI> { + let DecoderMethod = "DecodeMemSimm14"; + let mayStore = 1; + let Constraints = "$rd = $dst"; +} + +class STGT_LE : + InstForm<(outs), (ins RO:$rd, RO:$rj, RO:$rk), + !strconcat(opstr, "\t$rd, $rj, $rk"), + [], FrmI, opstr>; + +class Float_STGT_LE + : InstForm<(outs), (ins RD:$fd, RS:$rj, RS:$rk), + !strconcat(opstr, "\t$fd, $rj, $rk"), + [], FrmR, opstr>; + +def LL_D : LLBase<"ll.d", GPR64Opnd, mem_simm14_lsl2>, LL_SC<0b010>; +def SC_D : SCBase<"sc.d", GPR64Opnd, mem_simm14_lsl2>, LL_SC<0b011>; + +def LDPTR_W : LdPtr<"ldptr.w", GPR64Opnd>, LL_SC<0b100>; +def STPTR_W : StPtr<"stptr.w", GPR64Opnd>, LL_SC<0b101>; +def LDPTR_D : LdPtr<"ldptr.d", GPR64Opnd>, LL_SC<0b110>; +def STPTR_D : StPtr<"stptr.d", GPR64Opnd>, LL_SC<0b111>; + +def LD_B : Ld<"ld.b", GPR64Opnd, mem, sextloadi8>, LOAD_STORE<0b0000>; +def LD_H : Ld<"ld.h", GPR64Opnd, mem, sextloadi16>, LOAD_STORE<0b0001>; +def LD_W : Ld<"ld.w", GPR64Opnd, mem, sextloadi32>, LOAD_STORE<0b0010>; +def LD_D : Ld<"ld.d", GPR64Opnd, mem_simmptr, load>, LOAD_STORE<0b0011>; +def ST_B : St<"st.b", GPR64Opnd, mem, truncstorei8>, LOAD_STORE<0b0100>; +def ST_H : St<"st.h", GPR64Opnd, mem, truncstorei16>, LOAD_STORE<0b0101>; +def ST_W : St<"st.w", GPR64Opnd, mem, truncstorei32>, LOAD_STORE<0b0110>; +def ST_D : St<"st.d", GPR64Opnd, mem_simmptr, store>, LOAD_STORE<0b0111>; +def LD_BU : Ld<"ld.bu", GPR64Opnd, mem, zextloadi8>, LOAD_STORE<0b1000>; +def LD_HU : Ld<"ld.hu", GPR64Opnd, mem, zextloadi16>, LOAD_STORE<0b1001>; +def LD_WU : Ld<"ld.wu", GPR64Opnd, mem, zextloadi32>, LOAD_STORE<0b1010>; + +def AMSWAP_W : ATOMIC<"amswap.w", GPR32Opnd, amem>, AM<0b000000>; +def AMSWAP_D : ATOMIC<"amswap.d", GPR64Opnd, amem>, AM<0b000001>; +def AMADD_W : ATOMIC<"amadd.w", GPR32Opnd, amem>, AM<0b000010>; +def AMADD_D : ATOMIC<"amadd.d", GPR64Opnd, amem>, AM<0b000011>; +def AMAND_W : ATOMIC<"amand.w", GPR32Opnd, amem>, AM<0b000100>; +def AMAND_D : ATOMIC<"amand.d", GPR64Opnd, amem>, AM<0b000101>; +def AMOR_W : ATOMIC<"amor.w", GPR32Opnd, amem>, AM<0b000110>; +def AMOR_D : ATOMIC<"amor.d", GPR64Opnd, amem>, AM<0b000111>; +def AMXOR_W : ATOMIC<"amxor.w", GPR32Opnd, amem>, AM<0b001000>; +def AMXOR_D : ATOMIC<"amxor.d", GPR64Opnd, amem>, AM<0b001001>; +def AMMAX_W : ATOMIC<"ammax.w", GPR32Opnd, amem>, AM<0b001010>; +def AMMAX_D : ATOMIC<"ammax.d", GPR64Opnd, amem>, AM<0b001011>; +def AMMIN_W : ATOMIC<"ammin.w", GPR32Opnd, amem>, AM<0b001100>; +def AMMIN_D : ATOMIC<"ammin.d", GPR64Opnd, amem>, AM<0b001101>; +def AMMAX_WU : ATOMIC<"ammax.wu", GPR32Opnd, amem>, AM<0b001110>; +def AMMAX_DU : ATOMIC<"ammax.du", GPR64Opnd, amem>, AM<0b001111>; +def AMMIN_WU : ATOMIC<"ammin.wu", GPR32Opnd, amem>, AM<0b010000>; +def AMMIN_DU : ATOMIC<"ammin.du", GPR64Opnd, amem>, AM<0b010001>; + + +def AMSWAP_DB_W : ATOMIC<"amswap_db.w", GPR32Opnd, amem>, AM<0b010010>; +def AMSWAP_DB_D : ATOMIC<"amswap_db.d", GPR64Opnd, amem>, AM<0b010011>; +def AMADD_DB_W : ATOMIC<"amadd_db.w", GPR32Opnd, amem>, AM<0b010100>; +def AMADD_DB_D : ATOMIC<"amadd_db.d", GPR64Opnd, amem>, AM<0b010101>; +def AMAND_DB_W : ATOMIC<"amand_db.w", GPR32Opnd, amem>, AM<0b010110>; +def AMAND_DB_D : ATOMIC<"amand_db.d", GPR64Opnd, amem>, AM<0b010111>; +def AMOR_DB_W : ATOMIC<"amor_db.w", GPR32Opnd, amem>, AM<0b011000>; +def AMOR_DB_D : ATOMIC<"amor_db.d", GPR64Opnd, amem>, AM<0b011001>; +def AMXOR_DB_W : ATOMIC<"amxor_db.w", GPR32Opnd, amem>, AM<0b011010>; +def AMXOR_DB_D : ATOMIC<"amxor_db.d", GPR64Opnd, amem>, AM<0b011011>; +def AMMAX_DB_W : ATOMIC<"ammax_db.w", GPR32Opnd, amem>, AM<0b011100>; +def AMMAX_DB_D : ATOMIC<"ammax_db.d", GPR64Opnd, amem>, AM<0b011101>; +def AMMIN_DB_W : ATOMIC<"ammin_db.w", GPR32Opnd, amem>, AM<0b011110>; +def AMMIN_DB_D : ATOMIC<"ammin_db.d", GPR64Opnd, amem>, AM<0b011111>; +def AMMAX_DB_WU : ATOMIC<"ammax_db.wu", GPR32Opnd, amem>, AM<0b100000>; +def AMMAX_DB_DU : ATOMIC<"ammax_db.du", GPR64Opnd, amem>, AM<0b100001>; +def AMMIN_DB_WU : ATOMIC<"ammin_db.wu", GPR32Opnd, amem>, AM<0b100010>; +def AMMIN_DB_DU : ATOMIC<"ammin_db.du", GPR64Opnd, amem>, AM<0b100011>; + +def LDGT_B : Int_Reg3<"ldgt.b", GPR64Opnd>, R3MI<0b11110000>; +def LDGT_H : Int_Reg3<"ldgt.h", GPR64Opnd>, R3MI<0b11110001>; +def LDGT_W : Int_Reg3<"ldgt.w", GPR64Opnd>, R3MI<0b11110010>; +def LDGT_D : Int_Reg3<"ldgt.d", GPR64Opnd>, R3MI<0b11110011>; +def LDLE_B : Int_Reg3<"ldle.b", GPR64Opnd>, R3MI<0b11110100>; +def LDLE_H : Int_Reg3<"ldle.h", GPR64Opnd>, R3MI<0b11110101>; +def LDLE_W : Int_Reg3<"ldle.w", GPR64Opnd>, R3MI<0b11110110>; +def LDLE_D : Int_Reg3<"ldle.d", GPR64Opnd>, R3MI<0b11110111>; +def STGT_B : STGT_LE<"stgt.b", GPR64Opnd>, R3MI<0b11111000>; +def STGT_H : STGT_LE<"stgt.h", GPR64Opnd>, R3MI<0b11111001>; +def STGT_W : STGT_LE<"stgt.w", GPR64Opnd>, R3MI<0b11111010>; +def STGT_D : STGT_LE<"stgt.d", GPR64Opnd>, R3MI<0b11111011>; +def STLE_B : STGT_LE<"stle.b", GPR64Opnd>, R3MI<0b11111100>; +def STLE_H : STGT_LE<"stle.h", GPR64Opnd>, R3MI<0b11111101>; +def STLE_W : STGT_LE<"stle.w", GPR64Opnd>, R3MI<0b11111110>; +def STLE_D : STGT_LE<"stle.d", GPR64Opnd>, R3MI<0b11111111>; + +let isCodeGenOnly = 1 in { +def PRELD : Preld<"preld", mem, GPR64Opnd>, PRELD_FM; +} + +def PRELD_Raw : Preld_Raw<"preld", GPR64Opnd>, PRELD_FM; + +let isCall=1, isCTI=1, Defs = [RA] in { + class JumpLink : + InstForm<(outs), (ins opnd:$target), !strconcat(opstr, "\t$target"), + [(LoongArchJmpLink tglobaladdr:$target)], FrmJ, opstr> { + let DecoderMethod = "DecodeJumpTarget"; + } +} +def LONG_BRANCH_PCADDU12I : LoongArchPseudo<(outs GPR64Opnd:$dst), + (ins brtarget:$tgt), []>, GPR_64; + +def LONG_BRANCH_ADDID2Op : LoongArchPseudo<(outs GPR64Opnd:$dst), + (ins GPR64Opnd:$src, brtarget:$tgt), []>, GPR_64; + +def LONG_BRANCH_ADDID : LoongArchPseudo<(outs GPR64Opnd:$dst), + (ins GPR64Opnd:$src, brtarget:$tgt, brtarget:$baltgt), []>, GPR_64; + +def LEA_ADDI_D: EffectiveAddress<"addi.d", GPR64Opnd>, LEA_ADDI_FM<0b011>, GPR_64; + +class PseudoReturnBase : LoongArchPseudo<(outs), (ins RO:$rs), + []> { + let isTerminator = 1; + let isBarrier = 1; + let isReturn = 1; + let isCodeGenOnly = 1; + let hasCtrlDep = 1; + let hasExtraSrcRegAllocReq = 1; + bit isCTI = 1; +} + +def PseudoReturn64 : PseudoReturnBase; +//def PseudoReturn : PseudoReturnBase; + + +let isCall=1, isCTI=1, Defs=[RA], isCodeGenOnly=1 in { +def PseudoCall : LoongArchPseudo<(outs), (ins calltarget:$target), + []>; +} + +let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [SP] in +def PseudoTailCall : LoongArchPseudo<(outs), (ins calltarget:$target), + []>; + +class PseudoTailBase : LoongArchPseudo<(outs), (ins opnd:$offset26), + []> { + let isTerminator = 1; + let isBarrier = 1; + let isReturn = 1; + let isCodeGenOnly = 1; +} +def PseudoTailReturn : PseudoTailBase; + + +def : LoongArchPat<(LoongArchTailCall tglobaladdr:$dst), + (PseudoTailCall tglobaladdr:$dst)>; + +def : LoongArchPat<(LoongArchTailCall texternalsym:$dst), + (PseudoTailCall texternalsym:$dst)>; + +let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, isIndirectBranch = 1, Uses = [SP] in +def PseudoTAILIndirect : LoongArchPseudo<(outs), (ins GPRTC64Opnd:$rj), [(LoongArchTailCall GPRTC64Opnd:$rj)]>, + PseudoInstExpansion<(JIRL ZERO_64, GPR64Opnd:$rj, 0)>; + + +def : LoongArchPat<(LoongArchJmpLink tglobaladdr:$dst), + (PseudoCall tglobaladdr:$dst)>; + +def : LoongArchPat<(LoongArchJmpLink (i32 texternalsym:$dst)), + (PseudoCall texternalsym:$dst)>; +def : LoongArchPat<(LoongArchJmpLink (i64 texternalsym:$dst)), + (PseudoCall texternalsym:$dst)>; + +def : LoongArchPat<(LoongArchJmpLink (i64 texternalsym:$dst)), + (PseudoCall texternalsym:$dst)>; + +def BL : JumpLink<"bl", calltarget>, FJ<0b010101>; + +class IsAsCheapAsAMove { + bit isAsCheapAsAMove = 1; +} +class LoadUpper: + InstForm<(outs RO:$rt), (ins Imm:$imm16), !strconcat(opstr, "\t$rt, $imm16"), + [], FrmI, opstr>, IsAsCheapAsAMove { + let hasSideEffects = 0; + let isReMaterializable = 1; + let mayLoad = 1; +} + +let isCodeGenOnly = 1 in { +def LAPCREL : LoadUpper<"la.pcrel", GPR64Opnd, uimm16_64_relaxed>, LUI_FM, GPR_64; +} + +def NOP : LoongArchPseudo<(outs), (ins), []>, + PseudoInstExpansion<(ANDI ZERO_64, ZERO_64, 0)>; + +def : LoongArchInstAlias<"nop", (ANDI ZERO_64, ZERO_64, 0), 1>; +def : LoongArchInstAlias<"jr $rd", (JIRL ZERO_64, GPR64Opnd:$rd, 0), 1>; +def : LoongArchInstAlias<"move $dst, $src", + (OR GPR64Opnd:$dst, GPR64Opnd:$src, ZERO_64), 1>, GPR_64; + +def UImm12RelaxedAsmOperandClass +: UImmAsmOperandClass<12, [ConstantUImm20AsmOperandClass]> { + let Name = "UImm12_Relaxed"; + let PredicateMethod = "isAnyImm<12>"; + let DiagnosticType = "UImm12_Relaxed"; +} + +def SImm12RelaxedAsmOperandClass +: SImmAsmOperandClass<12, [UImm12RelaxedAsmOperandClass]> { + let Name = "SImm12_Relaxed"; + let PredicateMethod = "isAnyImm<12>"; + let DiagnosticType = "SImm12_Relaxed"; +} + +def simm12_relaxed : Operand { + let DecoderMethod = "DecodeSImmWithOffsetAndScale<12>"; + let ParserMatchClass = !cast("SImm12RelaxedAsmOperandClass"); +} + +def : LoongArchPat<(i64 (anyext GPR32:$src)), + (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GPR32:$src, sub_32)>,GPR_64; + +let usesCustomInserter = 1 in { + def ATOMIC_LOAD_ADD_I64 : Atomic2Ops; + def ATOMIC_LOAD_SUB_I64 : Atomic2Ops; + def ATOMIC_LOAD_AND_I64 : Atomic2Ops; + def ATOMIC_LOAD_OR_I64 : Atomic2Ops; + def ATOMIC_LOAD_XOR_I64 : Atomic2Ops; + def ATOMIC_LOAD_NAND_I64 : Atomic2Ops; + def ATOMIC_SWAP_I64 : Atomic2Ops; + def ATOMIC_CMP_SWAP_I64 : AtomicCmpSwap; + + def ATOMIC_LOAD_MAX_I64 : Atomic2Ops; + def ATOMIC_LOAD_MIN_I64 : Atomic2Ops; + def ATOMIC_LOAD_UMAX_I64 : Atomic2Ops; + def ATOMIC_LOAD_UMIN_I64 : Atomic2Ops; +} + +def ATOMIC_LOAD_ADD_I64_POSTRA : Atomic2OpsPostRA; +def ATOMIC_LOAD_SUB_I64_POSTRA : Atomic2OpsPostRA; +def ATOMIC_LOAD_AND_I64_POSTRA : Atomic2OpsPostRA; +def ATOMIC_LOAD_OR_I64_POSTRA : Atomic2OpsPostRA; +def ATOMIC_LOAD_XOR_I64_POSTRA : Atomic2OpsPostRA; +def ATOMIC_LOAD_NAND_I64_POSTRA : Atomic2OpsPostRA; + +def ATOMIC_SWAP_I64_POSTRA : Atomic2OpsPostRA; + +def ATOMIC_CMP_SWAP_I64_POSTRA : AtomicCmpSwapPostRA; + +def ATOMIC_LOAD_MAX_I64_POSTRA : Atomic2OpsPostRA; + +def ATOMIC_LOAD_MIN_I64_POSTRA : Atomic2OpsPostRA; + +def ATOMIC_LOAD_UMAX_I64_POSTRA : Atomic2OpsPostRA; + +def ATOMIC_LOAD_UMIN_I64_POSTRA : Atomic2OpsPostRA; + +def : LoongArchPat<(atomic_load_8 addr:$a), (LD_B addr:$a)>, GPR_64; +def : LoongArchPat<(atomic_load_16 addr:$a), (LD_H addr:$a)>, GPR_64; +def : LoongArchPat<(atomic_load_32 addrimm14lsl2:$a), (LDPTR_W addrimm14lsl2:$a)>, GPR_64; +def : LoongArchPat<(atomic_load_32 addr:$a), (LD_W addr:$a)>, GPR_64; +def : LoongArchPat<(atomic_load_64 addrimm14lsl2:$a), (LDPTR_D addrimm14lsl2:$a)>, GPR_64; +def : LoongArchPat<(atomic_load_64 addr:$a), (LD_D addr:$a)>, GPR_64; + +def : LoongArchPat<(atomic_store_8 addr:$a, GPR64:$v), + (ST_B GPR64:$v, addr:$a)>, GPR_64; +def : LoongArchPat<(atomic_store_16 addr:$a, GPR64:$v), + (ST_H GPR64:$v, addr:$a)>, GPR_64; +def : LoongArchPat<(atomic_store_32 addrimm14lsl2:$a, GPR64:$v), + (STPTR_W GPR64:$v, addrimm14lsl2:$a)>, GPR_64; +def : LoongArchPat<(atomic_store_32 addr:$a, GPR64:$v), + (ST_W GPR64:$v, addr:$a)>, GPR_64; +def : LoongArchPat<(atomic_store_64 addrimm14lsl2:$a, GPR64:$v), + (STPTR_D GPR64:$v, addrimm14lsl2:$a)>, GPR_64; +def : LoongArchPat<(atomic_store_64 addr:$a, GPR64:$v), + (ST_D GPR64:$v, addr:$a)>, GPR_64; + +def : LoongArchPat<(bswap GPR64:$rt), (REVH_D (REVB_4H GPR64:$rt))>; + +def immZExt5 : ImmLeaf; + +def immZExtRange2To64 : PatLeaf<(imm), [{ + return isUInt<7>(N->getZExtValue()) && (N->getZExtValue() >= 2) && + (N->getZExtValue() <= 64); +}]>; + +// bstrins and bstrpick +class InsBase + : InstForm<(outs RO:$rd), (ins RO:$rj, ImmOpnd:$msbd, ImmOpnd:$lsbd, RO:$src), + !strconcat(opstr, "\t$rd, $rj, $msbd, $lsbd"), + [(set RO:$rd, (OpNode RO:$rj, ImmOpnd:$msbd, ImmOpnd:$lsbd, RO:$src))], + FrmR, opstr> { + let Constraints = "$src = $rd"; + } + +class InsBase_32 + : InstForm<(outs RO:$rd), (ins RO:$rj, ImmOpnd:$msbw, ImmOpnd:$lsbw, RO:$src), + !strconcat(opstr, "\t$rd, $rj, $msbw, $lsbw"), + [(set RO:$rd, (OpNode RO:$rj, ImmOpnd:$msbw, ImmOpnd:$lsbw, RO:$src))], + FrmR, opstr> { + let Constraints = "$src = $rd"; +} + +class PickBase + : InstForm<(outs RO:$rd), (ins RO:$rj, ImmOpnd:$msbd, ImmOpnd:$lsbd), + !strconcat(opstr, "\t$rd, $rj, $msbd, $lsbd"), + [(set RO:$rd, (Op RO:$rj, ImmOpnd:$msbd, ImmOpnd:$lsbd))], + FrmR, opstr>; + +class PickBase_32 + : InstForm<(outs RO:$rd), (ins RO:$rj, ImmOpnd:$msbw, ImmOpnd:$lsbw), + !strconcat(opstr, "\t$rd, $rj, $msbw, $lsbw"), + [(set RO:$rd, (Op RO:$rj, ImmOpnd:$msbw, ImmOpnd:$lsbw))], + FrmR, opstr>; + + def BSTRINS_D : InsBase<"bstrins.d", GPR64Opnd, uimm6, LoongArchBstrins>, + INSERT_BIT64<0>; + def BSTRPICK_D : PickBase<"bstrpick.d", GPR64Opnd, uimm6, LoongArchBstrpick>, + INSERT_BIT64<1>; + +let isCodeGenOnly = 1 in { + def ZEXT64_32 : InstForm<(outs GPR64Opnd:$rd), + (ins GPR32Opnd:$rj, uimm6:$msbd, + uimm6:$lsbd), + "bstrpick.d $rd, $rj, $msbd, $lsbd", [], FrmR, "bstrpick.d">, + INSERT_BIT64<1>; +} + +//32-to-64-bit extension +def : LoongArchPat<(i64 (zext GPR32:$src)), (ZEXT64_32 GPR32:$src, 31, 0)>; +def : LoongArchPat<(i64 (extloadi1 addr:$src)), (LD_B addr:$src)>, + GPR_64; +def : LoongArchPat<(i64 (extloadi8 addr:$src)), (LD_B addr:$src)>, + GPR_64; +def : LoongArchPat<(i64 (extloadi16 addr:$src)), (LD_H addr:$src)>, + GPR_64; +def : LoongArchPat<(i64 (extloadi32 addr:$src)), (LD_W addr:$src)>, + GPR_64; + +class LDX_FT_LA : + InstForm<(outs DRC:$rd), (ins PtrRC:$rj, PtrRC:$rk), + !strconcat(opstr, "\t$rd, $rj, $rk"), + [(set DRC:$rd, (OpNode (add iPTR:$rj, iPTR:$rk)))], + FrmR, opstr> { + let AddedComplexity = 20; + let canFoldAsLoad = 1; + string BaseOpcode = opstr; + let mayLoad = 1; +} + +class STX_FT_LA : + InstForm<(outs), (ins DRC:$rd, PtrRC:$rj, PtrRC:$rk), + !strconcat(opstr, "\t$rd, $rj, $rk"), + [(OpNode DRC:$rd, (add iPTR:$rj, iPTR:$rk))], + FrmI, opstr> { + string BaseOpcode = opstr; + let mayStore = 1; + let AddedComplexity = 20; +} + + +def LDX_B : LDX_FT_LA<"ldx.b", GPR64Opnd, sextloadi8>, + R3MI<0b00000000>; +def LDX_H : LDX_FT_LA<"ldx.h", GPR64Opnd, sextloadi16>, + R3MI<0b00001000>; +def LDX_W : LDX_FT_LA<"ldx.w", GPR64Opnd, sextloadi32>, + R3MI<0b00010000>; +def LDX_D : LDX_FT_LA<"ldx.d", GPR64Opnd, load>, + R3MI<0b00011000>; +def STX_B : STX_FT_LA<"stx.b", GPR64Opnd, truncstorei8>, + R3MI<0b00100000>; +def STX_H : STX_FT_LA<"stx.h", GPR64Opnd, truncstorei16>, + R3MI<0b00101000>; +def STX_W : STX_FT_LA<"stx.w", GPR64Opnd, truncstorei32>, + R3MI<0b00110000>; +def STX_D : STX_FT_LA<"stx.d", GPR64Opnd, store>, + R3MI<0b00111000>; +def LDX_BU : LDX_FT_LA<"ldx.bu", GPR64Opnd, extloadi8>, + R3MI<0b01000000>; +def LDX_HU : LDX_FT_LA<"ldx.hu", GPR64Opnd, extloadi16>, + R3MI<0b01001000>; +def LDX_WU : LDX_FT_LA<"ldx.wu", GPR64Opnd, zextloadi32>, + R3MI<0b01010000>; + +//def : LoongArchPat<(bswap GPR64:$rj), (REVH_D (REVB_4H GPR64:$rj))>; +//def : LoongArchPat<(bswap GPR64:$rj), (ROTRI_D (REVB_2W GPR64:$rj), 32)>; +def : LoongArchPat<(bswap GPR64:$rj), (REVB_D GPR64:$rj)>; + +let isCodeGenOnly = 1 in { + def SLLI_D_64_32 : Shift_Imm64<"", GPR64Opnd>, R2_IMM6<0b00>, GPR_64 { + let imm6 = 0; + let AsmString = "slli.d\t$rd, $rj, 32"; + let InOperandList = (ins GPR32:$rj); + let OutOperandList = (outs GPR64:$rd); + } + + let isMoveReg = 1, imm5 = 0, + AsmString = "slli.w\t$rd, $rj, 0", + OutOperandList = (outs GPR64:$rd) in { + let InOperandList = (ins GPR32:$rj) in + def SLLI_W_64_32 : Shift_Imm32<"", GPR32Opnd>, R2_IMM5<0b00>, GPR_64; + let InOperandList = (ins GPR64:$rj) in + def SLLI_W_64_64 : Shift_Imm32<"", GPR32Opnd>, R2_IMM5<0b00>, GPR_64; + } + + let AsmString = "sltui\t$rd, $rj, $imm12", + OutOperandList = (outs GPR64:$rd) in { + let InOperandList = (ins GPR64:$rj, simm12:$imm12) in + def SLTUI_64 : SetCC_I<"", GPR64Opnd, simm12>, R2_IMM12<0b001>, GPR_64; + } +} + +// 32-to-64-bit extension +//def : LoongArchPat<(i64 (zext GPR32:$src)), (SRLI_D (SLLI_D_64_32 GPR32:$src), 32)>, GPR_64; +def : LoongArchPat<(i64 (sext GPR32:$src)), (SLLI_W_64_32 GPR32:$src)>, GPR_64; +def : LoongArchPat<(i64 (sext_inreg GPR64:$src, i32)), (SLLI_W_64_64 GPR64:$src)>, GPR_64; + +let Uses = [A0, A1], isTerminator = 1, isReturn = 1, isBarrier = 1, isCTI = 1 in { + def LoongArcheh_return32 : LoongArchPseudo<(outs), (ins GPR32:$spoff, GPR32:$dst), + [(LoongArchehret GPR32:$spoff, GPR32:$dst)]>; + def LoongArcheh_return64 : LoongArchPseudo<(outs), (ins GPR64:$spoff,GPR64:$dst), + [(LoongArchehret GPR64:$spoff, GPR64:$dst)]>; +} + +def : LoongArchPat<(select i32:$cond, i64:$t, i64:$f), + (OR (MASKEQZ i64:$t, (SLLI_W_64_32 i32:$cond)), + (MASKNEZ i64:$f, (SLLI_W_64_32 i32:$cond)))>; +// setcc patterns +multiclass SeteqPats { + def : LoongArchPat<(seteq RC:$lhs, 0), + (SLTiuOp RC:$lhs, 1)>; + def : LoongArchPat<(setne RC:$lhs, 0), + (SLTuOp ZEROReg, RC:$lhs)>; + def : LoongArchPat<(seteq RC:$lhs, RC:$rhs), + (SLTiuOp (XOROp RC:$lhs, RC:$rhs), 1)>; + def : LoongArchPat<(setne RC:$lhs, RC:$rhs), + (SLTuOp ZEROReg, (XOROp RC:$lhs, RC:$rhs))>; +} + +multiclass SetlePats { + def : LoongArchPat<(setle RC:$lhs, RC:$rhs), + (XORiOp (SLTOp RC:$rhs, RC:$lhs), 1)>; + def : LoongArchPat<(setule RC:$lhs, RC:$rhs), + (XORiOp (SLTuOp RC:$rhs, RC:$lhs), 1)>; +} + +multiclass SetgtPats { + def : LoongArchPat<(setgt RC:$lhs, RC:$rhs), + (SLTOp RC:$rhs, RC:$lhs)>; + def : LoongArchPat<(setugt RC:$lhs, RC:$rhs), + (SLTuOp RC:$rhs, RC:$lhs)>; +} + +multiclass SetgePats { + def : LoongArchPat<(setge RC:$lhs, RC:$rhs), + (XORiOp (SLTOp RC:$lhs, RC:$rhs), 1)>; + def : LoongArchPat<(setuge RC:$lhs, RC:$rhs), + (XORiOp (SLTuOp RC:$lhs, RC:$rhs), 1)>; +} + +multiclass SetgeImmPats { + def : LoongArchPat<(setge RC:$lhs, immSExt12:$rhs), + (XORiOp (SLTiOp RC:$lhs, immSExt12:$rhs), 1)>; + def : LoongArchPat<(setuge RC:$lhs, immSExt12:$rhs), + (XORiOp (SLTiuOp RC:$lhs, immSExt12:$rhs), 1)>; +} + +class LoadRegImmPat : + LoongArchPat<(ValTy (Node addrRegImm:$a)), (LoadInst addrRegImm:$a)>; + +class StoreRegImmPat : + LoongArchPat<(Node ValTy:$v, addrRegImm:$a), (StoreInst ValTy:$v, addrRegImm:$a)>; + +class LoadRegImm14Lsl2Pat : + LoongArchPat<(ValTy (Node addrimm14lsl2:$a)), (LoadInst addrimm14lsl2:$a)>; + +class StoreRegImm14Lsl2Pat : + LoongArchPat<(Node ValTy:$v, addrimm14lsl2:$a), (StoreInst ValTy:$v, addrimm14lsl2:$a)>; + +// Patterns for loads/stores with a reg+imm operand. +// let AddedComplexity = 40 so that these instructions are selected instead of +// LDX/STX which needs one more register and an ANDI instruction. +let AddedComplexity = 40 in { + def : LoadRegImmPat; + def : LoadRegImmPat; + def : LoadRegImmPat; + def : LoadRegImmPat; + def : LoadRegImmPat; + def : LoadRegImmPat; + def : LoadRegImmPat; + def : StoreRegImmPat; + def : StoreRegImmPat; + def : StoreRegImmPat; + def : StoreRegImmPat; + + def : LoadRegImm14Lsl2Pat; + def : LoadRegImm14Lsl2Pat; + def : StoreRegImm14Lsl2Pat; + def : StoreRegImm14Lsl2Pat; +} + +//===----------------------------------------------------------------------===// +// Base Extension Support +//===----------------------------------------------------------------------===// + +include "LoongArch32InstrInfo.td" +include "LoongArchInstrInfoF.td" +include "LoongArchLSXInstrFormats.td" +include "LoongArchLSXInstrInfo.td" +include "LoongArchLASXInstrFormats.td" +include "LoongArchLASXInstrInfo.td" + +defm : SeteqPats, GPR_64; +defm : SetlePats, GPR_64; +defm : SetgtPats, GPR_64; +defm : SetgePats, GPR_64; +defm : SetgeImmPats, GPR_64; + +/// +/// for relocation +/// +let isCodeGenOnly = 1 in { +def PCALAU12I_ri : SI20<"pcalau12i", GPR64Opnd, simm20>, R1_SI20<0b0001101>; +def ORI_rri : Int_Reg2_Imm12<"ori", GPR64Opnd, uimm12, or>, R2_IMM12<0b110>; +def LU12I_W_ri : SI20<"lu12i.w", GPR64Opnd, simm20>, R1_SI20<0b0001010>; +def LU32I_D_ri : SI20<"lu32i.d", GPR64Opnd, simm20>, R1_SI20<0b0001011>; +def LU52I_D_rri : Int_Reg2_Imm12<"lu52i.d", GPR64Opnd, simm12>, R2_IMM12<0b100>; +def ADDI_D_rri : Int_Reg2_Imm12<"addi.d", GPR64Opnd, simm12, add>, R2_IMM12<0b011>; +def LD_D_rri : Ld<"ld.d", GPR64Opnd, mem_simmptr, load>, LOAD_STORE<0b0011>; +def ADD_D_rrr : Int_Reg3<"add.d", GPR64Opnd, add>, R3I<0b0100001>; +def LDX_D_rrr : LDX_FT_LA<"ldx.d", GPR64Opnd, load>, + R3MI<0b00011000>; +} + +//===----------------------------------------------------------------------===// +// Assembler Pseudo Instructions +//===----------------------------------------------------------------------===// +def LoadImm32 : LoongArchAsmPseudoInst<(outs GPR32Opnd:$rd), + (ins uimm32_coerced:$imm32), + "li.w\t$rd, $imm32">; +def LoadImm64 : LoongArchAsmPseudoInst<(outs GPR64Opnd:$rd), + (ins imm64:$imm64), + "li.d\t$rd, $imm64">; +// load address +def LoadAddrLocal : LoongArchAsmPseudoInst<(outs GPR64Opnd:$rd), + (ins imm64:$imm64), + "la.local\t$rd, $imm64">; +def : InstAlias<"la.pcrel $rd, $imm", + (LoadAddrLocal GPR64Opnd:$rd, imm64:$imm), 1>; +def LoadAddrGlobal : LoongArchAsmPseudoInst<(outs GPR64Opnd:$rd), + (ins imm64:$imm64), + "la.global\t$rd, $imm64">; +def LoadAddrGlobal_Alias : LoongArchAsmPseudoInst<(outs GPR64Opnd:$rd), + (ins imm64:$imm64), + "la\t$rd, $imm64">; +def : InstAlias<"la.got $rd, $imm", + (LoadAddrGlobal GPR64Opnd:$rd, imm64:$imm), 1>; + +def LoadAddrTLS_LE : LoongArchAsmPseudoInst<(outs GPR64Opnd:$rd), + (ins imm64:$imm64), + "la.tls.le\t$rd, $imm64">; +def LoadAddrTLS_IE : LoongArchAsmPseudoInst<(outs GPR64Opnd:$rd), + (ins imm64:$imm64), + "la.tls.ie\t$rd, $imm64">; +def LoadAddrTLS_GD : LoongArchAsmPseudoInst<(outs GPR64Opnd:$rd), + (ins imm64:$imm64), + "la.tls.gd\t$rd, $imm64">; +def LoadAddrTLS_LD : LoongArchAsmPseudoInst<(outs GPR64Opnd:$rd), + (ins imm64:$imm64), + "la.tls.ld\t$rd, $imm64">; + +// load address with a temp reg +def LoadAddrLocalRR : LoongArchAsmPseudoInst<(outs GPR64Opnd:$rd), + (ins GPR64Opnd:$rt, imm64:$imm64), + "la.local\t$rd, $rt, $imm64">; +def LoadAddrGlobalRR : LoongArchAsmPseudoInst<(outs GPR64Opnd:$rd), + (ins GPR64Opnd:$rt, imm64:$imm64), + "la.global\t$rd, $rt, $imm64">; +def LoadAddrTLS_IE_RR : LoongArchAsmPseudoInst<(outs GPR64Opnd:$rd), + (ins GPR64Opnd:$rt, imm64:$imm64), + "la.tls.ie\t$rd, $rt, $imm64">; +def LoadAddrTLS_GD_RR : LoongArchAsmPseudoInst<(outs GPR64Opnd:$rd), + (ins GPR64Opnd:$rt, imm64:$imm64), + "la.tls.gd\t$rd, $rt, $imm64">; +def LoadAddrTLS_LD_RR : LoongArchAsmPseudoInst<(outs GPR64Opnd:$rd), + (ins GPR64Opnd:$rt, imm64:$imm64), + "la.tls.ld\t$rd, $rt, $imm64">; + +// trap when div zero +def PseudoTEQ : LoongArchPseudo<(outs), (ins GPR64Opnd:$rt), []>; + + +def : LoongArchPat<(i64 (sext (i32 (add GPR32:$src, immSExt12:$imm12)))), + (INSERT_SUBREG (i64 (IMPLICIT_DEF)), + (ADDI_W GPR32:$src, immSExt12:$imm12), sub_32)>; + +def : LoongArchPat<(i64 (sext (i32 (add GPR32:$src, GPR32:$src2)))), + (INSERT_SUBREG (i64 (IMPLICIT_DEF)), + (ADD_W GPR32:$src, GPR32:$src2), sub_32)>; + +def : LoongArchPat<(i64 (sext (i32 (sub GPR32:$src, GPR32:$src2)))), + (INSERT_SUBREG (i64 (IMPLICIT_DEF)), + (SUB_W GPR32:$src, GPR32:$src2), sub_32)>; + +def : LoongArchPat<(i64 (sext (i32 (mul GPR32:$src, GPR32:$src2)))), + (INSERT_SUBREG (i64 (IMPLICIT_DEF)), + (MUL_W GPR32:$src, GPR32:$src2), sub_32)>; + +def : LoongArchPat<(i64 (sext (i32 (shl GPR32:$src, immZExt5:$imm5)))), + (INSERT_SUBREG (i64 (IMPLICIT_DEF)), + (SLLI_W GPR32:$src, immZExt5:$imm5), sub_32)>; + +def : LoongArchPat<(i64 (sext (i32 (shl GPR32:$src, GPR32:$src2)))), + (INSERT_SUBREG (i64 (IMPLICIT_DEF)), + (SLL_W GPR32:$src, GPR32:$src2), sub_32)>; + +def : LoongArchPat<(i64 (sext (i32 (srl GPR32:$src, immZExt5:$imm5)))), + (INSERT_SUBREG (i64 (IMPLICIT_DEF)), + (SRLI_W GPR32:$src, immZExt5:$imm5), sub_32)>; + +def : LoongArchPat<(i64 (sext (i32 (srl GPR32:$src, GPR32:$src2)))), + (INSERT_SUBREG (i64 (IMPLICIT_DEF)), + (SRL_W GPR32:$src, GPR32:$src2), sub_32)>; + +def : LoongArchPat<(i64 (sext (i32 (sra GPR32:$src, immZExt5:$imm5)))), + (INSERT_SUBREG (i64 (IMPLICIT_DEF)), + (SRAI_W GPR32:$src, immZExt5:$imm5), sub_32)>; + +def : LoongArchPat<(i64 (sext (i32 (sra GPR32:$src, GPR32:$src2)))), + (INSERT_SUBREG (i64 (IMPLICIT_DEF)), + (SRA_W GPR32:$src, GPR32:$src2), sub_32)>; + + +def : LoongArchPat<(i64 (xor GPR64:$rj, (i64 -1))), + (NOR ZERO_64, GPR64:$rj)>; + +def : LoongArchPat<(and GPR64:$rj, (i64 (xor GPR64:$rk, (i64 -1)))), + (ANDN GPR64:$rj, GPR64:$rk)>; + +def : LoongArchPat<(i64 (or GPR64:$rj, (xor GPR64:$rk, (i64 -1)))), + (ORN GPR64:$rj, GPR64:$rk)>; + +def : LoongArchPat<(i64 (zext (i32 (seteq GPR64:$rj, (i64 0))))), + (SLTUI_64 GPR64:$rj, (i64 1))>; + + +def : LoongArchPat<(i64 (zext (i32 (srl GPR32:$src, immZExt5:$imm5)))), + (BSTRPICK_D (INSERT_SUBREG + (i64 (IMPLICIT_DEF)), GPR32:$src, sub_32), + (i32 31), immZExt5:$imm5)>; diff --git a/lib/Target/LoongArch/LoongArchInstrInfoF.td b/lib/Target/LoongArch/LoongArchInstrInfoF.td new file mode 100644 index 00000000..73711ff7 --- /dev/null +++ b/lib/Target/LoongArch/LoongArchInstrInfoF.td @@ -0,0 +1,629 @@ +//===- LoongArchInstrInfoF.td - Target Description for LoongArch Target -*- tablegen -*-=// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file contains the LoongArch implementation of the TargetInstrInfo class. +// +//===----------------------------------------------------------------------===// +// FP immediate patterns. +def fpimm0 : PatLeaf<(fpimm), [{ + return N->isExactlyValue(+0.0); +}]>; + +def fpimm0neg : PatLeaf<(fpimm), [{ + return N->isExactlyValue(-0.0); +}]>; + +def fpimm1 : PatLeaf<(fpimm), [{ + return N->isExactlyValue(+1.0); +}]>; + +def IsNotSoftFloat : Predicate<"!Subtarget->useSoftFloat()">; + +class HARDFLOAT { list HardFloatPredicate = [IsNotSoftFloat]; } + +def SDT_LoongArchTruncIntFP : SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisFP<1>]>; + +def LoongArchTruncIntFP : SDNode<"LoongArchISD::TruncIntFP", SDT_LoongArchTruncIntFP>; + +def SDT_LoongArchFPBrcond : SDTypeProfile<0, 3, [SDTCisInt<0>, + SDTCisVT<1, i32>, + SDTCisVT<2, OtherVT>]>; + +def LoongArchFPBrcond : SDNode<"LoongArchISD::FPBrcond", SDT_LoongArchFPBrcond, + [SDNPHasChain, SDNPOptInGlue]>; + +def SDT_LoongArchCMovFP : SDTypeProfile<1, 3, [SDTCisSameAs<0, 1>, SDTCisVT<2, i32>, + SDTCisSameAs<1, 3>]>; + +def LoongArchCMovFP_T : SDNode<"LoongArchISD::CMovFP_T", SDT_LoongArchCMovFP, [SDNPInGlue]>; + +def LoongArchCMovFP_F : SDNode<"LoongArchISD::CMovFP_F", SDT_LoongArchCMovFP, [SDNPInGlue]>; + +def SDT_LoongArchFPCmp : SDTypeProfile<0, 3, [SDTCisSameAs<0, 1>, SDTCisFP<1>, + SDTCisVT<2, i32>]>; + +def LoongArchFPCmp : SDNode<"LoongArchISD::FPCmp", SDT_LoongArchFPCmp, [SDNPOutGlue]>; + +def SDT_LoongArchFSEL : SDTypeProfile<1, 3, [SDTCisSameAs<0, 1>, + SDTCisVT<2, i32>, + SDTCisSameAs<1, 3>]>; + +def LoongArchFSEL : SDNode<"LoongArchISD::FSEL", SDT_LoongArchFSEL, + [SDNPInGlue]>; + +//===---------------------------------------------------------------------===/ +//Instruction Class Templates +//===---------------------------------------------------------------------===/ + +class Float_MOVF + : InstForm<(outs RO:$rd), (ins RC:$fj), + !strconcat(opstr, "\t$rd, $fj"), + [(set RO:$rd, (OpNode RC:$fj))], + FrmFR, opstr>, HARDFLOAT { + let isMoveReg = 1; +} + +class Float_MOVT + : InstForm<(outs RO:$fd), (ins RC:$rj), + !strconcat(opstr, "\t$fd, $rj"), + [(set RO:$fd, (OpNode RC:$rj))], + FrmFR, opstr>, HARDFLOAT { + let isMoveReg = 1; +} + +class Float_CVT + : InstForm<(outs RO:$fd), (ins RS:$fj), + !strconcat(opstr, "\t$fd, $fj"), + [(set RO:$fd, (OpNode RS:$fj))], + FrmFR, opstr>, + HARDFLOAT { + let hasSideEffects = 0; +} + +/// float mov +class Gpr_2_Fcsr + : InstForm<(outs FCSROpnd:$fcsr), (ins RO:$rj), + !strconcat(opstr, "\t$fcsr, $rj"), + [(set FCSROpnd:$fcsr, (OpNode RO:$rj))], + FrmR, opstr>; +class Fcsr_2_Gpr + : InstForm<(outs RO:$rd), (ins FCSROpnd:$fcsr), + !strconcat(opstr, "\t$rd, $fcsr"), + [(set RO:$rd, (OpNode FCSROpnd:$fcsr))], + FrmR, opstr>; +class Fgr_2_Fcfr + : InstForm<(outs FCFROpnd:$cd), (ins RO:$fj), + !strconcat(opstr, "\t$cd, $fj"), + [(set FCFROpnd:$cd, (OpNode RO:$fj))], + FrmR, opstr>; +class Fcfr_2_Fgr + : InstForm<(outs RO:$fd), (ins FCFROpnd:$cj), + !strconcat(opstr, "\t$fd, $cj"), + [(set RO:$fd, (OpNode FCFROpnd:$cj))], + FrmR, opstr>; +class Gpr_2_Fcfr + : InstForm<(outs FCFROpnd:$cd), (ins RO:$rj), + !strconcat(opstr, "\t$cd, $rj"), + [(set FCFROpnd:$cd, (OpNode RO:$rj))], + FrmR, opstr>; +class Fcfr_2_Gpr + : InstForm<(outs RO:$rd), (ins FCFROpnd:$cj), + !strconcat(opstr, "\t$rd, $cj"), + [(set RO:$rd, (OpNode FCFROpnd:$cj))], + FrmR, opstr>; + +class FLDX : + InstForm<(outs DRC:$fd), (ins PtrRC:$rj, PtrRC:$rk), + !strconcat(opstr, "\t$fd, $rj, $rk"), + [(set DRC:$fd, (OpNode (add iPTR:$rj, iPTR:$rk)))], + FrmR, opstr> { + let AddedComplexity = 20; +} + +class FSTX : + InstForm<(outs), (ins DRC:$fd, PtrRC:$rj, PtrRC:$rk), + !strconcat(opstr, "\t$fd, $rj, $rk"), + [(OpNode DRC:$fd, (add iPTR:$rj, iPTR:$rk))], + FrmR, opstr> { + let AddedComplexity = 20; +} + +/// f{maxa/mina}.{s/d} +class Float_Reg3_Fmaxa + : InstForm<(outs RO:$fd), (ins RO:$fj, RO:$fk), + !strconcat(opstr, "\t$fd, $fj, $fk"), + [], FrmR, opstr>; +/// frecip +class Float_Reg2_Frecip + : InstForm<(outs RO:$fd), (ins RO:$fj), + !strconcat(opstr, "\t$fd, $fj"), + [(set RO:$fd, (OpNode fpimm1, RO:$fj))], + FrmR, opstr>; +/// frsqrt +class Float_Reg2_Frsqrt + : InstForm<(outs RO:$fd), (ins RO:$fj), + !strconcat(opstr, "\t$fd, $fj"), + [(set RO:$fd, (OpNode fpimm1, (fsqrt RO:$fj)))], + FrmR, opstr>; + +class BceqzBr : + InstForm<(outs), (ins FCFROpnd:$cj, opnd:$offset), + !strconcat(opstr, "\t$cj, $offset"), + [(LoongArchFPBrcond Op, FCFROpnd:$cj, bb:$offset)], + FrmFI, opstr>, HARDFLOAT { + let isBranch = 1; + let isTerminator = 1; + let hasFCCRegOperand = 1; +} + +class FCMP_COND + : InstForm<(outs FCFROpnd:$cd), (ins RO:$fj, RO:$fk), + !strconcat("fcmp.", CondStr, ".", TypeStr, "\t$cd, $fj, $fk"), + [(set FCFROpnd:$cd, (OpNode RO:$fj, RO:$fk))], + FrmOther, + !strconcat("fcmp.", CondStr, ".", TypeStr)> { + bit isCTI = 1; // for what? from Mips32r6InstrInfo.td line 219 +} + +class FIELD_CMP_COND Val> { + bits<5> Value = Val; +} +def FIELD_CMP_COND_CAF : FIELD_CMP_COND<0x0>; +def FIELD_CMP_COND_CUN : FIELD_CMP_COND<0x8>; +def FIELD_CMP_COND_CEQ : FIELD_CMP_COND<0x4>; +def FIELD_CMP_COND_CUEQ : FIELD_CMP_COND<0xC>; +def FIELD_CMP_COND_CLT : FIELD_CMP_COND<0x2>; +def FIELD_CMP_COND_CULT : FIELD_CMP_COND<0xA>; +def FIELD_CMP_COND_CLE : FIELD_CMP_COND<0x6>; +def FIELD_CMP_COND_CULE : FIELD_CMP_COND<0xE>; +def FIELD_CMP_COND_CNE : FIELD_CMP_COND<0x10>; +def FIELD_CMP_COND_COR : FIELD_CMP_COND<0x14>; +def FIELD_CMP_COND_CUNE : FIELD_CMP_COND<0x18>; +def FIELD_CMP_COND_SAF : FIELD_CMP_COND<0x1>; +def FIELD_CMP_COND_SUN : FIELD_CMP_COND<0x9>; +def FIELD_CMP_COND_SEQ : FIELD_CMP_COND<0x5>; +def FIELD_CMP_COND_SUEQ : FIELD_CMP_COND<0xD>; +def FIELD_CMP_COND_SLT : FIELD_CMP_COND<0x3>; +def FIELD_CMP_COND_SULT : FIELD_CMP_COND<0xB>; +def FIELD_CMP_COND_SLE : FIELD_CMP_COND<0x7>; +def FIELD_CMP_COND_SULE : FIELD_CMP_COND<0xF>; +def FIELD_CMP_COND_SNE : FIELD_CMP_COND<0x11>; +def FIELD_CMP_COND_SOR : FIELD_CMP_COND<0x15>; +def FIELD_CMP_COND_SUNE : FIELD_CMP_COND<0x19>; + +multiclass FCMP_COND_M op, string TypeStr, + RegisterOperand RO> { + def FCMP_CAF_#NAME : FCMP_COND<"caf", TypeStr, RO>, + R2_COND; + def FCMP_CUN_#NAME : FCMP_COND<"cun", TypeStr, RO, setuo>, + R2_COND; + def FCMP_CEQ_#NAME : FCMP_COND<"ceq", TypeStr, RO, setoeq>, + R2_COND; + def FCMP_CUEQ_#NAME : FCMP_COND<"cueq", TypeStr, RO, setueq>, + R2_COND; + def FCMP_CLT_#NAME : FCMP_COND<"clt", TypeStr, RO, setolt>, + R2_COND; + def FCMP_CULT_#NAME : FCMP_COND<"cult", TypeStr, RO, setult>, + R2_COND; + def FCMP_CLE_#NAME : FCMP_COND<"cle", TypeStr, RO, setole>, + R2_COND; + def FCMP_CULE_#NAME : FCMP_COND<"cule", TypeStr, RO, setule>, + R2_COND; + def FCMP_CNE_#NAME : FCMP_COND<"cne", TypeStr, RO, setone>, + R2_COND; + def FCMP_COR_#NAME : FCMP_COND<"cor", TypeStr, RO, seto>, + R2_COND; + def FCMP_CUNE_#NAME : FCMP_COND<"cune", TypeStr, RO, setune>, + R2_COND; + + def FCMP_SAF_#NAME : FCMP_COND<"saf", TypeStr, RO>, + R2_COND; + def FCMP_SUN_#NAME : FCMP_COND<"sun", TypeStr, RO>, + R2_COND; + def FCMP_SEQ_#NAME : FCMP_COND<"seq", TypeStr, RO>, + R2_COND; + def FCMP_SUEQ_#NAME : FCMP_COND<"sueq", TypeStr, RO>, + R2_COND; + def FCMP_SLT_#NAME : FCMP_COND<"slt", TypeStr, RO>, + R2_COND; + def FCMP_SULT_#NAME : FCMP_COND<"sult", TypeStr, RO>, + R2_COND; + def FCMP_SLE_#NAME : FCMP_COND<"sle", TypeStr, RO>, + R2_COND; + def FCMP_SULE_#NAME : FCMP_COND<"sule", TypeStr, RO>, + R2_COND; + def FCMP_SNE_#NAME : FCMP_COND<"sne", TypeStr, RO>, + R2_COND; + def FCMP_SOR_#NAME : FCMP_COND<"sor", TypeStr, RO>, + R2_COND; + def FCMP_SUNE_#NAME : FCMP_COND<"sune", TypeStr, RO>, + R2_COND; +} + +//// comparisons supported via another comparison +//multiclass FCmp_Pats { +// def : LoongArchPat<(seteq VT:$lhs, VT:$rhs), +// (!cast("FCMP_CEQ_"#NAME) VT:$lhs, VT:$rhs)>; +// def : LoongArchPat<(setgt VT:$lhs, VT:$rhs), +// (!cast("FCMP_CLE_"#NAME) VT:$rhs, VT:$lhs)>; +// def : LoongArchPat<(setge VT:$lhs, VT:$rhs), +// (!cast("FCMP_CLT_"#NAME) VT:$rhs, VT:$lhs)>; +// def : LoongArchPat<(setlt VT:$lhs, VT:$rhs), +// (!cast("FCMP_CLT_"#NAME) VT:$lhs, VT:$rhs)>; +// def : LoongArchPat<(setle VT:$lhs, VT:$rhs), +// (!cast("FCMP_CLE_"#NAME) VT:$lhs, VT:$rhs)>; +// def : LoongArchPat<(setne VT:$lhs, VT:$rhs), +// (NOROp +// (!cast("FCMP_CEQ_"#NAME) VT:$lhs, VT:$rhs), +// ZEROReg)>; +//} + + +/// +/// R2 +/// +def FABS_S : Float_Reg2<"fabs.s", FGR32Opnd, fabs>, R2F<0b0100000001>; +def FABS_D : Float_Reg2<"fabs.d", FGR64Opnd, fabs>, R2F<0b0100000010>; +def FNEG_S : Float_Reg2<"fneg.s", FGR32Opnd, fneg>, R2F<0b0100000101>; +def FNEG_D : Float_Reg2<"fneg.d", FGR64Opnd, fneg>, R2F<0b0100000110>; +def FLOGB_S : Float_Reg2<"flogb.s", FGR32Opnd>, R2F<0b0100001001>; +def FLOGB_D : Float_Reg2<"flogb.d", FGR64Opnd>, R2F<0b0100001010>; +def FCLASS_S : Float_Reg2<"fclass.s", FGR32Opnd>, R2F<0b0100001101>; +def FCLASS_D : Float_Reg2<"fclass.d", FGR64Opnd>, R2F<0b0100001110>; +def FSQRT_S : Float_Reg2<"fsqrt.s", FGR32Opnd, fsqrt>, R2F<0b0100010001>; +def FSQRT_D : Float_Reg2<"fsqrt.d", FGR64Opnd, fsqrt>, R2F<0b0100010010>; +def FRECIP_S : Float_Reg2_Frecip<"frecip.s", FGR32Opnd, fdiv>, R2F<0b0100010101>; +def FRECIP_D : Float_Reg2_Frecip<"frecip.d", FGR64Opnd, fdiv>, R2F<0b0100010110>; +def FRSQRT_S : Float_Reg2_Frsqrt<"frsqrt.s", FGR32Opnd, fdiv>, R2F<0b0100011001>; +def FRSQRT_D : Float_Reg2_Frsqrt<"frsqrt.d", FGR64Opnd, fdiv>, R2F<0b0100011010>; +def FMOV_S : Float_Reg2<"fmov.s", FGR32Opnd>, R2F<0b0100100101>; +def FMOV_D : Float_Reg2<"fmov.d", FGR64Opnd>, R2F<0b0100100110>; + +def MOVGR2FR_W : Float_MOVT<"movgr2fr.w", FGR32Opnd, GPR32Opnd, bitconvert>, MOVFI<0b0100101001>; +def MOVGR2FR_D : Float_MOVT<"movgr2fr.d", FGR64Opnd, GPR64Opnd, bitconvert>, MOVFI<0b0100101010>; +def MOVGR2FRH_W : Float_MOVT<"movgr2frh.w", FGR64Opnd, GPR32Opnd>, MOVFI<0b0100101011>; //not realize +def MOVFR2GR_S : Float_MOVF<"movfr2gr.s", GPR32Opnd, FGR32Opnd, bitconvert>, MOVIF<0b0100101101>; +def MOVFR2GR_D : Float_MOVF<"movfr2gr.d", GPR64Opnd, FGR64Opnd, bitconvert>, MOVIF<0b0100101110>; +def MOVFRH2GR_S : Float_MOVF<"movfrh2gr.s", GPR32Opnd, FGR32Opnd>, MOVIF<0b0100101111>; //not realize + +let isCodeGenOnly = 1 in { + def MOVFR2GR_DS : Float_MOVF<"movfr2gr.s", GPR64Opnd, FGR32Opnd>, MOVIF<0b0100101101>; +} + +def FCVT_S_D : Float_CVT<"fcvt.s.d", FGR32Opnd, FGR64Opnd>, R2F<0b1001000110>; +def FCVT_D_S : Float_CVT<"fcvt.d.s", FGR64Opnd, FGR32Opnd>, R2F<0b1001001001>; + +def FTINTRM_W_S : Float_Reg2<"ftintrm.w.s", FGR32Opnd>, R2F<0b1010000001>; +def FTINTRM_W_D : Float_Reg2<"ftintrm.w.d", FGR64Opnd>, R2F<0b1010000010>; +def FTINTRM_L_S : Float_Reg2<"ftintrm.l.s", FGR32Opnd>, R2F<0b1010001001>; +def FTINTRM_L_D : Float_Reg2<"ftintrm.l.d", FGR64Opnd>, R2F<0b1010001010>; +def FTINTRP_W_S : Float_Reg2<"ftintrp.w.s", FGR32Opnd>, R2F<0b1010010001>; +def FTINTRP_W_D : Float_Reg2<"ftintrp.w.d", FGR64Opnd>, R2F<0b1010010010>; +def FTINTRP_L_S : Float_Reg2<"ftintrp.l.s", FGR32Opnd>, R2F<0b1010011001>; +def FTINTRP_L_D : Float_Reg2<"ftintrp.l.d", FGR64Opnd>, R2F<0b1010011010>; +def FTINTRZ_W_S : Float_Reg2<"ftintrz.w.s", FGR32Opnd>, R2F<0b1010100001>; +def FTINTRZ_L_D : Float_Reg2<"ftintrz.l.d", FGR64Opnd>, R2F<0b1010101010>; +def FTINTRNE_W_S : Float_Reg2<"ftintrne.w.s", FGR32Opnd>, R2F<0b1010110001>; +def FTINTRNE_W_D : Float_Reg2<"ftintrne.w.d", FGR64Opnd>, R2F<0b1010110010>; +def FTINTRNE_L_S : Float_Reg2<"ftintrne.l.s", FGR32Opnd>, R2F<0b1010111001>; +def FTINTRNE_L_D : Float_Reg2<"ftintrne.l.d", FGR64Opnd>, R2F<0b1010111010>; + +def FTINT_W_S : Float_CVT<"ftint.w.s", FGR32Opnd, FGR32Opnd>, R2F<0b1011000001>; +def FTINT_W_D : Float_CVT<"ftint.w.d", FGR32Opnd, FGR64Opnd>, R2F<0b1011000010>; +def FTINT_L_S : Float_CVT<"ftint.l.s", FGR64Opnd, FGR32Opnd>, R2F<0b1011001001>; +def FTINT_L_D : Float_CVT<"ftint.l.d", FGR64Opnd, FGR64Opnd>, R2F<0b1011001010>; +def FFINT_S_W : Float_CVT<"ffint.s.w", FGR32Opnd, FGR32Opnd>, R2F<0b1101000100>; +def FFINT_S_L : Float_CVT<"ffint.s.l", FGR32Opnd, FGR64Opnd>, R2F<0b1101000110>; +def FFINT_D_W : Float_CVT<"ffint.d.w", FGR64Opnd, FGR32Opnd>, R2F<0b1101001000>; +def FFINT_D_L : Float_CVT<"ffint.d.l", FGR64Opnd, FGR64Opnd>, R2F<0b1101001010>; + +def FRINT_S : Float_Reg2<"frint.s", FGR32Opnd, frint>, R2F<0b1110010001>; +def FRINT_D : Float_Reg2<"frint.d", FGR64Opnd, frint>, R2F<0b1110010010>; + +/// +/// R3 +/// +def FADD_S : Float_Reg3<"fadd.s", FGR32Opnd, fadd>, R3F<0b000001>; +def FADD_D : Float_Reg3<"fadd.d", FGR64Opnd, fadd>, R3F<0b000010>; +def FSUB_S : Float_Reg3<"fsub.s", FGR32Opnd, fsub>, R3F<0b000101>; +def FSUB_D : Float_Reg3<"fsub.d", FGR64Opnd, fsub>, R3F<0b000110>; +def FMUL_S : Float_Reg3<"fmul.s", FGR32Opnd, fmul>, R3F<0b001001>; +def FMUL_D : Float_Reg3<"fmul.d", FGR64Opnd, fmul>, R3F<0b001010>; +def FDIV_S : Float_Reg3<"fdiv.s", FGR32Opnd, fdiv>, R3F<0b001101>; +def FDIV_D : Float_Reg3<"fdiv.d", FGR64Opnd, fdiv>, R3F<0b001110>; +def FMAX_S : Float_Reg3<"fmax.s", FGR32Opnd, fmaxnum_ieee>, R3F<0b010001>; +def FMAX_D : Float_Reg3<"fmax.d", FGR64Opnd, fmaxnum_ieee>, R3F<0b010010>; +def FMIN_S : Float_Reg3<"fmin.s", FGR32Opnd, fminnum_ieee>, R3F<0b010101>; +def FMIN_D : Float_Reg3<"fmin.d", FGR64Opnd, fminnum_ieee>, R3F<0b010110>; +def FMAXA_S : Float_Reg3_Fmaxa<"fmaxa.s", FGR32Opnd>, R3F<0b011001>; +def FMAXA_D : Float_Reg3_Fmaxa<"fmaxa.d", FGR64Opnd>, R3F<0b011010>; +def FMINA_S : Float_Reg3_Fmaxa<"fmina.s", FGR32Opnd>, R3F<0b011101>; +def FMINA_D : Float_Reg3_Fmaxa<"fmina.d", FGR64Opnd>, R3F<0b011110>; +def FSCALEB_S : Float_Reg3<"fscaleb.s", FGR32Opnd>, R3F<0b100001>; +def FSCALEB_D : Float_Reg3<"fscaleb.d", FGR64Opnd>, R3F<0b100010>; +def FCOPYSIGN_S : Float_Reg3<"fcopysign.s", FGR32Opnd, fcopysign>, R3F<0b100101>; +def FCOPYSIGN_D : Float_Reg3<"fcopysign.d", FGR64Opnd, fcopysign>, R3F<0b100110>; +/// +/// R4_IMM21 +/// +def FMADD_S : Mul_Reg4<"fmadd.s", FGR32Opnd>, R4MUL<0b0001>; +def FMADD_D : Mul_Reg4<"fmadd.d", FGR64Opnd>, R4MUL<0b0010>; +def FMSUB_S : Mul_Reg4<"fmsub.s", FGR32Opnd>, R4MUL<0b0101>; +def FMSUB_D : Mul_Reg4<"fmsub.d", FGR64Opnd>, R4MUL<0b0110>; +def FNMADD_S : NMul_Reg4<"fnmadd.s", FGR32Opnd>, R4MUL<0b1001>; +def FNMADD_D : NMul_Reg4<"fnmadd.d", FGR64Opnd>, R4MUL<0b1010>; +def FNMSUB_S : NMul_Reg4<"fnmsub.s", FGR32Opnd>, R4MUL<0b1101>; +def FNMSUB_D : NMul_Reg4<"fnmsub.d", FGR64Opnd>, R4MUL<0b1110>; + + +// fmadd: fj * fk + fa +def : LoongArchPat<(fma FGR64Opnd:$fj, FGR64Opnd:$fk, FGR64Opnd:$fa), + (FMADD_D $fj, $fk, $fa)>; + +def : LoongArchPat<(fma FGR32Opnd:$fj, FGR32Opnd:$fk, FGR32Opnd:$fa), + (FMADD_S $fj, $fk, $fa)>; + + +// fmsub: fj * fk - fa +def : LoongArchPat<(fma FGR64Opnd:$fj, FGR64Opnd:$fk, (fneg FGR64Opnd:$fa)), + (FMSUB_D FGR64Opnd:$fj, FGR64Opnd:$fk, FGR64Opnd:$fa)>; + +def : LoongArchPat<(fma FGR32Opnd:$fj, FGR32Opnd:$fk, (fneg FGR32Opnd:$fa)), + (FMSUB_S FGR32Opnd:$fj, FGR32Opnd:$fk, FGR32Opnd:$fa)>; + + +// fnmadd: -(fj * fk + fa) +def : LoongArchPat<(fma (fneg FGR64Opnd:$fj), FGR64Opnd:$fk, (fneg FGR64Opnd:$fa)), + (FNMADD_D FGR64Opnd:$fj, FGR64Opnd:$fk, FGR64Opnd:$fa)>; + +def : LoongArchPat<(fma (fneg FGR32Opnd:$fj), FGR32Opnd:$fk, (fneg FGR32Opnd:$fa)), + (FNMADD_S FGR32Opnd:$fj, FGR32Opnd:$fk, FGR32Opnd:$fa)>; + +// fnmsub: -(fj * fk - fa) +def : LoongArchPat<(fma (fneg FGR64Opnd:$fj), FGR64Opnd:$fk, FGR64Opnd:$fa), + (FNMSUB_D FGR64Opnd:$fj, FGR64Opnd:$fk, FGR64Opnd:$fa)>; + +def : LoongArchPat<(fma (fneg FGR32Opnd:$fj), FGR32Opnd:$fk, FGR32Opnd:$fa), + (FNMSUB_S FGR32Opnd:$fj, FGR32Opnd:$fk, FGR32Opnd:$fa)>; + +let Pattern = [] in { +defm S : FCMP_COND_M<0b01, "s", FGR32Opnd>; +defm D : FCMP_COND_M<0b10, "d", FGR64Opnd>; +} +// +//defm S : FCmp_Pats; +//defm D : FCmp_Pats; + +/// +/// Float point branching +/// +def LoongArch_BRANCH_F : PatLeaf<(i32 0)>; +def LoongArch_BRANCH_T : PatLeaf<(i32 1)>; + +def BCEQZ : BceqzBr<"bceqz", brtarget, LoongArch_BRANCH_F>, R1_BCEQZ<0>; +def BCNEZ : BceqzBr<"bcnez", brtarget, LoongArch_BRANCH_T>, R1_BCEQZ<1>; + +/// +/// FMOV +/// +def MOVGR2FCSR : Gpr_2_Fcsr<"movgr2fcsr", GPR64Opnd>, MOVGPR2FCSR; +def MOVFCSR2GR : Fcsr_2_Gpr<"movfcsr2gr", GPR64Opnd>, MOVFCSR2GPR; +def MOVFR2CF : Fgr_2_Fcfr<"movfr2cf", FGR64Opnd>, MOVFGR2FCFR; +def MOVCF2FR : Fcfr_2_Fgr<"movcf2fr", FGR64Opnd>, MOVFCFR2FGR; +def MOVGR2CF : Gpr_2_Fcfr<"movgr2cf", GPR64Opnd>, MOVGPR2FCFR; +def MOVCF2GR : Fcfr_2_Gpr<"movcf2gr", GPR64Opnd>, MOVFCFR2GPR; + +let isCodeGenOnly = 1 in { + def MOVFR2CF32 : Fgr_2_Fcfr<"movfr2cf", FGR32Opnd>, MOVFGR2FCFR; + def MOVCF2FR32 : Fcfr_2_Fgr<"movcf2fr", FGR32Opnd>, MOVFCFR2FGR; + def MOVGR2CF32 : Gpr_2_Fcfr<"movgr2cf", GPR32Opnd>, MOVGPR2FCFR; + def MOVCF2GR32 : Fcfr_2_Gpr<"movcf2gr", GPR32Opnd>, MOVFCFR2GPR; +} + +class Sel_Reg4 + : InstForm<(outs RO:$fd), (ins FCFROpnd:$ca, RO:$fj, RO:$fk), + !strconcat(opstr, "\t$fd, $fj, $fk, $ca"), + [(set RO:$fd, (LoongArchFSEL RO:$fj, FCFROpnd:$ca, RO:$fk))], + FrmR, opstr>{ + let Defs = [FCC0, FCC1, FCC2, FCC3, FCC4, FCC5, FCC6]; + let hasFCCRegOperand = 1; + } + +def FSEL_T_S : Sel_Reg4<"fsel", FGR32Opnd>, R4SEL; +let isCodeGenOnly = 1 in { + def FSEL_T_D : Sel_Reg4<"fsel", FGR64Opnd>, R4SEL; +} + +/// +/// Mem access +/// +def FLD_S : FLd<"fld.s", FGR32Opnd, mem, load>, LOAD_STORE<0b1100>; +def FST_S : FSt<"fst.s", FGR32Opnd, mem, store>, LOAD_STORE<0b1101>; +def FLD_D : FLd<"fld.d", FGR64Opnd, mem, load>, LOAD_STORE<0b1110>; +def FST_D : FSt<"fst.d", FGR64Opnd, mem, store>, LOAD_STORE<0b1111>; + +def FLDX_S : FLDX<"fldx.s", FGR32Opnd, load>, R3MF<0b01100000>; +def FLDX_D : FLDX<"fldx.d", FGR64Opnd, load>, R3MF<0b01101000>; +def FSTX_S : FSTX<"fstx.s", FGR32Opnd, store>, R3MF<0b01110000>; +def FSTX_D : FSTX<"fstx.d", FGR64Opnd, store>, R3MF<0b01111000>; + +def FLDGT_S : Float_Int_Reg3<"fldgt.s", FGR32Opnd, GPR64Opnd>, R3MF<0b11101000>; +def FLDGT_D : Float_Int_Reg3<"fldgt.d", FGR64Opnd, GPR64Opnd>, R3MF<0b11101001>; +def FLDLE_S : Float_Int_Reg3<"fldle.s", FGR32Opnd, GPR64Opnd>, R3MF<0b11101010>; +def FLDLE_D : Float_Int_Reg3<"fldle.d", FGR64Opnd, GPR64Opnd>, R3MF<0b11101011>; +def FSTGT_S : Float_STGT_LE<"fstgt.s", FGR32Opnd, GPR64Opnd>, R3MF<0b11101100>; +def FSTGT_D : Float_STGT_LE<"fstgt.d", FGR64Opnd, GPR64Opnd>, R3MF<0b11101101>; +def FSTLE_S : Float_STGT_LE<"fstle.s", FGR32Opnd, GPR64Opnd>, R3MF<0b11101110>; +def FSTLE_D : Float_STGT_LE<"fstle.d", FGR64Opnd, GPR64Opnd>, R3MF<0b11101111>; + +let isPseudo = 1, isCodeGenOnly = 1 in { + def PseudoFFINT_S_W : Float_CVT<"", FGR32Opnd, GPR32Opnd>; + def PseudoFFINT_D_W : Float_CVT<"", FGR64Opnd, GPR32Opnd>; + def PseudoFFINT_S_L : Float_CVT<"", FGR64Opnd, GPR64Opnd>; + def PseudoFFINT_D_L : Float_CVT<"", FGR64Opnd, GPR64Opnd>; +} + +def : LoongArchPat<(f32 (fpround FGR64Opnd:$src)), + (FCVT_S_D FGR64Opnd:$src)>; +def : LoongArchPat<(f64 (fpextend FGR32Opnd:$src)), + (FCVT_D_S FGR32Opnd:$src)>; + +def : LoongArchPat<(f32 (sint_to_fp GPR32Opnd:$src)), + (PseudoFFINT_S_W GPR32Opnd:$src)>; +def : LoongArchPat<(f64 (sint_to_fp GPR32Opnd:$src)), + (PseudoFFINT_D_W GPR32Opnd:$src)>; +def : LoongArchPat<(f32 (sint_to_fp GPR64Opnd:$src)), + (EXTRACT_SUBREG (PseudoFFINT_S_L GPR64Opnd:$src), sub_lo)>; +def : LoongArchPat<(f64 (sint_to_fp GPR64Opnd:$src)), + (PseudoFFINT_D_L GPR64Opnd:$src)>; + +def : LoongArchPat<(f32 fpimm0), (MOVGR2FR_W ZERO)>; +def : LoongArchPat<(f32 fpimm0neg), (FNEG_S (MOVGR2FR_W ZERO))>; +def : LoongArchPat<(f32 fpimm1), (FFINT_S_W (MOVGR2FR_W (ADDI_W ZERO, 1)))>; +def : LoongArchPat<(f64 fpimm1), (FFINT_D_L (MOVGR2FR_D (ADDI_D ZERO_64, 1)))>; + +// Patterns for loads/stores with a reg+imm operand. +let AddedComplexity = 40 in { + def : LoadRegImmPat; + def : StoreRegImmPat; + def : LoadRegImmPat; + def : StoreRegImmPat; +} + +def : LoongArchPat<(LoongArchTruncIntFP FGR32Opnd:$src), + (FTINTRZ_W_S FGR32Opnd:$src)>; + +def : LoongArchPat<(LoongArchTruncIntFP FGR64Opnd:$src), + (FTINTRZ_L_D FGR64Opnd:$src)>; + +def : LoongArchPat<(LoongArchTruncIntFP FGR32Opnd:$src), + (FCVT_D_S (FTINTRZ_W_S FGR32Opnd:$src))>; + +def : LoongArchPat<(f32 (fcopysign FGR32Opnd:$lhs, FGR64Opnd:$rhs)), + (FCOPYSIGN_S FGR32Opnd:$lhs, (FCVT_S_D FGR64Opnd:$rhs))>; +def : LoongArchPat<(f64 (fcopysign FGR64Opnd:$lhs, FGR32Opnd:$rhs)), + (FCOPYSIGN_D FGR64Opnd:$lhs, (FCVT_D_S FGR32Opnd:$rhs))>; + +let PrintMethod = "printFCCOperand",EncoderMethod = "getFCMPEncoding" in + def condcode : Operand; + +class CEQS_FT : + InstForm<(outs), (ins RC:$fj, RC:$fk, condcode:$cond), + !strconcat("fcmp.$cond.", typestr, "\t$$fcc0, $fj, $fk"), + [(OpNode RC:$fj, RC:$fk, imm:$cond)], FrmFR, + !strconcat("fcmp.$cond.", typestr)>, HARDFLOAT { + let Defs = [FCC0, FCC1, FCC2, FCC3, FCC4, FCC5, FCC6, FCC7]; + let isCodeGenOnly = 1; + let hasFCCRegOperand = 1; +} + +def FCMP_S32 : CEQS_FT<"s", FGR32, LoongArchFPCmp>, CEQS_FM<0b01> { + bits<3> cd = 0; +} +def FCMP_D64 : CEQS_FT<"d", FGR64, LoongArchFPCmp>, CEQS_FM<0b10>{ + bits<3> cd = 0; +} + + +//multiclass FCmp_Pats2 { +// def : LoongArchPat<(seteq VT:$lhs, VT:$rhs), +// (!cast("SFCMP_CEQ_"#NAME) VT:$lhs, VT:$rhs)>; +// def : LoongArchPat<(setgt VT:$lhs, VT:$rhs), +// (!cast("SFCMP_CLE_"#NAME) VT:$rhs, VT:$lhs)>; +// def : LoongArchPat<(setge VT:$lhs, VT:$rhs), +// (!cast("SFCMP_CLT_"#NAME) VT:$rhs, VT:$lhs)>; +// def : LoongArchPat<(setlt VT:$lhs, VT:$rhs), +// (!cast("SFCMP_CLT_"#NAME) VT:$lhs, VT:$rhs)>; +// def : LoongArchPat<(setle VT:$lhs, VT:$rhs), +// (!cast("SFCMP_CLE_"#NAME) VT:$lhs, VT:$rhs)>; +// def : LoongArchPat<(setne VT:$lhs, VT:$rhs), +// (NOROp +// (!cast("SFCMP_CEQ_"#NAME) VT:$lhs, VT:$rhs), +// ZEROReg)>; +// +// def : LoongArchPat<(seteq VT:$lhs, VT:$rhs), +// (!cast("DFCMP_CEQ_"#NAME) VT:$lhs, VT:$rhs)>; +// def : LoongArchPat<(setgt VT:$lhs, VT:$rhs), +// (!cast("DFCMP_CLE_"#NAME) VT:$rhs, VT:$lhs)>; +// def : LoongArchPat<(setge VT:$lhs, VT:$rhs), +// (!cast("DFCMP_CLT_"#NAME) VT:$rhs, VT:$lhs)>; +// def : LoongArchPat<(setlt VT:$lhs, VT:$rhs), +// (!cast("DFCMP_CLT_"#NAME) VT:$lhs, VT:$rhs)>; +// def : LoongArchPat<(setle VT:$lhs, VT:$rhs), +// (!cast("DFCMP_CLE_"#NAME) VT:$lhs, VT:$rhs)>; +// def : LoongArchPat<(setne VT:$lhs, VT:$rhs), +// (NOROp +// (!cast("DFCMP_CEQ_"#NAME) VT:$lhs, VT:$rhs), +// ZEROReg)>; +// } +// +//defm S : FCmp_Pats2; +//defm D : FCmp_Pats2; + +let usesCustomInserter = 1 in { + class Select_Pseudo : + LoongArchPseudo<(outs RC:$dst), (ins GPR32Opnd:$cond, RC:$T, RC:$F), + [(set RC:$dst, (select GPR32Opnd:$cond, RC:$T, RC:$F))]>; + + class SelectFP_Pseudo_T : + LoongArchPseudo<(outs RC:$dst), (ins FCFROpnd:$cond, RC:$T, RC:$F), + [(set RC:$dst, (LoongArchCMovFP_T RC:$T, FCFROpnd:$cond, RC:$F))]>; + + class SelectFP_Pseudo_F : + LoongArchPseudo<(outs RC:$dst), (ins FCFROpnd:$cond, RC:$T, RC:$F), + [(set RC:$dst, (LoongArchCMovFP_F RC:$T, FCFROpnd:$cond, RC:$F))]>; +} + +def PseudoSELECT_I : Select_Pseudo; +def PseudoSELECT_I64 : Select_Pseudo; +def PseudoSELECT_S : Select_Pseudo; +def PseudoSELECT_D64 : Select_Pseudo; + +def PseudoSELECTFP_T_I : SelectFP_Pseudo_T; +def PseudoSELECTFP_T_I64 : SelectFP_Pseudo_T; + +def PseudoSELECTFP_F_I : SelectFP_Pseudo_F; +def PseudoSELECTFP_F_I64 : SelectFP_Pseudo_F; + +class ABSS_FT : + InstForm<(outs DstRC:$fd), (ins SrcRC:$fj), !strconcat(opstr, "\t$fd, $fj"), + [(set DstRC:$fd, (OpNode SrcRC:$fj))], FrmFR, opstr>; + +def TRUNC_W_D : ABSS_FT<"ftintrz.w.d", FGR32Opnd, FGR64Opnd>, R2F<0b1010100010>; + +def FTINTRZ_L_S : ABSS_FT<"ftintrz.l.s", FGR64Opnd, FGR32Opnd>, R2F<0b1010101001>; + +def : LoongArchPat<(LoongArchTruncIntFP FGR64Opnd:$src), + (TRUNC_W_D FGR64Opnd:$src)>; + +def : LoongArchPat<(LoongArchTruncIntFP FGR32Opnd:$src), + (FTINTRZ_L_S FGR32Opnd:$src)>; + +def : Pat<(fcanonicalize FGR32Opnd:$src), (FMAX_S $src, $src)>; +def : Pat<(fcanonicalize FGR64Opnd:$src), (FMAX_D $src, $src)>; + +def : LoongArchPat<(i64 (sext (i32 (bitconvert FGR32Opnd:$src)))), + (MOVFR2GR_DS FGR32Opnd:$src)>; diff --git a/lib/Target/LoongArch/LoongArchLASXInstrFormats.td b/lib/Target/LoongArch/LoongArchLASXInstrFormats.td new file mode 100644 index 00000000..8e255f85 --- /dev/null +++ b/lib/Target/LoongArch/LoongArchLASXInstrFormats.td @@ -0,0 +1,448 @@ +//===- LoongArchLASXInstrFormats.td - LoongArch LASX Instruction Formats ---*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +class LASXInst : InstLA<(outs), (ins), "", [], FrmOther>, + EXT_LASX { +} + +class LASXCBranch : LASXInst { +} + +class LASXSpecial : LASXInst { +} + +class LASXPseudo pattern>: + LoongArchPseudo { + let Predicates = [HasLASX]; +} + +class LASX_3R op>: LASXInst { + bits<5> xk; + bits<5> xj; + bits<5> xd; + + let Inst{31-15} = op; + let Inst{14-10} = xk; + let Inst{9-5} = xj; + let Inst{4-0} = xd; +} + +class LASX_4R op>: LASXInst { + bits<5> xa; + bits<5> xk; + bits<5> xj; + bits<5> xd; + + let Inst{31-20} = op; + let Inst{19-15} = xa; + let Inst{14-10} = xk; + let Inst{9-5} = xj; + let Inst{4-0} = xd; +} + +class LASX_XVFCMP op>: LASXInst { + bits<5> xd; + bits<5> xj; + bits<5> xk; + bits<5> cond; + + let Inst{31-20} = op; + let Inst{19-15} = cond; + let Inst{14-10} = xk; + let Inst{9-5} = xj; + let Inst{4-0} = xd; +} + +class LASX_I12_S op>: LASXInst { + bits<5> xd; + bits<17> addr; + + let Inst{31-22} = op; + let Inst{21-10} = addr{11-0}; + let Inst{9-5} = addr{16-12}; + let Inst{4-0} = xd; +} + +class LASX_SI12_S op>: LASXInst { + bits<5> xd; + bits<17> addr; + + let Inst{31-22} = op; + let Inst{21-10} = addr{11-0}; + let Inst{9-5} = addr{16-12}; + let Inst{4-0} = xd; +} + +class LASX_SI11_S op>: LASXInst { + bits<5> xd; + bits<16> addr; + + let Inst{31-21} = op; + let Inst{20-10} = addr{10-0}; + let Inst{9-5} = addr{15-11}; + let Inst{4-0} = xd; +} + +class LASX_SI10_S op>: LASXInst { + bits<5> xd; + bits<15> addr; + + let Inst{31-20} = op; + let Inst{19-10} = addr{9-0}; + let Inst{9-5} = addr{14-10}; + let Inst{4-0} = xd; +} + +class LASX_SI9_S op>: LASXInst { + bits<5> xd; + bits<14> addr; + + let Inst{31-19} = op; + let Inst{18-10} = addr{8-0}; + let Inst{9-5} = addr{13-9}; + let Inst{4-0} = xd; +} + +class LASX_SI8_idx5 op>: LASXInst { + bits<5> xd; + bits<5> rj; + bits<8> si8; + bits<5> idx; + + let Inst{31-23} = op; + let Inst{22-18} = idx; + let Inst{17-10} = si8; + let Inst{9-5} = rj; + let Inst{4-0} = xd; +} + +class LASX_SI8_idx2 op>: LASXInst { + bits<5> xd; + bits<5> rj; + bits<8> si8; + bits<2> idx; + + let Inst{31-20} = op; + let Inst{19-18} = idx; + let Inst{17-10} = si8; + let Inst{9-5} = rj; + let Inst{4-0} = xd; +} + +class LASX_SI8_idx3 op>: LASXInst { + bits<5> xd; + bits<5> rj; + bits<8> si8; + bits<3> idx; + + let Inst{31-21} = op; + let Inst{20-18} = idx; + let Inst{17-10} = si8; + let Inst{9-5} = rj; + let Inst{4-0} = xd; +} + +class LASX_SI8_idx4 op>: LASXInst { + bits<5> xd; + bits<5> rj; + bits<8> si8; + bits<4> idx; + + let Inst{31-22} = op; + let Inst{21-18} = idx; + let Inst{17-10} = si8; + let Inst{9-5} = rj; + let Inst{4-0} = xd; +} + +class LASX_3R_2GP op>: LASXInst { + bits<5> rk; + bits<5> rj; + bits<5> xd; + + let Inst{31-15} = op; + let Inst{14-10} = rk; + let Inst{9-5} = rj; + let Inst{4-0} = xd; +} + +class LASX_3R_1GP op>: LASXInst { + bits<5> rk; + bits<5> xj; + bits<5> xd; + + let Inst{31-15} = op; + let Inst{14-10} = rk; + let Inst{9-5} = xj; + let Inst{4-0} = xd; +} + +class LASX_I5 op>: LASXInst { + bits<5> xd; + bits<5> xj; + bits<5> si5; + + let Inst{31-15} = op; + let Inst{14-10} = si5; + let Inst{9-5} = xj; + let Inst{4-0} = xd; +} + +class LASX_I5_U op>: LASXInst { + bits<5> xd; + bits<5> xj; + bits<5> ui5; + + let Inst{31-15} = op; + let Inst{14-10} = ui5; + let Inst{9-5} = xj; + let Inst{4-0} = xd; +} + +class LASX_I5_mode_U op>: LASXInst { + bits<5> xd; + bits<5> mode; + bits<5> ui5; + + let Inst{31-15} = op; + let Inst{14-10} = ui5; + let Inst{9-5} = mode; + let Inst{4-0} = xd; +} + +class LASX_2R op>: LASXInst { + bits<5> xj; + bits<5> xd; + + let Inst{31-10} = op; + let Inst{9-5} = xj; + let Inst{4-0} = xd; +} + +class LASX_SET op>: LASXInst { + bits<5> xj; + bits<3> cd; + + let Inst{31-10} = op; + let Inst{9-5} = xj; + let Inst{4-3} = 0b00; + let Inst{2-0} = cd; +} + +class LASX_2R_1GP op>: LASXInst { + bits<5> rj; + bits<5> xd; + + let Inst{31-10} = op; + let Inst{9-5} = rj; + let Inst{4-0} = xd; +} + +class LASX_I3_U op>: LASXInst { + bits<5> xd; + bits<5> xj; + bits<3> ui3; + + let Inst{31-13} = op; + let Inst{12-10} = ui3; + let Inst{9-5} = xj; + let Inst{4-0} = xd; +} + +class LASX_I4_U op>: LASXInst { + bits<5> xd; + bits<5> xj; + bits<4> ui4; + + let Inst{31-14} = op; + let Inst{13-10} = ui4; + let Inst{9-5} = xj; + let Inst{4-0} = xd; +} + +class LASX_I6_U op>: LASXInst { + bits<5> xd; + bits<5> xj; + bits<6> ui6; + + let Inst{31-16} = op; + let Inst{15-10} = ui6; + let Inst{9-5} = xj; + let Inst{4-0} = xd; +} + +class LASX_I2_R_U op>: LASXInst { + bits<5> xd; + bits<5> rj; + bits<2> ui2; + + let Inst{31-12} = op; + let Inst{11-10} = ui2; + let Inst{9-5} = rj; + let Inst{4-0} = xd; +} + +class LASX_I3_R_U op>: LASXInst { + bits<5> xd; + bits<5> rj; + bits<3> ui3; + + let Inst{31-13} = op; + let Inst{12-10} = ui3; + let Inst{9-5} = rj; + let Inst{4-0} = xd; +} + +class LASX_ELM_COPY_U3 op>: LASXInst { + bits<5> rd; + bits<5> xj; + bits<3> ui3; + + let Inst{31-13} = op; + let Inst{12-10} = ui3; + let Inst{9-5} = xj; + let Inst{4-0} = rd; +} + +class LASX_ELM_COPY_U2 op>: LASXInst { + bits<5> rd; + bits<5> xj; + bits<2> ui2; + + let Inst{31-12} = op; + let Inst{11-10} = ui2; + let Inst{9-5} = xj; + let Inst{4-0} = rd; +} + +class LASX_I1_U op>: LASXInst { + bits<5> xd; + bits<5> xj; + bits<1> ui1; + + let Inst{31-11} = op; + let Inst{10} = ui1; + let Inst{9-5} = xj; + let Inst{4-0} = xd; +} + +class LASX_I2_U op>: LASXInst { + bits<5> xd; + bits<5> xj; + bits<2> ui2; + + let Inst{31-12} = op; + let Inst{11-10} = ui2; + let Inst{9-5} = xj; + let Inst{4-0} = xd; +} + +class LASX_I7_U op>: LASXInst { + bits<5> xd; + bits<5> xj; + bits<7> ui7; + + let Inst{31-17} = op; + let Inst{16-10} = ui7; + let Inst{9-5} = xj; + let Inst{4-0} = xd; +} + +class LASX_1R_I13 op>: LASXInst { + bits<13> i13; + bits<5> xd; + + let Inst{31-18} = op; + let Inst{17-5} = i13; + let Inst{4-0} = xd; +} + +class LASX_I8_U op>: LASXInst { + bits<5> xd; + bits<5> xj; + bits<8> ui8; + + let Inst{31-18} = op; + let Inst{17-10} = ui8; + let Inst{9-5} = xj; + let Inst{4-0} = xd; +} + + +////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +class LASX_I1_R_U op>: LASXInst { + bits<5> xd; + bits<5> rj; + bits<1> ui1; + + let Inst{31-11} = op; + let Inst{10} = ui1; + let Inst{9-5} = rj; + let Inst{4-0} = xd; +} + +class LASX_I4_R_U op>: LASXInst { + bits<5> xd; + bits<5> rj; + bits<4> ui4; + + let Inst{31-14} = op; + let Inst{13-10} = ui4; + let Inst{9-5} = rj; + let Inst{4-0} = xd; +} + +class LASX_ELM_COPY_B op>: LASXInst { + bits<5> rd; + bits<5> xj; + bits<4> ui4; + + let Inst{31-14} = op; + let Inst{13-10} = ui4; + let Inst{9-5} = xj; + let Inst{4-0} = rd; +} + +class LASX_ELM_COPY_D op>: LASXInst { + bits<5> rd; + bits<5> xj; + bits<1> ui1; + + let Inst{31-11} = op; + let Inst{10} = ui1; + let Inst{9-5} = xj; + let Inst{4-0} = rd; +} + +class LASX_Addr_SI8_idx1 op>: LASXInst { + bits<5> xd; + bits<13> addr; + bits<1> idx; + + let Inst{31-19} = op; + let Inst{18-11} = addr{7-0}; + let Inst{10} = idx; + let Inst{9-5} = addr{12-8}; + let Inst{4-0} = xd; +} + +class LASX_1R_I13_I10 op>: LASXInst { + bits<10> i10; + bits<5> xd; + + let Inst{31-15} = op; + let Inst{14-5} = i10; + let Inst{4-0} = xd; +} + + + + + + diff --git a/lib/Target/LoongArch/LoongArchLASXInstrInfo.td b/lib/Target/LoongArch/LoongArchLASXInstrInfo.td new file mode 100644 index 00000000..525da15c --- /dev/null +++ b/lib/Target/LoongArch/LoongArchLASXInstrInfo.td @@ -0,0 +1,5644 @@ +//===- LoongArchLASXInstrInfo.td - loongson LASX instructions -*- tablegen ------------*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file describes loongson ASX instructions. +// +//===----------------------------------------------------------------------===// +def SDT_XVPERMI : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisInt<0>, + SDTCisSameAs<0, 1>, + SDTCisVT<2, i32>]>; +def SDT_XVSHFI : SDTypeProfile<1, 3, [SDTCisInt<0>, SDTCisVec<0>, + SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>, + SDTCisVT<3, i32>]>; +def SDT_XVBROADCAST : SDTypeProfile<1, 1, [SDTCisVec<0>]>; + +def SDT_INSVE : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0, 1>, + SDTCisSameAs<1, 2>, + SDTCisVT<3, i32>]>; + +def SDT_XVPICKVE : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0, 1>, + SDTCisSameAs<1, 2>, + SDTCisVT<3, i32>]>; + +def SDT_XVSHUF4I : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisInt<0>, + SDTCisSameAs<0, 1>, + SDTCisSameAs<0, 2>, + SDTCisVT<3, i32>]>; + +def LoongArchXVSHUFI : SDNode<"LoongArchISD::XVSHFI", SDT_XVSHFI>; + +def LoongArchXVSELI : SDNode<"LoongArchISD::XVSELI", SDT_XVSHFI>; + +def LoongArchXVPERMI : SDNode<"LoongArchISD::XVPERMI", SDT_XVPERMI>; + +def LoongArchXVBROADCAST : SDNode<"LoongArchISD::XVBROADCAST", SDT_XVBROADCAST>; + +def LoongArchINSVE : SDNode<"LoongArchISD::INSVE", SDT_INSVE>; + +def LoongArchXVSHUF4I : SDNode<"LoongArchISD::XVSHUF4I", SDT_XVSHUF4I>; + +def LoongArchXVPICKVE : SDNode<"LoongArchISD::XVPICKVE", SDT_INSVE>; + +def xvbroadcast_v32i8 : PatFrag<(ops node:$v1), + (v32i8 (LoongArchXVBROADCAST node:$v1))>; +def xvbroadcast_v16i16 : PatFrag<(ops node:$v1), + (v16i16 (LoongArchXVBROADCAST node:$v1))>; +def xvbroadcast_v8i32 : PatFrag<(ops node:$v1), + (v8i32 (LoongArchXVBROADCAST node:$v1))>; +def xvbroadcast_v4i64 : PatFrag<(ops node:$v1), + (v4i64 (LoongArchXVBROADCAST node:$v1))>; + + +def vfseteq_v8f32 : vfsetcc_type; +def vfseteq_v4f64 : vfsetcc_type; +def vfsetge_v8f32 : vfsetcc_type; +def vfsetge_v4f64 : vfsetcc_type; +def vfsetgt_v8f32 : vfsetcc_type; +def vfsetgt_v4f64 : vfsetcc_type; +def vfsetle_v8f32 : vfsetcc_type; +def vfsetle_v4f64 : vfsetcc_type; +def vfsetlt_v8f32 : vfsetcc_type; +def vfsetlt_v4f64 : vfsetcc_type; +def vfsetne_v8f32 : vfsetcc_type; +def vfsetne_v4f64 : vfsetcc_type; +def vfsetoeq_v8f32 : vfsetcc_type; +def vfsetoeq_v4f64 : vfsetcc_type; +def vfsetoge_v8f32 : vfsetcc_type; +def vfsetoge_v4f64 : vfsetcc_type; +def vfsetogt_v8f32 : vfsetcc_type; +def vfsetogt_v4f64 : vfsetcc_type; +def vfsetole_v8f32 : vfsetcc_type; +def vfsetole_v4f64 : vfsetcc_type; +def vfsetolt_v8f32 : vfsetcc_type; +def vfsetolt_v4f64 : vfsetcc_type; +def vfsetone_v8f32 : vfsetcc_type; +def vfsetone_v4f64 : vfsetcc_type; +def vfsetord_v8f32 : vfsetcc_type; +def vfsetord_v4f64 : vfsetcc_type; +def vfsetun_v8f32 : vfsetcc_type; +def vfsetun_v4f64 : vfsetcc_type; +def vfsetueq_v8f32 : vfsetcc_type; +def vfsetueq_v4f64 : vfsetcc_type; +def vfsetuge_v8f32 : vfsetcc_type; +def vfsetuge_v4f64 : vfsetcc_type; +def vfsetugt_v8f32 : vfsetcc_type; +def vfsetugt_v4f64 : vfsetcc_type; +def vfsetule_v8f32 : vfsetcc_type; +def vfsetule_v4f64 : vfsetcc_type; +def vfsetult_v8f32 : vfsetcc_type; +def vfsetult_v4f64 : vfsetcc_type; +def vfsetune_v8f32 : vfsetcc_type; +def vfsetune_v4f64 : vfsetcc_type; + +def xvsplati8 : PatFrag<(ops node:$e0), + (v32i8 (build_vector + node:$e0, node:$e0, node:$e0, node:$e0, + node:$e0, node:$e0, node:$e0, node:$e0, + node:$e0, node:$e0, node:$e0, node:$e0, + node:$e0, node:$e0, node:$e0, node:$e0, + node:$e0, node:$e0, node:$e0, node:$e0, + node:$e0, node:$e0, node:$e0, node:$e0, + node:$e0, node:$e0, node:$e0, node:$e0, + node:$e0, node:$e0, node:$e0, node:$e0))>; +def xvsplati16 : PatFrag<(ops node:$e0), + (v16i16 (build_vector + node:$e0, node:$e0, node:$e0, node:$e0, + node:$e0, node:$e0, node:$e0, node:$e0, + node:$e0, node:$e0, node:$e0, node:$e0, + node:$e0, node:$e0, node:$e0, node:$e0))>; +def xvsplati32 : PatFrag<(ops node:$e0), + (v8i32 (build_vector + node:$e0, node:$e0, node:$e0, node:$e0, + node:$e0, node:$e0, node:$e0, node:$e0))>; +def xvsplati64 : PatFrag<(ops node:$e0), + (v4i64 (build_vector + node:$e0, node:$e0, node:$e0, node:$e0))>; +def xvsplatf32 : PatFrag<(ops node:$e0), + (v8f32 (build_vector node:$e0, node:$e0, + node:$e0, node:$e0))>; +def xvsplatf64 : PatFrag<(ops node:$e0), + (v4f64 (build_vector node:$e0, node:$e0))>; + +def xvsplati8_uimm3 : SplatComplexPattern; +def xvsplati16_uimm4 : SplatComplexPattern; + +def xvsplati64_uimm6 : SplatComplexPattern; + +def xvsplati8_simm5 : SplatComplexPattern; +def xvsplati16_simm5 : SplatComplexPattern; +def xvsplati32_simm5 : SplatComplexPattern; +def xvsplati64_simm5 : SplatComplexPattern; + +def xvsplat_imm_eq_1 : PatLeaf<(build_vector), [{ + APInt Imm; + EVT EltTy = N->getValueType(0).getVectorElementType(); + return selectVSplat(N, Imm, EltTy.getSizeInBits()) && + Imm.getBitWidth() == EltTy.getSizeInBits() && Imm == 1; +}]>; + +def xvsplati64_imm_eq_1 : PatLeaf<(bitconvert (v8i32 (build_vector))), [{ + APInt Imm; + SDNode *BV = N->getOperand(0).getNode(); + EVT EltTy = N->getValueType(0).getVectorElementType(); + + return selectVSplat(BV, Imm, EltTy.getSizeInBits()) && + Imm.getBitWidth() == EltTy.getSizeInBits() && Imm == 1; +}]>; + +def xvbitclr_b : PatFrag<(ops node:$xk, node:$xa), + (and node:$xk, (xor (shl vsplat_imm_eq_1, node:$xa), + immAllOnesV))>; +def xvbitclr_h : PatFrag<(ops node:$xk, node:$xa), + (and node:$xk, (xor (shl vsplat_imm_eq_1, node:$xa), + immAllOnesV))>; +def xvbitclr_w : PatFrag<(ops node:$xk, node:$xa), + (and node:$xk, (xor (shl vsplat_imm_eq_1, node:$xa), + immAllOnesV))>; +def xvbitclr_d : PatFrag<(ops node:$xk, node:$xa), + (and node:$xk, (xor (shl (v4i64 vsplati64_imm_eq_1), + node:$xa), + (bitconvert (v8i32 immAllOnesV))))>; + + + +def xvsplati8_uimm5 : SplatComplexPattern; +def xvsplati16_uimm5 : SplatComplexPattern; +def xvsplati32_uimm5 : SplatComplexPattern; +def xvsplati64_uimm5 : SplatComplexPattern; +def xvsplati8_uimm8 : SplatComplexPattern; +def xvsplati16_uimm8 : SplatComplexPattern; +def xvsplati32_uimm8 : SplatComplexPattern; +def xvsplati64_uimm8 : SplatComplexPattern; + + + +def xvsplati8_uimm4 : SplatComplexPattern; +def xvsplati16_uimm3 : SplatComplexPattern; +def xvsplati32_uimm2 : SplatComplexPattern; +def xvsplati64_uimm1 : SplatComplexPattern; + + +// Patterns. +class LASXPat pred = [HasLASX]> : + Pat, Requires; + +class LASX_4RF { + dag OutOperandList = (outs ROXD:$xd); + dag InOperandList = (ins ROXJ:$xj, ROXK:$xk, ROXA:$xa); + string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $xk, $xa"); + list Pattern = [(set ROXD:$xd, (OpNode ROXJ:$xj, ROXK:$xk, ROXA:$xa))]; +} + +class LASX_3RF { + dag OutOperandList = (outs ROXD:$xd); + dag InOperandList = (ins ROXJ:$xj, ROXK:$xk); + string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $xk"); + list Pattern = [(set ROXD:$xd, (OpNode ROXJ:$xj, ROXK:$xk))]; +} + +class LASX_3R_SETCC_DESC_BASE { + dag OutOperandList = (outs ROXD:$xd); + dag InOperandList = (ins ROXJ:$xj, ROXK:$xk); + string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $xk"); + list Pattern = [(set ROXD:$xd, (VT (vsetcc ROXJ:$xj, ROXK:$xk, CC)))]; +} + +class LASX_LD { + dag OutOperandList = (outs ROXD:$xd); + dag InOperandList = (ins MemOpnd:$addr); + string AsmString = !strconcat(instr_asm, "\t$xd, $addr"); + list Pattern = [(set ROXD:$xd, (TyNode (OpNode Addr:$addr)))]; + string DecoderMethod = "DecodeLASX256Mem"; +} + +class LASX_ST { + dag OutOperandList = (outs); + dag InOperandList = (ins ROXD:$xd, MemOpnd:$addr); + string AsmString = !strconcat(instr_asm, "\t$xd, $addr"); + list Pattern = [(OpNode (TyNode ROXD:$xd), Addr:$addr)]; + string DecoderMethod = "DecodeLASX256Mem"; +} + +class LASX_I8_U5_DESC_BASE { + dag OutOperandList = (outs); + dag InOperandList = (ins ROXD:$xd, PtrRC:$rj, ImmOp:$si8, uimm5:$idx); + string AsmString = !strconcat(instr_asm, "\t$xd, $rj, $si8, $idx"); + list Pattern = [(OpNode ROXD:$xd, iPTR:$rj, Imm:$si8, immZExt5:$idx)]; + string DecoderMethod = "DecodeLASX256memstl"; +} + +class LASX_I8_U2_DESC_BASE { + dag OutOperandList = (outs); + dag InOperandList = (ins ROXD:$xd, PtrRC:$rj, ImmOp:$si8, uimm2:$idx); + string AsmString = !strconcat(instr_asm, "\t$xd, $rj, $si8, $idx"); + list Pattern = [(OpNode ROXD:$xd, iPTR:$rj, Imm:$si8, immZExt2:$idx)]; + string DecoderMethod = "DecodeLASX256memstl"; +} + +class LASX_I8_U3_DESC_BASE { + dag OutOperandList = (outs); + dag InOperandList = (ins ROXD:$xd, PtrRC:$rj, ImmOp:$si8, uimm3:$idx); + string AsmString = !strconcat(instr_asm, "\t$xd, $rj, $si8, $idx"); + list Pattern = [(OpNode ROXD:$xd, iPTR:$rj, Imm:$si8, immZExt3:$idx)]; + string DecoderMethod = "DecodeLASX256memstl"; +} + +class LASX_I8_U4_DESC_BASE { + dag OutOperandList = (outs); + dag InOperandList = (ins ROXD:$xd, PtrRC:$rj, ImmOp:$si8, uimm4:$idx); + string AsmString = !strconcat(instr_asm, "\t$xd, $rj, $si8, $idx"); + list Pattern = [(OpNode ROXD:$xd, iPTR:$rj, Imm:$si8, immZExt4:$idx)]; + string DecoderMethod = "DecodeLASX256memstl"; +} + +class LASX_SDX_LA { + dag OutOperandList = (outs); + dag InOperandList = (ins ROXD:$xd, PtrRC:$rj, RORK:$rk); + string AsmString = !strconcat(instr_asm, "\t$xd, $rj, $rk"); + list Pattern = [(OpNode ROXD:$xd, iPTR:$rj, RORK:$rk)]; +} + +class LASX_3R_DESC_BASE { + dag OutOperandList = (outs ROXD:$xd); + dag InOperandList = (ins ROXJ:$xj, ROXK:$xk); + string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $xk"); + list Pattern = [(set ROXD:$xd, (OpNode ROXJ:$xj, ROXK:$xk))]; +} + +class LASX_LDX_LA { + dag OutOperandList = (outs ROXD:$xd); + dag InOperandList = (ins PtrRC:$rj, RORK:$rk); + string AsmString = !strconcat(instr_asm, "\t$xd, $rj, $rk"); + list Pattern = [(set ROXD:$xd, (OpNode iPTR:$rj, RORK:$rk))]; +} + +class LASX_3R_4R_DESC_BASE { + dag OutOperandList = (outs ROXD:$xd); + dag InOperandList = (ins ROXD:$xd_in, ROXJ:$xj, ROXK:$xk); + string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $xk"); + list Pattern = [(set ROXD:$xd, (OpNode ROXD:$xd_in, ROXJ:$xj, + ROXK:$xk))]; + string Constraints = "$xd = $xd_in"; +} + + +class LASX_3R_VREPLVE_DESC_BASE { + dag OutOperandList = (outs ROXD:$xd); + dag InOperandList = (ins ROXJ:$xj, GPR32Opnd:$rk); + string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $rk"); + list Pattern = [(set ROXD:$xd, (OpNode ROXJ:$xj, GPR32Opnd:$rk))]; +} + + +class LASX_3R_VREPLVE_DESC_BASE_N { + dag OutOperandList = (outs ROXD:$xd); + dag InOperandList = (ins ROXJ:$xj, GPR64Opnd:$rk); + string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $rk"); + list Pattern = []; +} + + +class LASX_VEC_DESC_BASE { + dag OutOperandList = (outs ROXD:$xd); + dag InOperandList = (ins ROXJ:$xj, ROXK:$xk); + string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $xk"); + list Pattern = [(set ROXD:$xd, (OpNode ROXJ:$xj, ROXK:$xk))]; +} + + + +class LASX_3RF_DESC_BASE : + LASX_3R_DESC_BASE; + + +class LASX_3R_DESC_BASE1 { + dag OutOperandList = (outs ROXD:$xd); + dag InOperandList = (ins ROXJ:$xj, ROXK:$xk); + string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $xk"); + list Pattern = [(set ROXD:$xd, (OpNode ROXJ:$xk, ROXK:$xj))]; +} + +class LASX_3RF_DESC_BASE1 : + LASX_3R_DESC_BASE1; + + + +class LASX_3R_VSHF_DESC_BASE { + dag OutOperandList = (outs ROXD:$xd); + dag InOperandList = (ins ROXD:$xd_in, ROXJ:$xj, ROXK:$xk); + string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $xk"); + list Pattern = [(set ROXD:$xd, (LoongArchVSHF ROXD:$xd_in, ROXJ:$xj, + ROXK:$xk))]; + string Constraints = "$xd = $xd_in"; +} + +class LASX_I5_SETCC_DESC_BASE { + dag OutOperandList = (outs ROXD:$xd); + dag InOperandList = (ins ROXJ:$xj, SplatImm.OpClass:$si5); + string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $si5"); + list Pattern = [(set ROXD:$xd, (VT (vsetcc ROXJ:$xj, SplatImm:$si5, CC)))]; +} + +class LASX_I5_SETCC_DESC_BASE_Intrinsic { + dag OutOperandList = (outs ROXD:$xd); + dag InOperandList = (ins ROXJ:$xj, ImmOp:$si5); + string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $si5"); + list Pattern = [(set ROXD:$xd, (OpNode ROXJ:$xj, Imm:$si5))]; +} + + +class LASX_I5_U_SETCC_DESC_BASE { + dag OutOperandList = (outs ROXD:$xd); + dag InOperandList = (ins ROXJ:$xj, SplatImm.OpClass:$ui5); + string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $ui5"); + list Pattern = [(set ROXD:$xd, (VT (vsetcc ROXJ:$xj, SplatImm:$ui5, CC)))]; +} + +class LASX_I5_U_SETCC_DESC_BASE_Intrinsic { + dag OutOperandList = (outs ROXD:$xd); + dag InOperandList = (ins ROXJ:$xj, ImmOp:$ui5); + string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $ui5"); + list Pattern = [(set ROXD:$xd, (OpNode ROXJ:$xj, Imm:$ui5))]; +} + +class LASX_VEC_PSEUDO_BASE : + LASXPseudo<(outs ROXD:$xd), (ins ROXJ:$xj, ROXK:$xk), + [(set ROXD:$xd, (OpNode ROXJ:$xj, ROXK:$xk))]>; + + +class LASX_I5_U_DESC_BASE { + dag OutOperandList = (outs ROXD:$xd); + dag InOperandList = (ins ROXJ:$xj, SplatImm.OpClass:$ui5); + string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $ui5"); + list Pattern = [(set ROXD:$xd, (OpNode ROXJ:$xj, SplatImm:$ui5))]; +} + + +class LASX_I5_U_DESC_BASE_Intrinsic { + dag OutOperandList = (outs ROXD:$xd); + dag InOperandList = (ins ROXJ:$xj, ImmOp:$ui5); + string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $ui5"); + list Pattern = [(set ROXD:$xd, (OpNode ROXJ:$xj, Imm:$ui5))]; +} + +class LASX_U5_DESC_BASE { + dag OutOperandList = (outs ROXD:$xd); + dag InOperandList = (ins ROXJ:$xj, uimm5:$ui5); + string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $ui5"); + list Pattern = [(set ROXD:$xd, (OpNode ROXJ:$xj, immZExt5:$ui5))]; +} + +class LASX_U5N_DESC_BASE : + LASX_U5_DESC_BASE; + +class LASX_U5_4R_DESC_BASE { + dag OutOperandList = (outs ROXD:$xd); + dag InOperandList = (ins ROXD:$xd_in, ROXJ:$xj, uimm5:$ui5); + string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $ui5"); + list Pattern = [(set ROXD:$xd, (OpNode ROXD:$xd_in, ROXJ:$xj, immZExt5:$ui5))]; + string Constraints = "$xd = $xd_in"; +} + +class LASX_2R_DESC_BASE { + dag OutOperandList = (outs ROXD:$xd); + dag InOperandList = (ins ROXJ:$xj); + string AsmString = !strconcat(instr_asm, "\t$xd, $xj"); + list Pattern = [(set ROXD:$xd, (OpNode ROXJ:$xj))]; +} + +class LASX_SET_DESC_BASE { + dag OutOperandList = (outs FCFROpnd:$cd); + dag InOperandList = (ins ROXD:$xj); + string AsmString = !strconcat(instr_asm, "\t$cd, $xj"); + list Pattern = []; +} + +class LASX_2RF_DESC_BASE { + dag OutOperandList = (outs ROXD:$xd); + dag InOperandList = (ins ROXJ:$xj); + string AsmString = !strconcat(instr_asm, "\t$xd, $xj"); + list Pattern = [(set ROXD:$xd, (OpNode ROXJ:$xj))]; +} + +class LASX_I5_DESC_BASE { + dag OutOperandList = (outs ROXD:$xd); + dag InOperandList = (ins ROXJ:$xj, SplatImm.OpClass:$si5); + string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $si5"); + list Pattern = [(set ROXD:$xd, (OpNode ROXJ:$xj, SplatImm:$si5))]; +} + +class LASX_I5_DESC_BASE_Intrinsic { + dag OutOperandList = (outs ROXD:$xd); + dag InOperandList = (ins ROXJ:$xj, ImmOp:$si5); + string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $si5"); + list Pattern = [(set ROXD:$xd, (OpNode ROXJ:$xj, Imm:$si5))]; +} + + +class LASX_2R_REPL_DESC_BASE { + dag OutOperandList = (outs ROXD:$xd); + dag InOperandList = (ins ROS:$rj); + string AsmString = !strconcat(instr_asm, "\t$xd, $rj"); + list Pattern = [(set ROXD:$xd, (VT (OpNode ROS:$rj)))]; +} + +class LASX_XVEXTEND_DESC_BASE { + dag OutOperandList = (outs ROXD:$xd); + dag InOperandList = (ins ROXJ:$xj); + string AsmString = !strconcat(instr_asm, "\t$xd, $xj"); + list Pattern = [(set ROXD:$xd, (DTy (OpNode (STy ROXJ:$xj))))]; +} + +class LASX_RORI_U3_DESC_BASE_Intrinsic { + dag OutOperandList = (outs ROXD:$xd); + dag InOperandList = (ins ROXJ:$xj, ImmOp:$ui3); + string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $ui3"); + list Pattern = [(set ROXD:$xd, (OpNode ROXJ:$xj, Imm:$ui3))]; +} + +class LASX_RORI_U4_DESC_BASE_Intrinsic { + dag OutOperandList = (outs ROXD:$xd); + dag InOperandList = (ins ROXJ:$xj, ImmOp:$ui4); + string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $ui4"); + list Pattern = [(set ROXD:$xd, (OpNode ROXJ:$xj, Imm:$ui4))]; +} + +class LASX_RORI_U5_DESC_BASE_Intrinsic { + dag OutOperandList = (outs ROXD:$xd); + dag InOperandList = (ins ROXJ:$xj, ImmOp:$ui5); + string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $ui5"); + list Pattern = [(set ROXD:$xd, (OpNode ROXJ:$xj, Imm:$ui5))]; +} + +class LASX_RORI_U6_DESC_BASE_Intrinsic { + dag OutOperandList = (outs ROXD:$xd); + dag InOperandList = (ins ROXJ:$xj, ImmOp:$ui6); + string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $ui6"); + list Pattern = [(set ROXD:$xd, (OpNode ROXJ:$xj, Imm:$ui6))]; +} + +class LASX_BIT_3_DESC_BASE { + dag OutOperandList = (outs ROXD:$xd); + dag InOperandList = (ins ROXJ:$xj, ImmOp:$ui3); + string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $ui3"); + list Pattern = [(set ROXD:$xd, (OpNode ROXJ:$xj, Imm:$ui3))]; +} + +class LASX_BIT_4_DESC_BASE { + dag OutOperandList = (outs ROXD:$xd); + dag InOperandList = (ins ROXJ:$xj, ImmOp:$ui4); + string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $ui4"); + list Pattern = [(set ROXD:$xd, (OpNode ROXJ:$xj, Imm:$ui4))]; +} + +class LASX_BIT_5_DESC_BASE { + dag OutOperandList = (outs ROXD:$xd); + dag InOperandList = (ins ROXJ:$xj, ImmOp:$ui5); + string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $ui5"); + list Pattern = [(set ROXD:$xd, (OpNode ROXJ:$xj, Imm:$ui5))]; +} + +class LASX_BIT_6_DESC_BASE { + dag OutOperandList = (outs ROXD:$xd); + dag InOperandList = (ins ROXJ:$xj, ImmOp:$ui6); + string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $ui6"); + list Pattern = [(set ROXD:$xd, (OpNode ROXJ:$xj, Imm:$ui6))]; +} + +class LASX_BIT_2_4O_DESC_BASE { + dag OutOperandList = (outs ROXD:$xd); + dag InOperandList = (ins ROXD:$xd_in, ROXJ:$xj, ImmOp:$ui2); + string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $ui2"); + list Pattern = [(set ROXD:$xd, (OpNode ROXD:$xd_in, ROXJ:$xj, Imm:$ui2))]; + string Constraints = "$xd = $xd_in"; +} + +class LASX_BIT_2_4ON : + LASX_BIT_2_4O_DESC_BASE; + +class LASX_BIT_3_4O_DESC_BASE { + dag OutOperandList = (outs ROXD:$xd); + dag InOperandList = (ins ROXD:$xd_in, ROXJ:$xj, ImmOp:$ui3); + string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $ui3"); + list Pattern = [(set ROXD:$xd, (OpNode ROXD:$xd_in, ROXJ:$xj, Imm:$ui3))]; + string Constraints = "$xd = $xd_in"; +} + +class LASX_BIT_3_4ON : + LASX_BIT_3_4O_DESC_BASE; + +class LASX_INSERT_U3_DESC_BASE { + dag OutOperandList = (outs ROXD:$xd); + dag InOperandList = (ins ROXD:$xd_in, ROS:$rj, ImmOp:$ui3); + string AsmString = !strconcat(instr_asm, "\t$xd, $rj, $ui3"); + list Pattern = [(set ROXD:$xd, (VTy (insertelt (VTy ROXD:$xd_in), ROS:$rj, Imm:$ui3)))]; + string Constraints = "$xd = $xd_in"; +} + +class LASX_INSERT_U2_DESC_BASE { + dag OutOperandList = (outs ROXD:$xd); + dag InOperandList = (ins ROXD:$xd_in, ROS:$rj, ImmOp:$ui2); + string AsmString = !strconcat(instr_asm, "\t$xd, $rj, $ui2"); + list Pattern = [(set ROXD:$xd, (VTy (insertelt (VTy ROXD:$xd_in), ROS:$rj, Imm:$ui2)))]; + string Constraints = "$xd = $xd_in"; +} + +class LASX_COPY_U2_DESC_BASE { + dag OutOperandList = (outs ROD:$rd); + dag InOperandList = (ins ROXJ:$xj, ImmOp:$ui2); + string AsmString = !strconcat(instr_asm, "\t$rd, $xj, $ui2"); + list Pattern = [(set ROD:$rd, (OpNode (VecTy ROXJ:$xj), Imm:$ui2))]; +} + +class LASX_COPY_U3_DESC_BASE { + dag OutOperandList = (outs ROD:$rd); + dag InOperandList = (ins ROXJ:$xj, ImmOp:$ui3); + string AsmString = !strconcat(instr_asm, "\t$rd, $xj, $ui3"); + list Pattern = [(set ROD:$rd, (OpNode (VecTy ROXJ:$xj), Imm:$ui3))]; +} + +class LASX_ELM_U4_VREPLVE_DESC_BASE_Intrinsic { + dag OutOperandList = (outs ROXD:$xd); + dag InOperandList = (ins ROXJ:$xj, uimm4:$ui4); + string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $ui4"); + list Pattern = [(set ROXD:$xd, (OpNode ROXJ:$xj, immZExt4:$ui4))]; +} + +class LASX_ELM_U3_VREPLVE_DESC_BASE_Intrinsic { + dag OutOperandList = (outs ROXD:$xd); + dag InOperandList = (ins ROXJ:$xj, uimm3:$ui3); + string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $ui3"); + list Pattern = [(set ROXD:$xd, (OpNode ROXJ:$xj, immZExt3:$ui3))]; +} + +class LASX_ELM_U2_VREPLVE_DESC_BASE_Intrinsic { + dag OutOperandList = (outs ROXD:$xd); + dag InOperandList = (ins ROXJ:$xj, uimm2:$ui2); + string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $ui2"); + list Pattern = [(set ROXD:$xd, (OpNode ROXJ:$xj, immZExt2:$ui2))]; +} + +class LASX_ELM_U1_VREPLVE_DESC_BASE_Intrinsic { + dag OutOperandList = (outs ROXD:$xd); + dag InOperandList = (ins ROXJ:$xj, uimm1:$ui1); + string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $ui1"); + list Pattern = [(set ROXD:$xd, (OpNode ROXJ:$xj, immZExt1:$ui1))]; +} + +class LASX_XVBROADCAST_DESC_BASE { + dag OutOperandList = (outs ROXD:$xd); + dag InOperandList = (ins ROXJ:$xj); + string AsmString = !strconcat(instr_asm, "\t$xd, $xj"); + list Pattern = [(set ROXD:$xd, (OpNode (TyNode ROXJ:$xj)))]; +} + +class LASX_2R_U3_DESC_BASE { + dag OutOperandList = (outs ROXD:$xd); + dag InOperandList = (ins ROXJ:$xj, uimm3:$ui3); + string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $ui3"); + list Pattern = [(set ROXD:$xd, (OpNode ROXJ:$xj, immZExt3:$ui3))]; +} + +class LASX_2R_U4_DESC_BASE { + dag OutOperandList = (outs ROXD:$xd); + dag InOperandList = (ins ROXJ:$xj, uimm4:$ui4); + string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $ui4"); + list Pattern = [(set ROXD:$xd, (OpNode ROXJ:$xj, immZExt4:$ui4))]; +} + +class LASX_2R_U5_DESC_BASE { + dag OutOperandList = (outs ROXD:$xd); + dag InOperandList = (ins ROXJ:$xj, uimm5:$ui5); + string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $ui5"); + list Pattern = [(set ROXD:$xd, (OpNode ROXJ:$xj, immZExt5:$ui5))]; +} + +class LASX_2R_U6_DESC_BASE { + dag OutOperandList = (outs ROXD:$xd); + dag InOperandList = (ins ROXJ:$xj, uimm6:$ui6); + string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $ui6"); + list Pattern = [(set ROXD:$xd, (OpNode ROXJ:$xj, immZExt6:$ui6))]; +} + +class LASX_BIT_U3_VREPLVE_DESC_BASE { + dag OutOperandList = (outs ROXD:$xd); + dag InOperandList = (ins ROXJ:$xj, SplatImm.OpClass:$ui3); + string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $ui3"); + list Pattern = [(set ROXD:$xd, (OpNode ROXJ:$xj, SplatImm:$ui3))]; +} + +class LASX_BIT_U4_VREPLVE_DESC_BASE { + dag OutOperandList = (outs ROXD:$xd); + dag InOperandList = (ins ROXJ:$xj, SplatImm.OpClass:$ui4); + string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $ui4"); + list Pattern = [(set ROXD:$xd, (OpNode ROXJ:$xj, SplatImm:$ui4))]; +} + +class LASX_BIT_U5_VREPLVE_DESC_BASE { + dag OutOperandList = (outs ROXD:$xd); + dag InOperandList = (ins ROXJ:$xj, SplatImm.OpClass:$ui5); + string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $ui5"); + list Pattern = [(set ROXD:$xd, (OpNode ROXJ:$xj, SplatImm:$ui5))]; +} + +class LASX_BIT_U6_VREPLVE_DESC_BASE { + dag OutOperandList = (outs ROXD:$xd); + dag InOperandList = (ins ROXJ:$xj, SplatImm.OpClass:$ui6); + string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $ui6"); + list Pattern = [(set ROXD:$xd, (OpNode ROXJ:$xj, SplatImm:$ui6))]; +} + +class LASX_BIT_U3_VREPLVE_DESC_BASE_Intrinsic { + dag OutOperandList = (outs ROXD:$xd); + dag InOperandList = (ins ROXJ:$xj, ImmOp:$ui3); + string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $ui3"); + list Pattern = [(set ROXD:$xd, (OpNode ROXJ:$xj, Imm:$ui3))]; +} + +class LASX_BIT_U4_VREPLVE_DESC_BASE_Intrinsic { + dag OutOperandList = (outs ROXD:$xd); + dag InOperandList = (ins ROXJ:$xj, ImmOp:$ui4); + string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $ui4"); + list Pattern = [(set ROXD:$xd, (OpNode ROXJ:$xj, Imm:$ui4))]; +} + +class LASX_BIT_U5_VREPLVE_DESC_BASE_Intrinsic { + dag OutOperandList = (outs ROXD:$xd); + dag InOperandList = (ins ROXJ:$xj, ImmOp:$ui5); + string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $ui5"); + list Pattern = [(set ROXD:$xd, (OpNode ROXJ:$xj, Imm:$ui5))]; +} + +class LASX_BIT_U6_VREPLVE_DESC_BASE_Intrinsic { + dag OutOperandList = (outs ROXD:$xd); + dag InOperandList = (ins ROXJ:$xj, ImmOp:$ui6); + string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $ui6"); + list Pattern = [(set ROXD:$xd, (OpNode ROXJ:$xj, Imm:$ui6))]; +} + +class LASX_U4_DESC_BASE { + dag OutOperandList = (outs ROXD:$xd); + dag InOperandList = (ins ROXD:$xd_in,ROXJ:$xj, ImmOp:$ui4); + string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $ui4"); + list Pattern = [(set ROXD:$xd, (OpNode ROXD:$xd_in,ROXJ:$xj, Imm:$ui4))]; + string Constraints = "$xd = $xd_in"; +} + +class LASX_N4_U5_DESC_BASE { + dag OutOperandList = (outs ROXD:$xd); + dag InOperandList = (ins ROXD:$xd_in, ROXJ:$xj, ImmOp:$ui5); + string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $ui5"); + list Pattern = [(set ROXD:$xd, (OpNode ROXD:$xd_in, ROXJ:$xj, Imm:$ui5))]; + string Constraints = "$xd = $xd_in"; +} + +class LASX_U6_DESC_BASE { + dag OutOperandList = (outs ROXD:$xd); + dag InOperandList = (ins ROXD:$xd_in, ROXJ:$xj, ImmOp:$ui6); + string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $ui6"); + list Pattern = [(set ROXD:$xd, (OpNode ROXD:$xd_in, ROXJ:$xj, Imm:$ui6))]; + string Constraints = "$xd = $xd_in"; +} + +class LASX_D_DESC_BASE { + dag OutOperandList = (outs ROXD:$xd); + dag InOperandList = (ins ROXD:$xd_in, ROXJ:$xj, uimm7:$ui7); + string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $ui7"); + list Pattern = [(set ROXD:$xd, (OpNode ROXD:$xd_in, ROXJ:$xj, immZExt7:$ui7))]; + string Constraints = "$xd = $xd_in"; +} + +class LASX_2R_3R_U4_DESC_BASE { + dag OutOperandList = (outs ROXD:$xd); + dag InOperandList = (ins ROXD:$xd_in, ROXJ:$xj, uimm4:$ui4); + string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $ui4"); + list Pattern = [(set ROXD:$xd, (OpNode ROXD:$xd_in, ROXJ:$xj, immZExt4:$ui4))]; + string Constraints = "$xd = $xd_in"; +} + +class LASX_2R_3R_U5_DESC_BASE { + dag OutOperandList = (outs ROXD:$xd); + dag InOperandList = (ins ROXD:$xd_in, ROXJ:$xj, uimm5:$ui5); + string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $ui5"); + list Pattern = [(set ROXD:$xd, (OpNode ROXD:$xd_in, ROXJ:$xj, immZExt5:$ui5))]; + string Constraints = "$xd = $xd_in"; +} + +class LASX_2R_3R_U6_DESC_BASE { + dag OutOperandList = (outs ROXD:$xd); + dag InOperandList = (ins ROXD:$xd_in, ROXJ:$xj, uimm6:$ui6); + string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $ui6"); + list Pattern = [(set ROXD:$xd, (OpNode ROXD:$xd_in, ROXJ:$xj, immZExt6:$ui6))]; + string Constraints = "$xd = $xd_in"; +} + +class LASX_2R_3R_U7_DESC_BASE { + dag OutOperandList = (outs ROXD:$xd); + dag InOperandList = (ins ROXD:$xd_in, ROXJ:$xj, uimm7:$ui7); + string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $ui7"); + list Pattern = [(set ROXD:$xd, (OpNode ROXD:$xd_in, ROXJ:$xj, immZExt7:$ui7))]; + string Constraints = "$xd = $xd_in"; +} + +class LASX_2R_3R_U8_DESC_BASE { + dag OutOperandList = (outs ROXD:$xd); + dag InOperandList = (ins ROXD:$xd_in, ROXJ:$xj, uimm8:$ui8); + string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $ui8"); + list Pattern = [(set ROXD:$xd, (OpNode ROXD:$xd_in, ROXJ:$xj, immZExt8:$ui8))]; + string Constraints = "$xd = $xd_in"; +} + +class LASX_2R_3R_U8_SELECT { + dag OutOperandList = (outs ROXD:$xd); + dag InOperandList = (ins ROXD:$xd_in, ROXJ:$xj, vsplat_uimm8:$ui8); + string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $ui8"); + list Pattern = [(set ROXD:$xd, (OpNode ROXD:$xd_in, xvsplati8_uimm8:$ui8, ROXJ:$xj))]; + string Constraints = "$xd = $xd_in"; +} + +class LASX_I8_O4_SHF_DESC_BASE { + dag OutOperandList = (outs ROXD:$xd); + dag InOperandList = (ins ROXD:$xd_in, ROXJ:$xj, uimm8:$ui8); + string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $ui8"); + list Pattern = [(set ROXD:$xd, (OpNode ROXD:$xd_in, ROXJ:$xj, immZExt8:$ui8))]; + string Constraints = "$xd = $xd_in"; +} + +class LASX_I8_SHF_DESC_BASE { + dag OutOperandList = (outs ROXD:$xd); + dag InOperandList = (ins ROXJ:$xj, uimm8:$ui8); + string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $ui8"); + list Pattern = [(set ROXD:$xd, (OpNode ROXJ:$xj, immZExt8:$ui8))]; +} + +class LASX_2R_U8_DESC_BASE { + dag OutOperandList = (outs ROXD:$xd); + dag InOperandList = (ins ROXJ:$xj, uimm8:$ui8); + string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $ui8"); + list Pattern = [(set ROXD:$xd, (OpNode ROXJ:$xj, immZExt8:$ui8))]; +} + +class LASX_I13_DESC_BASE { + dag OutOperandList = (outs ROXD:$xd); + dag InOperandList = (ins immOp:$i13); + string AsmString = !strconcat(instr_asm, "\t$xd, $i13"); + list Pattern = [(set ROXD:$xd, (OpNode (Ty simm13:$i13)))]; + string DecoderMethod = "DecodeLASX256Mem13"; +} + +class LASX_I13_DESC_BASE_tmp { + dag OutOperandList = (outs ROXD:$xd); + dag InOperandList = (ins vsplat_simm10:$i10); + string AsmString = !strconcat(instr_asm, "\t$xd, $i10"); + list Pattern = []; + bit hasSideEffects = 0; + string DecoderMethod = "DecodeLASX256Mem10"; +} + +class LASX_BIT_U8_DESC_BASE { + dag OutOperandList = (outs ROXD:$xd); + dag InOperandList = (ins ROXJ:$xj, SplatImm.OpClass:$ui8); + string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $ui8"); + list Pattern = [(set ROXD:$xd, (OpNode ROXJ:$xj, SplatImm:$ui8))]; +} + +class LASX_2RN_3R_U8_DESC_BASE { + dag OutOperandList = (outs ROXD:$xd); + dag InOperandList = (ins ROXD:$xd_in, ROXJ:$xj, uimm8:$ui8); + string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $ui8"); + list Pattern = []; + string Constraints = "$xd = $xd_in"; +} + + +//encoding + +def XVFMADD_S : LASX_4R<0b000010100001>, + LASX_4RF<"xvfmadd.s", int_loongarch_lasx_xvfmadd_s, LASX256WOpnd>; + +def XVFMADD_D : LASX_4R<0b000010100010>, + LASX_4RF<"xvfmadd.d", int_loongarch_lasx_xvfmadd_d, LASX256DOpnd>; + + +def XVFMSUB_S : LASX_4R<0b000010100101>, + LASX_4RF<"xvfmsub.s", int_loongarch_lasx_xvfmsub_s, LASX256WOpnd>; + +def XVFMSUB_D : LASX_4R<0b000010100110>, + LASX_4RF<"xvfmsub.d", int_loongarch_lasx_xvfmsub_d, LASX256DOpnd>; + + +def XVFNMADD_S : LASX_4R<0b000010101001>, + LASX_4RF<"xvfnmadd.s", int_loongarch_lasx_xvfnmadd_s, LASX256WOpnd>; + +def XVFNMADD_D : LASX_4R<0b000010101010>, + LASX_4RF<"xvfnmadd.d", int_loongarch_lasx_xvfnmadd_d, LASX256DOpnd>; + + +def XVFNMSUB_S : LASX_4R<0b000010101101>, + LASX_4RF<"xvfnmsub.s", int_loongarch_lasx_xvfnmsub_s, LASX256WOpnd>; + +def XVFNMSUB_D : LASX_4R<0b000010101110>, + LASX_4RF<"xvfnmsub.d", int_loongarch_lasx_xvfnmsub_d, LASX256DOpnd>; + + +// xvfmadd: xj * xk + xa +def : LASXPat<(fma v4f64:$xj, v4f64:$xk, v4f64:$xa), + (XVFMADD_D $xj, $xk, $xa)>; + +def : LASXPat<(fma v8f32:$xj, v8f32:$xk, v8f32:$xa), + (XVFMADD_S $xj, $xk, $xa)>; + + +// xvfmsub: xj * xk - xa +def : LASXPat<(fma v4f64:$xj, v4f64:$xk, (fneg v4f64:$xa)), + (XVFMSUB_D v4f64:$xj, v4f64:$xk, v4f64:$xa)>; + +def : LASXPat<(fma v8f32:$xj, v8f32:$xk, (fneg v8f32:$xa)), + (XVFMSUB_S v8f32:$xj, v8f32:$xk, v8f32:$xa)>; + + +// xvfnmadd: -(xj * xk + xa) +def : LASXPat<(fma (fneg v4f64:$xj), v4f64:$xk, (fneg v4f64:$xa)), + (XVFNMADD_D v4f64:$xj, v4f64:$xk, v4f64:$xa)>; + +def : LASXPat<(fma (fneg v8f32:$xj), v8f32:$xk, (fneg v8f32:$xa)), + (XVFNMADD_S v8f32:$xj, v8f32:$xk, v8f32:$xa)>; + +// xvfnmsub: -(xj * xk - xa) +def : LASXPat<(fma (fneg v4f64:$xj), v4f64:$xk, v4f64:$xa), + (XVFNMSUB_D v4f64:$xj, v4f64:$xk, v4f64:$xa)>; + +def : LASXPat<(fma (fneg v8f32:$xj), v8f32:$xk, v8f32:$xa), + (XVFNMSUB_S v8f32:$xj, v8f32:$xk, v8f32:$xa)>; + + +def XVFCMP_CAF_S : LASX_XVFCMP<0b000011001001>, + LASX_3RF<"xvfcmp.caf.s", int_loongarch_lasx_xvfcmp_caf_s, LASX256WOpnd>{ + bits<5> cond=0x0; + } + +def XVFCMP_CAF_D : LASX_XVFCMP<0b000011001010>, + LASX_3RF<"xvfcmp.caf.d", int_loongarch_lasx_xvfcmp_caf_d, LASX256DOpnd>{ + bits<5> cond=0x0; + } + +def XVFCMP_COR_S : LASX_XVFCMP<0b000011001001>, + LASX_3RF<"xvfcmp.cor.s", vfsetord_v8f32, LASX256WOpnd>{ + bits<5> cond=0x14; + } + +def XVFCMP_COR_D : LASX_XVFCMP<0b000011001010>, + LASX_3RF<"xvfcmp.cor.d", vfsetord_v4f64, LASX256DOpnd>{ + bits<5> cond=0x14; + } + +def XVFCMP_CUN_S : LASX_XVFCMP<0b000011001001>, + LASX_3RF<"xvfcmp.cun.s", vfsetun_v8f32, LASX256WOpnd>{ + bits<5> cond=0x8; + } + +def XVFCMP_CUN_D : LASX_XVFCMP<0b000011001010>, + LASX_3RF<"xvfcmp.cun.d", vfsetun_v4f64, LASX256DOpnd>{ + bits<5> cond=0x8; + } + +def XVFCMP_CUNE_S : LASX_XVFCMP<0b000011001001>, + LASX_3RF<"xvfcmp.cune.s", vfsetune_v8f32, LASX256WOpnd>{ + bits<5> cond=0x18; + } + +def XVFCMP_CUNE_D : LASX_XVFCMP<0b000011001010>, + LASX_3RF<"xvfcmp.cune.d", vfsetune_v4f64, LASX256DOpnd>{ + bits<5> cond=0x18; + } + +def XVFCMP_CUEQ_S : LASX_XVFCMP<0b000011001001>, + LASX_3RF<"xvfcmp.cueq.s", vfsetueq_v8f32, LASX256WOpnd>{ + bits<5> cond=0xc; + } + +def XVFCMP_CUEQ_D : LASX_XVFCMP<0b000011001010>, + LASX_3RF<"xvfcmp.cueq.d", vfsetueq_v4f64, LASX256DOpnd>{ + bits<5> cond=0xc; + } + +def XVFCMP_CEQ_S : LASX_XVFCMP<0b000011001001>, + LASX_3RF<"xvfcmp.ceq.s", vfsetoeq_v8f32, LASX256WOpnd>{ + bits<5> cond=0x4; + } + +def XVFCMP_CEQ_D : LASX_XVFCMP<0b000011001010>, + LASX_3RF<"xvfcmp.ceq.d", vfsetoeq_v4f64, LASX256DOpnd>{ + bits<5> cond=0x4; + } + +def XVFCMP_CNE_S : LASX_XVFCMP<0b000011001001>, + LASX_3RF<"xvfcmp.cne.s", vfsetone_v8f32, LASX256WOpnd>{ + bits<5> cond=0x10; + } + +def XVFCMP_CNE_D : LASX_XVFCMP<0b000011001010>, + LASX_3RF<"xvfcmp.cne.d", vfsetone_v4f64, LASX256DOpnd>{ + bits<5> cond=0x10; + } + +def XVFCMP_CLT_S : LASX_XVFCMP<0b000011001001>, + LASX_3RF<"xvfcmp.clt.s", vfsetolt_v8f32, LASX256WOpnd>{ + bits<5> cond=0x2; + } + +def XVFCMP_CLT_D : LASX_XVFCMP<0b000011001010>, + LASX_3RF<"xvfcmp.clt.d", vfsetolt_v4f64, LASX256DOpnd>{ + bits<5> cond=0x2; + } + +def XVFCMP_CULT_S : LASX_XVFCMP<0b000011001001>, + LASX_3RF<"xvfcmp.cult.s", vfsetult_v8f32, LASX256WOpnd>{ + bits<5> cond=0xa; + } + +def XVFCMP_CULT_D : LASX_XVFCMP<0b000011001010>, + LASX_3RF<"xvfcmp.cult.d", vfsetult_v4f64, LASX256DOpnd>{ + bits<5> cond=0xa; + } + +def XVFCMP_CLE_S : LASX_XVFCMP<0b000011001001>, + LASX_3RF<"xvfcmp.cle.s", vfsetole_v8f32, LASX256WOpnd>{ + bits<5> cond=0x6; + } + +def XVFCMP_CLE_D : LASX_XVFCMP<0b000011001010>, + LASX_3RF<"xvfcmp.cle.d", vfsetole_v4f64, LASX256DOpnd>{ + bits<5> cond=0x6; + } + +def XVFCMP_CULE_S : LASX_XVFCMP<0b000011001001>, + LASX_3RF<"xvfcmp.cule.s", vfsetule_v8f32, LASX256WOpnd>{ + bits<5> cond=0xe; + } + +def XVFCMP_CULE_D : LASX_XVFCMP<0b000011001010>, + LASX_3RF<"xvfcmp.cule.d", vfsetule_v4f64, LASX256DOpnd>{ + bits<5> cond=0xe; + } + +def XVFCMP_SAF_S : LASX_XVFCMP<0b000011001001>, + LASX_3RF<"xvfcmp.saf.s", int_loongarch_lasx_xvfcmp_saf_s, LASX256WOpnd>{ + bits<5> cond=0x1; + } + +def XVFCMP_SAF_D : LASX_XVFCMP<0b000011001010>, + LASX_3RF<"xvfcmp.saf.d", int_loongarch_lasx_xvfcmp_saf_d, LASX256DOpnd>{ + bits<5> cond=0x1; + } + +def XVFCMP_SOR_S : LASX_XVFCMP<0b000011001001>, + LASX_3RF<"xvfcmp.sor.s", int_loongarch_lasx_xvfcmp_sor_s, LASX256WOpnd>{ + bits<5> cond=0x15; + } + +def XVFCMP_SOR_D : LASX_XVFCMP<0b000011001010>, + LASX_3RF<"xvfcmp.sor.d", int_loongarch_lasx_xvfcmp_sor_d, LASX256DOpnd>{ + bits<5> cond=0x15; + } + +def XVFCMP_SUN_S : LASX_XVFCMP<0b000011001001>, + LASX_3RF<"xvfcmp.sun.s", int_loongarch_lasx_xvfcmp_sun_s, LASX256WOpnd>{ + bits<5> cond=0x9; + } + +def XVFCMP_SUN_D : LASX_XVFCMP<0b000011001010>, + LASX_3RF<"xvfcmp.sun.d", int_loongarch_lasx_xvfcmp_sun_d, LASX256DOpnd>{ + bits<5> cond=0x9; + } + +def XVFCMP_SUNE_S : LASX_XVFCMP<0b000011001001>, + LASX_3RF<"xvfcmp.sune.s", int_loongarch_lasx_xvfcmp_sune_s, LASX256WOpnd>{ + bits<5> cond=0x19; + } + +def XVFCMP_SUNE_D : LASX_XVFCMP<0b000011001010>, + LASX_3RF<"xvfcmp.sune.d", int_loongarch_lasx_xvfcmp_sune_d, LASX256DOpnd>{ + bits<5> cond=0x19; + } + +def XVFCMP_SUEQ_S : LASX_XVFCMP<0b000011001001>, + LASX_3RF<"xvfcmp.sueq.s", int_loongarch_lasx_xvfcmp_sueq_s, LASX256WOpnd>{ + bits<5> cond=0xd; + } + +def XVFCMP_SUEQ_D : LASX_XVFCMP<0b000011001010>, + LASX_3RF<"xvfcmp.sueq.d", int_loongarch_lasx_xvfcmp_sueq_d, LASX256DOpnd>{ + bits<5> cond=0xd; + } + +def XVFCMP_SEQ_S : LASX_XVFCMP<0b000011001001>, + LASX_3RF<"xvfcmp.seq.s", int_loongarch_lasx_xvfcmp_seq_s, LASX256WOpnd>{ + bits<5> cond=0x5; + } + +def XVFCMP_SEQ_D : LASX_XVFCMP<0b000011001010>, + LASX_3RF<"xvfcmp.seq.d", int_loongarch_lasx_xvfcmp_seq_d, LASX256DOpnd>{ + bits<5> cond=0x5; + } + +def XVFCMP_SNE_S : LASX_XVFCMP<0b000011001001>, + LASX_3RF<"xvfcmp.sne.s", int_loongarch_lasx_xvfcmp_sne_s, LASX256WOpnd>{ + bits<5> cond=0x11; + } + +def XVFCMP_SNE_D : LASX_XVFCMP<0b000011001010>, + LASX_3RF<"xvfcmp.sne.d", int_loongarch_lasx_xvfcmp_sne_d, LASX256DOpnd>{ + bits<5> cond=0x11; + } + +def XVFCMP_SLT_S : LASX_XVFCMP<0b000011001001>, + LASX_3RF<"xvfcmp.slt.s", int_loongarch_lasx_xvfcmp_slt_s, LASX256WOpnd>{ + bits<5> cond=0x3; + } + +def XVFCMP_SLT_D : LASX_XVFCMP<0b000011001010>, + LASX_3RF<"xvfcmp.slt.d", int_loongarch_lasx_xvfcmp_slt_d, LASX256DOpnd>{ + bits<5> cond=0x3; + } + +def XVFCMP_SULT_S : LASX_XVFCMP<0b000011001001>, + LASX_3RF<"xvfcmp.sult.s", int_loongarch_lasx_xvfcmp_sult_s, LASX256WOpnd>{ + bits<5> cond=0xb; + } + +def XVFCMP_SULT_D : LASX_XVFCMP<0b000011001010>, + LASX_3RF<"xvfcmp.sult.d", int_loongarch_lasx_xvfcmp_sult_d, LASX256DOpnd>{ + bits<5> cond=0xb; + } + +def XVFCMP_SLE_S : LASX_XVFCMP<0b000011001001>, + LASX_3RF<"xvfcmp.sle.s", int_loongarch_lasx_xvfcmp_sle_s, LASX256WOpnd>{ + bits<5> cond=0x7; + } + +def XVFCMP_SLE_D : LASX_XVFCMP<0b000011001010>, + LASX_3RF<"xvfcmp.sle.d", int_loongarch_lasx_xvfcmp_sle_d, LASX256DOpnd>{ + bits<5> cond=0x7; + } + +def XVFCMP_SULE_S : LASX_XVFCMP<0b000011001001>, + LASX_3RF<"xvfcmp.sule.s", int_loongarch_lasx_xvfcmp_sule_s, LASX256WOpnd>{ + bits<5> cond=0xf; + } + +def XVFCMP_SULE_D : LASX_XVFCMP<0b000011001010>, + LASX_3RF<"xvfcmp.sule.d", int_loongarch_lasx_xvfcmp_sule_d, LASX256DOpnd>{ + bits<5> cond=0xf; + } + + +def XVBITSEL_V : LASX_4R<0b000011010010>, + LASX_4RF<"xvbitsel.v", int_loongarch_lasx_xvbitsel_v, LASX256BOpnd>; + +class LASX_BSEL_PSEUDO_BASE : + LASXPseudo<(outs RO:$xd), (ins RO:$xd_in, RO:$xs, RO:$xt), + [(set RO:$xd, (Ty (vselect RO:$xd_in, RO:$xt, RO:$xs)))]>, + PseudoInstExpansion<(XVBITSEL_V LASX256BOpnd:$xd, LASX256BOpnd:$xs, + LASX256BOpnd:$xt, LASX256BOpnd:$xd_in)> { + let Constraints = "$xd_in = $xd"; +} + +def XBSEL_B_PSEUDO : LASX_BSEL_PSEUDO_BASE; +def XBSEL_H_PSEUDO : LASX_BSEL_PSEUDO_BASE; +def XBSEL_W_PSEUDO : LASX_BSEL_PSEUDO_BASE; +def XBSEL_D_PSEUDO : LASX_BSEL_PSEUDO_BASE; +def XBSEL_FW_PSEUDO : LASX_BSEL_PSEUDO_BASE; +def XBSEL_FD_PSEUDO : LASX_BSEL_PSEUDO_BASE; + + + +def XVSHUF_B : LASX_4R<0b000011010110>, + LASX_4RF<"xvshuf.b", int_loongarch_lasx_xvshuf_b, LASX256BOpnd>; + + +def XVLD : LASX_I12_S<0b0010110010>, + LASX_LD<"xvld", load, v32i8, LASX256BOpnd, mem>; + +def XVST : LASX_I12_S<0b0010110011>, + LASX_ST<"xvst", store, v32i8, LASX256BOpnd, mem_simm12>; + + +class LASX_LD_DESC_BASE { + dag OutOperandList = (outs ROXD:$xd); + dag InOperandList = (ins MemOpnd:$addr); + string AsmString = !strconcat(instr_asm, "\t$xd, $addr"); + list Pattern = [(set ROXD:$xd, (OpNode (TyNode (load Addr:$addr))))]; + string DecoderMethod = "DecodeLASX256memlsl"; +} + + +def XVLDREPL_B : LASX_SI12_S<0b0011001010>, + LASX_LD_DESC_BASE<"xvldrepl.b", xvbroadcast_v32i8, v32i8, LASX256BOpnd>; + +def XVLDREPL_H : LASX_SI11_S<0b00110010010>, + LASX_LD_DESC_BASE<"xvldrepl.h", xvbroadcast_v16i16, v16i16, LASX256HOpnd, mem_simm11_lsl1, addrimm11lsl1>; + +def XVLDREPL_W : LASX_SI10_S<0b001100100010>, + LASX_LD_DESC_BASE<"xvldrepl.w", xvbroadcast_v8i32, v8i32, LASX256WOpnd, mem_simm10_lsl2, addrimm10lsl2>; + +def XVLDREPL_D : LASX_SI9_S<0b0011001000010>, + LASX_LD_DESC_BASE<"xvldrepl.d", xvbroadcast_v4i64, v4i64, LASX256DOpnd, mem_simm9_lsl3, addrimm9lsl3>; + + +def XVSTELM_B : LASX_SI8_idx5<0b001100111>, + LASX_I8_U5_DESC_BASE<"xvstelm.b", int_loongarch_lasx_xvstelm_b, simm8_32, immSExt8, LASX256BOpnd, GPR32Opnd>; + +def XVSTELM_H : LASX_SI8_idx4<0b0011001101>, + LASX_I8_U4_DESC_BASE<"xvstelm.h", int_loongarch_lasx_xvstelm_h, immSExt8_1_O, immSExt8, LASX256HOpnd, GPR32Opnd>; + +def XVSTELM_W : LASX_SI8_idx3<0b00110011001>, + LASX_I8_U3_DESC_BASE<"xvstelm.w", int_loongarch_lasx_xvstelm_w, immSExt8_2_O, immSExt8, LASX256WOpnd, GPR32Opnd>; + +def XVSTELM_D : LASX_SI8_idx2<0b001100110001>, + LASX_I8_U2_DESC_BASE<"xvstelm.d", int_loongarch_lasx_xvstelm_d, immSExt8_3_O, immSExt8, LASX256DOpnd, GPR32Opnd>; + +let mayLoad = 1, canFoldAsLoad = 1 in { + def XVLDX : LASX_3R_2GP<0b00111000010010000>, + LASX_LDX_LA<"xvldx", int_loongarch_lasx_xvldx, GPR64Opnd, LASX256BOpnd>; +} + +let mayStore = 1 in{ + def XVSTX : LASX_3R_2GP<0b00111000010011000>, + LASX_SDX_LA<"xvstx", int_loongarch_lasx_xvstx, GPR64Opnd, LASX256BOpnd>; +} + + +def XVSEQ_B : LASX_3R<0b01110100000000000>, IsCommutable, + LASX_3R_SETCC_DESC_BASE<"xvseq.b", SETEQ, v32i8, LASX256BOpnd>; + +def XVSEQ_H : LASX_3R<0b01110100000000001>, IsCommutable, + LASX_3R_SETCC_DESC_BASE<"xvseq.h", SETEQ, v16i16, LASX256HOpnd>; + +def XVSEQ_W : LASX_3R<0b01110100000000010>, IsCommutable, + LASX_3R_SETCC_DESC_BASE<"xvseq.w", SETEQ, v8i32, LASX256WOpnd> ; + +def XVSEQ_D : LASX_3R<0b01110100000000011>, IsCommutable, + LASX_3R_SETCC_DESC_BASE<"xvseq.d", SETEQ, v4i64, LASX256DOpnd>; + + +def XVSLE_B : LASX_3R<0b01110100000000100>, + LASX_3R_SETCC_DESC_BASE<"xvsle.b", SETLE, v32i8, LASX256BOpnd>; + +def XVSLE_H : LASX_3R<0b01110100000000101>, + LASX_3R_SETCC_DESC_BASE<"xvsle.h", SETLE, v16i16, LASX256HOpnd>; + +def XVSLE_W : LASX_3R<0b01110100000000110>, + LASX_3R_SETCC_DESC_BASE<"xvsle.w", SETLE, v8i32, LASX256WOpnd>; + +def XVSLE_D : LASX_3R<0b01110100000000111>, + LASX_3R_SETCC_DESC_BASE<"xvsle.d", SETLE, v4i64, LASX256DOpnd>; + + +def XVSLE_BU : LASX_3R<0b01110100000001000>, + LASX_3R_SETCC_DESC_BASE<"xvsle.bu", SETULE, v32i8, LASX256BOpnd>; + +def XVSLE_HU : LASX_3R<0b01110100000001001>, + LASX_3R_SETCC_DESC_BASE<"xvsle.hu", SETULE, v16i16, LASX256HOpnd>; + +def XVSLE_WU : LASX_3R<0b01110100000001010>, + LASX_3R_SETCC_DESC_BASE<"xvsle.wu", SETULE, v8i32, LASX256WOpnd>; + +def XVSLE_DU : LASX_3R<0b01110100000001011>, + LASX_3R_SETCC_DESC_BASE<"xvsle.du", SETULE, v4i64, LASX256DOpnd>; + + +def XVSLT_B : LASX_3R<0b01110100000001100>, + LASX_3R_SETCC_DESC_BASE<"xvslt.b", SETLT, v32i8, LASX256BOpnd>; + +def XVSLT_H : LASX_3R<0b01110100000001101>, + LASX_3R_SETCC_DESC_BASE<"xvslt.h", SETLT, v16i16, LASX256HOpnd>; + +def XVSLT_W : LASX_3R<0b01110100000001110>, + LASX_3R_SETCC_DESC_BASE<"xvslt.w", SETLT, v8i32, LASX256WOpnd>; + +def XVSLT_D : LASX_3R<0b01110100000001111>, + LASX_3R_SETCC_DESC_BASE<"xvslt.d", SETLT, v4i64, LASX256DOpnd>; + + +def XVSLT_BU : LASX_3R<0b01110100000010000>, + LASX_3R_SETCC_DESC_BASE<"xvslt.bu", SETULT, v32i8, LASX256BOpnd>; + +def XVSLT_HU : LASX_3R<0b01110100000010001>, + LASX_3R_SETCC_DESC_BASE<"xvslt.hu", SETULT, v16i16, LASX256HOpnd>; + +def XVSLT_WU : LASX_3R<0b01110100000010010>, + LASX_3R_SETCC_DESC_BASE<"xvslt.wu", SETULT, v8i32, LASX256WOpnd>; + +def XVSLT_DU : LASX_3R<0b01110100000010011>, + LASX_3R_SETCC_DESC_BASE<"xvslt.du", SETULT, v4i64, LASX256DOpnd>; + + +def XVADD_B : LASX_3R<0b01110100000010100>, IsCommutable, + LASX_3R_DESC_BASE<"xvadd.b", add, LASX256BOpnd>; + +def XVADD_H : LASX_3R<0b01110100000010101>, IsCommutable, + LASX_3R_DESC_BASE<"xvadd.h", add, LASX256HOpnd>; + +def XVADD_W : LASX_3R<0b01110100000010110>, IsCommutable, + LASX_3R_DESC_BASE<"xvadd.w", add, LASX256WOpnd>; + +def XVADD_D : LASX_3R<0b01110100000010111>, IsCommutable, + LASX_3R_DESC_BASE<"xvadd.d", add, LASX256DOpnd>; + + +def XVSUB_B : LASX_3R<0b01110100000011000>, + LASX_3R_DESC_BASE<"xvsub.b", sub, LASX256BOpnd>; + +def XVSUB_H : LASX_3R<0b01110100000011001>, + LASX_3R_DESC_BASE<"xvsub.h", sub, LASX256HOpnd>; + +def XVSUB_W : LASX_3R<0b01110100000011010>, + LASX_3R_DESC_BASE<"xvsub.w", sub, LASX256WOpnd>; + +def XVSUB_D : LASX_3R<0b01110100000011011>, + LASX_3R_DESC_BASE<"xvsub.d", sub, LASX256DOpnd>; + + +def XVADDWEV_H_B : LASX_3R<0b01110100000111100>, + LASX_3R_DESC_BASE<"xvaddwev.h.b", int_loongarch_lasx_xvaddwev_h_b, LASX256HOpnd, LASX256BOpnd, LASX256BOpnd>; + +def XVADDWEV_W_H : LASX_3R<0b01110100000111101>, + LASX_3R_DESC_BASE<"xvaddwev.w.h", int_loongarch_lasx_xvaddwev_w_h, LASX256WOpnd, LASX256HOpnd, LASX256HOpnd>; + +def XVADDWEV_D_W : LASX_3R<0b01110100000111110>, + LASX_3R_DESC_BASE<"xvaddwev.d.w", int_loongarch_lasx_xvaddwev_d_w, LASX256DOpnd, LASX256WOpnd, LASX256WOpnd> ; + +def XVADDWEV_Q_D : LASX_3R<0b01110100000111111>, + LASX_3R_DESC_BASE<"xvaddwev.q.d", int_loongarch_lasx_xvaddwev_q_d, LASX256DOpnd, LASX256DOpnd, LASX256DOpnd>; + + +def XVSUBWEV_H_B : LASX_3R<0b01110100001000000>, + LASX_3R_DESC_BASE<"xvsubwev.h.b", int_loongarch_lasx_xvsubwev_h_b, LASX256HOpnd, LASX256BOpnd, LASX256BOpnd>; + +def XVSUBWEV_W_H : LASX_3R<0b01110100001000001>, + LASX_3R_DESC_BASE<"xvsubwev.w.h", int_loongarch_lasx_xvsubwev_w_h, LASX256WOpnd, LASX256HOpnd, LASX256HOpnd>; + +def XVSUBWEV_D_W : LASX_3R<0b01110100001000010>, + LASX_3R_DESC_BASE<"xvsubwev.d.w", int_loongarch_lasx_xvsubwev_d_w, LASX256DOpnd, LASX256WOpnd, LASX256WOpnd> ; + +def XVSUBWEV_Q_D : LASX_3R<0b01110100001000011>, + LASX_3R_DESC_BASE<"xvsubwev.q.d", int_loongarch_lasx_xvsubwev_q_d, LASX256DOpnd, LASX256DOpnd, LASX256DOpnd>; + + +def XVADDWOD_H_B : LASX_3R<0b01110100001000100>, + LASX_3R_DESC_BASE<"xvaddwod.h.b", int_loongarch_lasx_xvaddwod_h_b, LASX256HOpnd, LASX256BOpnd, LASX256BOpnd>; + +def XVADDWOD_W_H : LASX_3R<0b01110100001000101>, + LASX_3R_DESC_BASE<"xvaddwod.w.h", int_loongarch_lasx_xvaddwod_w_h, LASX256WOpnd, LASX256HOpnd, LASX256HOpnd>; + +def XVADDWOD_D_W : LASX_3R<0b01110100001000110>, + LASX_3R_DESC_BASE<"xvaddwod.d.w", int_loongarch_lasx_xvaddwod_d_w, LASX256DOpnd, LASX256WOpnd, LASX256WOpnd> ; + +def XVADDWOD_Q_D : LASX_3R<0b01110100001000111>, + LASX_3R_DESC_BASE<"xvaddwod.q.d", int_loongarch_lasx_xvaddwod_q_d, LASX256DOpnd, LASX256DOpnd, LASX256DOpnd>; + + +def XVSUBWOD_H_B : LASX_3R<0b01110100001001000>, + LASX_3R_DESC_BASE<"xvsubwod.h.b", int_loongarch_lasx_xvsubwod_h_b, LASX256HOpnd, LASX256BOpnd, LASX256BOpnd>; + +def XVSUBWOD_W_H : LASX_3R<0b01110100001001001>, + LASX_3R_DESC_BASE<"xvsubwod.w.h", int_loongarch_lasx_xvsubwod_w_h, LASX256WOpnd, LASX256HOpnd, LASX256HOpnd>; + +def XVSUBWOD_D_W : LASX_3R<0b01110100001001010>, + LASX_3R_DESC_BASE<"xvsubwod.d.w", int_loongarch_lasx_xvsubwod_d_w, LASX256DOpnd, LASX256WOpnd, LASX256WOpnd> ; + +def XVSUBWOD_Q_D : LASX_3R<0b01110100001001011>, + LASX_3R_DESC_BASE<"xvsubwod.q.d", int_loongarch_lasx_xvsubwod_q_d, LASX256DOpnd, LASX256DOpnd, LASX256DOpnd>; + + +def XVADDWEV_H_BU : LASX_3R<0b01110100001011100>, + LASX_3R_DESC_BASE<"xvaddwev.h.bu", int_loongarch_lasx_xvaddwev_h_bu, LASX256HOpnd, LASX256BOpnd, LASX256BOpnd>; + +def XVADDWEV_W_HU : LASX_3R<0b01110100001011101>, + LASX_3R_DESC_BASE<"xvaddwev.w.hu", int_loongarch_lasx_xvaddwev_w_hu, LASX256WOpnd, LASX256HOpnd, LASX256HOpnd>; + +def XVADDWEV_D_WU : LASX_3R<0b01110100001011110>, + LASX_3R_DESC_BASE<"xvaddwev.d.wu", int_loongarch_lasx_xvaddwev_d_wu, LASX256DOpnd, LASX256WOpnd, LASX256WOpnd> ; + +def XVADDWEV_Q_DU : LASX_3R<0b01110100001011111>, + LASX_3R_DESC_BASE<"xvaddwev.q.du", int_loongarch_lasx_xvaddwev_q_du, LASX256DOpnd, LASX256DOpnd, LASX256DOpnd>; + + +def XVSUBWEV_H_BU : LASX_3R<0b01110100001100000>, + LASX_3R_DESC_BASE<"xvsubwev.h.bu", int_loongarch_lasx_xvsubwev_h_bu, LASX256HOpnd, LASX256BOpnd, LASX256BOpnd>; + +def XVSUBWEV_W_HU : LASX_3R<0b01110100001100001>, + LASX_3R_DESC_BASE<"xvsubwev.w.hu", int_loongarch_lasx_xvsubwev_w_hu, LASX256WOpnd, LASX256HOpnd, LASX256HOpnd>; + +def XVSUBWEV_D_WU : LASX_3R<0b01110100001100010>, + LASX_3R_DESC_BASE<"xvsubwev.d.wu", int_loongarch_lasx_xvsubwev_d_wu, LASX256DOpnd, LASX256WOpnd, LASX256WOpnd> ; + +def XVSUBWEV_Q_DU : LASX_3R<0b01110100001100011>, + LASX_3R_DESC_BASE<"xvsubwev.q.du", int_loongarch_lasx_xvsubwev_q_du, LASX256DOpnd, LASX256DOpnd, LASX256DOpnd>; + + +def XVADDWOD_H_BU : LASX_3R<0b01110100001100100>, + LASX_3R_DESC_BASE<"xvaddwod.h.bu", int_loongarch_lasx_xvaddwod_h_bu, LASX256HOpnd, LASX256BOpnd, LASX256BOpnd>; + +def XVADDWOD_W_HU : LASX_3R<0b01110100001100101>, + LASX_3R_DESC_BASE<"xvaddwod.w.hu", int_loongarch_lasx_xvaddwod_w_hu, LASX256WOpnd, LASX256HOpnd, LASX256HOpnd>; + +def XVADDWOD_D_WU : LASX_3R<0b01110100001100110>, + LASX_3R_DESC_BASE<"xvaddwod.d.wu", int_loongarch_lasx_xvaddwod_d_wu, LASX256DOpnd, LASX256WOpnd, LASX256WOpnd> ; + +def XVADDWOD_Q_DU : LASX_3R<0b01110100001100111>, + LASX_3R_DESC_BASE<"xvaddwod.q.du", int_loongarch_lasx_xvaddwod_q_du, LASX256DOpnd, LASX256DOpnd, LASX256DOpnd>; + + +def XVSUBWOD_H_BU : LASX_3R<0b01110100001101000>, + LASX_3R_DESC_BASE<"xvsubwod.h.bu", int_loongarch_lasx_xvsubwod_h_bu, LASX256HOpnd, LASX256BOpnd, LASX256BOpnd>; + +def XVSUBWOD_W_HU : LASX_3R<0b01110100001101001>, + LASX_3R_DESC_BASE<"xvsubwod.w.hu", int_loongarch_lasx_xvsubwod_w_hu, LASX256WOpnd, LASX256HOpnd, LASX256HOpnd>; + +def XVSUBWOD_D_WU : LASX_3R<0b01110100001101010>, + LASX_3R_DESC_BASE<"xvsubwod.d.wu", int_loongarch_lasx_xvsubwod_d_wu, LASX256DOpnd, LASX256WOpnd, LASX256WOpnd> ; + +def XVSUBWOD_Q_DU : LASX_3R<0b01110100001101011>, + LASX_3R_DESC_BASE<"xvsubwod.q.du", int_loongarch_lasx_xvsubwod_q_du, LASX256DOpnd, LASX256DOpnd, LASX256DOpnd>; + + +def XVADDWEV_H_BU_B : LASX_3R<0b01110100001111100>, + LASX_3R_DESC_BASE<"xvaddwev.h.bu.b", int_loongarch_lasx_xvaddwev_h_bu_b, LASX256HOpnd, LASX256BOpnd, LASX256BOpnd>; + +def XVADDWEV_W_HU_H : LASX_3R<0b01110100001111101>, + LASX_3R_DESC_BASE<"xvaddwev.w.hu.h", int_loongarch_lasx_xvaddwev_w_hu_h, LASX256WOpnd, LASX256HOpnd, LASX256HOpnd>; + +def XVADDWEV_D_WU_W : LASX_3R<0b01110100001111110>, + LASX_3R_DESC_BASE<"xvaddwev.d.wu.w", int_loongarch_lasx_xvaddwev_d_wu_w, LASX256DOpnd, LASX256WOpnd, LASX256WOpnd> ; + +def XVADDWEV_Q_DU_D : LASX_3R<0b01110100001111111>, + LASX_3R_DESC_BASE<"xvaddwev.q.du.d", int_loongarch_lasx_xvaddwev_q_du_d, LASX256DOpnd, LASX256DOpnd, LASX256DOpnd>; + + +def XVADDWOD_H_BU_B : LASX_3R<0b01110100010000000>, + LASX_3R_DESC_BASE<"xvaddwod.h.bu.b", int_loongarch_lasx_xvaddwod_h_bu_b, LASX256HOpnd, LASX256BOpnd, LASX256BOpnd>; + +def XVADDWOD_W_HU_H : LASX_3R<0b01110100010000001>, + LASX_3R_DESC_BASE<"xvaddwod.w.hu.h", int_loongarch_lasx_xvaddwod_w_hu_h, LASX256WOpnd, LASX256HOpnd, LASX256HOpnd>; + +def XVADDWOD_D_WU_W : LASX_3R<0b01110100010000010>, + LASX_3R_DESC_BASE<"xvaddwod.d.wu.w", int_loongarch_lasx_xvaddwod_d_wu_w, LASX256DOpnd, LASX256WOpnd, LASX256WOpnd> ; + +def XVADDWOD_Q_DU_D : LASX_3R<0b01110100010000011>, + LASX_3R_DESC_BASE<"xvaddwod.q.du.d", int_loongarch_lasx_xvaddwod_q_du_d, LASX256DOpnd, LASX256DOpnd, LASX256DOpnd>; + + +def XVSADD_B : LASX_3R<0b01110100010001100>, IsCommutable, + LASX_3R_DESC_BASE<"xvsadd.b", saddsat, LASX256BOpnd>; + +def XVSADD_H : LASX_3R<0b01110100010001101>, IsCommutable, + LASX_3R_DESC_BASE<"xvsadd.h", saddsat, LASX256HOpnd>; + +def XVSADD_W : LASX_3R<0b01110100010001110>, IsCommutable, + LASX_3R_DESC_BASE<"xvsadd.w", saddsat, LASX256WOpnd>; + +def XVSADD_D : LASX_3R<0b01110100010001111>, IsCommutable, + LASX_3R_DESC_BASE<"xvsadd.d", saddsat, LASX256DOpnd>; + + +def XVSSUB_B : LASX_3R<0b01110100010010000>, + LASX_3R_DESC_BASE<"xvssub.b", ssubsat, LASX256BOpnd>; + +def XVSSUB_H : LASX_3R<0b01110100010010001>, + LASX_3R_DESC_BASE<"xvssub.h", ssubsat, LASX256HOpnd>; + +def XVSSUB_W : LASX_3R<0b01110100010010010>, + LASX_3R_DESC_BASE<"xvssub.w", ssubsat, LASX256WOpnd>; + +def XVSSUB_D : LASX_3R<0b01110100010010011>, + LASX_3R_DESC_BASE<"xvssub.d", ssubsat, LASX256DOpnd>; + + +def XVSADD_BU : LASX_3R<0b01110100010010100>, IsCommutable, + LASX_3R_DESC_BASE<"xvsadd.bu", uaddsat, LASX256BOpnd>; + +def XVSADD_HU : LASX_3R<0b01110100010010101>, IsCommutable, + LASX_3R_DESC_BASE<"xvsadd.hu", uaddsat, LASX256HOpnd>; + +def XVSADD_WU : LASX_3R<0b01110100010010110>, IsCommutable, + LASX_3R_DESC_BASE<"xvsadd.wu", uaddsat, LASX256WOpnd>; + +def XVSADD_DU : LASX_3R<0b01110100010010111>, IsCommutable, + LASX_3R_DESC_BASE<"xvsadd.du", uaddsat, LASX256DOpnd>; + + +def XVSSUB_BU : LASX_3R<0b01110100010011000>, + LASX_3R_DESC_BASE<"xvssub.bu", usubsat, LASX256BOpnd>; + +def XVSSUB_HU : LASX_3R<0b01110100010011001>, + LASX_3R_DESC_BASE<"xvssub.hu", usubsat, LASX256HOpnd>; + +def XVSSUB_WU : LASX_3R<0b01110100010011010>, + LASX_3R_DESC_BASE<"xvssub.wu", usubsat, LASX256WOpnd>; + +def XVSSUB_DU : LASX_3R<0b01110100010011011>, + LASX_3R_DESC_BASE<"xvssub.du", usubsat, LASX256DOpnd>; + + +def XVHADDW_H_B : LASX_3R<0b01110100010101000>, + LASX_3R_DESC_BASE<"xvhaddw.h.b", int_loongarch_lasx_xvhaddw_h_b, LASX256HOpnd, LASX256BOpnd, LASX256BOpnd>; + +def XVHADDW_W_H : LASX_3R<0b01110100010101001>, + LASX_3R_DESC_BASE<"xvhaddw.w.h", int_loongarch_lasx_xvhaddw_w_h, LASX256WOpnd, LASX256HOpnd, LASX256HOpnd>; + +def XVHADDW_D_W : LASX_3R<0b01110100010101010>, + LASX_3R_DESC_BASE<"xvhaddw.d.w", int_loongarch_lasx_xvhaddw_d_w, LASX256DOpnd, LASX256WOpnd, LASX256WOpnd>; + +def XVHADDW_Q_D : LASX_3R<0b01110100010101011>, + LASX_3R_DESC_BASE<"xvhaddw.q.d", int_loongarch_lasx_xvhaddw_q_d, LASX256DOpnd, LASX256DOpnd, LASX256DOpnd>; + + + +def XVHSUBW_H_B : LASX_3R<0b01110100010101100>, + LASX_3R_DESC_BASE<"xvhsubw.h.b", int_loongarch_lasx_xvhsubw_h_b, LASX256HOpnd, LASX256BOpnd, LASX256BOpnd>; + +def XVHSUBW_W_H : LASX_3R<0b01110100010101101>, + LASX_3R_DESC_BASE<"xvhsubw.w.h", int_loongarch_lasx_xvhsubw_w_h, LASX256WOpnd, LASX256HOpnd, LASX256HOpnd>; + +def XVHSUBW_D_W : LASX_3R<0b01110100010101110>, + LASX_3R_DESC_BASE<"xvhsubw.d.w", int_loongarch_lasx_xvhsubw_d_w, LASX256DOpnd, LASX256WOpnd, LASX256WOpnd>; + +def XVHSUBW_Q_D : LASX_3R<0b01110100010101111>, + LASX_3R_DESC_BASE<"xvhsubw.q.d", int_loongarch_lasx_xvhsubw_q_d, LASX256DOpnd, LASX256DOpnd, LASX256DOpnd>; + + +def XVHADDW_HU_BU : LASX_3R<0b01110100010110000>, + LASX_3R_DESC_BASE<"xvhaddw.hu.bu", int_loongarch_lasx_xvhaddw_hu_bu, LASX256HOpnd, LASX256BOpnd, LASX256BOpnd>; + +def XVHADDW_WU_HU : LASX_3R<0b01110100010110001>, + LASX_3R_DESC_BASE<"xvhaddw.wu.hu", int_loongarch_lasx_xvhaddw_wu_hu, LASX256WOpnd, LASX256HOpnd, LASX256HOpnd>; + +def XVHADDW_DU_WU : LASX_3R<0b01110100010110010>, + LASX_3R_DESC_BASE<"xvhaddw.du.wu", int_loongarch_lasx_xvhaddw_du_wu, LASX256DOpnd, LASX256WOpnd, LASX256WOpnd>; + +def XVHADDW_QU_DU : LASX_3R<0b01110100010110011>, + LASX_3R_DESC_BASE<"xvhaddw.qu.du", int_loongarch_lasx_xvhaddw_qu_du, LASX256DOpnd, LASX256DOpnd, LASX256DOpnd>; + + + +def XVHSUBW_HU_BU : LASX_3R<0b01110100010110100>, + LASX_3R_DESC_BASE<"xvhsubw.hu.bu", int_loongarch_lasx_xvhsubw_hu_bu, LASX256HOpnd, LASX256BOpnd, LASX256BOpnd>; + +def XVHSUBW_WU_HU : LASX_3R<0b01110100010110101>, + LASX_3R_DESC_BASE<"xvhsubw.wu.hu", int_loongarch_lasx_xvhsubw_wu_hu, LASX256WOpnd, LASX256HOpnd, LASX256HOpnd>; + +def XVHSUBW_DU_WU : LASX_3R<0b01110100010110110>, + LASX_3R_DESC_BASE<"xvhsubw.du.wu", int_loongarch_lasx_xvhsubw_du_wu, LASX256DOpnd, LASX256WOpnd, LASX256WOpnd>; + +def XVHSUBW_QU_DU : LASX_3R<0b01110100010110111>, + LASX_3R_DESC_BASE<"xvhsubw.qu.du", int_loongarch_lasx_xvhsubw_qu_du, LASX256DOpnd, LASX256DOpnd, LASX256DOpnd>; + + +def XVADDA_B : LASX_3R<0b01110100010111000>, IsCommutable, + LASX_3R_DESC_BASE<"xvadda.b", int_loongarch_lasx_xvadda_b, LASX256BOpnd>; + +def XVADDA_H : LASX_3R<0b01110100010111001>, IsCommutable, + LASX_3R_DESC_BASE<"xvadda.h", int_loongarch_lasx_xvadda_h, LASX256HOpnd>; + +def XVADDA_W : LASX_3R<0b01110100010111010>, IsCommutable, + LASX_3R_DESC_BASE<"xvadda.w", int_loongarch_lasx_xvadda_w, LASX256WOpnd>; + +def XVADDA_D : LASX_3R<0b01110100010111011>, IsCommutable, + LASX_3R_DESC_BASE<"xvadda.d", int_loongarch_lasx_xvadda_d, LASX256DOpnd>; + + +def XVABSD_B : LASX_3R<0b01110100011000000>, + LASX_3R_DESC_BASE<"xvabsd.b", int_loongarch_lasx_xvabsd_b, LASX256BOpnd>; + +def XVABSD_H : LASX_3R<0b01110100011000001>, + LASX_3R_DESC_BASE<"xvabsd.h", int_loongarch_lasx_xvabsd_h, LASX256HOpnd>; + +def XVABSD_W : LASX_3R<0b01110100011000010>, + LASX_3R_DESC_BASE<"xvabsd.w", int_loongarch_lasx_xvabsd_w, LASX256WOpnd>; + +def XVABSD_D : LASX_3R<0b01110100011000011>, + LASX_3R_DESC_BASE<"xvabsd.d", int_loongarch_lasx_xvabsd_d, LASX256DOpnd>; + + +def XVABSD_BU : LASX_3R<0b01110100011000100>, + LASX_3R_DESC_BASE<"xvabsd.bu", int_loongarch_lasx_xvabsd_bu, LASX256BOpnd>; + +def XVABSD_HU : LASX_3R<0b01110100011000101>, + LASX_3R_DESC_BASE<"xvabsd.hu", int_loongarch_lasx_xvabsd_hu, LASX256HOpnd>; + +def XVABSD_WU : LASX_3R<0b01110100011000110>, + LASX_3R_DESC_BASE<"xvabsd.wu", int_loongarch_lasx_xvabsd_wu, LASX256WOpnd>; + +def XVABSD_DU : LASX_3R<0b01110100011000111>, + LASX_3R_DESC_BASE<"xvabsd.du", int_loongarch_lasx_xvabsd_du, LASX256DOpnd>; + + +def XVAVG_B : LASX_3R<0b01110100011001000>, IsCommutable, + LASX_3R_DESC_BASE<"xvavg.b", int_loongarch_lasx_xvavg_b, LASX256BOpnd>; + +def XVAVG_H : LASX_3R<0b01110100011001001>, IsCommutable, + LASX_3R_DESC_BASE<"xvavg.h", int_loongarch_lasx_xvavg_h, LASX256HOpnd>; + +def XVAVG_W : LASX_3R<0b01110100011001010>, IsCommutable, + LASX_3R_DESC_BASE<"xvavg.w", int_loongarch_lasx_xvavg_w, LASX256WOpnd>; + +def XVAVG_D : LASX_3R<0b01110100011001011>, IsCommutable, + LASX_3R_DESC_BASE<"xvavg.d", int_loongarch_lasx_xvavg_d, LASX256DOpnd>; + + +def XVAVG_BU : LASX_3R<0b01110100011001100>, IsCommutable, + LASX_3R_DESC_BASE<"xvavg.bu", int_loongarch_lasx_xvavg_bu, LASX256BOpnd>; + +def XVAVG_HU : LASX_3R<0b01110100011001101>, IsCommutable, + LASX_3R_DESC_BASE<"xvavg.hu", int_loongarch_lasx_xvavg_hu, LASX256HOpnd>; + +def XVAVG_WU : LASX_3R<0b01110100011001110>, IsCommutable, + LASX_3R_DESC_BASE<"xvavg.wu", int_loongarch_lasx_xvavg_wu, LASX256WOpnd>; + +def XVAVG_DU : LASX_3R<0b01110100011001111>, IsCommutable, + LASX_3R_DESC_BASE<"xvavg.du", int_loongarch_lasx_xvavg_du, LASX256DOpnd>; + + +def XVAVGR_B : LASX_3R<0b01110100011010000>, IsCommutable, + LASX_3R_DESC_BASE<"xvavgr.b", int_loongarch_lasx_xvavgr_b, LASX256BOpnd>; + +def XVAVGR_H : LASX_3R<0b01110100011010001>, IsCommutable, + LASX_3R_DESC_BASE<"xvavgr.h", int_loongarch_lasx_xvavgr_h, LASX256HOpnd>; + +def XVAVGR_W : LASX_3R<0b01110100011010010>, IsCommutable, + LASX_3R_DESC_BASE<"xvavgr.w", int_loongarch_lasx_xvavgr_w, LASX256WOpnd>; + +def XVAVGR_D : LASX_3R<0b01110100011010011>, IsCommutable, + LASX_3R_DESC_BASE<"xvavgr.d", int_loongarch_lasx_xvavgr_d, LASX256DOpnd>; + + +def XVAVGR_BU : LASX_3R<0b01110100011010100>, IsCommutable, + LASX_3R_DESC_BASE<"xvavgr.bu", int_loongarch_lasx_xvavgr_bu, LASX256BOpnd>; + +def XVAVGR_HU : LASX_3R<0b01110100011010101>, IsCommutable, + LASX_3R_DESC_BASE<"xvavgr.hu", int_loongarch_lasx_xvavgr_hu, LASX256HOpnd>; + +def XVAVGR_WU : LASX_3R<0b01110100011010110>, IsCommutable, + LASX_3R_DESC_BASE<"xvavgr.wu", int_loongarch_lasx_xvavgr_wu, LASX256WOpnd>; + +def XVAVGR_DU : LASX_3R<0b01110100011010111>, IsCommutable, + LASX_3R_DESC_BASE<"xvavgr.du", int_loongarch_lasx_xvavgr_du, LASX256DOpnd>; + + +def XVMAX_B : LASX_3R<0b01110100011100000>, + LASX_3R_DESC_BASE<"xvmax.b", smax, LASX256BOpnd>; + +def XVMAX_H : LASX_3R<0b01110100011100001>, + LASX_3R_DESC_BASE<"xvmax.h", smax, LASX256HOpnd>; + +def XVMAX_W : LASX_3R<0b01110100011100010>, + LASX_3R_DESC_BASE<"xvmax.w", smax, LASX256WOpnd>; + +def XVMAX_D : LASX_3R<0b01110100011100011>, + LASX_3R_DESC_BASE<"xvmax.d", smax, LASX256DOpnd>; + + +def XVMIN_B : LASX_3R<0b01110100011100100>, + LASX_3R_DESC_BASE<"xvmin.b", smin, LASX256BOpnd>; + +def XVMIN_H : LASX_3R<0b01110100011100101>, + LASX_3R_DESC_BASE<"xvmin.h", smin, LASX256HOpnd>; + +def XVMIN_W : LASX_3R<0b01110100011100110>, + LASX_3R_DESC_BASE<"xvmin.w", smin, LASX256WOpnd>; + +def XVMIN_D : LASX_3R<0b01110100011100111>, + LASX_3R_DESC_BASE<"xvmin.d", smin, LASX256DOpnd>; + + +def XVMAX_BU : LASX_3R<0b01110100011101000>, + LASX_3R_DESC_BASE<"xvmax.bu", umax, LASX256BOpnd>; + +def XVMAX_HU : LASX_3R<0b01110100011101001>, + LASX_3R_DESC_BASE<"xvmax.hu", umax, LASX256HOpnd>; + +def XVMAX_WU : LASX_3R<0b01110100011101010>, + LASX_3R_DESC_BASE<"xvmax.wu", umax, LASX256WOpnd>; + +def XVMAX_DU : LASX_3R<0b01110100011101011>, + LASX_3R_DESC_BASE<"xvmax.du", umax, LASX256DOpnd>; + + +def XVMIN_BU : LASX_3R<0b01110100011101100>, + LASX_3R_DESC_BASE<"xvmin.bu", umin, LASX256BOpnd>; + +def XVMIN_HU : LASX_3R<0b01110100011101101>, + LASX_3R_DESC_BASE<"xvmin.hu", umin, LASX256HOpnd>; + +def XVMIN_WU : LASX_3R<0b01110100011101110>, + LASX_3R_DESC_BASE<"xvmin.wu", umin, LASX256WOpnd>; + +def XVMIN_DU : LASX_3R<0b01110100011101111>, + LASX_3R_DESC_BASE<"xvmin.du", umin, LASX256DOpnd>; + + +def XVMUL_B : LASX_3R<0b01110100100001000>, + LASX_3R_DESC_BASE<"xvmul.b", mul, LASX256BOpnd>, IsCommutable; + +def XVMUL_H : LASX_3R<0b01110100100001001>, + LASX_3R_DESC_BASE<"xvmul.h", mul, LASX256HOpnd>, IsCommutable; + +def XVMUL_W : LASX_3R<0b01110100100001010>, + LASX_3R_DESC_BASE<"xvmul.w", mul, LASX256WOpnd>, IsCommutable; + +def XVMUL_D : LASX_3R<0b01110100100001011>, + LASX_3R_DESC_BASE<"xvmul.d", mul, LASX256DOpnd>, IsCommutable; + + +def XVMUH_B : LASX_3R<0b01110100100001100>, + LASX_3R_DESC_BASE<"xvmuh.b", int_loongarch_lasx_xvmuh_b, LASX256BOpnd, LASX256BOpnd, LASX256BOpnd>; + +def XVMUH_H : LASX_3R<0b01110100100001101>, + LASX_3R_DESC_BASE<"xvmuh.h", int_loongarch_lasx_xvmuh_h, LASX256HOpnd, LASX256HOpnd, LASX256HOpnd>; + +def XVMUH_W : LASX_3R<0b01110100100001110>, + LASX_3R_DESC_BASE<"xvmuh.w", int_loongarch_lasx_xvmuh_w, LASX256WOpnd, LASX256WOpnd, LASX256WOpnd> ; + +def XVMUH_D : LASX_3R<0b01110100100001111>, + LASX_3R_DESC_BASE<"xvmuh.d", int_loongarch_lasx_xvmuh_d, LASX256DOpnd, LASX256DOpnd, LASX256DOpnd>; + + +def XVMUH_BU : LASX_3R<0b01110100100010000>, + LASX_3R_DESC_BASE<"xvmuh.bu", int_loongarch_lasx_xvmuh_bu, LASX256BOpnd, LASX256BOpnd, LASX256BOpnd>; + +def XVMUH_HU : LASX_3R<0b01110100100010001>, + LASX_3R_DESC_BASE<"xvmuh.hu", int_loongarch_lasx_xvmuh_hu, LASX256HOpnd, LASX256HOpnd, LASX256HOpnd>; + +def XVMUH_WU : LASX_3R<0b01110100100010010>, + LASX_3R_DESC_BASE<"xvmuh.wu", int_loongarch_lasx_xvmuh_wu, LASX256WOpnd, LASX256WOpnd, LASX256WOpnd> ; + +def XVMUH_DU : LASX_3R<0b01110100100010011>, + LASX_3R_DESC_BASE<"xvmuh.du", int_loongarch_lasx_xvmuh_du, LASX256DOpnd, LASX256DOpnd, LASX256DOpnd>; + + +def XVMULWEV_H_B : LASX_3R<0b01110100100100000>, + LASX_3R_DESC_BASE<"xvmulwev.h.b", int_loongarch_lasx_xvmulwev_h_b, LASX256HOpnd, LASX256BOpnd, LASX256BOpnd>; + +def XVMULWEV_W_H : LASX_3R<0b01110100100100001>, + LASX_3R_DESC_BASE<"xvmulwev.w.h", int_loongarch_lasx_xvmulwev_w_h, LASX256WOpnd, LASX256HOpnd, LASX256HOpnd>; + +def XVMULWEV_D_W : LASX_3R<0b01110100100100010>, + LASX_3R_DESC_BASE<"xvmulwev.d.w", int_loongarch_lasx_xvmulwev_d_w, LASX256DOpnd, LASX256WOpnd, LASX256WOpnd> ; + +def XVMULWEV_Q_D : LASX_3R<0b01110100100100011>, + LASX_3R_DESC_BASE<"xvmulwev.q.d", int_loongarch_lasx_xvmulwev_q_d, LASX256DOpnd, LASX256DOpnd, LASX256DOpnd>; + + +def XVMULWOD_H_B : LASX_3R<0b01110100100100100>, + LASX_3R_DESC_BASE<"xvmulwod.h.b", int_loongarch_lasx_xvmulwod_h_b, LASX256HOpnd, LASX256BOpnd, LASX256BOpnd>; + +def XVMULWOD_W_H : LASX_3R<0b01110100100100101>, + LASX_3R_DESC_BASE<"xvmulwod.w.h", int_loongarch_lasx_xvmulwod_w_h, LASX256WOpnd, LASX256HOpnd, LASX256HOpnd>; + +def XVMULWOD_D_W : LASX_3R<0b01110100100100110>, + LASX_3R_DESC_BASE<"xvmulwod.d.w", int_loongarch_lasx_xvmulwod_d_w, LASX256DOpnd, LASX256WOpnd, LASX256WOpnd> ; + +def XVMULWOD_Q_D : LASX_3R<0b01110100100100111>, + LASX_3R_DESC_BASE<"xvmulwod.q.d", int_loongarch_lasx_xvmulwod_q_d, LASX256DOpnd, LASX256DOpnd, LASX256DOpnd>; + + +def XVMULWEV_H_BU : LASX_3R<0b01110100100110000>, + LASX_3R_DESC_BASE<"xvmulwev.h.bu", int_loongarch_lasx_xvmulwev_h_bu, LASX256HOpnd, LASX256BOpnd, LASX256BOpnd>; + +def XVMULWEV_W_HU : LASX_3R<0b01110100100110001>, + LASX_3R_DESC_BASE<"xvmulwev.w.hu", int_loongarch_lasx_xvmulwev_w_hu, LASX256WOpnd, LASX256HOpnd, LASX256HOpnd>; + +def XVMULWEV_D_WU : LASX_3R<0b01110100100110010>, + LASX_3R_DESC_BASE<"xvmulwev.d.wu", int_loongarch_lasx_xvmulwev_d_wu, LASX256DOpnd, LASX256WOpnd, LASX256WOpnd> ; + +def XVMULWEV_Q_DU : LASX_3R<0b01110100100110011>, + LASX_3R_DESC_BASE<"xvmulwev.q.du", int_loongarch_lasx_xvmulwev_q_du, LASX256DOpnd, LASX256DOpnd, LASX256DOpnd>; + + +def XVMULWOD_H_BU : LASX_3R<0b01110100100110100>, + LASX_3R_DESC_BASE<"xvmulwod.h.bu", int_loongarch_lasx_xvmulwod_h_bu, LASX256HOpnd, LASX256BOpnd, LASX256BOpnd>; + +def XVMULWOD_W_HU : LASX_3R<0b01110100100110101>, + LASX_3R_DESC_BASE<"xvmulwod.w.hu", int_loongarch_lasx_xvmulwod_w_hu, LASX256WOpnd, LASX256HOpnd, LASX256HOpnd>; + +def XVMULWOD_D_WU : LASX_3R<0b01110100100110110>, + LASX_3R_DESC_BASE<"xvmulwod.d.wu", int_loongarch_lasx_xvmulwod_d_wu, LASX256DOpnd, LASX256WOpnd, LASX256WOpnd> ; + +def XVMULWOD_Q_DU : LASX_3R<0b01110100100110111>, + LASX_3R_DESC_BASE<"xvmulwod.q.du", int_loongarch_lasx_xvmulwod_q_du, LASX256DOpnd, LASX256DOpnd, LASX256DOpnd>; + + +def XVMULWEV_H_BU_B : LASX_3R<0b01110100101000000>, + LASX_3R_DESC_BASE<"xvmulwev.h.bu.b", int_loongarch_lasx_xvmulwev_h_bu_b, LASX256HOpnd, LASX256BOpnd, LASX256BOpnd>; + +def XVMULWEV_W_HU_H : LASX_3R<0b01110100101000001>, + LASX_3R_DESC_BASE<"xvmulwev.w.hu.h", int_loongarch_lasx_xvmulwev_w_hu_h, LASX256WOpnd, LASX256HOpnd, LASX256HOpnd>; + +def XVMULWEV_D_WU_W : LASX_3R<0b01110100101000010>, + LASX_3R_DESC_BASE<"xvmulwev.d.wu.w", int_loongarch_lasx_xvmulwev_d_wu_w, LASX256DOpnd, LASX256WOpnd, LASX256WOpnd> ; + +def XVMULWEV_Q_DU_D : LASX_3R<0b01110100101000011>, + LASX_3R_DESC_BASE<"xvmulwev.q.du.d", int_loongarch_lasx_xvmulwev_q_du_d, LASX256DOpnd, LASX256DOpnd, LASX256DOpnd>; + + +def XVMULWOD_H_BU_B : LASX_3R<0b01110100101000100>, + LASX_3R_DESC_BASE<"xvmulwod.h.bu.b", int_loongarch_lasx_xvmulwod_h_bu_b, LASX256HOpnd, LASX256BOpnd, LASX256BOpnd>; + +def XVMULWOD_W_HU_H : LASX_3R<0b01110100101000101>, + LASX_3R_DESC_BASE<"xvmulwod.w.hu.h", int_loongarch_lasx_xvmulwod_w_hu_h, LASX256WOpnd, LASX256HOpnd, LASX256HOpnd>; + +def XVMULWOD_D_WU_W : LASX_3R<0b01110100101000110>, + LASX_3R_DESC_BASE<"xvmulwod.d.wu.w", int_loongarch_lasx_xvmulwod_d_wu_w, LASX256DOpnd, LASX256WOpnd, LASX256WOpnd> ; + +def XVMULWOD_Q_DU_D : LASX_3R<0b01110100101000111>, + LASX_3R_DESC_BASE<"xvmulwod.q.du.d", int_loongarch_lasx_xvmulwod_q_du_d, LASX256DOpnd, LASX256DOpnd, LASX256DOpnd>; + + +def XVMADD_B : LASX_3R<0b01110100101010000>, + LASX_3R_4R_DESC_BASE<"xvmadd.b", muladd, LASX256BOpnd>; + +def XVMADD_H : LASX_3R<0b01110100101010001>, + LASX_3R_4R_DESC_BASE<"xvmadd.h", muladd, LASX256HOpnd>; + +def XVMADD_W : LASX_3R<0b01110100101010010>, + LASX_3R_4R_DESC_BASE<"xvmadd.w", muladd, LASX256WOpnd>; + +def XVMADD_D : LASX_3R<0b01110100101010011>, + LASX_3R_4R_DESC_BASE<"xvmadd.d", muladd, LASX256DOpnd>; + + +def XVMSUB_B : LASX_3R<0b01110100101010100>, + LASX_3R_4R_DESC_BASE<"xvmsub.b", mulsub, LASX256BOpnd>; + +def XVMSUB_H : LASX_3R<0b01110100101010101>, + LASX_3R_4R_DESC_BASE<"xvmsub.h", mulsub, LASX256HOpnd>; + +def XVMSUB_W : LASX_3R<0b01110100101010110>, + LASX_3R_4R_DESC_BASE<"xvmsub.w", mulsub, LASX256WOpnd>; + +def XVMSUB_D : LASX_3R<0b01110100101010111>, + LASX_3R_4R_DESC_BASE<"xvmsub.d", mulsub, LASX256DOpnd>; + + +def XVMADDWEV_H_B : LASX_3R<0b01110100101011000>, + LASX_3R_4R_DESC_BASE<"xvmaddwev.h.b", int_loongarch_lasx_xvmaddwev_h_b, LASX256HOpnd, LASX256BOpnd, LASX256BOpnd>; + +def XVMADDWEV_W_H : LASX_3R<0b01110100101011001>, + LASX_3R_4R_DESC_BASE<"xvmaddwev.w.h", int_loongarch_lasx_xvmaddwev_w_h, LASX256WOpnd, LASX256HOpnd, LASX256HOpnd>; + +def XVMADDWEV_D_W : LASX_3R<0b01110100101011010>, + LASX_3R_4R_DESC_BASE<"xvmaddwev.d.w", int_loongarch_lasx_xvmaddwev_d_w, LASX256DOpnd, LASX256WOpnd, LASX256WOpnd>; + +def XVMADDWEV_Q_D : LASX_3R<0b01110100101011011>, + LASX_3R_4R_DESC_BASE<"xvmaddwev.q.d", int_loongarch_lasx_xvmaddwev_q_d, LASX256DOpnd, LASX256DOpnd, LASX256DOpnd>; + + +def XVMADDWOD_H_B : LASX_3R<0b01110100101011100>, + LASX_3R_4R_DESC_BASE<"xvmaddwod.h.b", int_loongarch_lasx_xvmaddwod_h_b, LASX256HOpnd, LASX256BOpnd, LASX256BOpnd>; + +def XVMADDWOD_W_H : LASX_3R<0b01110100101011101>, + LASX_3R_4R_DESC_BASE<"xvmaddwod.w.h", int_loongarch_lasx_xvmaddwod_w_h, LASX256WOpnd, LASX256HOpnd, LASX256HOpnd>; + +def XVMADDWOD_D_W : LASX_3R<0b01110100101011110>, + LASX_3R_4R_DESC_BASE<"xvmaddwod.d.w", int_loongarch_lasx_xvmaddwod_d_w, LASX256DOpnd, LASX256WOpnd, LASX256WOpnd>; + +def XVMADDWOD_Q_D : LASX_3R<0b01110100101011111>, + LASX_3R_4R_DESC_BASE<"xvmaddwod.q.d", int_loongarch_lasx_xvmaddwod_q_d, LASX256DOpnd, LASX256DOpnd, LASX256DOpnd>; + + +def XVMADDWEV_H_BU : LASX_3R<0b01110100101101000>, + LASX_3R_4R_DESC_BASE<"xvmaddwev.h.bu", int_loongarch_lasx_xvmaddwev_h_bu, LASX256HOpnd, LASX256BOpnd, LASX256BOpnd>; + +def XVMADDWEV_W_HU : LASX_3R<0b01110100101101001>, + LASX_3R_4R_DESC_BASE<"xvmaddwev.w.hu", int_loongarch_lasx_xvmaddwev_w_hu, LASX256WOpnd, LASX256HOpnd, LASX256HOpnd>; + +def XVMADDWEV_D_WU : LASX_3R<0b01110100101101010>, + LASX_3R_4R_DESC_BASE<"xvmaddwev.d.wu", int_loongarch_lasx_xvmaddwev_d_wu, LASX256DOpnd, LASX256WOpnd, LASX256WOpnd>; + +def XVMADDWEV_Q_DU : LASX_3R<0b01110100101101011>, + LASX_3R_4R_DESC_BASE<"xvmaddwev.q.du", int_loongarch_lasx_xvmaddwev_q_du, LASX256DOpnd, LASX256DOpnd, LASX256DOpnd>; + + +def XVMADDWOD_H_BU : LASX_3R<0b01110100101101100>, + LASX_3R_4R_DESC_BASE<"xvmaddwod.h.bu", int_loongarch_lasx_xvmaddwod_h_bu, LASX256HOpnd, LASX256BOpnd, LASX256BOpnd>; + +def XVMADDWOD_W_HU : LASX_3R<0b01110100101101101>, + LASX_3R_4R_DESC_BASE<"xvmaddwod.w.hu", int_loongarch_lasx_xvmaddwod_w_hu, LASX256WOpnd, LASX256HOpnd, LASX256HOpnd>; + +def XVMADDWOD_D_WU : LASX_3R<0b01110100101101110>, + LASX_3R_4R_DESC_BASE<"xvmaddwod.d.wu", int_loongarch_lasx_xvmaddwod_d_wu, LASX256DOpnd, LASX256WOpnd, LASX256WOpnd>; + +def XVMADDWOD_Q_DU : LASX_3R<0b01110100101101111>, + LASX_3R_4R_DESC_BASE<"xvmaddwod.q.du", int_loongarch_lasx_xvmaddwod_q_du, LASX256DOpnd, LASX256DOpnd, LASX256DOpnd>; + + +def XVMADDWEV_H_BU_B : LASX_3R<0b01110100101111000>, + LASX_3R_4R_DESC_BASE<"xvmaddwev.h.bu.b", int_loongarch_lasx_xvmaddwev_h_bu_b, LASX256HOpnd, LASX256BOpnd, LASX256BOpnd>; + +def XVMADDWEV_W_HU_H : LASX_3R<0b01110100101111001>, + LASX_3R_4R_DESC_BASE<"xvmaddwev.w.hu.h", int_loongarch_lasx_xvmaddwev_w_hu_h, LASX256WOpnd, LASX256HOpnd, LASX256HOpnd>; + +def XVMADDWEV_D_WU_W : LASX_3R<0b01110100101111010>, + LASX_3R_4R_DESC_BASE<"xvmaddwev.d.wu.w", int_loongarch_lasx_xvmaddwev_d_wu_w, LASX256DOpnd, LASX256WOpnd, LASX256WOpnd>; + +def XVMADDWEV_Q_DU_D : LASX_3R<0b01110100101111011>, + LASX_3R_4R_DESC_BASE<"xvmaddwev.q.du.d", int_loongarch_lasx_xvmaddwev_q_du_d, LASX256DOpnd, LASX256DOpnd, LASX256DOpnd>; + + +def XVMADDWOD_H_BU_B : LASX_3R<0b01110100101111100>, + LASX_3R_4R_DESC_BASE<"xvmaddwod.h.bu.b", int_loongarch_lasx_xvmaddwod_h_bu_b, LASX256HOpnd, LASX256BOpnd, LASX256BOpnd>; + +def XVMADDWOD_W_HU_H : LASX_3R<0b01110100101111101>, + LASX_3R_4R_DESC_BASE<"xvmaddwod.w.hu.h", int_loongarch_lasx_xvmaddwod_w_hu_h, LASX256WOpnd, LASX256HOpnd, LASX256HOpnd>; + +def XVMADDWOD_D_WU_W : LASX_3R<0b01110100101111110>, + LASX_3R_4R_DESC_BASE<"xvmaddwod.d.wu.w", int_loongarch_lasx_xvmaddwod_d_wu_w, LASX256DOpnd, LASX256WOpnd, LASX256WOpnd>; + +def XVMADDWOD_Q_DU_D : LASX_3R<0b01110100101111111>, + LASX_3R_4R_DESC_BASE<"xvmaddwod.q.du.d", int_loongarch_lasx_xvmaddwod_q_du_d, LASX256DOpnd, LASX256DOpnd, LASX256DOpnd>; + + +def XVDIV_B : LASX_3R<0b01110100111000000>, + LASX_3R_DESC_BASE<"xvdiv.b", sdiv, LASX256BOpnd>; + +def XVDIV_H : LASX_3R<0b01110100111000001>, + LASX_3R_DESC_BASE<"xvdiv.h", sdiv, LASX256HOpnd>; + +def XVDIV_W : LASX_3R<0b01110100111000010>, + LASX_3R_DESC_BASE<"xvdiv.w", sdiv, LASX256WOpnd>; + +def XVDIV_D : LASX_3R<0b01110100111000011>, + LASX_3R_DESC_BASE<"xvdiv.d", sdiv, LASX256DOpnd>; + + +def XVMOD_B : LASX_3R<0b01110100111000100>, + LASX_3R_DESC_BASE<"xvmod.b", srem, LASX256BOpnd>; + +def XVMOD_H : LASX_3R<0b01110100111000101>, + LASX_3R_DESC_BASE<"xvmod.h", srem, LASX256HOpnd>; + +def XVMOD_W : LASX_3R<0b01110100111000110>, + LASX_3R_DESC_BASE<"xvmod.w", srem, LASX256WOpnd>; + +def XVMOD_D : LASX_3R<0b01110100111000111>, + LASX_3R_DESC_BASE<"xvmod.d", srem, LASX256DOpnd>; + + +def XVDIV_BU : LASX_3R<0b01110100111001000>, + LASX_3R_DESC_BASE<"xvdiv.bu", udiv, LASX256BOpnd>; + +def XVDIV_HU : LASX_3R<0b01110100111001001>, + LASX_3R_DESC_BASE<"xvdiv.hu", udiv, LASX256HOpnd>; + +def XVDIV_WU : LASX_3R<0b01110100111001010>, + LASX_3R_DESC_BASE<"xvdiv.wu", udiv, LASX256WOpnd>; + +def XVDIV_DU : LASX_3R<0b01110100111001011>, + LASX_3R_DESC_BASE<"xvdiv.du", udiv, LASX256DOpnd>; + + +def XVMOD_BU : LASX_3R<0b01110100111001100>, + LASX_3R_DESC_BASE<"xvmod.bu", urem, LASX256BOpnd>; + +def XVMOD_HU : LASX_3R<0b01110100111001101>, + LASX_3R_DESC_BASE<"xvmod.hu", urem, LASX256HOpnd>; + +def XVMOD_WU : LASX_3R<0b01110100111001110>, + LASX_3R_DESC_BASE<"xvmod.wu", urem, LASX256WOpnd>; + +def XVMOD_DU : LASX_3R<0b01110100111001111>, + LASX_3R_DESC_BASE<"xvmod.du", urem, LASX256DOpnd>; + + +def XVSLL_B : LASX_3R<0b01110100111010000>, + LASX_3R_DESC_BASE<"xvsll.b", shl, LASX256BOpnd>; + +def XVSLL_H : LASX_3R<0b01110100111010001>, + LASX_3R_DESC_BASE<"xvsll.h", shl, LASX256HOpnd>; + +def XVSLL_W : LASX_3R<0b01110100111010010>, + LASX_3R_DESC_BASE<"xvsll.w", shl, LASX256WOpnd>; + +def XVSLL_D : LASX_3R<0b01110100111010011>, + LASX_3R_DESC_BASE<"xvsll.d", shl, LASX256DOpnd>; + + +def XVSRL_B : LASX_3R<0b01110100111010100>, + LASX_3R_DESC_BASE<"xvsrl.b", srl, LASX256BOpnd>; + +def XVSRL_H : LASX_3R<0b01110100111010101>, + LASX_3R_DESC_BASE<"xvsrl.h", srl, LASX256HOpnd>; + +def XVSRL_W : LASX_3R<0b01110100111010110>, + LASX_3R_DESC_BASE<"xvsrl.w", srl, LASX256WOpnd>; + +def XVSRL_D : LASX_3R<0b01110100111010111>, + LASX_3R_DESC_BASE<"xvsrl.d", srl, LASX256DOpnd>; + + +def XVSRA_B : LASX_3R<0b01110100111011000>, + LASX_3R_DESC_BASE<"xvsra.b", sra, LASX256BOpnd>; + +def XVSRA_H : LASX_3R<0b01110100111011001>, + LASX_3R_DESC_BASE<"xvsra.h", sra, LASX256HOpnd>; + +def XVSRA_W : LASX_3R<0b01110100111011010>, + LASX_3R_DESC_BASE<"xvsra.w", sra, LASX256WOpnd>; + +def XVSRA_D : LASX_3R<0b01110100111011011>, + LASX_3R_DESC_BASE<"xvsra.d", sra, LASX256DOpnd>; + + +def XVROTR_B : LASX_3R<0b01110100111011100>, + LASX_3R_DESC_BASE<"xvrotr.b", int_loongarch_lasx_xvrotr_b, LASX256BOpnd>; + +def XVROTR_H : LASX_3R<0b01110100111011101>, + LASX_3R_DESC_BASE<"xvrotr.h", int_loongarch_lasx_xvrotr_h, LASX256HOpnd>; + +def XVROTR_W : LASX_3R<0b01110100111011110>, + LASX_3R_DESC_BASE<"xvrotr.w", int_loongarch_lasx_xvrotr_w, LASX256WOpnd>; + +def XVROTR_D : LASX_3R<0b01110100111011111>, + LASX_3R_DESC_BASE<"xvrotr.d", int_loongarch_lasx_xvrotr_d, LASX256DOpnd>; + + +def XVSRLR_B : LASX_3R<0b01110100111100000>, + LASX_3R_DESC_BASE<"xvsrlr.b", int_loongarch_lasx_xvsrlr_b, LASX256BOpnd>; + +def XVSRLR_H : LASX_3R<0b01110100111100001>, + LASX_3R_DESC_BASE<"xvsrlr.h", int_loongarch_lasx_xvsrlr_h, LASX256HOpnd>; + +def XVSRLR_W : LASX_3R<0b01110100111100010>, + LASX_3R_DESC_BASE<"xvsrlr.w", int_loongarch_lasx_xvsrlr_w, LASX256WOpnd>; + +def XVSRLR_D : LASX_3R<0b01110100111100011>, + LASX_3R_DESC_BASE<"xvsrlr.d", int_loongarch_lasx_xvsrlr_d, LASX256DOpnd>; + + +def XVSRAR_B : LASX_3R<0b01110100111100100>, + LASX_3R_DESC_BASE<"xvsrar.b", int_loongarch_lasx_xvsrar_b, LASX256BOpnd>; + +def XVSRAR_H : LASX_3R<0b01110100111100101>, + LASX_3R_DESC_BASE<"xvsrar.h", int_loongarch_lasx_xvsrar_h, LASX256HOpnd>; + +def XVSRAR_W : LASX_3R<0b01110100111100110>, + LASX_3R_DESC_BASE<"xvsrar.w", int_loongarch_lasx_xvsrar_w, LASX256WOpnd>; + +def XVSRAR_D : LASX_3R<0b01110100111100111>, + LASX_3R_DESC_BASE<"xvsrar.d", int_loongarch_lasx_xvsrar_d, LASX256DOpnd>; + + +def XVSRLN_B_H : LASX_3R<0b01110100111101001>, + LASX_3R_DESC_BASE<"xvsrln.b.h", int_loongarch_lasx_xvsrln_b_h, LASX256BOpnd, LASX256HOpnd, LASX256HOpnd>; + +def XVSRLN_H_W : LASX_3R<0b01110100111101010>, + LASX_3R_DESC_BASE<"xvsrln.h.w", int_loongarch_lasx_xvsrln_h_w, LASX256HOpnd, LASX256WOpnd, LASX256WOpnd>; + +def XVSRLN_W_D : LASX_3R<0b01110100111101011>, + LASX_3R_DESC_BASE<"xvsrln.w.d", int_loongarch_lasx_xvsrln_w_d, LASX256WOpnd, LASX256DOpnd, LASX256DOpnd>; + + +def XVSRAN_B_H : LASX_3R<0b01110100111101101>, + LASX_3R_DESC_BASE<"xvsran.b.h", int_loongarch_lasx_xvsran_b_h, LASX256BOpnd, LASX256HOpnd, LASX256HOpnd>; + +def XVSRAN_H_W : LASX_3R<0b01110100111101110>, + LASX_3R_DESC_BASE<"xvsran.h.w", int_loongarch_lasx_xvsran_h_w, LASX256HOpnd, LASX256WOpnd, LASX256WOpnd>; + +def XVSRAN_W_D : LASX_3R<0b01110100111101111>, + LASX_3R_DESC_BASE<"xvsran.w.d", int_loongarch_lasx_xvsran_w_d, LASX256WOpnd, LASX256DOpnd, LASX256DOpnd>; + + +def XVSRLRN_B_H : LASX_3R<0b01110100111110001>, + LASX_3R_DESC_BASE<"xvsrlrn.b.h", int_loongarch_lasx_xvsrlrn_b_h, LASX256BOpnd, LASX256HOpnd, LASX256HOpnd>; + +def XVSRLRN_H_W : LASX_3R<0b01110100111110010>, + LASX_3R_DESC_BASE<"xvsrlrn.h.w", int_loongarch_lasx_xvsrlrn_h_w, LASX256HOpnd, LASX256WOpnd, LASX256WOpnd>; + +def XVSRLRN_W_D : LASX_3R<0b01110100111110011>, + LASX_3R_DESC_BASE<"xvsrlrn.w.d", int_loongarch_lasx_xvsrlrn_w_d, LASX256WOpnd, LASX256DOpnd, LASX256DOpnd>; + + +def XVSRARN_B_H : LASX_3R<0b01110100111110101>, + LASX_3R_DESC_BASE<"xvsrarn.b.h", int_loongarch_lasx_xvsrarn_b_h, LASX256BOpnd, LASX256HOpnd, LASX256HOpnd>; + +def XVSRARN_H_W : LASX_3R<0b01110100111110110>, + LASX_3R_DESC_BASE<"xvsrarn.h.w", int_loongarch_lasx_xvsrarn_h_w, LASX256HOpnd, LASX256WOpnd, LASX256WOpnd>; + +def XVSRARN_W_D : LASX_3R<0b01110100111110111>, + LASX_3R_DESC_BASE<"xvsrarn.w.d", int_loongarch_lasx_xvsrarn_w_d, LASX256WOpnd, LASX256DOpnd, LASX256DOpnd>; + + +def XVSSRLN_B_H : LASX_3R<0b01110100111111001>, + LASX_3R_DESC_BASE<"xvssrln.b.h", int_loongarch_lasx_xvssrln_b_h, LASX256BOpnd, LASX256HOpnd, LASX256HOpnd>; + +def XVSSRLN_H_W : LASX_3R<0b01110100111111010>, + LASX_3R_DESC_BASE<"xvssrln.h.w", int_loongarch_lasx_xvssrln_h_w, LASX256HOpnd, LASX256WOpnd, LASX256WOpnd>; + +def XVSSRLN_W_D : LASX_3R<0b01110100111111011>, + LASX_3R_DESC_BASE<"xvssrln.w.d", int_loongarch_lasx_xvssrln_w_d, LASX256WOpnd, LASX256DOpnd, LASX256DOpnd>; + + +def XVSSRAN_B_H : LASX_3R<0b01110100111111101>, + LASX_3R_DESC_BASE<"xvssran.b.h", int_loongarch_lasx_xvssran_b_h, LASX256BOpnd, LASX256HOpnd, LASX256HOpnd>; + +def XVSSRAN_H_W : LASX_3R<0b01110100111111110>, + LASX_3R_DESC_BASE<"xvssran.h.w", int_loongarch_lasx_xvssran_h_w, LASX256HOpnd, LASX256WOpnd, LASX256WOpnd>; + +def XVSSRAN_W_D : LASX_3R<0b01110100111111111>, + LASX_3R_DESC_BASE<"xvssran.w.d", int_loongarch_lasx_xvssran_w_d, LASX256WOpnd, LASX256DOpnd, LASX256DOpnd>; + + +def XVSSRLRN_B_H : LASX_3R<0b01110101000000001>, + LASX_3R_DESC_BASE<"xvssrlrn.b.h", int_loongarch_lasx_xvssrlrn_b_h, LASX256BOpnd, LASX256HOpnd, LASX256HOpnd>; + +def XVSSRLRN_H_W : LASX_3R<0b01110101000000010>, + LASX_3R_DESC_BASE<"xvssrlrn.h.w", int_loongarch_lasx_xvssrlrn_h_w, LASX256HOpnd, LASX256WOpnd, LASX256WOpnd>; + +def XVSSRLRN_W_D : LASX_3R<0b01110101000000011>, + LASX_3R_DESC_BASE<"xvssrlrn.w.d", int_loongarch_lasx_xvssrlrn_w_d, LASX256WOpnd, LASX256DOpnd, LASX256DOpnd>; + + +def XVSSRARN_B_H : LASX_3R<0b01110101000000101>, + LASX_3R_DESC_BASE<"xvssrarn.b.h", int_loongarch_lasx_xvssrarn_b_h, LASX256BOpnd, LASX256HOpnd, LASX256HOpnd>; + +def XVSSRARN_H_W : LASX_3R<0b01110101000000110>, + LASX_3R_DESC_BASE<"xvssrarn.h.w", int_loongarch_lasx_xvssrarn_h_w, LASX256HOpnd, LASX256WOpnd, LASX256WOpnd>; + +def XVSSRARN_W_D : LASX_3R<0b01110101000000111>, + LASX_3R_DESC_BASE<"xvssrarn.w.d", int_loongarch_lasx_xvssrarn_w_d, LASX256WOpnd, LASX256DOpnd, LASX256DOpnd>; + + +def XVSSRLN_BU_H : LASX_3R<0b01110101000001001>, + LASX_3R_DESC_BASE<"xvssrln.bu.h", int_loongarch_lasx_xvssrln_bu_h, LASX256BOpnd, LASX256HOpnd, LASX256HOpnd>; + +def XVSSRLN_HU_W : LASX_3R<0b01110101000001010>, + LASX_3R_DESC_BASE<"xvssrln.hu.w", int_loongarch_lasx_xvssrln_hu_w, LASX256HOpnd, LASX256WOpnd, LASX256WOpnd>; + +def XVSSRLN_WU_D : LASX_3R<0b01110101000001011>, + LASX_3R_DESC_BASE<"xvssrln.wu.d", int_loongarch_lasx_xvssrln_wu_d, LASX256WOpnd, LASX256DOpnd, LASX256DOpnd>; + + +def XVSSRAN_BU_H : LASX_3R<0b01110101000001101>, + LASX_3R_DESC_BASE<"xvssran.bu.h", int_loongarch_lasx_xvssran_bu_h, LASX256BOpnd, LASX256HOpnd, LASX256HOpnd>; + +def XVSSRAN_HU_W : LASX_3R<0b01110101000001110>, + LASX_3R_DESC_BASE<"xvssran.hu.w", int_loongarch_lasx_xvssran_hu_w, LASX256HOpnd, LASX256WOpnd, LASX256WOpnd>; + +def XVSSRAN_WU_D : LASX_3R<0b01110101000001111>, + LASX_3R_DESC_BASE<"xvssran.wu.d", int_loongarch_lasx_xvssran_wu_d, LASX256WOpnd, LASX256DOpnd, LASX256DOpnd>; + + +def XVSSRLRN_BU_H : LASX_3R<0b01110101000010001>, + LASX_3R_DESC_BASE<"xvssrlrn.bu.h", int_loongarch_lasx_xvssrlrn_bu_h, LASX256BOpnd, LASX256HOpnd, LASX256HOpnd>; + +def XVSSRLRN_HU_W : LASX_3R<0b01110101000010010>, + LASX_3R_DESC_BASE<"xvssrlrn.hu.w", int_loongarch_lasx_xvssrlrn_hu_w, LASX256HOpnd, LASX256WOpnd, LASX256WOpnd>; + +def XVSSRLRN_WU_D : LASX_3R<0b01110101000010011>, + LASX_3R_DESC_BASE<"xvssrlrn.wu.d", int_loongarch_lasx_xvssrlrn_wu_d, LASX256WOpnd, LASX256DOpnd, LASX256DOpnd>; + + +def XVSSRARN_BU_H : LASX_3R<0b01110101000010101>, + LASX_3R_DESC_BASE<"xvssrarn.bu.h", int_loongarch_lasx_xvssrarn_bu_h, LASX256BOpnd, LASX256HOpnd, LASX256HOpnd>; + +def XVSSRARN_HU_W : LASX_3R<0b01110101000010110>, + LASX_3R_DESC_BASE<"xvssrarn.hu.w", int_loongarch_lasx_xvssrarn_hu_w, LASX256HOpnd, LASX256WOpnd, LASX256WOpnd>; + +def XVSSRARN_WU_D : LASX_3R<0b01110101000010111>, + LASX_3R_DESC_BASE<"xvssrarn.wu.d", int_loongarch_lasx_xvssrarn_wu_d, LASX256WOpnd, LASX256DOpnd, LASX256DOpnd>; + + +def XVBITCLR_B : LASX_3R<0b01110101000011000>, + LASX_3R_DESC_BASE<"xvbitclr.b", xvbitclr_b, LASX256BOpnd>; + +def XVBITCLR_H : LASX_3R<0b01110101000011001>, + LASX_3R_DESC_BASE<"xvbitclr.h", xvbitclr_h, LASX256HOpnd>; + +def XVBITCLR_W : LASX_3R<0b01110101000011010>, + LASX_3R_DESC_BASE<"xvbitclr.w", xvbitclr_w, LASX256WOpnd>; + +def XVBITCLR_D : LASX_3R<0b01110101000011011>, + LASX_3R_DESC_BASE<"xvbitclr.d", xvbitclr_d, LASX256DOpnd>; + + +def XVBITSET_B : LASX_3R<0b01110101000011100>, + LASX_3R_DESC_BASE<"xvbitset.b", int_loongarch_lasx_xvbitset_b, LASX256BOpnd>; + +def XVBITSET_H : LASX_3R<0b01110101000011101>, + LASX_3R_DESC_BASE<"xvbitset.h", int_loongarch_lasx_xvbitset_h, LASX256HOpnd>; + +def XVBITSET_W : LASX_3R<0b01110101000011110>, + LASX_3R_DESC_BASE<"xvbitset.w", int_loongarch_lasx_xvbitset_w, LASX256WOpnd>; + +def XVBITSET_D : LASX_3R<0b01110101000011111>, + LASX_3R_DESC_BASE<"xvbitset.d", int_loongarch_lasx_xvbitset_d, LASX256DOpnd>; + + +def XVBITREV_B : LASX_3R<0b01110101000100000>, + LASX_3R_DESC_BASE<"xvbitrev.b", int_loongarch_lasx_xvbitrev_b, LASX256BOpnd>; + +def XVBITREV_H : LASX_3R<0b01110101000100001>, + LASX_3R_DESC_BASE<"xvbitrev.h", int_loongarch_lasx_xvbitrev_h, LASX256HOpnd>; + +def XVBITREV_W : LASX_3R<0b01110101000100010>, + LASX_3R_DESC_BASE<"xvbitrev.w", int_loongarch_lasx_xvbitrev_w, LASX256WOpnd>; + +def XVBITREV_D : LASX_3R<0b01110101000100011>, + LASX_3R_DESC_BASE<"xvbitrev.d", int_loongarch_lasx_xvbitrev_d, LASX256DOpnd>; + + +def XVPACKEV_B : LASX_3R<0b01110101000101100>, + LASX_3R_DESC_BASE<"xvpackev.b", LoongArchVPACKEV, LASX256BOpnd>; + +def XVPACKEV_H : LASX_3R<0b01110101000101101>, + LASX_3R_DESC_BASE<"xvpackev.h", LoongArchVPACKEV, LASX256HOpnd>; + +def XVPACKEV_W : LASX_3R<0b01110101000101110>, + LASX_3R_DESC_BASE<"xvpackev.w", LoongArchVPACKEV, LASX256WOpnd>; + +def XVPACKEV_D : LASX_3R<0b01110101000101111>, + LASX_3R_DESC_BASE<"xvpackev.d", LoongArchVPACKEV, LASX256DOpnd>; + + +def XVPACKOD_B : LASX_3R<0b01110101000110000>, + LASX_3R_DESC_BASE<"xvpackod.b", LoongArchVPACKOD, LASX256BOpnd>; + +def XVPACKOD_H : LASX_3R<0b01110101000110001>, + LASX_3R_DESC_BASE<"xvpackod.h", LoongArchVPACKOD, LASX256HOpnd>; + +def XVPACKOD_W : LASX_3R<0b01110101000110010>, + LASX_3R_DESC_BASE<"xvpackod.w", LoongArchVPACKOD, LASX256WOpnd>; + +def XVPACKOD_D : LASX_3R<0b01110101000110011>, + LASX_3R_DESC_BASE<"xvpackod.d", LoongArchVPACKOD, LASX256DOpnd>; + + +def XVILVL_B : LASX_3R<0b01110101000110100>, + LASX_3R_DESC_BASE<"xvilvl.b", LoongArchVILVL, LASX256BOpnd>; + +def XVILVL_H : LASX_3R<0b01110101000110101>, + LASX_3R_DESC_BASE<"xvilvl.h", LoongArchVILVL, LASX256HOpnd>; + +def XVILVL_W : LASX_3R<0b01110101000110110>, + LASX_3R_DESC_BASE<"xvilvl.w", LoongArchVILVL, LASX256WOpnd>; + +def XVILVL_D : LASX_3R<0b01110101000110111>, + LASX_3R_DESC_BASE<"xvilvl.d", LoongArchVILVL, LASX256DOpnd>; + + +def XVILVH_B : LASX_3R<0b01110101000111000>, + LASX_3R_DESC_BASE<"xvilvh.b", LoongArchVILVH, LASX256BOpnd>; + +def XVILVH_H : LASX_3R<0b01110101000111001>, + LASX_3R_DESC_BASE<"xvilvh.h", LoongArchVILVH, LASX256HOpnd>; + +def XVILVH_W : LASX_3R<0b01110101000111010>, + LASX_3R_DESC_BASE<"xvilvh.w", LoongArchVILVH, LASX256WOpnd>; + +def XVILVH_D : LASX_3R<0b01110101000111011>, + LASX_3R_DESC_BASE<"xvilvh.d", LoongArchVILVH, LASX256DOpnd>; + + +def XVPICKEV_B : LASX_3R<0b01110101000111100>, + LASX_3R_DESC_BASE<"xvpickev.b", LoongArchVPICKEV, LASX256BOpnd>; + +def XVPICKEV_H : LASX_3R<0b01110101000111101>, + LASX_3R_DESC_BASE<"xvpickev.h", LoongArchVPICKEV, LASX256HOpnd>; + +def XVPICKEV_W : LASX_3R<0b01110101000111110>, + LASX_3R_DESC_BASE<"xvpickev.w", LoongArchVPICKEV, LASX256WOpnd>; + +def XVPICKEV_D : LASX_3R<0b01110101000111111>, + LASX_3R_DESC_BASE<"xvpickev.d", LoongArchVPICKEV, LASX256DOpnd>; + + +def XVPICKOD_B : LASX_3R<0b01110101001000000>, + LASX_3R_DESC_BASE<"xvpickod.b", LoongArchVPICKOD, LASX256BOpnd>; + +def XVPICKOD_H : LASX_3R<0b01110101001000001>, + LASX_3R_DESC_BASE<"xvpickod.h", LoongArchVPICKOD, LASX256HOpnd>; + +def XVPICKOD_W : LASX_3R<0b01110101001000010>, + LASX_3R_DESC_BASE<"xvpickod.w", LoongArchVPICKOD, LASX256WOpnd>; + +def XVPICKOD_D : LASX_3R<0b01110101001000011>, + LASX_3R_DESC_BASE<"xvpickod.d", LoongArchVPICKOD, LASX256DOpnd>; + + +def XVREPLVE_B : LASX_3R_1GP<0b01110101001000100>, + LASX_3R_VREPLVE_DESC_BASE<"xvreplve.b", int_loongarch_lasx_xvreplve_b, LASX256BOpnd>; + +def XVREPLVE_H : LASX_3R_1GP<0b01110101001000101>, + LASX_3R_VREPLVE_DESC_BASE<"xvreplve.h", int_loongarch_lasx_xvreplve_h, LASX256HOpnd>; + +def XVREPLVE_W : LASX_3R_1GP<0b01110101001000110>, + LASX_3R_VREPLVE_DESC_BASE<"xvreplve.w", int_loongarch_lasx_xvreplve_w, LASX256WOpnd>; + +def XVREPLVE_D : LASX_3R_1GP<0b01110101001000111>, + LASX_3R_VREPLVE_DESC_BASE<"xvreplve.d", int_loongarch_lasx_xvreplve_d, LASX256DOpnd>; + + +def XVAND_V : LASX_3R<0b01110101001001100>, + LASX_VEC_DESC_BASE<"xvand.v", and, LASX256BOpnd>; +class XAND_V_H_PSEUDO_DESC : LASX_VEC_PSEUDO_BASE; +class XAND_V_W_PSEUDO_DESC : LASX_VEC_PSEUDO_BASE; +class XAND_V_D_PSEUDO_DESC : LASX_VEC_PSEUDO_BASE; + +def XAND_V_H_PSEUDO : XAND_V_H_PSEUDO_DESC, + PseudoInstExpansion<(XVAND_V LASX256BOpnd:$xd, + LASX256BOpnd:$xj, + LASX256BOpnd:$xk)>; +def XAND_V_W_PSEUDO : XAND_V_W_PSEUDO_DESC, + PseudoInstExpansion<(XVAND_V LASX256BOpnd:$xd, + LASX256BOpnd:$xj, + LASX256BOpnd:$xk)>; +def XAND_V_D_PSEUDO : XAND_V_D_PSEUDO_DESC, + PseudoInstExpansion<(XVAND_V LASX256BOpnd:$xd, + LASX256BOpnd:$xj, + LASX256BOpnd:$xk)>; + + +def XVOR_V : LASX_3R<0b01110101001001101>, + LASX_VEC_DESC_BASE<"xvor.v", or, LASX256BOpnd>; +class X_OR_V_H_PSEUDO_DESC : LASX_VEC_PSEUDO_BASE; +class X_OR_V_W_PSEUDO_DESC : LASX_VEC_PSEUDO_BASE; +class X_OR_V_D_PSEUDO_DESC : LASX_VEC_PSEUDO_BASE; + +def X_OR_V_H_PSEUDO : X_OR_V_H_PSEUDO_DESC, + PseudoInstExpansion<(XVOR_V LASX256BOpnd:$xd, + LASX256BOpnd:$xj, + LASX256BOpnd:$xk)>; +def X_OR_V_W_PSEUDO : X_OR_V_W_PSEUDO_DESC, + PseudoInstExpansion<(XVOR_V LASX256BOpnd:$xd, + LASX256BOpnd:$xj, + LASX256BOpnd:$xk)>; +def X_OR_V_D_PSEUDO : X_OR_V_D_PSEUDO_DESC, + PseudoInstExpansion<(XVOR_V LASX256BOpnd:$xd, + LASX256BOpnd:$xj, + LASX256BOpnd:$xk)>; + + +def XVXOR_V : LASX_3R<0b01110101001001110>, + LASX_VEC_DESC_BASE<"xvxor.v", xor, LASX256BOpnd>; +class XXOR_V_H_PSEUDO_DESC : LASX_VEC_PSEUDO_BASE; +class XXOR_V_W_PSEUDO_DESC : LASX_VEC_PSEUDO_BASE; +class XXOR_V_D_PSEUDO_DESC : LASX_VEC_PSEUDO_BASE; + +def XXOR_V_H_PSEUDO : XXOR_V_H_PSEUDO_DESC, + PseudoInstExpansion<(XVXOR_V LASX256BOpnd:$xd, + LASX256BOpnd:$xj, + LASX256BOpnd:$xk)>; +def XXOR_V_W_PSEUDO : XXOR_V_W_PSEUDO_DESC, + PseudoInstExpansion<(XVXOR_V LASX256BOpnd:$xd, + LASX256BOpnd:$xj, + LASX256BOpnd:$xk)>; +def XXOR_V_D_PSEUDO : XXOR_V_D_PSEUDO_DESC, + PseudoInstExpansion<(XVXOR_V LASX256BOpnd:$xd, + LASX256BOpnd:$xj, + LASX256BOpnd:$xk)>; + + +def XVNOR_V : LASX_3R<0b01110101001001111>, + LASX_VEC_DESC_BASE<"xvnor.v", LoongArchVNOR, LASX256BOpnd>; + +class XNOR_V_H_PSEUDO_DESC : LASX_VEC_PSEUDO_BASE; +class XNOR_V_W_PSEUDO_DESC : LASX_VEC_PSEUDO_BASE; +class XNOR_V_D_PSEUDO_DESC : LASX_VEC_PSEUDO_BASE; + +def XNOR_V_H_PSEUDO : XNOR_V_H_PSEUDO_DESC, + PseudoInstExpansion<(XVNOR_V LASX256BOpnd:$xd, + LASX256BOpnd:$xj, + LASX256BOpnd:$xk)>; +def XNOR_V_W_PSEUDO : XNOR_V_W_PSEUDO_DESC, + PseudoInstExpansion<(XVNOR_V LASX256BOpnd:$xd, + LASX256BOpnd:$xj, + LASX256BOpnd:$xk)>; +def XNOR_V_D_PSEUDO : XNOR_V_D_PSEUDO_DESC, + PseudoInstExpansion<(XVNOR_V LASX256BOpnd:$xd, + LASX256BOpnd:$xj, + LASX256BOpnd:$xk)>; + + +def XVANDN_V : LASX_3R<0b01110101001010000>, + LASX_3R_DESC_BASE<"xvandn.v", int_loongarch_lasx_xvandn_v, LASX256BOpnd>; + + +class LASX_ANDN_PSEUDO_BASE : + LASXPseudo<(outs RO:$xd), (ins RO:$xj, RO:$xk), + []>, + PseudoInstExpansion<(XVANDN_V LASX256BOpnd:$xd, + LASX256BOpnd:$xj, + LASX256BOpnd:$xk)>; + +def XVANDN_H_PSEUDO : LASX_ANDN_PSEUDO_BASE; +def XVANDN_W_PSEUDO : LASX_ANDN_PSEUDO_BASE; +def XVANDN_D_PSEUDO : LASX_ANDN_PSEUDO_BASE; + + +def XVORN_V : LASX_3R<0b01110101001010001>, + LASX_3R_DESC_BASE<"xvorn.v", int_loongarch_lasx_xvorn_v, LASX256BOpnd>; + + +class LASX_ORN_PSEUDO_BASE : + LASXPseudo<(outs RO:$xd), (ins RO:$xj, RO:$xk), + []>, + PseudoInstExpansion<(XVORN_V LASX256BOpnd:$xd, + LASX256BOpnd:$xj, + LASX256BOpnd:$xk)>; + +def XVORN_H_PSEUDO : LASX_ORN_PSEUDO_BASE; +def XVORN_W_PSEUDO : LASX_ORN_PSEUDO_BASE; +def XVORN_D_PSEUDO : LASX_ORN_PSEUDO_BASE; + + +def XVFRSTP_B : LASX_3R<0b01110101001010110>, + LASX_3R_4R_DESC_BASE<"xvfrstp.b", int_loongarch_lasx_xvfrstp_b, LASX256BOpnd>; + +def XVFRSTP_H : LASX_3R<0b01110101001010111>, + LASX_3R_4R_DESC_BASE<"xvfrstp.h", int_loongarch_lasx_xvfrstp_h, LASX256HOpnd>; + + +def XVADD_Q : LASX_3R<0b01110101001011010>, IsCommutable, + LASX_3R_DESC_BASE<"xvadd.q", int_loongarch_lasx_xvadd_q, LASX256DOpnd>; + +def XVSUB_Q : LASX_3R<0b01110101001011011>, + LASX_3R_DESC_BASE<"xvsub.q", int_loongarch_lasx_xvsub_q, LASX256DOpnd>; + + +def XVSIGNCOV_B : LASX_3R<0b01110101001011100>, + LASX_3R_DESC_BASE<"xvsigncov.b", int_loongarch_lasx_xvsigncov_b, LASX256BOpnd>; + +def XVSIGNCOV_H : LASX_3R<0b01110101001011101>, + LASX_3R_DESC_BASE<"xvsigncov.h", int_loongarch_lasx_xvsigncov_h, LASX256HOpnd>; + +def XVSIGNCOV_W : LASX_3R<0b01110101001011110>, + LASX_3R_DESC_BASE<"xvsigncov.w", int_loongarch_lasx_xvsigncov_w, LASX256WOpnd>; + +def XVSIGNCOV_D : LASX_3R<0b01110101001011111>, + LASX_3R_DESC_BASE<"xvsigncov.d", int_loongarch_lasx_xvsigncov_d, LASX256DOpnd>; + + +def XVFADD_S : LASX_3R<0b01110101001100001>, IsCommutable, + LASX_3RF_DESC_BASE<"xvfadd.s", fadd, LASX256WOpnd>; + +def XVFADD_D : LASX_3R<0b01110101001100010>, IsCommutable, + LASX_3RF_DESC_BASE<"xvfadd.d", fadd, LASX256DOpnd>; + + +def XVFSUB_S : LASX_3R<0b01110101001100101>, + LASX_3RF_DESC_BASE<"xvfsub.s", fsub, LASX256WOpnd>; + +def XVFSUB_D : LASX_3R<0b01110101001100110>, + LASX_3RF_DESC_BASE<"xvfsub.d", fsub, LASX256DOpnd>; + + +def XVFMUL_S : LASX_3R<0b01110101001110001>, + LASX_3RF_DESC_BASE<"xvfmul.s", fmul, LASX256WOpnd>; + +def XVFMUL_D : LASX_3R<0b01110101001110010>, + LASX_3RF_DESC_BASE<"xvfmul.d", fmul, LASX256DOpnd>; + + +def XVFDIV_S : LASX_3R<0b01110101001110101>, + LASX_3RF_DESC_BASE<"xvfdiv.s", fdiv, LASX256WOpnd>; + +def XVFDIV_D : LASX_3R<0b01110101001110110>, + LASX_3RF_DESC_BASE<"xvfdiv.d", fdiv, LASX256DOpnd>; + + +def XVFMAX_S : LASX_3R<0b01110101001111001>, + LASX_3RF_DESC_BASE<"xvfmax.s", int_loongarch_lasx_xvfmax_s, LASX256WOpnd>; + +def XVFMAX_D : LASX_3R<0b01110101001111010>, + LASX_3RF_DESC_BASE<"xvfmax.d", int_loongarch_lasx_xvfmax_d, LASX256DOpnd>; + + +def XVFMIN_S : LASX_3R<0b01110101001111101>, + LASX_3RF_DESC_BASE<"xvfmin.s", int_loongarch_lasx_xvfmin_s, LASX256WOpnd>; + +def XVFMIN_D : LASX_3R<0b01110101001111110>, + LASX_3RF_DESC_BASE<"xvfmin.d", int_loongarch_lasx_xvfmin_d, LASX256DOpnd>; + + +def XVFMAXA_S : LASX_3R<0b01110101010000001>, + LASX_3RF_DESC_BASE<"xvfmaxa.s", int_loongarch_lasx_xvfmaxa_s, LASX256WOpnd>; + +def XVFMAXA_D : LASX_3R<0b01110101010000010>, + LASX_3RF_DESC_BASE<"xvfmaxa.d", int_loongarch_lasx_xvfmaxa_d, LASX256DOpnd>; + + +def XVFMINA_S : LASX_3R<0b01110101010000101>, + LASX_3RF_DESC_BASE<"xvfmina.s", int_loongarch_lasx_xvfmina_s, LASX256WOpnd>; + +def XVFMINA_D : LASX_3R<0b01110101010000110>, + LASX_3RF_DESC_BASE<"xvfmina.d", int_loongarch_lasx_xvfmina_d, LASX256DOpnd>; + + +def XVFCVT_H_S : LASX_3R<0b01110101010001100>, + LASX_3RF_DESC_BASE<"xvfcvt.h.s", int_loongarch_lasx_xvfcvt_h_s, LASX256HOpnd, LASX256WOpnd, LASX256WOpnd>; + +def XVFCVT_S_D : LASX_3R<0b01110101010001101>, + LASX_3RF_DESC_BASE1<"xvfcvt.s.d", int_loongarch_lasx_xvfcvt_s_d, LASX256WOpnd, LASX256DOpnd, LASX256DOpnd>; + + +def XVFFINT_S_L : LASX_3R<0b01110101010010000>, + LASX_3RF_DESC_BASE<"xvffint.s.l", int_loongarch_lasx_xvffint_s_l, LASX256WOpnd, LASX256DOpnd, LASX256DOpnd>; + +def XVFTINT_W_D : LASX_3R<0b01110101010010011>, + LASX_3RF_DESC_BASE<"xvftint.w.d", int_loongarch_lasx_xvftint_w_d, LASX256WOpnd, LASX256DOpnd, LASX256DOpnd>; + + +def XVFTINTRM_W_D : LASX_3R<0b01110101010010100>, + LASX_3RF_DESC_BASE<"xvftintrm.w.d", int_loongarch_lasx_xvftintrm_w_d, LASX256WOpnd, LASX256DOpnd, LASX256DOpnd>; + +def XVFTINTRP_W_D : LASX_3R<0b01110101010010101>, + LASX_3RF_DESC_BASE<"xvftintrp.w.d", int_loongarch_lasx_xvftintrp_w_d, LASX256WOpnd, LASX256DOpnd, LASX256DOpnd>; + +def XVFTINTRZ_W_D : LASX_3R<0b01110101010010110>, + LASX_3RF_DESC_BASE<"xvftintrz.w.d", int_loongarch_lasx_xvftintrz_w_d, LASX256WOpnd, LASX256DOpnd, LASX256DOpnd>; + +def XVFTINTRNE_W_D : LASX_3R<0b01110101010010111>, + LASX_3RF_DESC_BASE<"xvftintrne.w.d", int_loongarch_lasx_xvftintrne_w_d, LASX256WOpnd, LASX256DOpnd, LASX256DOpnd>; + + +def XVSHUF_H : LASX_3R<0b01110101011110101>, + LASX_3R_VSHF_DESC_BASE<"xvshuf.h", LASX256HOpnd>; + +def XVSHUF_W : LASX_3R<0b01110101011110110>, + LASX_3R_VSHF_DESC_BASE<"xvshuf.w", LASX256WOpnd>; + +def XVSHUF_D : LASX_3R<0b01110101011110111>, + LASX_3R_VSHF_DESC_BASE<"xvshuf.d", LASX256DOpnd>; + + +def XVPERM_W : LASX_3R<0b01110101011111010>, + LASX_3R_DESC_BASE<"xvperm.w", int_loongarch_lasx_xvperm_w, LASX256WOpnd>; + + +def XVSEQI_B : LASX_I5<0b01110110100000000>, + LASX_I5_SETCC_DESC_BASE_Intrinsic<"xvseqi.b", int_loongarch_lasx_xvseqi_b, simm5_32, immSExt5, LASX256BOpnd>; + +def XVSEQI_H : LASX_I5<0b01110110100000001>, + LASX_I5_SETCC_DESC_BASE_Intrinsic<"xvseqi.h", int_loongarch_lasx_xvseqi_h, simm5_32, immSExt5, LASX256HOpnd>; + +def XVSEQI_W : LASX_I5<0b01110110100000010>, + LASX_I5_SETCC_DESC_BASE_Intrinsic<"xvseqi.w", int_loongarch_lasx_xvseqi_w, simm5_32, immSExt5, LASX256WOpnd>; + +def XVSEQI_D : LASX_I5<0b01110110100000011>, + LASX_I5_SETCC_DESC_BASE_Intrinsic<"xvseqi.d", int_loongarch_lasx_xvseqi_d, simm5_32, immSExt5, LASX256DOpnd>; + + +def XVSLEI_B : LASX_I5<0b01110110100000100>, + LASX_I5_SETCC_DESC_BASE_Intrinsic<"xvslei.b", int_loongarch_lasx_xvslei_b, simm5_32, immSExt5, LASX256BOpnd>; + +def XVSLEI_H : LASX_I5<0b01110110100000101>, + LASX_I5_SETCC_DESC_BASE_Intrinsic<"xvslei.h", int_loongarch_lasx_xvslei_h, simm5_32, immSExt5, LASX256HOpnd>; + +def XVSLEI_W : LASX_I5<0b01110110100000110>, + LASX_I5_SETCC_DESC_BASE_Intrinsic<"xvslei.w", int_loongarch_lasx_xvslei_w, simm5_32, immSExt5, LASX256WOpnd>; + +def XVSLEI_D : LASX_I5<0b01110110100000111>, + LASX_I5_SETCC_DESC_BASE_Intrinsic<"xvslei.d", int_loongarch_lasx_xvslei_d, simm5_32, immSExt5, LASX256DOpnd>; + + +def XVSLEI_BU : LASX_I5_U<0b01110110100001000>, + LASX_I5_U_SETCC_DESC_BASE_Intrinsic<"xvslei.bu", int_loongarch_lasx_xvslei_bu, uimm5, immZExt5, LASX256BOpnd>; + +def XVSLEI_HU : LASX_I5_U<0b01110110100001001>, + LASX_I5_U_SETCC_DESC_BASE_Intrinsic<"xvslei.hu", int_loongarch_lasx_xvslei_hu, uimm5, immZExt5, LASX256HOpnd>; + +def XVSLEI_WU : LASX_I5_U<0b01110110100001010>, + LASX_I5_U_SETCC_DESC_BASE_Intrinsic<"xvslei.wu", int_loongarch_lasx_xvslei_wu, uimm5, immZExt5, LASX256WOpnd>; + +def XVSLEI_DU : LASX_I5_U<0b01110110100001011>, + LASX_I5_U_SETCC_DESC_BASE_Intrinsic<"xvslei.du", int_loongarch_lasx_xvslei_du, uimm5, immZExt5, LASX256DOpnd>; + + +def XVSLTI_B : LASX_I5<0b01110110100001100>, + LASX_I5_SETCC_DESC_BASE_Intrinsic<"xvslti.b", int_loongarch_lasx_xvslti_b, simm5_32, immSExt5, LASX256BOpnd>; + +def XVSLTI_H : LASX_I5<0b01110110100001101>, + LASX_I5_SETCC_DESC_BASE_Intrinsic<"xvslti.h", int_loongarch_lasx_xvslti_h, simm5_32, immSExt5, LASX256HOpnd>; + +def XVSLTI_W : LASX_I5<0b01110110100001110>, + LASX_I5_SETCC_DESC_BASE_Intrinsic<"xvslti.w", int_loongarch_lasx_xvslti_w, simm5_32, immSExt5, LASX256WOpnd>; + +def XVSLTI_D : LASX_I5<0b01110110100001111>, + LASX_I5_SETCC_DESC_BASE_Intrinsic<"xvslti.d", int_loongarch_lasx_xvslti_d, simm5_32, immSExt5, LASX256DOpnd>; + + +def XVSLTI_BU : LASX_I5_U<0b01110110100010000>, + LASX_I5_U_SETCC_DESC_BASE_Intrinsic<"xvslti.bu", int_loongarch_lasx_xvslti_bu, uimm5, immZExt5, LASX256BOpnd>; + +def XVSLTI_HU : LASX_I5_U<0b01110110100010001>, + LASX_I5_U_SETCC_DESC_BASE_Intrinsic<"xvslti.hu", int_loongarch_lasx_xvslti_hu, uimm5, immZExt5, LASX256HOpnd>; + +def XVSLTI_WU : LASX_I5_U<0b01110110100010010>, + LASX_I5_U_SETCC_DESC_BASE_Intrinsic<"xvslti.wu", int_loongarch_lasx_xvslti_wu, uimm5, immZExt5, LASX256WOpnd>; + +def XVSLTI_DU : LASX_I5_U<0b01110110100010011>, + LASX_I5_U_SETCC_DESC_BASE_Intrinsic<"xvslti.du", int_loongarch_lasx_xvslti_du, uimm5, immZExt5, LASX256DOpnd>; + + +def XVADDI_BU : LASX_I5_U<0b01110110100010100>, + LASX_I5_U_DESC_BASE_Intrinsic<"xvaddi.bu", int_loongarch_lasx_xvaddi_bu, uimm5, immZExt5, LASX256BOpnd>; + +def XVADDI_HU : LASX_I5_U<0b01110110100010101>, + LASX_I5_U_DESC_BASE_Intrinsic<"xvaddi.hu", int_loongarch_lasx_xvaddi_hu, uimm5, immZExt5, LASX256HOpnd>; + +def XVADDI_WU : LASX_I5_U<0b01110110100010110>, + LASX_I5_U_DESC_BASE_Intrinsic<"xvaddi.wu", int_loongarch_lasx_xvaddi_wu, uimm5, immZExt5, LASX256WOpnd>; + +def XVADDI_DU : LASX_I5_U<0b01110110100010111>, + LASX_I5_U_DESC_BASE_Intrinsic<"xvaddi.du", int_loongarch_lasx_xvaddi_du, uimm5, immZExt5, LASX256DOpnd>; + + +def XVSUBI_BU : LASX_I5_U<0b01110110100011000>, + LASX_I5_U_DESC_BASE_Intrinsic<"xvsubi.bu", int_loongarch_lasx_xvsubi_bu, uimm5, immZExt5, LASX256BOpnd>; + +def XVSUBI_HU : LASX_I5_U<0b01110110100011001>, + LASX_I5_U_DESC_BASE_Intrinsic<"xvsubi.hu", int_loongarch_lasx_xvsubi_hu, uimm5, immZExt5, LASX256HOpnd>; + +def XVSUBI_WU : LASX_I5_U<0b01110110100011010>, + LASX_I5_U_DESC_BASE_Intrinsic<"xvsubi.wu", int_loongarch_lasx_xvsubi_wu, uimm5, immZExt5, LASX256WOpnd>; + +def XVSUBI_DU : LASX_I5_U<0b01110110100011011>, + LASX_I5_U_DESC_BASE_Intrinsic<"xvsubi.du", int_loongarch_lasx_xvsubi_du, uimm5, immZExt5, LASX256DOpnd>; + + +def XVBSLL_V : LASX_I5_U<0b01110110100011100>, + LASX_U5_DESC_BASE<"xvbsll.v", int_loongarch_lasx_xvbsll_v, LASX256BOpnd>; + +def XVBSRL_V : LASX_I5_U<0b01110110100011101>, + LASX_U5_DESC_BASE<"xvbsrl.v", int_loongarch_lasx_xvbsrl_v, LASX256BOpnd>; + + +def XVMAXI_B : LASX_I5<0b01110110100100000>, + LASX_I5_DESC_BASE_Intrinsic<"xvmaxi.b", int_loongarch_lasx_xvmaxi_b, simm5_32, immSExt5, LASX256BOpnd>; + +def XVMAXI_H : LASX_I5<0b01110110100100001>, + LASX_I5_DESC_BASE_Intrinsic<"xvmaxi.h", int_loongarch_lasx_xvmaxi_h, simm5_32, immSExt5, LASX256HOpnd>; + +def XVMAXI_W : LASX_I5<0b01110110100100010>, + LASX_I5_DESC_BASE_Intrinsic<"xvmaxi.w", int_loongarch_lasx_xvmaxi_w, simm5_32, immSExt5, LASX256WOpnd>; + +def XVMAXI_D : LASX_I5<0b01110110100100011>, + LASX_I5_DESC_BASE_Intrinsic<"xvmaxi.d", int_loongarch_lasx_xvmaxi_d, simm5_32, immSExt5, LASX256DOpnd>; + + +def XVMINI_B : LASX_I5<0b01110110100100100>, + LASX_I5_DESC_BASE_Intrinsic<"xvmini.b", int_loongarch_lasx_xvmini_b, simm5_32, immSExt5, LASX256BOpnd>; + +def XVMINI_H : LASX_I5<0b01110110100100101>, + LASX_I5_DESC_BASE_Intrinsic<"xvmini.h", int_loongarch_lasx_xvmini_h, simm5_32, immSExt5, LASX256HOpnd>; + +def XVMINI_W : LASX_I5<0b01110110100100110>, + LASX_I5_DESC_BASE_Intrinsic<"xvmini.w", int_loongarch_lasx_xvmini_w, simm5_32, immSExt5, LASX256WOpnd>; + +def XVMINI_D : LASX_I5<0b01110110100100111>, + LASX_I5_DESC_BASE_Intrinsic<"xvmini.d", int_loongarch_lasx_xvmini_d, simm5_32, immSExt5, LASX256DOpnd>; + + +def XVMAXI_BU : LASX_I5_U<0b01110110100101000>, + LASX_I5_U_DESC_BASE_Intrinsic<"xvmaxi.bu", int_loongarch_lasx_xvmaxi_bu, uimm5, immZExt5, LASX256BOpnd>; + +def XVMAXI_HU : LASX_I5_U<0b01110110100101001>, + LASX_I5_U_DESC_BASE_Intrinsic<"xvmaxi.hu", int_loongarch_lasx_xvmaxi_hu, uimm5, immZExt5, LASX256HOpnd>; + +def XVMAXI_WU : LASX_I5_U<0b01110110100101010>, + LASX_I5_U_DESC_BASE_Intrinsic<"xvmaxi.wu", int_loongarch_lasx_xvmaxi_wu, uimm5, immZExt5, LASX256WOpnd>; + +def XVMAXI_DU : LASX_I5_U<0b01110110100101011>, + LASX_I5_U_DESC_BASE_Intrinsic<"xvmaxi.du", int_loongarch_lasx_xvmaxi_du, uimm5, immZExt5, LASX256DOpnd>; + + +def XVMINI_BU : LASX_I5_U<0b01110110100101100>, + LASX_I5_U_DESC_BASE_Intrinsic<"xvmini.bu", int_loongarch_lasx_xvmini_bu, uimm5, immZExt5, LASX256BOpnd>; + +def XVMINI_HU : LASX_I5_U<0b01110110100101101>, + LASX_I5_U_DESC_BASE_Intrinsic<"xvmini.hu", int_loongarch_lasx_xvmini_hu, uimm5, immZExt5, LASX256HOpnd>; + +def XVMINI_WU : LASX_I5_U<0b01110110100101110>, + LASX_I5_U_DESC_BASE_Intrinsic<"xvmini.wu", int_loongarch_lasx_xvmini_wu, uimm5, immZExt5, LASX256WOpnd>; + +def XVMINI_DU : LASX_I5_U<0b01110110100101111>, + LASX_I5_U_DESC_BASE_Intrinsic<"xvmini.du", int_loongarch_lasx_xvmini_du, uimm5, immZExt5, LASX256DOpnd>; + + +def XVFRSTPI_B : LASX_I5_U<0b01110110100110100>, + LASX_U5_4R_DESC_BASE<"xvfrstpi.b", int_loongarch_lasx_xvfrstpi_b, LASX256BOpnd>; + +def XVFRSTPI_H : LASX_I5_U<0b01110110100110101>, + LASX_U5_4R_DESC_BASE<"xvfrstpi.h", int_loongarch_lasx_xvfrstpi_h, LASX256HOpnd>; + + +def XVCLO_B : LASX_2R<0b0111011010011100000000>, + LASX_2R_DESC_BASE<"xvclo.b", int_loongarch_lasx_xvclo_b, LASX256BOpnd>; + +def XVCLO_H : LASX_2R<0b0111011010011100000001>, + LASX_2R_DESC_BASE<"xvclo.h", int_loongarch_lasx_xvclo_h, LASX256HOpnd>; + +def XVCLO_W : LASX_2R<0b0111011010011100000010>, + LASX_2R_DESC_BASE<"xvclo.w", int_loongarch_lasx_xvclo_w, LASX256WOpnd>; + +def XVCLO_D : LASX_2R<0b0111011010011100000011>, + LASX_2R_DESC_BASE<"xvclo.d", int_loongarch_lasx_xvclo_d, LASX256DOpnd>; + + +def XVCLZ_B : LASX_2R<0b0111011010011100000100>, + LASX_2R_DESC_BASE<"xvclz.b", ctlz, LASX256BOpnd>; + +def XVCLZ_H : LASX_2R<0b0111011010011100000101>, + LASX_2R_DESC_BASE<"xvclz.h", ctlz, LASX256HOpnd>; + +def XVCLZ_W : LASX_2R<0b0111011010011100000110>, + LASX_2R_DESC_BASE<"xvclz.w", ctlz, LASX256WOpnd>; + +def XVCLZ_D : LASX_2R<0b0111011010011100000111>, + LASX_2R_DESC_BASE<"xvclz.d", ctlz, LASX256DOpnd>; + + +def XVPCNT_B : LASX_2R<0b0111011010011100001000>, + LASX_2R_DESC_BASE<"xvpcnt.b", ctpop, LASX256BOpnd>; + +def XVPCNT_H : LASX_2R<0b0111011010011100001001>, + LASX_2R_DESC_BASE<"xvpcnt.h", ctpop, LASX256HOpnd>; + +def XVPCNT_W : LASX_2R<0b0111011010011100001010>, + LASX_2R_DESC_BASE<"xvpcnt.w", ctpop, LASX256WOpnd>; + +def XVPCNT_D : LASX_2R<0b0111011010011100001011>, + LASX_2R_DESC_BASE<"xvpcnt.d", ctpop, LASX256DOpnd>; + + +def XVNEG_B : LASX_2R<0b0111011010011100001100>, + LASX_2R_DESC_BASE<"xvneg.b", int_loongarch_lasx_xvneg_b, LASX256BOpnd>; + +def XVNEG_H : LASX_2R<0b0111011010011100001101>, + LASX_2R_DESC_BASE<"xvneg.h", int_loongarch_lasx_xvneg_h, LASX256HOpnd>; + +def XVNEG_W : LASX_2R<0b0111011010011100001110>, + LASX_2R_DESC_BASE<"xvneg.w", int_loongarch_lasx_xvneg_w, LASX256WOpnd>; + +def XVNEG_D : LASX_2R<0b0111011010011100001111>, + LASX_2R_DESC_BASE<"xvneg.d", int_loongarch_lasx_xvneg_d, LASX256DOpnd>; + + +def XVMSKLTZ_B : LASX_2R<0b0111011010011100010000>, + LASX_2R_DESC_BASE<"xvmskltz.b", int_loongarch_lasx_xvmskltz_b, LASX256BOpnd>; + +def XVMSKLTZ_H : LASX_2R<0b0111011010011100010001>, + LASX_2R_DESC_BASE<"xvmskltz.h", int_loongarch_lasx_xvmskltz_h, LASX256HOpnd>; + +def XVMSKLTZ_W : LASX_2R<0b0111011010011100010010>, + LASX_2R_DESC_BASE<"xvmskltz.w", int_loongarch_lasx_xvmskltz_w, LASX256WOpnd>; + +def XVMSKLTZ_D : LASX_2R<0b0111011010011100010011>, + LASX_2R_DESC_BASE<"xvmskltz.d", int_loongarch_lasx_xvmskltz_d, LASX256DOpnd>; + + +def XVMSKGEZ_B : LASX_2R<0b0111011010011100010100>, + LASX_2R_DESC_BASE<"xvmskgez.b", int_loongarch_lasx_xvmskgez_b, LASX256BOpnd>; + +def XVMSKNZ_B : LASX_2R<0b0111011010011100011000>, + LASX_2R_DESC_BASE<"xvmsknz.b", int_loongarch_lasx_xvmsknz_b, LASX256BOpnd>; + + +def XVSETEQZ_V : LASX_SET<0b0111011010011100100110>, + LASX_SET_DESC_BASE<"xvseteqz.v", LASX256BOpnd>; + +def XVSETNEZ_V : LASX_SET<0b0111011010011100100111>, + LASX_SET_DESC_BASE<"xvsetnez.v", LASX256BOpnd>; + + +def XVSETANYEQZ_B : LASX_SET<0b0111011010011100101000>, + LASX_SET_DESC_BASE<"xvsetanyeqz.b", LASX256BOpnd>; + +def XVSETANYEQZ_H : LASX_SET<0b0111011010011100101001>, + LASX_SET_DESC_BASE<"xvsetanyeqz.h", LASX256HOpnd>; + +def XVSETANYEQZ_W : LASX_SET<0b0111011010011100101010>, + LASX_SET_DESC_BASE<"xvsetanyeqz.w", LASX256WOpnd>; + +def XVSETANYEQZ_D : LASX_SET<0b0111011010011100101011>, + LASX_SET_DESC_BASE<"xvsetanyeqz.d", LASX256DOpnd>; + + +def XVSETALLNEZ_B : LASX_SET<0b0111011010011100101100>, + LASX_SET_DESC_BASE<"xvsetallnez.b", LASX256BOpnd>; + +def XVSETALLNEZ_H : LASX_SET<0b0111011010011100101101>, + LASX_SET_DESC_BASE<"xvsetallnez.h", LASX256HOpnd>; + +def XVSETALLNEZ_W : LASX_SET<0b0111011010011100101110>, + LASX_SET_DESC_BASE<"xvsetallnez.w", LASX256WOpnd>; + +def XVSETALLNEZ_D : LASX_SET<0b0111011010011100101111>, + LASX_SET_DESC_BASE<"xvsetallnez.d", LASX256DOpnd>; + +class LASX_CBRANCH_PSEUDO_DESC_BASE : + LoongArchPseudo<(outs GPR32:$dst), + (ins RCWS:$xj), + [(set GPR32:$dst, (OpNode (TyNode RCWS:$xj)))]> { + bit usesCustomInserter = 1; +} + +def XSNZ_B_PSEUDO : LASX_CBRANCH_PSEUDO_DESC_BASE; +def XSNZ_H_PSEUDO : LASX_CBRANCH_PSEUDO_DESC_BASE; +def XSNZ_W_PSEUDO : LASX_CBRANCH_PSEUDO_DESC_BASE; +def XSNZ_D_PSEUDO : LASX_CBRANCH_PSEUDO_DESC_BASE; +def XSNZ_V_PSEUDO : LASX_CBRANCH_PSEUDO_DESC_BASE; + +def XSZ_B_PSEUDO : LASX_CBRANCH_PSEUDO_DESC_BASE; +def XSZ_H_PSEUDO : LASX_CBRANCH_PSEUDO_DESC_BASE; +def XSZ_W_PSEUDO : LASX_CBRANCH_PSEUDO_DESC_BASE; +def XSZ_D_PSEUDO : LASX_CBRANCH_PSEUDO_DESC_BASE; +def XSZ_V_PSEUDO : LASX_CBRANCH_PSEUDO_DESC_BASE; + + +def XVFLOGB_S : LASX_2R<0b0111011010011100110001>, + LASX_2RF_DESC_BASE<"xvflogb.s", int_loongarch_lasx_xvflogb_s, LASX256WOpnd>; + +def XVFLOGB_D : LASX_2R<0b0111011010011100110010>, + LASX_2RF_DESC_BASE<"xvflogb.d", int_loongarch_lasx_xvflogb_d, LASX256DOpnd>; + + +def XVFCLASS_S : LASX_2R<0b0111011010011100110101>, + LASX_2RF_DESC_BASE<"xvfclass.s", int_loongarch_lasx_xvfclass_s, LASX256WOpnd>; + +def XVFCLASS_D : LASX_2R<0b0111011010011100110110>, + LASX_2RF_DESC_BASE<"xvfclass.d", int_loongarch_lasx_xvfclass_d, LASX256DOpnd>; + + +def XVFSQRT_S : LASX_2R<0b0111011010011100111001>, + LASX_2RF_DESC_BASE<"xvfsqrt.s", fsqrt, LASX256WOpnd>; + +def XVFSQRT_D : LASX_2R<0b0111011010011100111010>, + LASX_2RF_DESC_BASE<"xvfsqrt.d", fsqrt, LASX256DOpnd>; + + +def XVFRECIP_S : LASX_2R<0b0111011010011100111101>, + LASX_2RF_DESC_BASE<"xvfrecip.s", int_loongarch_lasx_xvfrecip_s, LASX256WOpnd>; + +def XVFRECIP_D : LASX_2R<0b0111011010011100111110>, + LASX_2RF_DESC_BASE<"xvfrecip.d", int_loongarch_lasx_xvfrecip_d, LASX256DOpnd>; + + +def XVFRSQRT_S : LASX_2R<0b0111011010011101000001>, + LASX_2RF_DESC_BASE<"xvfrsqrt.s", int_loongarch_lasx_xvfrsqrt_s, LASX256WOpnd>; + +def XVFRSQRT_D : LASX_2R<0b0111011010011101000010>, + LASX_2RF_DESC_BASE<"xvfrsqrt.d", int_loongarch_lasx_xvfrsqrt_d, LASX256DOpnd>; + + +def XVFRINT_S : LASX_2R<0b0111011010011101001101>, + LASX_2RF_DESC_BASE<"xvfrint.s", frint, LASX256WOpnd>; + +def XVFRINT_D : LASX_2R<0b0111011010011101001110>, + LASX_2RF_DESC_BASE<"xvfrint.d", frint, LASX256DOpnd>; + + +def XVFRINTRM_S : LASX_2R<0b0111011010011101010001>, + LASX_2RF_DESC_BASE<"xvfrintrm.s", int_loongarch_lasx_xvfrintrm_s, LASX256WOpnd>; + +def XVFRINTRM_D : LASX_2R<0b0111011010011101010010>, + LASX_2RF_DESC_BASE<"xvfrintrm.d", int_loongarch_lasx_xvfrintrm_d, LASX256DOpnd>; + + +def XVFRINTRP_S : LASX_2R<0b0111011010011101010101>, + LASX_2RF_DESC_BASE<"xvfrintrp.s", int_loongarch_lasx_xvfrintrp_s, LASX256WOpnd>; + +def XVFRINTRP_D : LASX_2R<0b0111011010011101010110>, + LASX_2RF_DESC_BASE<"xvfrintrp.d", int_loongarch_lasx_xvfrintrp_d, LASX256DOpnd>; + + +def XVFRINTRZ_S : LASX_2R<0b0111011010011101011001>, + LASX_2RF_DESC_BASE<"xvfrintrz.s", int_loongarch_lasx_xvfrintrz_s, LASX256WOpnd>; + +def XVFRINTRZ_D : LASX_2R<0b0111011010011101011010>, + LASX_2RF_DESC_BASE<"xvfrintrz.d", int_loongarch_lasx_xvfrintrz_d, LASX256DOpnd>; + + +def XVFRINTRNE_S : LASX_2R<0b0111011010011101011101>, + LASX_2RF_DESC_BASE<"xvfrintrne.s", int_loongarch_lasx_xvfrintrne_s, LASX256WOpnd>; + +def XVFRINTRNE_D : LASX_2R<0b0111011010011101011110>, + LASX_2RF_DESC_BASE<"xvfrintrne.d", int_loongarch_lasx_xvfrintrne_d, LASX256DOpnd>; + + +def XVFCVTL_S_H : LASX_2R<0b0111011010011101111010>, + LASX_2RF_DESC_BASE<"xvfcvtl.s.h", int_loongarch_lasx_xvfcvtl_s_h, LASX256WOpnd, LASX256HOpnd>; + +def XVFCVTH_S_H : LASX_2R<0b0111011010011101111011>, + LASX_2RF_DESC_BASE<"xvfcvth.s.h", int_loongarch_lasx_xvfcvth_s_h, LASX256WOpnd, LASX256HOpnd>; + + +def XVFCVTL_D_S : LASX_2R<0b0111011010011101111100>, + LASX_2RF_DESC_BASE<"xvfcvtl.d.s", int_loongarch_lasx_xvfcvtl_d_s, LASX256DOpnd, LASX256WOpnd>; + +def XVFCVTH_D_S : LASX_2R<0b0111011010011101111101>, + LASX_2RF_DESC_BASE<"xvfcvth.d.s", int_loongarch_lasx_xvfcvth_d_s, LASX256DOpnd, LASX256WOpnd>; + + +def XVFFINT_S_W : LASX_2R<0b0111011010011110000000>, + LASX_2RF_DESC_BASE<"xvffint.s.w", sint_to_fp, LASX256WOpnd>; + +def XVFFINT_S_WU : LASX_2R<0b0111011010011110000001>, + LASX_2RF_DESC_BASE<"xvffint.s.wu", uint_to_fp, LASX256WOpnd>; + + +def XVFFINT_D_L : LASX_2R<0b0111011010011110000010>, + LASX_2RF_DESC_BASE<"xvffint.d.l", sint_to_fp, LASX256DOpnd>; + +def XVFFINT_D_LU : LASX_2R<0b0111011010011110000011>, + LASX_2RF_DESC_BASE<"xvffint.d.lu", uint_to_fp, LASX256DOpnd>; + + +def XVFFINTL_D_W : LASX_2R<0b0111011010011110000100>, + LASX_2RF_DESC_BASE<"xvffintl.d.w", int_loongarch_lasx_xvffintl_d_w, LASX256DOpnd, LASX256WOpnd>; + +def XVFFINTH_D_W : LASX_2R<0b0111011010011110000101>, + LASX_2RF_DESC_BASE<"xvffinth.d.w", int_loongarch_lasx_xvffinth_d_w, LASX256DOpnd, LASX256WOpnd>; + + +def XVFTINT_W_S : LASX_2R<0b0111011010011110001100>, + LASX_2RF_DESC_BASE<"xvftint.w.s", int_loongarch_lasx_xvftint_w_s, LASX256WOpnd>; + +def XVFTINT_L_D : LASX_2R<0b0111011010011110001101>, + LASX_2RF_DESC_BASE<"xvftint.l.d", int_loongarch_lasx_xvftint_l_d, LASX256DOpnd>; + + +def XVFTINTRM_W_S : LASX_2R<0b0111011010011110001110>, + LASX_2RF_DESC_BASE<"xvftintrm.w.s", int_loongarch_lasx_xvftintrm_w_s, LASX256WOpnd>; + +def XVFTINTRM_L_D : LASX_2R<0b0111011010011110001111>, + LASX_2RF_DESC_BASE<"xvftintrm.l.d", int_loongarch_lasx_xvftintrm_l_d, LASX256DOpnd>; + + +def XVFTINTRP_W_S : LASX_2R<0b0111011010011110010000>, + LASX_2RF_DESC_BASE<"xvftintrp.w.s", int_loongarch_lasx_xvftintrp_w_s, LASX256WOpnd>; + +def XVFTINTRP_L_D : LASX_2R<0b0111011010011110010001>, + LASX_2RF_DESC_BASE<"xvftintrp.l.d", int_loongarch_lasx_xvftintrp_l_d, LASX256DOpnd>; + + +def XVFTINTRZ_W_S : LASX_2R<0b0111011010011110010010>, + LASX_2RF_DESC_BASE<"xvftintrz.w.s", fp_to_sint, LASX256WOpnd>; + +def XVFTINTRZ_L_D : LASX_2R<0b0111011010011110010011>, + LASX_2RF_DESC_BASE<"xvftintrz.l.d", fp_to_sint, LASX256DOpnd>; + + +def XVFTINTRNE_W_S : LASX_2R<0b0111011010011110010100>, + LASX_2RF_DESC_BASE<"xvftintrne.w.s", int_loongarch_lasx_xvftintrne_w_s, LASX256WOpnd>; + +def XVFTINTRNE_L_D : LASX_2R<0b0111011010011110010101>, + LASX_2RF_DESC_BASE<"xvftintrne.l.d", int_loongarch_lasx_xvftintrne_l_d, LASX256DOpnd>; + + +def XVFTINT_WU_S : LASX_2R<0b0111011010011110010110>, + LASX_2RF_DESC_BASE<"xvftint.wu.s", int_loongarch_lasx_xvftint_wu_s, LASX256WOpnd>; + +def XVFTINT_LU_D : LASX_2R<0b0111011010011110010111>, + LASX_2RF_DESC_BASE<"xvftint.lu.d", int_loongarch_lasx_xvftint_lu_d, LASX256DOpnd>; + + +def XVFTINTRZ_WU_S : LASX_2R<0b0111011010011110011100>, + LASX_2RF_DESC_BASE<"xvftintrz.wu.s", fp_to_uint, LASX256WOpnd>; + +def XVFTINTRZ_LU_D : LASX_2R<0b0111011010011110011101>, + LASX_2RF_DESC_BASE<"xvftintrz.lu.d", fp_to_uint, LASX256DOpnd>; + + +def XVFTINTL_L_S : LASX_2R<0b0111011010011110100000>, + LASX_2RF_DESC_BASE<"xvftintl.l.s", int_loongarch_lasx_xvftintl_l_s, LASX256DOpnd, LASX256WOpnd>; + +def XVFTINTH_L_S : LASX_2R<0b0111011010011110100001>, + LASX_2RF_DESC_BASE<"xvftinth.l.s", int_loongarch_lasx_xvftinth_l_s, LASX256DOpnd, LASX256WOpnd>; + + +def XVFTINTRML_L_S : LASX_2R<0b0111011010011110100010>, + LASX_2RF_DESC_BASE<"xvftintrml.l.s", int_loongarch_lasx_xvftintrml_l_s, LASX256DOpnd, LASX256WOpnd>; + +def XVFTINTRMH_L_S : LASX_2R<0b0111011010011110100011>, + LASX_2RF_DESC_BASE<"xvftintrmh.l.s", int_loongarch_lasx_xvftintrmh_l_s, LASX256DOpnd, LASX256WOpnd>; + + +def XVFTINTRPL_L_S : LASX_2R<0b0111011010011110100100>, + LASX_2RF_DESC_BASE<"xvftintrpl.l.s", int_loongarch_lasx_xvftintrpl_l_s, LASX256DOpnd, LASX256WOpnd>; + +def XVFTINTRPH_L_S : LASX_2R<0b0111011010011110100101>, + LASX_2RF_DESC_BASE<"xvftintrph.l.s", int_loongarch_lasx_xvftintrph_l_s, LASX256DOpnd, LASX256WOpnd>; + + +def XVFTINTRZL_L_S : LASX_2R<0b0111011010011110100110>, + LASX_2RF_DESC_BASE<"xvftintrzl.l.s", int_loongarch_lasx_xvftintrzl_l_s, LASX256DOpnd, LASX256WOpnd>; + +def XVFTINTRZH_L_S : LASX_2R<0b0111011010011110100111>, + LASX_2RF_DESC_BASE<"xvftintrzh.l.s", int_loongarch_lasx_xvftintrzh_l_s, LASX256DOpnd, LASX256WOpnd>; + + +def XVFTINTRNEL_L_S : LASX_2R<0b0111011010011110101000>, + LASX_2RF_DESC_BASE<"xvftintrnel.l.s", int_loongarch_lasx_xvftintrnel_l_s, LASX256DOpnd, LASX256WOpnd>; + +def XVFTINTRNEH_L_S : LASX_2R<0b0111011010011110101001>, + LASX_2RF_DESC_BASE<"xvftintrneh.l.s", int_loongarch_lasx_xvftintrneh_l_s, LASX256DOpnd, LASX256WOpnd>; + + +def XVEXTH_H_B : LASX_2R<0b0111011010011110111000>, + LASX_2R_DESC_BASE<"xvexth.h.b", int_loongarch_lasx_xvexth_h_b, LASX256HOpnd, LASX256BOpnd>; + +def XVEXTH_W_H : LASX_2R<0b0111011010011110111001>, + LASX_2R_DESC_BASE<"xvexth.w.h", int_loongarch_lasx_xvexth_w_h, LASX256WOpnd, LASX256HOpnd>; + +def XVEXTH_D_W : LASX_2R<0b0111011010011110111010>, + LASX_2R_DESC_BASE<"xvexth.d.w", int_loongarch_lasx_xvexth_d_w, LASX256DOpnd, LASX256WOpnd> ; + +def XVEXTH_Q_D : LASX_2R<0b0111011010011110111011>, + LASX_2R_DESC_BASE<"xvexth.q.d", int_loongarch_lasx_xvexth_q_d, LASX256DOpnd, LASX256DOpnd>; + + +def XVEXTH_HU_BU : LASX_2R<0b0111011010011110111100>, + LASX_2R_DESC_BASE<"xvexth.hu.bu", int_loongarch_lasx_xvexth_hu_bu, LASX256HOpnd, LASX256BOpnd>; + +def XVEXTH_WU_HU : LASX_2R<0b0111011010011110111101>, + LASX_2R_DESC_BASE<"xvexth.wu.hu", int_loongarch_lasx_xvexth_wu_hu, LASX256WOpnd, LASX256HOpnd>; + +def XVEXTH_DU_WU : LASX_2R<0b0111011010011110111110>, + LASX_2R_DESC_BASE<"xvexth.du.wu", int_loongarch_lasx_xvexth_du_wu, LASX256DOpnd, LASX256WOpnd> ; + +def XVEXTH_QU_DU : LASX_2R<0b0111011010011110111111>, + LASX_2R_DESC_BASE<"xvexth.qu.du", int_loongarch_lasx_xvexth_qu_du, LASX256DOpnd, LASX256DOpnd>; + + +def XVREPLGR2VR_B : LASX_2R_1GP<0b0111011010011111000000>, + LASX_2R_REPL_DESC_BASE<"xvreplgr2vr.b", v32i8, xvsplati8, LASX256BOpnd, GPR32Opnd>; + +def XVREPLGR2VR_H : LASX_2R_1GP<0b0111011010011111000001>, + LASX_2R_REPL_DESC_BASE<"xvreplgr2vr.h", v16i16, xvsplati16, LASX256HOpnd, GPR32Opnd>; + +def XVREPLGR2VR_W : LASX_2R_1GP<0b0111011010011111000010>, + LASX_2R_REPL_DESC_BASE<"xvreplgr2vr.w", v8i32, xvsplati32, LASX256WOpnd, GPR32Opnd>; + +def XVREPLGR2VR_D : LASX_2R_1GP<0b0111011010011111000011>, + LASX_2R_REPL_DESC_BASE<"xvreplgr2vr.d", v4i64, xvsplati64, LASX256DOpnd, GPR64Opnd>; + + +def VEXT2XV_H_B : LASX_2R<0b0111011010011111000100>, + LASX_XVEXTEND_DESC_BASE<"vext2xv.h.b", int_loongarch_lasx_vext2xv_h_b, v32i8, v16i16, LASX256BOpnd, LASX256HOpnd>; + +def VEXT2XV_W_B : LASX_2R<0b0111011010011111000101>, + LASX_XVEXTEND_DESC_BASE<"vext2xv.w.b", int_loongarch_lasx_vext2xv_w_b, v32i8, v8i32, LASX256BOpnd, LASX256WOpnd>; + +def VEXT2XV_D_B : LASX_2R<0b0111011010011111000110>, + LASX_XVEXTEND_DESC_BASE<"vext2xv.d.b", int_loongarch_lasx_vext2xv_d_b, v32i8, v4i64, LASX256BOpnd, LASX256DOpnd> ; + +def VEXT2XV_W_H : LASX_2R<0b0111011010011111000111>, + LASX_XVEXTEND_DESC_BASE<"vext2xv.w.h", int_loongarch_lasx_vext2xv_w_h, v16i16, v8i32, LASX256HOpnd, LASX256WOpnd>; + +def VEXT2XV_D_H : LASX_2R<0b0111011010011111001000>, + LASX_XVEXTEND_DESC_BASE<"vext2xv.d.h", int_loongarch_lasx_vext2xv_d_h, v16i16, v4i64, LASX256HOpnd, LASX256DOpnd> ; + +def VEXT2XV_D_W : LASX_2R<0b0111011010011111001001>, + LASX_XVEXTEND_DESC_BASE<"vext2xv.d.w", int_loongarch_lasx_vext2xv_d_w, v8i32, v4i64, LASX256WOpnd, LASX256DOpnd>; + + +def VEXT2XV_HU_BU : LASX_2R<0b0111011010011111001010>, + LASX_XVEXTEND_DESC_BASE<"vext2xv.hu.bu", int_loongarch_lasx_vext2xv_hu_bu, v32i8, v16i16, LASX256BOpnd, LASX256HOpnd>; + +def VEXT2XV_WU_BU : LASX_2R<0b0111011010011111001011>, + LASX_XVEXTEND_DESC_BASE<"vext2xv.wu.bu", int_loongarch_lasx_vext2xv_wu_bu, v32i8, v8i32, LASX256BOpnd, LASX256WOpnd>; + +def VEXT2XV_DU_BU : LASX_2R<0b0111011010011111001100>, + LASX_XVEXTEND_DESC_BASE<"vext2xv.du.bu", int_loongarch_lasx_vext2xv_du_bu, v32i8, v4i64, LASX256BOpnd, LASX256DOpnd> ; + +def VEXT2XV_WU_HU : LASX_2R<0b0111011010011111001101>, + LASX_XVEXTEND_DESC_BASE<"vext2xv.wu.hu", int_loongarch_lasx_vext2xv_wu_hu, v16i16, v8i32, LASX256HOpnd, LASX256WOpnd>; + +def VEXT2XV_DU_HU : LASX_2R<0b0111011010011111001110>, + LASX_XVEXTEND_DESC_BASE<"vext2xv.du.hu", int_loongarch_lasx_vext2xv_du_hu, v16i16, v4i64, LASX256HOpnd, LASX256DOpnd> ; + +def VEXT2XV_DU_WU : LASX_2R<0b0111011010011111001111>, + LASX_XVEXTEND_DESC_BASE<"vext2xv.du.wu", int_loongarch_lasx_vext2xv_du_wu, v8i32, v4i64, LASX256WOpnd, LASX256DOpnd>; + + +def XVHSELI_D : LASX_I5_U<0b01110110100111111>, + LASX_U5N_DESC_BASE<"xvhseli.d", LASX256DOpnd>; + + +def XVROTRI_B : LASX_I3_U<0b0111011010100000001>, + LASX_RORI_U3_DESC_BASE_Intrinsic<"xvrotri.b", int_loongarch_lasx_xvrotri_b, uimm3, immZExt3, LASX256BOpnd>; + +def XVROTRI_H : LASX_I4_U<0b011101101010000001>, + LASX_RORI_U4_DESC_BASE_Intrinsic<"xvrotri.h", int_loongarch_lasx_xvrotri_h, uimm4, immZExt4, LASX256HOpnd>; + +def XVROTRI_W : LASX_I5_U<0b01110110101000001>, + LASX_RORI_U5_DESC_BASE_Intrinsic<"xvrotri.w", int_loongarch_lasx_xvrotri_w, uimm5, immZExt5, LASX256WOpnd>; + +def XVROTRI_D : LASX_I6_U<0b0111011010100001>, + LASX_RORI_U6_DESC_BASE_Intrinsic<"xvrotri.d", int_loongarch_lasx_xvrotri_d, uimm6, immZExt6, LASX256DOpnd>; + + +def XVSRLRI_B : LASX_I3_U<0b0111011010100100001>, + LASX_BIT_3_DESC_BASE<"xvsrlri.b", int_loongarch_lasx_xvsrlri_b, uimm3, immZExt3, LASX256BOpnd>; + +def XVSRLRI_H : LASX_I4_U<0b011101101010010001>, + LASX_BIT_4_DESC_BASE<"xvsrlri.h", int_loongarch_lasx_xvsrlri_h, uimm4, immZExt4, LASX256HOpnd>; + +def XVSRLRI_W : LASX_I5_U<0b01110110101001001>, + LASX_BIT_5_DESC_BASE<"xvsrlri.w", int_loongarch_lasx_xvsrlri_w, uimm5, immZExt5, LASX256WOpnd>; + +def XVSRLRI_D : LASX_I6_U<0b0111011010100101>, + LASX_BIT_6_DESC_BASE<"xvsrlri.d", int_loongarch_lasx_xvsrlri_d, uimm6, immZExt6, LASX256DOpnd>; + + +def XVSRARI_B : LASX_I3_U<0b0111011010101000001>, + LASX_BIT_3_DESC_BASE<"xvsrari.b", int_loongarch_lasx_xvsrari_b, uimm3, immZExt3, LASX256BOpnd>; + +def XVSRARI_H : LASX_I4_U<0b011101101010100001>, + LASX_BIT_4_DESC_BASE<"xvsrari.h", int_loongarch_lasx_xvsrari_h, uimm4, immZExt4, LASX256HOpnd>; + +def XVSRARI_W : LASX_I5_U<0b01110110101010001>, + LASX_BIT_5_DESC_BASE<"xvsrari.w", int_loongarch_lasx_xvsrari_w, uimm5, immZExt5, LASX256WOpnd>; + +def XVSRARI_D : LASX_I6_U<0b0111011010101001>, + LASX_BIT_6_DESC_BASE<"xvsrari.d", int_loongarch_lasx_xvsrari_d, uimm6, immZExt6, LASX256DOpnd>; + + +def XVINSGR2VR_W : LASX_I3_R_U<0b0111011011101011110>, + LASX_INSERT_U3_DESC_BASE<"xvinsgr2vr.w", v8i32, uimm3_ptr, immZExt3Ptr, LASX256WOpnd, GPR32Opnd>; + +def XVINSGR2VR_D : LASX_I2_R_U<0b01110110111010111110>, + LASX_INSERT_U2_DESC_BASE<"xvinsgr2vr.d", v4i64, uimm2_ptr, immZExt2Ptr, LASX256DOpnd, GPR64Opnd>; + + +def XVPICKVE2GR_W : LASX_ELM_COPY_U3<0b0111011011101111110>, + LASX_COPY_U3_DESC_BASE<"xvpickve2gr.w", vextract_sext_i32, v8i32, uimm3_ptr, immZExt3Ptr, GPR32Opnd, LASX256WOpnd>; + +def XVPICKVE2GR_D : LASX_ELM_COPY_U2<0b01110110111011111110>, + LASX_COPY_U2_DESC_BASE<"xvpickve2gr.d", vextract_sext_i64, v4i64, uimm2_ptr, immZExt2Ptr, GPR64Opnd, LASX256DOpnd>; + + +def XVPICKVE2GR_WU : LASX_ELM_COPY_U3<0b0111011011110011110>, + LASX_COPY_U3_DESC_BASE<"xvpickve2gr.wu", vextract_zext_i32, v8i32, uimm3_ptr, immZExt3Ptr, GPR32Opnd, LASX256WOpnd>; + +def XVPICKVE2GR_DU : LASX_ELM_COPY_U2<0b01110110111100111110>, + LASX_COPY_U2_DESC_BASE<"xvpickve2gr.du", vextract_zext_i64, v4i64, uimm2_ptr, immZExt2Ptr, GPR64Opnd, LASX256DOpnd>; + + +def XVREPL128VEI_B : LASX_I4_U<0b011101101111011110>, + LASX_ELM_U4_VREPLVE_DESC_BASE_Intrinsic<"xvrepl128vei.b", int_loongarch_lasx_xvrepl128vei_b, LASX256BOpnd>; + +def XVREPL128VEI_H : LASX_I3_U<0b0111011011110111110>, + LASX_ELM_U3_VREPLVE_DESC_BASE_Intrinsic<"xvrepl128vei.h", int_loongarch_lasx_xvrepl128vei_h, LASX256HOpnd>; + +def XVREPL128VEI_W : LASX_I2_U<0b01110110111101111110>, + LASX_ELM_U2_VREPLVE_DESC_BASE_Intrinsic<"xvrepl128vei.w", int_loongarch_lasx_xvrepl128vei_w, LASX256WOpnd>; + +def XVREPL128VEI_D : LASX_I1_U<0b011101101111011111110>, + LASX_ELM_U1_VREPLVE_DESC_BASE_Intrinsic<"xvrepl128vei.d", int_loongarch_lasx_xvrepl128vei_d, LASX256DOpnd>; + + +def XVINSVE0_W : LASX_I3_U<0b0111011011111111110>, + LASX_BIT_3_4O_DESC_BASE<"xvinsve0.w", int_loongarch_lasx_xvinsve0_w, uimm3, immZExt3, LASX256WOpnd>; + +def XVINSVE0_D : LASX_I2_U<0b01110110111111111110>, + LASX_BIT_2_4O_DESC_BASE<"xvinsve0.d", int_loongarch_lasx_xvinsve0_d, uimm2, immZExt2, LASX256DOpnd>; + + +def XVPICKVE_W : LASX_I3_U<0b0111011100000011110>, + LASX_BIT_3_4ON<"xvpickve.w", uimm3, immZExt3, LASX256WOpnd>; + +def XVPICKVE_D : LASX_I2_U<0b01110111000000111110>, + LASX_BIT_2_4ON<"xvpickve.d", uimm2, immZExt2, LASX256DOpnd>; + + +def XVREPLVE0_B : LASX_2R<0b0111011100000111000000>, + LASX_XVBROADCAST_DESC_BASE<"xvreplve0.b", int_loongarch_lasx_xvreplve0_b, v32i8, LASX256BOpnd>; + +def XVREPLVE0_H : LASX_2R<0b0111011100000111100000>, + LASX_XVBROADCAST_DESC_BASE<"xvreplve0.h", int_loongarch_lasx_xvreplve0_h, v16i16, LASX256HOpnd>; + +def XVREPLVE0_W : LASX_2R<0b0111011100000111110000>, + LASX_XVBROADCAST_DESC_BASE<"xvreplve0.w", int_loongarch_lasx_xvreplve0_w, v8i32, LASX256WOpnd> ; + +def XVREPLVE0_D : LASX_2R<0b0111011100000111111000>, + LASX_XVBROADCAST_DESC_BASE<"xvreplve0.d", xvbroadcast_v4i64, v4i64, LASX256DOpnd>; + +def XVREPLVE0_Q : LASX_2R<0b0111011100000111111100>, + LASX_XVBROADCAST_DESC_BASE<"xvreplve0.q", int_loongarch_lasx_xvreplve0_q, v32i8, LASX256BOpnd>; + + +def XVSLLWIL_H_B : LASX_I3_U<0b0111011100001000001>, + LASX_2R_U3_DESC_BASE<"xvsllwil.h.b", int_loongarch_lasx_xvsllwil_h_b, LASX256HOpnd, LASX256BOpnd>; + +def XVSLLWIL_W_H : LASX_I4_U<0b011101110000100001>, + LASX_2R_U4_DESC_BASE<"xvsllwil.w.h", int_loongarch_lasx_xvsllwil_w_h, LASX256WOpnd, LASX256HOpnd>; + +def XVSLLWIL_D_W : LASX_I5_U<0b01110111000010001>, + LASX_2R_U5_DESC_BASE<"xvsllwil.d.w", int_loongarch_lasx_xvsllwil_d_w, LASX256DOpnd, LASX256WOpnd> ; + + +def XVEXTL_Q_D : LASX_2R<0b0111011100001001000000>, + LASX_2R_DESC_BASE<"xvextl.q.d", int_loongarch_lasx_xvextl_q_d, LASX256DOpnd, LASX256DOpnd>; + + +def XVSLLWIL_HU_BU : LASX_I3_U<0b0111011100001100001>, + LASX_2R_U3_DESC_BASE<"xvsllwil.hu.bu", int_loongarch_lasx_xvsllwil_hu_bu, LASX256HOpnd, LASX256BOpnd>; + +def XVSLLWIL_WU_HU : LASX_I4_U<0b011101110000110001>, + LASX_2R_U4_DESC_BASE<"xvsllwil.wu.hu", int_loongarch_lasx_xvsllwil_wu_hu, LASX256WOpnd, LASX256HOpnd>; + +def XVSLLWIL_DU_WU : LASX_I5_U<0b01110111000011001>, + LASX_2R_U5_DESC_BASE<"xvsllwil.du.wu", int_loongarch_lasx_xvsllwil_du_wu, LASX256DOpnd, LASX256WOpnd> ; + + +def XVEXTL_QU_DU : LASX_2R<0b0111011100001101000000>, + LASX_2R_DESC_BASE<"xvextl.qu.du", int_loongarch_lasx_xvextl_qu_du, LASX256DOpnd, LASX256DOpnd>; + + +def XVBITCLRI_B : LASX_I3_U<0b0111011100010000001>, + LASX_2R_U3_DESC_BASE<"xvbitclri.b", int_loongarch_lasx_xvbitclri_b, LASX256BOpnd, LASX256BOpnd>; + +def XVBITCLRI_H : LASX_I4_U<0b011101110001000001>, + LASX_2R_U4_DESC_BASE<"xvbitclri.h", int_loongarch_lasx_xvbitclri_h, LASX256HOpnd, LASX256HOpnd>; + +def XVBITCLRI_W : LASX_I5_U<0b01110111000100001>, + LASX_2R_U5_DESC_BASE<"xvbitclri.w", int_loongarch_lasx_xvbitclri_w, LASX256WOpnd, LASX256WOpnd>; + +def XVBITCLRI_D : LASX_I6_U<0b0111011100010001>, + LASX_2R_U6_DESC_BASE<"xvbitclri.d", int_loongarch_lasx_xvbitclri_d, LASX256DOpnd, LASX256DOpnd>; + + +def XVBITSETI_B : LASX_I3_U<0b0111011100010100001>, + LASX_2R_U3_DESC_BASE<"xvbitseti.b", int_loongarch_lasx_xvbitseti_b, LASX256BOpnd, LASX256BOpnd>; + +def XVBITSETI_H : LASX_I4_U<0b011101110001010001>, + LASX_2R_U4_DESC_BASE<"xvbitseti.h", int_loongarch_lasx_xvbitseti_h, LASX256HOpnd, LASX256HOpnd>; + +def XVBITSETI_W : LASX_I5_U<0b01110111000101001>, + LASX_2R_U5_DESC_BASE<"xvbitseti.w", int_loongarch_lasx_xvbitseti_w, LASX256WOpnd, LASX256WOpnd>; + +def XVBITSETI_D : LASX_I6_U<0b0111011100010101>, + LASX_2R_U6_DESC_BASE<"xvbitseti.d", int_loongarch_lasx_xvbitseti_d, LASX256DOpnd, LASX256DOpnd>; + + +def XVBITREVI_B : LASX_I3_U<0b0111011100011000001>, + LASX_2R_U3_DESC_BASE<"xvbitrevi.b", int_loongarch_lasx_xvbitrevi_b, LASX256BOpnd, LASX256BOpnd>; + +def XVBITREVI_H : LASX_I4_U<0b011101110001100001>, + LASX_2R_U4_DESC_BASE<"xvbitrevi.h", int_loongarch_lasx_xvbitrevi_h, LASX256HOpnd, LASX256HOpnd>; + +def XVBITREVI_W : LASX_I5_U<0b01110111000110001>, + LASX_2R_U5_DESC_BASE<"xvbitrevi.w", int_loongarch_lasx_xvbitrevi_w, LASX256WOpnd, LASX256WOpnd>; + +def XVBITREVI_D : LASX_I6_U<0b0111011100011001>, + LASX_2R_U6_DESC_BASE<"xvbitrevi.d", int_loongarch_lasx_xvbitrevi_d, LASX256DOpnd, LASX256DOpnd>; + + +def XVSAT_B : LASX_I3_U<0b0111011100100100001>, + LASX_BIT_3_DESC_BASE<"xvsat.b", int_loongarch_lasx_xvsat_b, uimm3, immZExt3, LASX256BOpnd>; + +def XVSAT_H : LASX_I4_U<0b011101110010010001>, + LASX_BIT_4_DESC_BASE<"xvsat.h", int_loongarch_lasx_xvsat_h, uimm4, immZExt4, LASX256HOpnd>; + +def XVSAT_W : LASX_I5_U<0b01110111001001001>, + LASX_BIT_5_DESC_BASE<"xvsat.w", int_loongarch_lasx_xvsat_w, uimm5, immZExt5, LASX256WOpnd>; + +def XVSAT_D : LASX_I6_U<0b0111011100100101>, + LASX_BIT_6_DESC_BASE<"xvsat.d", int_loongarch_lasx_xvsat_d, uimm6, immZExt6, LASX256DOpnd>; + + +def XVSAT_BU : LASX_I3_U<0b0111011100101000001>, + LASX_BIT_3_DESC_BASE<"xvsat.bu", int_loongarch_lasx_xvsat_bu, uimm3, immZExt3, LASX256BOpnd>; + +def XVSAT_HU : LASX_I4_U<0b011101110010100001>, + LASX_BIT_4_DESC_BASE<"xvsat.hu", int_loongarch_lasx_xvsat_hu, uimm4, immZExt4, LASX256HOpnd>; + +def XVSAT_WU : LASX_I5_U<0b01110111001010001>, + LASX_BIT_5_DESC_BASE<"xvsat.wu", int_loongarch_lasx_xvsat_wu, uimm5, immZExt5, LASX256WOpnd>; + +def XVSAT_DU : LASX_I6_U<0b0111011100101001>, + LASX_BIT_6_DESC_BASE<"xvsat.du", int_loongarch_lasx_xvsat_du, uimm6, immZExt6, LASX256DOpnd>; + + +def XVSLLI_B : LASX_I3_U<0b0111011100101100001>, + LASX_BIT_U3_VREPLVE_DESC_BASE_Intrinsic<"xvslli.b", int_loongarch_lasx_xvslli_b, uimm3, immZExt3, LASX256BOpnd>; + +def XVSLLI_H : LASX_I4_U<0b011101110010110001>, + LASX_BIT_U4_VREPLVE_DESC_BASE_Intrinsic<"xvslli.h", int_loongarch_lasx_xvslli_h, uimm4, immZExt4, LASX256HOpnd>; + +def XVSLLI_W : LASX_I5_U<0b01110111001011001>, + LASX_BIT_U5_VREPLVE_DESC_BASE_Intrinsic<"xvslli.w", int_loongarch_lasx_xvslli_w, uimm5, immZExt5, LASX256WOpnd>; + +def XVSLLI_D : LASX_I6_U<0b0111011100101101>, + LASX_BIT_U6_VREPLVE_DESC_BASE_Intrinsic<"xvslli.d", int_loongarch_lasx_xvslli_d, uimm6, immZExt6, LASX256DOpnd>; + + +def XVSRLI_B : LASX_I3_U<0b0111011100110000001>, + LASX_BIT_U3_VREPLVE_DESC_BASE_Intrinsic<"xvsrli.b", int_loongarch_lasx_xvsrli_b, uimm3, immZExt3, LASX256BOpnd>; + +def XVSRLI_H : LASX_I4_U<0b011101110011000001>, + LASX_BIT_U4_VREPLVE_DESC_BASE_Intrinsic<"xvsrli.h", int_loongarch_lasx_xvsrli_h, uimm4, immZExt4, LASX256HOpnd>; + +def XVSRLI_W : LASX_I5_U<0b01110111001100001>, + LASX_BIT_U5_VREPLVE_DESC_BASE_Intrinsic<"xvsrli.w", int_loongarch_lasx_xvsrli_w, uimm5, immZExt5, LASX256WOpnd>; + +def XVSRLI_D : LASX_I6_U<0b0111011100110001>, + LASX_BIT_U6_VREPLVE_DESC_BASE_Intrinsic<"xvsrli.d", int_loongarch_lasx_xvsrli_d, uimm6, immZExt6, LASX256DOpnd>; + + +def XVSRAI_B : LASX_I3_U<0b0111011100110100001>, + LASX_BIT_U3_VREPLVE_DESC_BASE_Intrinsic<"xvsrai.b", int_loongarch_lasx_xvsrai_b, uimm3, immZExt3, LASX256BOpnd>; + +def XVSRAI_H : LASX_I4_U<0b011101110011010001>, + LASX_BIT_U4_VREPLVE_DESC_BASE_Intrinsic<"xvsrai.h", int_loongarch_lasx_xvsrai_h, uimm4, immZExt4, LASX256HOpnd>; + +def XVSRAI_W : LASX_I5_U<0b01110111001101001>, + LASX_BIT_U5_VREPLVE_DESC_BASE_Intrinsic<"xvsrai.w", int_loongarch_lasx_xvsrai_w, uimm5, immZExt5, LASX256WOpnd>; + +def XVSRAI_D : LASX_I6_U<0b0111011100110101>, + LASX_BIT_U6_VREPLVE_DESC_BASE_Intrinsic<"xvsrai.d", int_loongarch_lasx_xvsrai_d, uimm6, immZExt6, LASX256DOpnd>; + + +def XVSRLNI_B_H : LASX_I4_U<0b011101110100000001>, + LASX_U4_DESC_BASE<"xvsrlni.b.h", int_loongarch_lasx_xvsrlni_b_h, uimm4, immZExt4, LASX256BOpnd>; + +def XVSRLNI_H_W : LASX_I5_U<0b01110111010000001>, + LASX_N4_U5_DESC_BASE<"xvsrlni.h.w", int_loongarch_lasx_xvsrlni_h_w, uimm5, immZExt5, LASX256HOpnd>; + +def XVSRLNI_W_D : LASX_I6_U<0b0111011101000001>, + LASX_U6_DESC_BASE<"xvsrlni.w.d", int_loongarch_lasx_xvsrlni_w_d, uimm6, immZExt6, LASX256WOpnd>; + +def XVSRLNI_D_Q : LASX_I7_U<0b011101110100001>, + LASX_D_DESC_BASE<"xvsrlni.d.q", int_loongarch_lasx_xvsrlni_d_q, LASX256DOpnd>; + + +def XVSRLRNI_B_H : LASX_I4_U<0b011101110100010001>, + LASX_U4_DESC_BASE<"xvsrlrni.b.h", int_loongarch_lasx_xvsrlrni_b_h, uimm4, immZExt4, LASX256BOpnd>; + +def XVSRLRNI_H_W : LASX_I5_U<0b01110111010001001>, + LASX_N4_U5_DESC_BASE<"xvsrlrni.h.w", int_loongarch_lasx_xvsrlrni_h_w, uimm5, immZExt5, LASX256HOpnd>; + +def XVSRLRNI_W_D : LASX_I6_U<0b0111011101000101>, + LASX_U6_DESC_BASE<"xvsrlrni.w.d", int_loongarch_lasx_xvsrlrni_w_d, uimm6, immZExt6, LASX256WOpnd>; + +def XVSRLRNI_D_Q : LASX_I7_U<0b011101110100011>, + LASX_D_DESC_BASE<"xvsrlrni.d.q", int_loongarch_lasx_xvsrlrni_d_q, LASX256DOpnd>; + + +def XVSSRLNI_B_H : LASX_I4_U<0b011101110100100001>, + LASX_U4_DESC_BASE<"xvssrlni.b.h", int_loongarch_lasx_xvssrlni_b_h, uimm4, immZExt4, LASX256BOpnd>; + +def XVSSRLNI_H_W : LASX_I5_U<0b01110111010010001>, + LASX_N4_U5_DESC_BASE<"xvssrlni.h.w", int_loongarch_lasx_xvssrlni_h_w, uimm5, immZExt5, LASX256HOpnd>; + +def XVSSRLNI_W_D : LASX_I6_U<0b0111011101001001>, + LASX_U6_DESC_BASE<"xvssrlni.w.d", int_loongarch_lasx_xvssrlni_w_d, uimm6, immZExt6, LASX256WOpnd>; + +def XVSSRLNI_D_Q : LASX_I7_U<0b011101110100101>, + LASX_D_DESC_BASE<"xvssrlni.d.q", int_loongarch_lasx_xvssrlni_d_q, LASX256DOpnd>; + + +def XVSSRLNI_BU_H : LASX_I4_U<0b011101110100110001>, + LASX_U4_DESC_BASE<"xvssrlni.bu.h", int_loongarch_lasx_xvssrlni_bu_h, uimm4, immZExt4, LASX256BOpnd>; + +def XVSSRLNI_HU_W : LASX_I5_U<0b01110111010011001>, + LASX_N4_U5_DESC_BASE<"xvssrlni.hu.w", int_loongarch_lasx_xvssrlni_hu_w, uimm5, immZExt5, LASX256HOpnd>; + +def XVSSRLNI_WU_D : LASX_I6_U<0b0111011101001101>, + LASX_U6_DESC_BASE<"xvssrlni.wu.d", int_loongarch_lasx_xvssrlni_wu_d, uimm6, immZExt6, LASX256WOpnd>; + +def XVSSRLNI_DU_Q : LASX_I7_U<0b011101110100111>, + LASX_D_DESC_BASE<"xvssrlni.du.q", int_loongarch_lasx_xvssrlni_du_q, LASX256DOpnd>; + + +def XVSSRLRNI_B_H : LASX_I4_U<0b011101110101000001>, + LASX_2R_3R_U4_DESC_BASE<"xvssrlrni.b.h", int_loongarch_lasx_xvssrlrni_b_h, LASX256BOpnd, LASX256BOpnd>; + +def XVSSRLRNI_H_W : LASX_I5_U<0b01110111010100001>, + LASX_2R_3R_U5_DESC_BASE<"xvssrlrni.h.w", int_loongarch_lasx_xvssrlrni_h_w, LASX256HOpnd, LASX256HOpnd>; + +def XVSSRLRNI_W_D : LASX_I6_U<0b0111011101010001>, + LASX_2R_3R_U6_DESC_BASE<"xvssrlrni.w.d", int_loongarch_lasx_xvssrlrni_w_d, LASX256WOpnd, LASX256WOpnd>; + +def XVSSRLRNI_D_Q : LASX_I7_U<0b011101110101001>, + LASX_2R_3R_U7_DESC_BASE<"xvssrlrni.d.q", int_loongarch_lasx_xvssrlrni_d_q, LASX256DOpnd, LASX256DOpnd>; + + +def XVSSRLRNI_BU_H : LASX_I4_U<0b011101110101010001>, + LASX_U4_DESC_BASE<"xvssrlrni.bu.h", int_loongarch_lasx_xvssrlrni_bu_h, uimm4, immZExt4, LASX256BOpnd>; + +def XVSSRLRNI_HU_W : LASX_I5_U<0b01110111010101001>, + LASX_N4_U5_DESC_BASE<"xvssrlrni.hu.w", int_loongarch_lasx_xvssrlrni_hu_w, uimm5, immZExt5, LASX256HOpnd>; + +def XVSSRLRNI_WU_D : LASX_I6_U<0b0111011101010101>, + LASX_U6_DESC_BASE<"xvssrlrni.wu.d", int_loongarch_lasx_xvssrlrni_wu_d, uimm6, immZExt6, LASX256WOpnd>; + +def XVSSRLRNI_DU_Q : LASX_I7_U<0b011101110101011>, + LASX_D_DESC_BASE<"xvssrlrni.du.q", int_loongarch_lasx_xvssrlrni_du_q, LASX256DOpnd>; + + +def XVSRANI_B_H : LASX_I4_U<0b011101110101100001>, + LASX_2R_3R_U4_DESC_BASE<"xvsrani.b.h", int_loongarch_lasx_xvsrani_b_h, LASX256BOpnd, LASX256BOpnd>; + +def XVSRANI_H_W : LASX_I5_U<0b01110111010110001>, + LASX_2R_3R_U5_DESC_BASE<"xvsrani.h.w", int_loongarch_lasx_xvsrani_h_w, LASX256HOpnd, LASX256HOpnd>; + +def XVSRANI_W_D : LASX_I6_U<0b0111011101011001>, + LASX_2R_3R_U6_DESC_BASE<"xvsrani.w.d", int_loongarch_lasx_xvsrani_w_d, LASX256WOpnd, LASX256WOpnd>; + +def XVSRANI_D_Q : LASX_I7_U<0b011101110101101>, + LASX_2R_3R_U7_DESC_BASE<"xvsrani.d.q", int_loongarch_lasx_xvsrani_d_q, LASX256DOpnd, LASX256DOpnd>; + + +def XVSRARNI_B_H : LASX_I4_U<0b011101110101110001>, + LASX_U4_DESC_BASE<"xvsrarni.b.h", int_loongarch_lasx_xvsrarni_b_h, uimm4, immZExt4, LASX256BOpnd>; + +def XVSRARNI_H_W : LASX_I5_U<0b01110111010111001>, + LASX_N4_U5_DESC_BASE<"xvsrarni.h.w", int_loongarch_lasx_xvsrarni_h_w, uimm5, immZExt5, LASX256HOpnd>; + +def XVSRARNI_W_D : LASX_I6_U<0b0111011101011101>, + LASX_U6_DESC_BASE<"xvsrarni.w.d", int_loongarch_lasx_xvsrarni_w_d, uimm6, immZExt6, LASX256WOpnd>; + +def XVSRARNI_D_Q : LASX_I7_U<0b011101110101111>, + LASX_D_DESC_BASE<"xvsrarni.d.q", int_loongarch_lasx_xvsrarni_d_q, LASX256DOpnd>; + + +def XVSSRANI_B_H : LASX_I4_U<0b011101110110000001>, + LASX_U4_DESC_BASE<"xvssrani.b.h", int_loongarch_lasx_xvssrani_b_h, uimm4, immZExt4, LASX256BOpnd>; + +def XVSSRANI_H_W : LASX_I5_U<0b01110111011000001>, + LASX_N4_U5_DESC_BASE<"xvssrani.h.w", int_loongarch_lasx_xvssrani_h_w, uimm5, immZExt5, LASX256HOpnd>; + +def XVSSRANI_W_D : LASX_I6_U<0b0111011101100001>, + LASX_U6_DESC_BASE<"xvssrani.w.d", int_loongarch_lasx_xvssrani_w_d, uimm6, immZExt6, LASX256WOpnd>; + +def XVSSRANI_D_Q : LASX_I7_U<0b011101110110001>, + LASX_D_DESC_BASE<"xvssrani.d.q", int_loongarch_lasx_xvssrani_d_q, LASX256DOpnd>; + + +def XVSSRANI_BU_H : LASX_I4_U<0b011101110110010001>, + LASX_U4_DESC_BASE<"xvssrani.bu.h", int_loongarch_lasx_xvssrani_bu_h, uimm4, immZExt4, LASX256BOpnd>; + +def XVSSRANI_HU_W : LASX_I5_U<0b01110111011001001>, + LASX_N4_U5_DESC_BASE<"xvssrani.hu.w", int_loongarch_lasx_xvssrani_hu_w, uimm5, immZExt5, LASX256HOpnd>; + +def XVSSRANI_WU_D : LASX_I6_U<0b0111011101100101>, + LASX_U6_DESC_BASE<"xvssrani.wu.d", int_loongarch_lasx_xvssrani_wu_d, uimm6, immZExt6, LASX256WOpnd>; + +def XVSSRANI_DU_Q : LASX_I7_U<0b011101110110011>, + LASX_D_DESC_BASE<"xvssrani.du.q", int_loongarch_lasx_xvssrani_du_q, LASX256DOpnd>; + + +def XVSSRARNI_B_H : LASX_I4_U<0b011101110110100001>, + LASX_U4_DESC_BASE<"xvssrarni.b.h", int_loongarch_lasx_xvssrarni_b_h, uimm4, immZExt4, LASX256BOpnd>; + +def XVSSRARNI_H_W : LASX_I5_U<0b01110111011010001>, + LASX_N4_U5_DESC_BASE<"xvssrarni.h.w", int_loongarch_lasx_xvssrarni_h_w, uimm5, immZExt5, LASX256HOpnd>; + +def XVSSRARNI_W_D : LASX_I6_U<0b0111011101101001>, + LASX_U6_DESC_BASE<"xvssrarni.w.d", int_loongarch_lasx_xvssrarni_w_d, uimm6, immZExt6, LASX256WOpnd>; + +def XVSSRARNI_D_Q : LASX_I7_U<0b011101110110101>, + LASX_D_DESC_BASE<"xvssrarni.d.q", int_loongarch_lasx_xvssrarni_d_q, LASX256DOpnd>; + + +def XVSSRARNI_BU_H : LASX_I4_U<0b011101110110110001>, + LASX_U4_DESC_BASE<"xvssrarni.bu.h", int_loongarch_lasx_xvssrarni_bu_h, uimm4, immZExt4, LASX256BOpnd>; + +def XVSSRARNI_HU_W : LASX_I5_U<0b01110111011011001>, + LASX_N4_U5_DESC_BASE<"xvssrarni.hu.w", int_loongarch_lasx_xvssrarni_hu_w, uimm5, immZExt5, LASX256HOpnd>; + +def XVSSRARNI_WU_D : LASX_I6_U<0b0111011101101101>, + LASX_U6_DESC_BASE<"xvssrarni.wu.d", int_loongarch_lasx_xvssrarni_wu_d, uimm6, immZExt6, LASX256WOpnd>; + +def XVSSRARNI_DU_Q : LASX_I7_U<0b011101110110111>, + LASX_D_DESC_BASE<"xvssrarni.du.q", int_loongarch_lasx_xvssrarni_du_q, LASX256DOpnd>; + + +def XVEXTRINS_B : LASX_I8_U<0b01110111100011>, + LASX_2R_3R_U8_DESC_BASE<"xvextrins.b", int_loongarch_lasx_xvextrins_b, LASX256BOpnd, LASX256BOpnd>; + +def XVEXTRINS_H : LASX_I8_U<0b01110111100010>, + LASX_2R_3R_U8_DESC_BASE<"xvextrins.h", int_loongarch_lasx_xvextrins_h, LASX256HOpnd, LASX256HOpnd>; + +def XVEXTRINS_W : LASX_I8_U<0b01110111100001>, + LASX_2R_3R_U8_DESC_BASE<"xvextrins.w", int_loongarch_lasx_xvextrins_w, LASX256WOpnd, LASX256WOpnd>; + +def XVEXTRINS_D : LASX_I8_U<0b01110111100000>, + LASX_2R_3R_U8_DESC_BASE<"xvextrins.d", int_loongarch_lasx_xvextrins_d, LASX256DOpnd, LASX256DOpnd>; + + +def XVSHUF4I_B : LASX_I8_U<0b01110111100100>, + LASX_I8_SHF_DESC_BASE<"xvshuf4i.b", int_loongarch_lasx_xvshuf4i_b, LASX256BOpnd>; + +def XVSHUF4I_H : LASX_I8_U<0b01110111100101>, + LASX_I8_SHF_DESC_BASE<"xvshuf4i.h", int_loongarch_lasx_xvshuf4i_h, LASX256HOpnd>; + +def XVSHUF4I_W : LASX_I8_U<0b01110111100110>, + LASX_I8_SHF_DESC_BASE<"xvshuf4i.w", int_loongarch_lasx_xvshuf4i_w, LASX256WOpnd>; + +def XVSHUF4I_D : LASX_I8_U<0b01110111100111>, + LASX_I8_O4_SHF_DESC_BASE<"xvshuf4i.d", int_loongarch_lasx_xvshuf4i_d, LASX256DOpnd>; + + +def XVBITSELI_B : LASX_I8_U<0b01110111110001>, + LASX_2R_3R_U8_DESC_BASE<"xvbitseli.b", int_loongarch_lasx_xvbitseli_b, LASX256BOpnd, LASX256BOpnd>; + + +def XVANDI_B : LASX_I8_U<0b01110111110100>, + LASX_2R_U8_DESC_BASE<"xvandi.b", int_loongarch_lasx_xvandi_b, LASX256BOpnd, LASX256BOpnd>; + + +def XVORI_B : LASX_I8_U<0b01110111110101>, + LASX_2R_U8_DESC_BASE<"xvori.b", int_loongarch_lasx_xvori_b, LASX256BOpnd, LASX256BOpnd>; + + +def XVXORI_B : LASX_I8_U<0b01110111110110>, + LASX_2R_U8_DESC_BASE<"xvxori.b", int_loongarch_lasx_xvxori_b, LASX256BOpnd, LASX256BOpnd>; + + +def XVNORI_B : LASX_I8_U<0b01110111110111>, + LASX_2R_U8_DESC_BASE<"xvnori.b", int_loongarch_lasx_xvnori_b, LASX256BOpnd, LASX256BOpnd>; + + +def XVLDI : LASX_1R_I13<0b01110111111000>, + LASX_I13_DESC_BASE<"xvldi", int_loongarch_lasx_xvldi, i32, simm13Op, LASX256DOpnd>; + +def XVLDI_B : LASX_1R_I13_I10<0b01110111111000000>, + LASX_I13_DESC_BASE_tmp<"xvldi", LASX256BOpnd>; + +def XVLDI_H : LASX_1R_I13_I10<0b01110111111000001>, + LASX_I13_DESC_BASE_tmp<"xvldi", LASX256HOpnd>; + +def XVLDI_W : LASX_1R_I13_I10<0b01110111111000010>, + LASX_I13_DESC_BASE_tmp<"xvldi", LASX256WOpnd>; + +def XVLDI_D : LASX_1R_I13_I10<0b01110111111000011>, + LASX_I13_DESC_BASE_tmp<"xvldi", LASX256DOpnd>; + + +def XVPERMI_W : LASX_I8_U<0b01110111111001>, + LASX_2R_3R_U8_DESC_BASE<"xvpermi.w", int_loongarch_lasx_xvpermi_w, LASX256WOpnd, LASX256WOpnd>; + +def XVPERMI_D : LASX_I8_U<0b01110111111010>, + LASX_2R_U8_DESC_BASE<"xvpermi.d", int_loongarch_lasx_xvpermi_d, LASX256DOpnd, LASX256DOpnd>; + +def XVPERMI_Q : LASX_I8_U<0b01110111111011>, + LASX_2R_3R_U8_DESC_BASE<"xvpermi.q", int_loongarch_lasx_xvpermi_q, LASX256BOpnd, LASX256BOpnd>; + + +//Pat + +class LASXBitconvertPat preds = [HasLASX]> : + LASXPat<(DstVT (bitconvert SrcVT:$src)), + (COPY_TO_REGCLASS SrcVT:$src, DstRC), preds>; + +// These are endian-independent because the element size doesnt change +def : LASXBitconvertPat; +def : LASXBitconvertPat; +def : LASXBitconvertPat; +def : LASXBitconvertPat; + +def : LASXBitconvertPat; +def : LASXBitconvertPat; +def : LASXBitconvertPat; +def : LASXBitconvertPat; +def : LASXBitconvertPat; + +def : LASXBitconvertPat; +def : LASXBitconvertPat; +def : LASXBitconvertPat; +def : LASXBitconvertPat; +def : LASXBitconvertPat; + +def : LASXBitconvertPat; +def : LASXBitconvertPat; +def : LASXBitconvertPat; +def : LASXBitconvertPat; + +def : LASXBitconvertPat; +def : LASXBitconvertPat; +def : LASXBitconvertPat; +def : LASXBitconvertPat; + +def : LASXBitconvertPat; +def : LASXBitconvertPat; +def : LASXBitconvertPat; +def : LASXBitconvertPat; + +def : LASXBitconvertPat; +def : LASXBitconvertPat; +def : LASXBitconvertPat; +def : LASXBitconvertPat; + +class LASX_XINSERT_PSEUDO_BASE : + LASXPseudo<(outs ROXD:$xd), (ins ROXD:$xd_in, ImmOp:$n, ROFS:$fs), + [(set ROXD:$xd, (OpNode (Ty ROXD:$xd_in), ROFS:$fs, Imm:$n))]> { + bit usesCustomInserter = 1; + string Constraints = "$xd = $xd_in"; +} + +class XINSERT_H_PSEUDO_DESC : LASX_XINSERT_PSEUDO_BASE; + +class XINSERT_H64_PSEUDO_DESC : LASX_XINSERT_PSEUDO_BASE; + +def XINSERT_H_PSEUDO : XINSERT_H_PSEUDO_DESC; +def XINSERT_H64_PSEUDO : XINSERT_H64_PSEUDO_DESC; + +class XINSERT_B_PSEUDO_DESC : LASX_XINSERT_PSEUDO_BASE; +def XINSERT_B_PSEUDO : XINSERT_B_PSEUDO_DESC; + + +class LASX_COPY_PSEUDO_BASE : + LASXPseudo<(outs RCD:$xd), (ins RCWS:$xj, ImmOp:$n), + [(set RCD:$xd, (OpNode (VecTy RCWS:$xj), Imm:$n))]> { + bit usesCustomInserter = 1; +} + +class XCOPY_FW_PSEUDO_DESC : LASX_COPY_PSEUDO_BASE; +class XCOPY_FD_PSEUDO_DESC : LASX_COPY_PSEUDO_BASE; +def XCOPY_FW_PSEUDO : XCOPY_FW_PSEUDO_DESC; +def XCOPY_FD_PSEUDO : XCOPY_FD_PSEUDO_DESC; + + + +class LASX_XINSERT_VIDX_PSEUDO_BASE : + LASXPseudo<(outs ROXD:$xd), (ins ROXD:$xd_in, ROIdx:$n, ROFS:$fs), + [(set ROXD:$xd, (OpNode (Ty ROXD:$xd_in), ROFS:$fs, ROIdx:$n))]> { + bit usesCustomInserter = 1; + string Constraints = "$xd = $xd_in"; +} + + +class XINSERT_FW_PSEUDO_DESC : LASX_XINSERT_PSEUDO_BASE; +def XINSERT_FW_PSEUDO : XINSERT_FW_PSEUDO_DESC; + +class XINSERT_FW_VIDX_PSEUDO_DESC : + LASX_XINSERT_VIDX_PSEUDO_BASE; +class XINSERT_FW_VIDX64_PSEUDO_DESC : + LASX_XINSERT_VIDX_PSEUDO_BASE; + +def XINSERT_FW_VIDX_PSEUDO : XINSERT_FW_VIDX_PSEUDO_DESC; +def XINSERT_FW_VIDX64_PSEUDO : XINSERT_FW_VIDX64_PSEUDO_DESC; + +class XINSERT_B_VIDX64_PSEUDO_DESC : + LASX_XINSERT_VIDX_PSEUDO_BASE; + +def XINSERT_B_VIDX64_PSEUDO : XINSERT_B_VIDX64_PSEUDO_DESC; + + +class XINSERT_B_VIDX_PSEUDO_DESC : + LASX_XINSERT_VIDX_PSEUDO_BASE; + +def XINSERT_B_VIDX_PSEUDO : XINSERT_B_VIDX_PSEUDO_DESC; + + +class XINSERTPostRA : + LoongArchPseudo<(outs RC:$xd), (ins RC:$xd_in, RD:$n, RE:$fs), []> { + let mayLoad = 1; + let mayStore = 1; +} + +def XINSERT_B_VIDX_PSEUDO_POSTRA : XINSERTPostRA; +def XINSERT_B_VIDX64_PSEUDO_POSTRA : XINSERTPostRA; +def XINSERT_FW_VIDX_PSEUDO_POSTRA : XINSERTPostRA; +def XINSERT_FW_VIDX64_PSEUDO_POSTRA : XINSERTPostRA; + +class XINSERT_FD_PSEUDO_DESC : LASX_XINSERT_PSEUDO_BASE; + +def XINSERT_FD_PSEUDO : XINSERT_FD_PSEUDO_DESC; + +class LASX_2R_FILL_PSEUDO_BASE : + LASXPseudo<(outs RCWD:$xd), (ins RCWS:$fs), + [(set RCWD:$xd, (OpNode RCWS:$fs))]> { + let usesCustomInserter = 1; +} + +class XFILL_FW_PSEUDO_DESC : LASX_2R_FILL_PSEUDO_BASE; +class XFILL_FD_PSEUDO_DESC : LASX_2R_FILL_PSEUDO_BASE; +def XFILL_FW_PSEUDO : XFILL_FW_PSEUDO_DESC; +def XFILL_FD_PSEUDO : XFILL_FD_PSEUDO_DESC; + +class LASX_CONCAT_VECTORS_PSEUDO_BASE : + LASXPseudo<(outs ROXD:$xd), (ins ROXJ:$xs, ROXK:$xt), + [(set ROXD:$xd, (Ty (concat_vectors (SubTy ROXJ:$xs), (SubTy ROXK:$xt))))]> { + bit usesCustomInserter = 1; +} + +class CONCAT_VECTORS_B_PSEUDO_DESC : + LASX_CONCAT_VECTORS_PSEUDO_BASE; +class CONCAT_VECTORS_H_PSEUDO_DESC : + LASX_CONCAT_VECTORS_PSEUDO_BASE; +class CONCAT_VECTORS_W_PSEUDO_DESC : + LASX_CONCAT_VECTORS_PSEUDO_BASE; +class CONCAT_VECTORS_D_PSEUDO_DESC : + LASX_CONCAT_VECTORS_PSEUDO_BASE; + +class CONCAT_VECTORS_FW_PSEUDO_DESC : + LASX_CONCAT_VECTORS_PSEUDO_BASE; +class CONCAT_VECTORS_FD_PSEUDO_DESC : + LASX_CONCAT_VECTORS_PSEUDO_BASE; + +def CONCAT_VECTORS_B_PSEUDO : CONCAT_VECTORS_B_PSEUDO_DESC; +def CONCAT_VECTORS_H_PSEUDO : CONCAT_VECTORS_H_PSEUDO_DESC; +def CONCAT_VECTORS_W_PSEUDO : CONCAT_VECTORS_W_PSEUDO_DESC; +def CONCAT_VECTORS_D_PSEUDO : CONCAT_VECTORS_D_PSEUDO_DESC; +def CONCAT_VECTORS_FW_PSEUDO : CONCAT_VECTORS_FW_PSEUDO_DESC; +def CONCAT_VECTORS_FD_PSEUDO : CONCAT_VECTORS_FD_PSEUDO_DESC; + + +class LASX_COPY_GPR_PSEUDO_BASE : + LASXPseudo<(outs ROXD:$xd), (ins ROFS:$xj, ROIdx:$n), + [(set ROXD:$xd, (OpNode (VecTy ROFS:$xj), ROIdx:$n))]> { + bit usesCustomInserter = 1; +} + +class XCOPY_FW_GPR_PSEUDO_DESC : LASX_COPY_GPR_PSEUDO_BASE; +def XCOPY_FW_GPR_PSEUDO : XCOPY_FW_GPR_PSEUDO_DESC; + + +let isCodeGenOnly = 1 in { + +def XVLD_H : LASX_I12_S<0b0010110010>, + LASX_LD<"xvld", load, v16i16, LASX256HOpnd, mem>; + +def XVLD_W : LASX_I12_S<0b0010110010>, + LASX_LD<"xvld", load, v8i32, LASX256WOpnd, mem>; + +def XVLD_D : LASX_I12_S<0b0010110010>, + LASX_LD<"xvld", load, v4i64, LASX256DOpnd, mem>; + + +def XVST_H : LASX_I12_S<0b0010110011>, + LASX_ST<"xvst", store, v16i16, LASX256HOpnd, mem_simm12>; + +def XVST_W : LASX_I12_S<0b0010110011>, + LASX_ST<"xvst", store, v8i32, LASX256WOpnd, mem_simm12>; + +def XVST_D : LASX_I12_S<0b0010110011>, + LASX_ST<"xvst", store, v4i64, LASX256DOpnd, mem_simm12>; + + +def XVREPLVE_W_N : LASX_3R_1GP<0b01110101001000110>, + LASX_3R_VREPLVE_DESC_BASE_N<"xvreplve.w", LASX256WOpnd>; + + +def XVANDI_B_N : LASX_I8_U<0b01110111110100>, + LASX_BIT_U8_DESC_BASE<"xvandi.b", and, xvsplati8_uimm8, LASX256BOpnd>; + + +def XVXORI_B_N : LASX_I8_U<0b01110111110110>, + LASX_BIT_U8_DESC_BASE<"xvxori.b", xor, xvsplati8_uimm8, LASX256BOpnd>; + + +def XVSRAI_B_N : LASX_I3_U<0b0111011100110100001>, + LASX_BIT_U3_VREPLVE_DESC_BASE<"xvsrai.b", sra, xvsplati8_uimm3, LASX256BOpnd>; + +def XVSRAI_H_N : LASX_I4_U<0b011101110011010001>, + LASX_BIT_U4_VREPLVE_DESC_BASE<"xvsrai.h", sra, xvsplati16_uimm4, LASX256HOpnd>; + +def XVSRAI_W_N : LASX_I5_U<0b01110111001101001>, + LASX_BIT_U5_VREPLVE_DESC_BASE<"xvsrai.w", sra, xvsplati32_uimm5, LASX256WOpnd>; + +def XVSRAI_D_N : LASX_I6_U<0b0111011100110101>, + LASX_BIT_U6_VREPLVE_DESC_BASE<"xvsrai.d", sra, xvsplati64_uimm6, LASX256DOpnd>; + + +def XVSLLI_B_N : LASX_I3_U<0b0111011100101100001>, + LASX_BIT_U3_VREPLVE_DESC_BASE<"xvslli.b", shl, xvsplati8_uimm3, LASX256BOpnd>; + +def XVSLLI_H_N : LASX_I4_U<0b011101110010110001>, + LASX_BIT_U4_VREPLVE_DESC_BASE<"xvslli.h", shl, xvsplati16_uimm4, LASX256HOpnd>; + +def XVSLLI_W_N : LASX_I5_U<0b01110111001011001>, + LASX_BIT_U5_VREPLVE_DESC_BASE<"xvslli.w", shl, xvsplati32_uimm5, LASX256WOpnd>; + +def XVSLLI_D_N : LASX_I6_U<0b0111011100101101>, + LASX_BIT_U6_VREPLVE_DESC_BASE<"xvslli.d", shl, xvsplati64_uimm6, LASX256DOpnd>; + + +def XVSRLI_B_N : LASX_I3_U<0b0111011100110000001>, + LASX_BIT_U3_VREPLVE_DESC_BASE<"xvsrli.b", srl, xvsplati8_uimm3, LASX256BOpnd>; + +def XVSRLI_H_N : LASX_I4_U<0b011101110011000001>, + LASX_BIT_U4_VREPLVE_DESC_BASE<"xvsrli.h", srl, xvsplati16_uimm4, LASX256HOpnd>; + +def XVSRLI_W_N : LASX_I5_U<0b01110111001100001>, + LASX_BIT_U5_VREPLVE_DESC_BASE<"xvsrli.w", srl, xvsplati32_uimm5, LASX256WOpnd>; + +def XVSRLI_D_N : LASX_I6_U<0b0111011100110001>, + LASX_BIT_U6_VREPLVE_DESC_BASE<"xvsrli.d", srl, xvsplati64_uimm6, LASX256DOpnd>; + + +def XVMAXI_B_N : LASX_I5<0b01110110100100000>, + LASX_I5_DESC_BASE<"xvmaxi.b", smax, xvsplati8_simm5, LASX256BOpnd>; + +def XVMAXI_H_N : LASX_I5<0b01110110100100001>, + LASX_I5_DESC_BASE<"xvmaxi.h", smax, xvsplati16_simm5, LASX256HOpnd>; + +def XVMAXI_W_N : LASX_I5<0b01110110100100010>, + LASX_I5_DESC_BASE<"xvmaxi.w", smax, xvsplati32_simm5, LASX256WOpnd>; + +def XVMAXI_D_N : LASX_I5<0b01110110100100011>, + LASX_I5_DESC_BASE<"xvmaxi.d", smax, xvsplati64_simm5, LASX256DOpnd>; + + +def XVMINI_B_N : LASX_I5<0b01110110100100100>, + LASX_I5_DESC_BASE<"xvmini.b", smin, xvsplati8_simm5, LASX256BOpnd>; + +def XVMINI_H_N : LASX_I5<0b01110110100100101>, + LASX_I5_DESC_BASE<"xvmini.h", smin, xvsplati16_simm5, LASX256HOpnd>; + +def XVMINI_W_N : LASX_I5<0b01110110100100110>, + LASX_I5_DESC_BASE<"xvmini.w", smin, xvsplati32_simm5, LASX256WOpnd>; + +def XVMINI_D_N : LASX_I5<0b01110110100100111>, + LASX_I5_DESC_BASE<"xvmini.d", smin, xvsplati64_simm5, LASX256DOpnd>; + + +def XVMAXI_BU_N : LASX_I5_U<0b01110110100101000>, + LASX_I5_U_DESC_BASE<"xvmaxi.bu", umax, xvsplati8_uimm5, LASX256BOpnd>; + +def XVMAXI_HU_N : LASX_I5_U<0b01110110100101001>, + LASX_I5_U_DESC_BASE<"xvmaxi.hu", umax, xvsplati16_uimm5, LASX256HOpnd>; + +def XVMAXI_WU_N : LASX_I5_U<0b01110110100101010>, + LASX_I5_U_DESC_BASE<"xvmaxi.wu", umax, xvsplati32_uimm5, LASX256WOpnd>; + +def XVMAXI_DU_N : LASX_I5_U<0b01110110100101011>, + LASX_I5_U_DESC_BASE<"xvmaxi.du", umax, xvsplati64_uimm5, LASX256DOpnd>; + + +def XVMINI_BU_N : LASX_I5_U<0b01110110100101100>, + LASX_I5_U_DESC_BASE<"xvmini.bu", umin, xvsplati8_uimm5, LASX256BOpnd>; + +def XVMINI_HU_N : LASX_I5_U<0b01110110100101101>, + LASX_I5_U_DESC_BASE<"xvmini.hu", umin, xvsplati16_uimm5, LASX256HOpnd>; + +def XVMINI_WU_N : LASX_I5_U<0b01110110100101110>, + LASX_I5_U_DESC_BASE<"xvmini.wu", umin, xvsplati32_uimm5, LASX256WOpnd>; + +def XVMINI_DU_N : LASX_I5_U<0b01110110100101111>, + LASX_I5_U_DESC_BASE<"xvmini.du", umin, xvsplati64_uimm5, LASX256DOpnd>; + + +def XVSEQI_B_N : LASX_I5<0b01110110100000000>, + LASX_I5_SETCC_DESC_BASE<"xvseqi.b", SETEQ, v32i8, xvsplati8_simm5, LASX256BOpnd>; + +def XVSEQI_H_N : LASX_I5<0b01110110100000001>, + LASX_I5_SETCC_DESC_BASE<"xvseqi.h", SETEQ, v16i16, xvsplati16_simm5, LASX256HOpnd>; + +def XVSEQI_W_N : LASX_I5<0b01110110100000010>, + LASX_I5_SETCC_DESC_BASE<"xvseqi.w", SETEQ, v8i32, xvsplati32_simm5, LASX256WOpnd>; + +def XVSEQI_D_N : LASX_I5<0b01110110100000011>, + LASX_I5_SETCC_DESC_BASE<"xvseqi.d", SETEQ, v4i64, xvsplati64_simm5, LASX256DOpnd>; + + +def XVSLEI_B_N : LASX_I5<0b01110110100000100>, + LASX_I5_SETCC_DESC_BASE<"xvslei.b", SETLE, v32i8, xvsplati8_simm5, LASX256BOpnd>; + +def XVSLEI_H_N : LASX_I5<0b01110110100000101>, + LASX_I5_SETCC_DESC_BASE<"xvslei.h", SETLE, v16i16, xvsplati16_simm5, LASX256HOpnd>; + +def XVSLEI_W_N : LASX_I5<0b01110110100000110>, + LASX_I5_SETCC_DESC_BASE<"xvslei.w", SETLE, v8i32, xvsplati32_simm5, LASX256WOpnd>; + +def XVSLEI_D_N : LASX_I5<0b01110110100000111>, + LASX_I5_SETCC_DESC_BASE<"xvslei.d", SETLE, v4i64, xvsplati64_simm5, LASX256DOpnd>; + + +def XVSLEI_BU_N : LASX_I5_U<0b01110110100001000>, + LASX_I5_U_SETCC_DESC_BASE<"xvslei.bu", SETULE, v32i8, xvsplati8_uimm5, LASX256BOpnd>; + +def XVSLEI_HU_N : LASX_I5_U<0b01110110100001001>, + LASX_I5_U_SETCC_DESC_BASE<"xvslei.hu", SETULE, v16i16, xvsplati16_uimm5, LASX256HOpnd>; + +def XVSLEI_WU_N : LASX_I5_U<0b01110110100001010>, + LASX_I5_U_SETCC_DESC_BASE<"xvslei.wu", SETULE, v8i32, xvsplati32_uimm5, LASX256WOpnd>; + +def XVSLEI_DU_N : LASX_I5_U<0b01110110100001011>, + LASX_I5_U_SETCC_DESC_BASE<"xvslei.du", SETULE, v4i64, xvsplati64_uimm5, LASX256DOpnd>; + + +def XVSLTI_B_N : LASX_I5<0b01110110100001100>, + LASX_I5_SETCC_DESC_BASE<"xvslti.b", SETLT, v32i8, xvsplati8_simm5, LASX256BOpnd>; + +def XVSLTI_H_N : LASX_I5<0b01110110100001101>, + LASX_I5_SETCC_DESC_BASE<"xvslti.h", SETLT, v16i16, xvsplati16_simm5, LASX256HOpnd>; + +def XVSLTI_W_N : LASX_I5<0b01110110100001110>, + LASX_I5_SETCC_DESC_BASE<"xvslti.w", SETLT, v8i32, xvsplati32_simm5, LASX256WOpnd>; + +def XVSLTI_D_N : LASX_I5<0b01110110100001111>, + LASX_I5_SETCC_DESC_BASE<"xvslti.d", SETLT, v4i64, xvsplati64_simm5, LASX256DOpnd>; + + +def XVSLTI_BU_N : LASX_I5_U<0b01110110100010000>, + LASX_I5_U_SETCC_DESC_BASE<"xvslti.bu", SETULT, v32i8, xvsplati8_uimm5, LASX256BOpnd>; + +def XVSLTI_HU_N : LASX_I5_U<0b01110110100010001>, + LASX_I5_U_SETCC_DESC_BASE<"xvslti.hu", SETULT, v16i16, xvsplati16_uimm5, LASX256HOpnd>; + +def XVSLTI_WU_N : LASX_I5_U<0b01110110100010010>, + LASX_I5_U_SETCC_DESC_BASE<"xvslti.wu", SETULT, v8i32, xvsplati32_uimm5, LASX256WOpnd>; + +def XVSLTI_DU_N : LASX_I5_U<0b01110110100010011>, + LASX_I5_U_SETCC_DESC_BASE<"xvslti.du", SETULT, v4i64, xvsplati64_uimm5, LASX256DOpnd>; + + +def XVADDI_BU_N : LASX_I5_U<0b01110110100010100>, + LASX_I5_U_DESC_BASE<"xvaddi.bu", add, xvsplati8_uimm5, LASX256BOpnd>; + +def XVADDI_HU_N : LASX_I5_U<0b01110110100010101>, + LASX_I5_U_DESC_BASE<"xvaddi.hu", add, xvsplati16_uimm5, LASX256HOpnd>; + +def XVADDI_WU_N : LASX_I5_U<0b01110110100010110>, + LASX_I5_U_DESC_BASE<"xvaddi.wu", add, xvsplati32_uimm5, LASX256WOpnd>; + +def XVADDI_DU_N : LASX_I5_U<0b01110110100010111>, + LASX_I5_U_DESC_BASE<"xvaddi.du", add, xvsplati64_uimm5, LASX256DOpnd>; + + +def XVSUBI_BU_N : LASX_I5_U<0b01110110100011000>, + LASX_I5_U_DESC_BASE<"xvsubi.bu", sub, xvsplati8_uimm5, LASX256BOpnd>; + +def XVSUBI_HU_N : LASX_I5_U<0b01110110100011001>, + LASX_I5_U_DESC_BASE<"xvsubi.hu", sub, xvsplati16_uimm5, LASX256HOpnd>; + +def XVSUBI_WU_N : LASX_I5_U<0b01110110100011010>, + LASX_I5_U_DESC_BASE<"xvsubi.wu", sub, xvsplati32_uimm5, LASX256WOpnd>; + +def XVSUBI_DU_N : LASX_I5_U<0b01110110100011011>, + LASX_I5_U_DESC_BASE<"xvsubi.du", sub, xvsplati64_uimm5, LASX256DOpnd>; + + +def XVPERMI_QH : LASX_I8_U<0b01110111111011>, + LASX_2RN_3R_U8_DESC_BASE<"xvpermi.q", LASX256HOpnd, LASX256HOpnd>; + +def XVPERMI_QW : LASX_I8_U<0b01110111111011>, + LASX_2RN_3R_U8_DESC_BASE<"xvpermi.q", LASX256WOpnd, LASX256WOpnd>; + +def XVPERMI_QD : LASX_I8_U<0b01110111111011>, + LASX_2RN_3R_U8_DESC_BASE<"xvpermi.q", LASX256DOpnd, LASX256DOpnd>; + + +def XVBITSELI_B_N : LASX_I8_U<0b01110111110001>, + LASX_2R_3R_U8_SELECT<"xvbitseli.b", vselect, LASX256BOpnd, LASX256BOpnd>; + +} + + +def : LASXPat<(v8f32 (load addrimm12:$addr)), (XVLD_W addrimm12:$addr)>; +def : LASXPat<(v4f64 (load addrimm12:$addr)), (XVLD_D addrimm12:$addr)>; + +def XVST_FW : LASXPat<(store (v8f32 LASX256W:$xj), addrimm12:$addr), + (XVST_W LASX256W:$xj, addrimm12:$addr)>; +def XVST_FD : LASXPat<(store (v4f64 LASX256D:$xj), addrimm12:$addr), + (XVST_D LASX256D:$xj, addrimm12:$addr)>; + +def XVNEG_FW : LASXPat<(fneg (v8f32 LASX256W:$xj)), + (XVBITREVI_W LASX256W:$xj, 31)>; +def XVNEG_FD : LASXPat<(fneg (v4f64 LASX256D:$xj)), + (XVBITREVI_D LASX256D:$xj, 63)>; + + +def : LASXPat<(v4i64 (LoongArchVABSD v4i64:$xj, v4i64:$xk, (i32 0))), + (v4i64 (XVABSD_D $xj, $xk))>; + +def : LASXPat<(v8i32 (LoongArchVABSD v8i32:$xj, v8i32:$xk, (i32 0))), + (v8i32 (XVABSD_W $xj, $xk))>; + +def : LASXPat<(v16i16 (LoongArchVABSD v16i16:$xj, v16i16:$xk, (i32 0))), + (v16i16 (XVABSD_H $xj, $xk))>; + +def : LASXPat<(v32i8 (LoongArchVABSD v32i8:$xj, v32i8:$xk, (i32 0))), + (v32i8 (XVABSD_B $xj, $xk))>; + +def : LASXPat<(v4i64 (LoongArchUVABSD v4i64:$xj, v4i64:$xk, (i32 0))), + (v4i64 (XVABSD_DU $xj, $xk))>; + +def : LASXPat<(v8i32 (LoongArchUVABSD v8i32:$xj, v8i32:$xk, (i32 0))), + (v8i32 (XVABSD_WU $xj, $xk))>; + +def : LASXPat<(v16i16 (LoongArchUVABSD v16i16:$xj, v16i16:$xk, (i32 0))), + (v16i16 (XVABSD_HU $xj, $xk))>; + +def : LASXPat<(v32i8 (LoongArchUVABSD v32i8:$xj, v32i8:$xk, (i32 0))), + (v32i8 (XVABSD_BU $xj, $xk))>; + + +def : LASXPat<(or v32i8:$vj, (shl vsplat_imm_eq_1, v32i8:$vk)), + (XVBITSET_B v32i8:$vj, v32i8:$vk)>; +def : LASXPat<(or v16i16:$vj, (shl vsplat_imm_eq_1, v16i16:$vk)), + (XVBITSET_H v16i16:$vj, v16i16:$vk)>; +def : LASXPat<(or v8i32:$vj, (shl vsplat_imm_eq_1, v8i32:$vk)), + (XVBITSET_W v8i32:$vj, v8i32:$vk)>; +def : LASXPat<(or v4i64:$vj, (shl vsplat_imm_eq_1, v4i64:$vk)), + (XVBITSET_D v4i64:$vj, v4i64:$vk)>; + +def : LASXPat<(xor v32i8:$vj, (shl xvsplat_imm_eq_1, v32i8:$vk)), + (XVBITREV_B v32i8:$vj, v32i8:$vk)>; +def : LASXPat<(xor v16i16:$vj, (shl xvsplat_imm_eq_1, v16i16:$vk)), + (XVBITREV_H v16i16:$vj, v16i16:$vk)>; +def : LASXPat<(xor v8i32:$vj, (shl xvsplat_imm_eq_1, v8i32:$vk)), + (XVBITREV_W v8i32:$vj, v8i32:$vk)>; +def : LASXPat<(xor v4i64:$vj, (shl (v4i64 xvsplati64_imm_eq_1), v4i64:$vk)), + (XVBITREV_D v4i64:$vj, v4i64:$vk)>; + +def : LASXPat<(and v32i8:$vj, (xor (shl vsplat_imm_eq_1, v32i8:$vk), immAllOnesV)), + (XVBITCLR_B v32i8:$vj, v32i8:$vk)>; +def : LASXPat<(and v16i16:$vj, (xor (shl vsplat_imm_eq_1, v16i16:$vk), immAllOnesV)), + (XVBITCLR_H v16i16:$vj, v16i16:$vk)>; +def : LASXPat<(and v8i32:$vj, (xor (shl vsplat_imm_eq_1, v8i32:$vk), immAllOnesV)), + (XVBITCLR_W v8i32:$vj, v8i32:$vk)>; +def : LASXPat<(and v4i64:$vj, (xor (shl (v4i64 vsplati64_imm_eq_1), v4i64:$vk), (bitconvert (v8i32 immAllOnesV)))), + (XVBITCLR_D v4i64:$vj, v4i64:$vk)>; + +def xvsplati64_imm_eq_63 : PatLeaf<(bitconvert (v8i32 (build_vector))), [{ + APInt Imm; + SDNode *BV = N->getOperand(0).getNode(); + EVT EltTy = N->getValueType(0).getVectorElementType(); + + return selectVSplat(BV, Imm, EltTy.getSizeInBits()) && + Imm.getBitWidth() == EltTy.getSizeInBits() && Imm == 63; +}]>; + +def xvsplati8imm7 : PatFrag<(ops node:$wt), + (and node:$wt, (xvsplati8 immi32Cst7))>; +def xvsplati16imm15 : PatFrag<(ops node:$wt), + (and node:$wt, (xvsplati16 immi32Cst15))>; +def xvsplati32imm31 : PatFrag<(ops node:$wt), + (and node:$wt, (xvsplati32 immi32Cst31))>; +def xvsplati64imm63 : PatFrag<(ops node:$wt), + (and node:$wt, xvsplati64_imm_eq_63)>; + + +class LASXShiftPat : + LASXPat<(VT (Node VT:$vs, (VT (and VT:$vt, Vec)))), + (VT (Insn VT:$vs, VT:$vt))>; + +class LASXBitPat : + LASXPat<(VT (Node VT:$vs, (shl vsplat_imm_eq_1, (Frag VT:$vt)))), + (VT (Insn VT:$vs, VT:$vt))>; + +multiclass LASXShiftPats { + def : LASXShiftPat(Insn#_B), + (xvsplati8 immi32Cst7)>; + def : LASXShiftPat(Insn#_H), + (xvsplati16 immi32Cst15)>; + def : LASXShiftPat(Insn#_W), + (xvsplati32 immi32Cst31)>; + def : LASXPat<(v4i64 (Node v4i64:$vs, (v4i64 (and v4i64:$vt, + xvsplati64_imm_eq_63)))), + (v4i64 (!cast(Insn#_D) v4i64:$vs, v4i64:$vt))>; +} + +multiclass LASXBitPats { + def : LASXBitPat(Insn#_B), xvsplati8imm7>; + def : LASXBitPat(Insn#_H), xvsplati16imm15>; + def : LASXBitPat(Insn#_W), xvsplati32imm31>; + def : LASXPat<(Node v4i64:$vs, (shl (v4i64 xvsplati64_imm_eq_1), + (xvsplati64imm63 v4i64:$vt))), + (v4i64 (!cast(Insn#_D) v4i64:$vs, v4i64:$vt))>; +} + +defm : LASXShiftPats; +defm : LASXShiftPats; +defm : LASXShiftPats; +defm : LASXBitPats; +defm : LASXBitPats; + +def : LASXPat<(and v32i8:$vs, (xor (shl xvsplat_imm_eq_1, + (xvsplati8imm7 v32i8:$vt)), + immAllOnesV)), + (v32i8 (XVBITCLR_B v32i8:$vs, v32i8:$vt))>; +def : LASXPat<(and v16i16:$vs, (xor (shl xvsplat_imm_eq_1, + (xvsplati16imm15 v16i16:$vt)), + immAllOnesV)), + (v16i16 (XVBITCLR_H v16i16:$vs, v16i16:$vt))>; +def : LASXPat<(and v8i32:$vs, (xor (shl xvsplat_imm_eq_1, + (xvsplati32imm31 v8i32:$vt)), + immAllOnesV)), + (v8i32 (XVBITCLR_W v8i32:$vs, v8i32:$vt))>; +def : LASXPat<(and v4i64:$vs, (xor (shl (v4i64 xvsplati64_imm_eq_1), + (xvsplati64imm63 v4i64:$vt)), + (bitconvert (v8i32 immAllOnesV)))), + (v4i64 (XVBITCLR_D v4i64:$vs, v4i64:$vt))>; + + +def : LASXPat<(fdiv (v8f32 (build_vector (f32 fpimm1), (f32 fpimm1), (f32 fpimm1), + (f32 fpimm1),(f32 fpimm1), (f32 fpimm1), (f32 fpimm1), (f32 fpimm1))), v8f32:$v), + (XVFRECIP_S v8f32:$v)>; + +def : LASXPat<(fdiv (v4f64 (build_vector (f64 fpimm1), (f64 fpimm1), (f64 fpimm1), (f64 fpimm1))), v4f64:$v), + (XVFRECIP_D v4f64:$v)>; + +def : LASXPat<(fdiv (v8f32 fpimm1), v8f32:$v), + (XVFRECIP_S v8f32:$v)>; + +def : LASXPat<(fdiv (v4f64 fpimm1), v4f64:$v), + (XVFRECIP_D v4f64:$v)>; + + +def : LASXPat<(fdiv (v8f32 (build_vector (f32 fpimm1), (f32 fpimm1), (f32 fpimm1), (f32 fpimm1), + (f32 fpimm1), (f32 fpimm1), (f32 fpimm1), (f32 fpimm1))), (fsqrt v8f32:$v)), + (XVFRSQRT_S v8f32:$v)>; + +def : LASXPat<(fdiv (v4f64 (build_vector (f64 fpimm1), (f64 fpimm1), (f64 fpimm1), (f64 fpimm1))), (fsqrt v4f64:$v)), + (XVFRSQRT_D v4f64:$v)>; + +def : LASXPat<(fdiv (v8f32 fpimm1), (fsqrt v8f32:$v)), + (XVFRSQRT_S v8f32:$v)>; + +def : LASXPat<(fdiv (v4f64 fpimm1), (fsqrt v4f64:$v)), + (XVFRSQRT_D v4f64:$v)>; + + +def : LASXPat <(extract_subvector v4f64:$vec, (i32 0)), + (v2f64 (EXTRACT_SUBREG v4f64:$vec, sub_128))>; + +def : LASXPat <(extract_subvector v8f32:$vec, (i32 0)), + (v4f32 (EXTRACT_SUBREG v8f32:$vec, sub_128))>; + +def : LASXPat <(extract_subvector v4i64:$vec, (i32 0)), + (v2i64 (EXTRACT_SUBREG v4i64:$vec, sub_128))>; + +def : LASXPat <(extract_subvector v8i32:$vec, (i32 0)), + (v4i32 (EXTRACT_SUBREG v8i32:$vec, sub_128))>; + +def : LASXPat <(extract_subvector v16i16:$vec, (i32 0)), + (v8i16 (EXTRACT_SUBREG v16i16:$vec, sub_128))>; + +def : LASXPat <(extract_subvector v32i8:$vec, (i32 0)), + (v16i8 (EXTRACT_SUBREG v32i8:$vec, sub_128))>; + + + +def : LASXPat <(extract_subvector v4f64:$vec, (i64 0)), + (v2f64 (EXTRACT_SUBREG v4f64:$vec, sub_128))>; + +def : LASXPat <(extract_subvector v8f32:$vec, (i64 0)), + (v4f32 (EXTRACT_SUBREG v8f32:$vec, sub_128))>; + +def : LASXPat <(extract_subvector v4i64:$vec, (i64 0)), + (v2i64 (EXTRACT_SUBREG v4i64:$vec, sub_128))>; + +def : LASXPat <(extract_subvector v8i32:$vec, (i64 0)), + (v4i32 (EXTRACT_SUBREG v8i32:$vec, sub_128))>; + +def : LASXPat <(extract_subvector v16i16:$vec, (i64 0)), + (v8i16 (EXTRACT_SUBREG v16i16:$vec, sub_128))>; + +def : LASXPat <(extract_subvector v32i8:$vec, (i64 0)), + (v16i8 (EXTRACT_SUBREG v32i8:$vec, sub_128))>; + + +def : LASXPat <(extract_subvector v4i64:$vec, (i32 2)), + (v2i64 (EXTRACT_SUBREG (v4i64 (XVPERMI_QD v4i64:$vec, v4i64:$vec, (i32 1))), sub_128))>; + +def : LASXPat <(extract_subvector v8i32:$vec, (i32 4)), + (v4i32 (EXTRACT_SUBREG (v8i32 (XVPERMI_QW v8i32:$vec, v8i32:$vec, (i32 1))), sub_128))>; + +def : LASXPat <(extract_subvector v16i16:$vec, (i32 8)), + (v8i16 (EXTRACT_SUBREG (v16i16 (XVPERMI_QH v16i16:$vec, v16i16:$vec, (i32 1))), sub_128))>; + +def : LASXPat <(extract_subvector v32i8:$vec, (i32 16)), + (v16i8 (EXTRACT_SUBREG (v32i8 (XVPERMI_Q v32i8:$vec, v32i8:$vec, (i32 1))), sub_128))>; + + +def : LASXPat <(extract_subvector v4i64:$vec, (i64 2)), + (v2i64 (EXTRACT_SUBREG (v4i64 (XVPERMI_QD v4i64:$vec, v4i64:$vec, (i32 1))), sub_128))>; + +def : LASXPat <(extract_subvector v8i32:$vec, (i64 4)), + (v4i32 (EXTRACT_SUBREG (v8i32 (XVPERMI_QW v8i32:$vec, v8i32:$vec, (i32 1))), sub_128))>; + +def : LASXPat <(extract_subvector v16i16:$vec, (i64 8)), + (v8i16 (EXTRACT_SUBREG (v16i16 (XVPERMI_QH v16i16:$vec, v16i16:$vec, (i32 1))), sub_128))>; + +def : LASXPat <(extract_subvector v32i8:$vec, (i64 16)), + (v16i8 (EXTRACT_SUBREG (v32i8 (XVPERMI_Q v32i8:$vec, v32i8:$vec, (i32 1))), sub_128))>; + + +def : LASXPat<(abs v4i64:$v), + (XVMAX_D v4i64:$v, (XVNEG_D v4i64:$v))>; + +def : LASXPat<(abs v8i32:$v), + (XVMAX_W v8i32:$v, (XVNEG_W v8i32:$v))>; + +def : LASXPat<(abs v16i16:$v), + (XVMAX_H v16i16:$v, (XVNEG_H v16i16:$v))>; + +def : LASXPat<(abs v32i8:$v), + (XVMAX_B v32i8:$v, (XVNEG_B v32i8:$v))>; + + +def : LASXPat<(sub (v32i8 immAllZerosV), v32i8:$v), + (XVNEG_B v32i8:$v)>; + +def : LASXPat<(sub (v16i16 immAllZerosV), v16i16:$v), + (XVNEG_H v16i16:$v)>; + +def : LASXPat<(sub (v8i32 immAllZerosV), v8i32:$v), + (XVNEG_W v8i32:$v)>; + +def : LASXPat<(sub (v4i64 immAllZerosV), v4i64:$v), + (XVNEG_D v4i64:$v)>; + + + +def : LASXPat<(insert_subvector undef, (v2i64 LSX128D:$src), (i32 0)), + (INSERT_SUBREG (v4i64 (IMPLICIT_DEF)), LSX128D:$src, sub_128)>; + +def : LASXPat<(insert_subvector undef, (v4i32 LSX128W:$src), (i32 0)), + (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)), LSX128W:$src, sub_128)>; + +def : LASXPat<(insert_subvector undef, (v8i16 LSX128H:$src), (i32 0)), + (INSERT_SUBREG (v16i16 (IMPLICIT_DEF)), LSX128H:$src, sub_128)>; + +def : LASXPat<(insert_subvector undef, (v16i8 LSX128B:$src), (i32 0)), + (INSERT_SUBREG (v32i8 (IMPLICIT_DEF)), LSX128B:$src, sub_128)>; + + +def : LASXPat<(insert_subvector undef, (v2i64 LSX128D:$src), (i64 0)), + (INSERT_SUBREG (v4i64 (IMPLICIT_DEF)), LSX128D:$src, sub_128)>; + +def : LASXPat<(insert_subvector undef, (v4i32 LSX128W:$src), (i64 0)), + (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)), LSX128W:$src, sub_128)>; + +def : LASXPat<(insert_subvector undef, (v8i16 LSX128H:$src), (i64 0)), + (INSERT_SUBREG (v16i16 (IMPLICIT_DEF)), LSX128H:$src, sub_128)>; + +def : LASXPat<(insert_subvector undef, (v16i8 LSX128B:$src), (i64 0)), + (INSERT_SUBREG (v32i8 (IMPLICIT_DEF)), LSX128B:$src, sub_128)>; + + +def : LASXPat<(insert_subvector + (v4i64 immAllZerosV), (v2i64 LSX128D:$src), (i32 0)), + (INSERT_SUBREG (v4i64 (IMPLICIT_DEF)), LSX128D:$src, sub_128)>; + +def : LASXPat<(insert_subvector + (v8i32 immAllZerosV), (v4i32 LSX128W:$src), (i32 0)), + (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)), LSX128W:$src, sub_128)>; + +def : LASXPat<(insert_subvector + (v16i16 immAllZerosV), (v8i16 LSX128H:$src), (i32 0)), + (INSERT_SUBREG (v16i16 (IMPLICIT_DEF)), LSX128H:$src, sub_128)>; + +def : LASXPat<(insert_subvector + (v32i8 immAllZerosV), (v16i8 LSX128B:$src), (i32 0)), + (INSERT_SUBREG (v32i8 (IMPLICIT_DEF)), LSX128B:$src, sub_128)>; + +def : LASXPat<(insert_subvector + (v4i64 immAllZerosV), (v2i64 LSX128D:$src), (i64 0)), + (INSERT_SUBREG (v4i64 (IMPLICIT_DEF)), LSX128D:$src, sub_128)>; + +def : LASXPat<(insert_subvector + (v8i32 immAllZerosV), (v4i32 LSX128W:$src), (i64 0)), + (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)), LSX128W:$src, sub_128)>; + +def : LASXPat<(insert_subvector + (v16i16 immAllZerosV), (v8i16 LSX128H:$src), (i64 0)), + (INSERT_SUBREG (v16i16 (IMPLICIT_DEF)), LSX128H:$src, sub_128)>; + +def : LASXPat<(insert_subvector + (v32i8 immAllZerosV), (v16i8 LSX128B:$src), (i64 0)), + (INSERT_SUBREG (v32i8 (IMPLICIT_DEF)), LSX128B:$src, sub_128)>; + + +def : LASXPat<(insert_subvector + (v4i64 immAllZerosV), (v2i64 LSX128D:$src), (i32 2)), + (XVPERMI_QD (v4i64 (XVREPLGR2VR_D ZERO_64)), + (v4i64 (INSERT_SUBREG (v4i64 (IMPLICIT_DEF)), + LSX128D:$src, sub_128)), (i32 32))>; + +def : LASXPat<(insert_subvector (v8i32 immAllZerosV), + (v4i32 LSX128W:$src), (i32 4)), + (XVPERMI_QW (v8i32 (XVREPLGR2VR_W ZERO)), + (v8i32 (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)), + LSX128W:$src, sub_128)), (i32 32))>; + +def : LASXPat<(insert_subvector + (v16i16 immAllZerosV), (v8i16 LSX128H:$src), (i32 8)), + (XVPERMI_QH (v16i16 (XVREPLGR2VR_H ZERO)), + (v16i16 (INSERT_SUBREG (v16i16 (IMPLICIT_DEF)), + LSX128H:$src, sub_128)), (i32 32))>; + +def : LASXPat<(insert_subvector + (v32i8 immAllZerosV), (v16i8 LSX128B:$src), (i32 16)), + (XVPERMI_Q (v32i8 (XVREPLGR2VR_B ZERO)), + (v32i8 (INSERT_SUBREG (v32i8 (IMPLICIT_DEF)), + LSX128B:$src, sub_128)), (i32 32))>; + + +def : LASXPat<(insert_subvector + (v4i64 immAllZerosV), (v2i64 LSX128D:$src), (i64 2)), + (XVPERMI_QD (v4i64 (XVREPLGR2VR_D ZERO_64)), + (v4i64 (INSERT_SUBREG (v4i64 (IMPLICIT_DEF)), + LSX128D:$src, sub_128)), (i32 32))>; + +def : LASXPat<(insert_subvector + (v8i32 immAllZerosV), (v4i32 LSX128W:$src), (i64 4)), + (XVPERMI_QW (v8i32 (XVREPLGR2VR_W ZERO)), + (v8i32 (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)), + LSX128W:$src, sub_128)), (i32 32))>; + +def : LASXPat<(insert_subvector + (v16i16 immAllZerosV), (v8i16 LSX128H:$src), (i64 8)), + (XVPERMI_QH (v16i16 (XVREPLGR2VR_H ZERO)), + (v16i16 (INSERT_SUBREG (v16i16 (IMPLICIT_DEF)), + LSX128H:$src, sub_128)), (i32 32))>; + +def : LASXPat<(insert_subvector + (v32i8 immAllZerosV), (v16i8 LSX128B:$src), (i64 16)), + (XVPERMI_Q (v32i8 (XVREPLGR2VR_B ZERO)), + (v32i8 (INSERT_SUBREG (v32i8 (IMPLICIT_DEF)), + LSX128B:$src, sub_128)), (i32 32))>; + + +def : LASXPat<(insert_subvector undef, (v2i64 LSX128D:$src), (i32 2)), + (XVPERMI_QD (v4i64 (IMPLICIT_DEF)), + (v4i64 (INSERT_SUBREG (v4i64 (IMPLICIT_DEF)), + LSX128D:$src, sub_128)), (i32 32))>; + +def : LASXPat<(insert_subvector undef, (v4i32 LSX128W:$src), (i32 4)), + (XVPERMI_QW (v8i32 (IMPLICIT_DEF)), + (v8i32 (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)), + LSX128W:$src, sub_128)), (i32 32))>; + +def : LASXPat<(insert_subvector undef, (v8i16 LSX128H:$src), (i32 8)), + (XVPERMI_QH (v16i16 (IMPLICIT_DEF)), + (v16i16 (INSERT_SUBREG (v16i16 (IMPLICIT_DEF)), + LSX128H:$src, sub_128)), (i32 32))>; + +def : LASXPat<(insert_subvector undef, (v16i8 LSX128B:$src), (i32 16)), + (XVPERMI_Q (v32i8 (IMPLICIT_DEF)), + (v32i8 (INSERT_SUBREG (v32i8 (IMPLICIT_DEF)), + LSX128B:$src, sub_128)), (i32 32))>; + + +def : LASXPat<(insert_subvector undef, (v2i64 LSX128D:$src), (i64 2)), + (XVPERMI_QD (v4i64 (IMPLICIT_DEF)), + (v4i64 (INSERT_SUBREG (v4i64 (IMPLICIT_DEF)), + LSX128D:$src, sub_128)), (i32 32))>; + +def : LASXPat<(insert_subvector undef, (v4i32 LSX128W:$src), (i64 4)), + (XVPERMI_QW (v8i32 (IMPLICIT_DEF)), + (v8i32 (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)), + LSX128W:$src, sub_128)), (i32 32))>; + +def : LASXPat<(insert_subvector undef, (v8i16 LSX128H:$src), (i64 8)), + (XVPERMI_QH (v16i16 (IMPLICIT_DEF)), + (v16i16 (INSERT_SUBREG (v16i16 (IMPLICIT_DEF)), + LSX128H:$src, sub_128)), (i32 32))>; + +def : LASXPat<(insert_subvector undef, (v16i8 LSX128B:$src), (i64 16)), + (XVPERMI_Q (v32i8 (IMPLICIT_DEF)), + (v32i8 (INSERT_SUBREG (v32i8 (IMPLICIT_DEF)), + LSX128B:$src, sub_128)), (i32 32))>; + + +def : LASXPat<(sra + (v32i8 (add + (v32i8 (add LASX256B:$a, LASX256B:$b)), + (v32i8 (srl + (v32i8 (add LASX256B:$a, LASX256B:$b)), + (v32i8 (build_vector (i32 7),(i32 7),(i32 7),(i32 7), + (i32 7),(i32 7),(i32 7),(i32 7), + (i32 7),(i32 7),(i32 7),(i32 7), + (i32 7),(i32 7),(i32 7),(i32 7), + (i32 7),(i32 7),(i32 7),(i32 7), + (i32 7),(i32 7),(i32 7),(i32 7), + (i32 7),(i32 7),(i32 7),(i32 7), + (i32 7),(i32 7),(i32 7),(i32 7)) + ) + ) + ) + ) + ), + (v32i8 (build_vector (i32 1),(i32 1),(i32 1),(i32 1), + (i32 1),(i32 1),(i32 1),(i32 1), + (i32 1),(i32 1),(i32 1),(i32 1), + (i32 1),(i32 1),(i32 1),(i32 1), + (i32 1),(i32 1),(i32 1),(i32 1), + (i32 1),(i32 1),(i32 1),(i32 1), + (i32 1),(i32 1),(i32 1),(i32 1), + (i32 1),(i32 1),(i32 1),(i32 1) + ))), + (XVAVG_B (v32i8 LASX256B:$a), (v32i8 LASX256B:$b))>; + +def : LASXPat<(sra + (v16i16 (add + (v16i16 (add LASX256H:$a, LASX256H:$b)), + (v16i16 (srl + (v16i16 (add LASX256H:$a, LASX256H:$b)), + (v16i16 (build_vector (i32 15),(i32 15),(i32 15),(i32 15), + (i32 15),(i32 15),(i32 15),(i32 15), + (i32 15),(i32 15),(i32 15),(i32 15), + (i32 15),(i32 15),(i32 15),(i32 15)) + ) + ) + ) + ) + ), + (v16i16 (build_vector (i32 1),(i32 1),(i32 1),(i32 1), + (i32 1),(i32 1),(i32 1),(i32 1), + (i32 1),(i32 1),(i32 1),(i32 1), + (i32 1),(i32 1),(i32 1),(i32 1) + ))), + (XVAVG_H (v16i16 LASX256H:$a), (v16i16 LASX256H:$b))>; + +def : LASXPat<(sra + (v8i32 (add + (v8i32 (add LASX256W:$a, LASX256W:$b)), + (v8i32 (srl + (v8i32 (add LASX256W:$a, LASX256W:$b)), + (v8i32 (build_vector (i32 31),(i32 31),(i32 31),(i32 31), + (i32 31),(i32 31),(i32 31),(i32 31)) + ) + ) + ) + ) + ), + (v8i32 (build_vector (i32 1),(i32 1),(i32 1),(i32 1), + (i32 1),(i32 1),(i32 1),(i32 1) + ))), + (XVAVG_W (v8i32 LASX256W:$a), (v8i32 LASX256W:$b))>; + +def : LASXPat<(sra + (v4i64 (add + (v4i64 (add LASX256D:$a, LASX256D:$b)), + (v4i64 (srl + (v4i64 (add LASX256D:$a, LASX256D:$b)), + (v4i64 (build_vector (i64 63),(i64 63),(i64 63),(i64 63))) + ) + ) + ) + ), + (v4i64 (build_vector (i64 1),(i64 1),(i64 1),(i64 1)))), + (XVAVG_D (v4i64 LASX256D:$a), (v4i64 LASX256D:$b))>; + + + +def : LASXPat<(srl + (v32i8 (add LASX256B:$a, LASX256B:$b)), + (v32i8 (build_vector (i32 1),(i32 1),(i32 1),(i32 1), + (i32 1),(i32 1),(i32 1),(i32 1), + (i32 1),(i32 1),(i32 1),(i32 1), + (i32 1),(i32 1),(i32 1),(i32 1), + (i32 1),(i32 1),(i32 1),(i32 1), + (i32 1),(i32 1),(i32 1),(i32 1), + (i32 1),(i32 1),(i32 1),(i32 1), + (i32 1),(i32 1),(i32 1),(i32 1)) + ) + ), + (XVAVG_BU (v32i8 LASX256B:$a), (v32i8 LASX256B:$b))>; + +def : LASXPat<(srl + (v16i16 (add LASX256H:$a, LASX256H:$b)), + (v16i16 (build_vector (i32 1),(i32 1),(i32 1),(i32 1), + (i32 1),(i32 1),(i32 1),(i32 1), + (i32 1),(i32 1),(i32 1),(i32 1), + (i32 1),(i32 1),(i32 1),(i32 1)) + ) + ), + (XVAVG_HU (v16i16 LASX256H:$a), (v16i16 LASX256H:$b))>; + +def : LASXPat<(srl + (v8i32 (add LASX256W:$a, LASX256W:$b)), + (v8i32 (build_vector (i32 1),(i32 1),(i32 1),(i32 1), + (i32 1),(i32 1),(i32 1),(i32 1)) + ) + ), + (XVAVG_WU (v8i32 LASX256W:$a), (v8i32 LASX256W:$b))>; + +def : LASXPat<(srl + (v4i64 (add LASX256D:$a, LASX256D:$b)), + (v4i64 (build_vector (i64 1),(i64 1),(i64 1),(i64 1)) + ) + ), + (XVAVG_DU (v4i64 LASX256D:$a), (v4i64 LASX256D:$b))>; + + + +def : LASXPat<(sra + (v32i8 (add + (v32i8 (add (v32i8 ( + build_vector (i32 1),(i32 1),(i32 1),(i32 1), + (i32 1),(i32 1),(i32 1),(i32 1), + (i32 1),(i32 1),(i32 1),(i32 1), + (i32 1),(i32 1),(i32 1),(i32 1), + (i32 1),(i32 1),(i32 1),(i32 1), + (i32 1),(i32 1),(i32 1),(i32 1), + (i32 1),(i32 1),(i32 1),(i32 1), + (i32 1),(i32 1),(i32 1),(i32 1) + )), + (v32i8 (add LASX256B:$a, LASX256B:$b)) + )), + (v32i8 (srl + (v32i8 ( add (v32i8( + build_vector (i32 1),(i32 1),(i32 1),(i32 1), + (i32 1),(i32 1),(i32 1),(i32 1), + (i32 1),(i32 1),(i32 1),(i32 1), + (i32 1),(i32 1),(i32 1),(i32 1), + (i32 1),(i32 1),(i32 1),(i32 1), + (i32 1),(i32 1),(i32 1),(i32 1), + (i32 1),(i32 1),(i32 1),(i32 1), + (i32 1),(i32 1),(i32 1),(i32 1) + )), + (v32i8 (add LASX256B:$a, LASX256B:$b)) + )), + (v32i8 (build_vector (i32 7),(i32 7),(i32 7),(i32 7), + (i32 7),(i32 7),(i32 7),(i32 7), + (i32 7),(i32 7),(i32 7),(i32 7), + (i32 7),(i32 7),(i32 7),(i32 7), + (i32 7),(i32 7),(i32 7),(i32 7), + (i32 7),(i32 7),(i32 7),(i32 7), + (i32 7),(i32 7),(i32 7),(i32 7), + (i32 7),(i32 7),(i32 7),(i32 7)) + ) + ) + ) + ) + ), + (v32i8 (build_vector (i32 1),(i32 1),(i32 1),(i32 1), + (i32 1),(i32 1),(i32 1),(i32 1), + (i32 1),(i32 1),(i32 1),(i32 1), + (i32 1),(i32 1),(i32 1),(i32 1), + (i32 1),(i32 1),(i32 1),(i32 1), + (i32 1),(i32 1),(i32 1),(i32 1), + (i32 1),(i32 1),(i32 1),(i32 1), + (i32 1),(i32 1),(i32 1),(i32 1) + ))), + (XVAVGR_B (v32i8 LASX256B:$a), (v32i8 LASX256B:$b))>; + + +def : LASXPat<(sra + (v16i16 (add + (v16i16 (add (v16i16 ( + build_vector (i32 1),(i32 1),(i32 1),(i32 1), + (i32 1),(i32 1),(i32 1),(i32 1), + (i32 1),(i32 1),(i32 1),(i32 1), + (i32 1),(i32 1),(i32 1),(i32 1) + )), + (v16i16 (add LASX256H:$a, LASX256H:$b)) + )), + (v16i16 (srl + (v16i16 (add (v16i16 ( + build_vector (i32 1),(i32 1),(i32 1),(i32 1), + (i32 1),(i32 1),(i32 1),(i32 1), + (i32 1),(i32 1),(i32 1),(i32 1), + (i32 1),(i32 1),(i32 1),(i32 1) + )), + (v16i16 (add LASX256H:$a, LASX256H:$b)) + )), + (v16i16 (build_vector + (i32 15),(i32 15),(i32 15),(i32 15), + (i32 15),(i32 15),(i32 15),(i32 15), + (i32 15),(i32 15),(i32 15),(i32 15), + (i32 15),(i32 15),(i32 15),(i32 15)) + ) + ) + ) + ) + ), + (v16i16 (build_vector (i32 1),(i32 1),(i32 1),(i32 1), + (i32 1),(i32 1),(i32 1),(i32 1), + (i32 1),(i32 1),(i32 1),(i32 1), + (i32 1),(i32 1),(i32 1),(i32 1) + ))), + (XVAVGR_H (v16i16 LASX256H:$a), (v16i16 LASX256H:$b))>; + + +def : LASXPat<(sra + (v8i32 (add + (v8i32 (add (v8i32 ( + build_vector (i32 1),(i32 1),(i32 1),(i32 1), + (i32 1),(i32 1),(i32 1),(i32 1) + )), + (v8i32 (add LASX256W:$a, LASX256W:$b)) + )), + (v8i32 (srl + (v8i32 (add (v8i32 ( + build_vector (i32 1),(i32 1),(i32 1),(i32 1), + (i32 1),(i32 1),(i32 1),(i32 1) + )), + (v8i32 (add LASX256W:$a, LASX256W:$b)) + )), + (v8i32 (build_vector + (i32 31),(i32 31),(i32 31),(i32 31), + (i32 31),(i32 31),(i32 31),(i32 31) + ) + ) + ) + ) + ) + ), + (v8i32 (build_vector (i32 1),(i32 1),(i32 1),(i32 1), + (i32 1),(i32 1),(i32 1),(i32 1)))), + (XVAVGR_W (v8i32 LASX256W:$a), (v8i32 LASX256W:$b))>; + +def : LASXPat<(sra + (v4i64 (add + (v4i64 (add (v4i64 ( + build_vector (i64 1),(i64 1),(i64 1),(i64 1) + )), + (v4i64 (add LASX256D:$a, LASX256D:$b)) + )), + (v4i64 (srl + (v4i64 (add (v4i64 ( + build_vector (i64 1),(i64 1),(i64 1),(i64 1) + )), + (v4i64 (add LASX256D:$a, LASX256D:$b)) + )), + (v4i64 (build_vector + (i64 63),(i64 63),(i64 63),(i64 63))) + ) + ) + ) + ), + (v4i64 (build_vector (i64 1),(i64 1),(i64 1),(i64 1)))), + (XVAVGR_D (v4i64 LASX256D:$a), (v4i64 LASX256D:$b))>; + + + +def : LASXPat<(srl + (v32i8 (add (v32i8 ( + build_vector (i32 1),(i32 1),(i32 1),(i32 1), + (i32 1),(i32 1),(i32 1),(i32 1), + (i32 1),(i32 1),(i32 1),(i32 1), + (i32 1),(i32 1),(i32 1),(i32 1), + (i32 1),(i32 1),(i32 1),(i32 1), + (i32 1),(i32 1),(i32 1),(i32 1), + (i32 1),(i32 1),(i32 1),(i32 1), + (i32 1),(i32 1),(i32 1),(i32 1) + )), + (v32i8 (add LASX256B:$a, LASX256B:$b)) + )), + (v32i8 (build_vector (i32 1),(i32 1),(i32 1),(i32 1), + (i32 1),(i32 1),(i32 1),(i32 1), + (i32 1),(i32 1),(i32 1),(i32 1), + (i32 1),(i32 1),(i32 1),(i32 1), + (i32 1),(i32 1),(i32 1),(i32 1), + (i32 1),(i32 1),(i32 1),(i32 1), + (i32 1),(i32 1),(i32 1),(i32 1), + (i32 1),(i32 1),(i32 1),(i32 1)) + ) + ), + (XVAVGR_BU (v32i8 LASX256B:$a), (v32i8 LASX256B:$b))>; + +def : LASXPat<(srl + (v16i16 (add (v16i16 ( + build_vector (i32 1),(i32 1),(i32 1),(i32 1), + (i32 1),(i32 1),(i32 1),(i32 1), + (i32 1),(i32 1),(i32 1),(i32 1), + (i32 1),(i32 1),(i32 1),(i32 1) + )), + (v16i16 (add LASX256H:$a, LASX256H:$b)) + )), + (v16i16 (build_vector (i32 1),(i32 1),(i32 1),(i32 1), + (i32 1),(i32 1),(i32 1),(i32 1), + (i32 1),(i32 1),(i32 1),(i32 1), + (i32 1),(i32 1),(i32 1),(i32 1)) + ) + ), + (XVAVGR_HU (v16i16 LASX256H:$a), (v16i16 LASX256H:$b))>; + +def : LASXPat<(srl + (v8i32 (add (v8i32 ( + build_vector (i32 1),(i32 1),(i32 1),(i32 1), + (i32 1),(i32 1),(i32 1),(i32 1) + )), + (v8i32 (add LASX256W:$a, LASX256W:$b)) + )), + (v8i32 (build_vector (i32 1),(i32 1),(i32 1),(i32 1), + (i32 1),(i32 1),(i32 1),(i32 1)) + ) + ), + (XVAVGR_WU (v8i32 LASX256W:$a), (v8i32 LASX256W:$b))>; + +def : LASXPat<(srl + (v4i64 (add (v4i64 ( + build_vector (i64 1),(i64 1),(i64 1),(i64 1) + )), + (v4i64 (add LASX256D:$a, LASX256D:$b)) + )), + (v4i64 (build_vector (i64 1),(i64 1),(i64 1),(i64 1)) + ) + ), + (XVAVGR_DU (v4i64 LASX256D:$a), (v4i64 LASX256D:$b))>; + + +def : LASXPat<(mulhs LASX256D:$a, LASX256D:$b), + (XVMUH_D LASX256D:$a, LASX256D:$b)>; + +def : LASXPat<(mulhs LASX256W:$a, LASX256W:$b), + (XVMUH_W LASX256W:$a, LASX256W:$b)>; + +def : LASXPat<(mulhs LASX256H:$a, LASX256H:$b), + (XVMUH_H LASX256H:$a, LASX256H:$b)>; + +def : LASXPat<(mulhs LASX256B:$a, LASX256B:$b), + (XVMUH_B LASX256B:$a, LASX256B:$b)>; + + +def : LASXPat<(mulhu LASX256D:$a, LASX256D:$b), + (XVMUH_DU LASX256D:$a, LASX256D:$b)>; + +def : LASXPat<(mulhu LASX256W:$a, LASX256W:$b), + (XVMUH_WU LASX256W:$a, LASX256W:$b)>; + +def : LASXPat<(mulhu LASX256H:$a, LASX256H:$b), + (XVMUH_HU LASX256H:$a, LASX256H:$b)>; + +def : LASXPat<(mulhu LASX256B:$a, LASX256B:$b), + (XVMUH_BU LASX256B:$a, LASX256B:$b)>; + + +def : LASXPat<(LoongArchINSVE (v8i32 LASX256W:$a), (v8i32 LASX256W:$b), uimm3:$ui3), + (XVINSVE0_W LASX256W:$a, LASX256W:$b, uimm3:$ui3)>; + +def : LASXPat<(LoongArchINSVE (v4i64 LASX256D:$a), (v4i64 LASX256D:$b), uimm2:$ui2), + (XVINSVE0_D LASX256D:$a, LASX256D:$b, uimm2:$ui2)>; + + +def : LASXPat<(LoongArchXVPICKVE (v8i32 (bitconvert (v32i8 (build_vector + (i32 0),(i32 0),(i32 0),(i32 0), + (i32 0),(i32 0),(i32 0),(i32 0), + (i32 0),(i32 0),(i32 0),(i32 0), + (i32 0),(i32 0),(i32 0),(i32 0), + (i32 0),(i32 0),(i32 0),(i32 0), + (i32 0),(i32 0),(i32 0),(i32 0), + (i32 0),(i32 0),(i32 0),(i32 0), + (i32 0),(i32 0),(i32 0),(i32 0) + )))), (v8i32 LASX256W:$b), uimm3:$ui3), + (XVPICKVE_W (v8i32 (IMPLICIT_DEF)), LASX256W:$b, uimm3:$ui3)>; + +def : LASXPat<(LoongArchXVPICKVE (v4i64 (bitconvert (v32i8 (build_vector + (i32 0),(i32 0),(i32 0),(i32 0), + (i32 0),(i32 0),(i32 0),(i32 0), + (i32 0),(i32 0),(i32 0),(i32 0), + (i32 0),(i32 0),(i32 0),(i32 0), + (i32 0),(i32 0),(i32 0),(i32 0), + (i32 0),(i32 0),(i32 0),(i32 0), + (i32 0),(i32 0),(i32 0),(i32 0), + (i32 0),(i32 0),(i32 0),(i32 0) + )))), (v4i64 LASX256D:$b), uimm2:$ui2), + (XVPICKVE_D (v4i64 (IMPLICIT_DEF)), LASX256D:$b, uimm2:$ui2)>; + + +def : LASXPat<(LoongArchXVPICKVE (v8i32 (build_vector + (i32 0),(i32 0),(i32 0),(i32 0), + (i32 0),(i32 0),(i32 0),(i32 0) + )), (v8i32 LASX256W:$b), uimm3:$ui3), + (XVPICKVE_W (v8i32 (IMPLICIT_DEF)), LASX256W:$b, uimm3:$ui3)>; + +def : LASXPat<(LoongArchXVPICKVE (v4i64 (build_vector + (i64 0),(i64 0),(i64 0),(i64 0) + )), (v4i64 LASX256D:$b), uimm2:$ui2), + (XVPICKVE_D (v4i64 (IMPLICIT_DEF)), LASX256D:$b, uimm2:$ui2)>; + + +def : LASXPat<(LoongArchXVPICKVE (v8i32 LASX256W:$a), (v8i32 LASX256W:$b), uimm3:$ui3), + (XVPICKVE_W LASX256W:$a, LASX256W:$b, uimm3:$ui3)>; + +def : LASXPat<(LoongArchXVPICKVE (v4i64 LASX256D:$a), (v4i64 LASX256D:$b), uimm2:$ui2), + (XVPICKVE_D LASX256D:$a, LASX256D:$b, uimm2:$ui2)>; + + +def : LASXPat<(LoongArchXVSHUF4I (v4i64 LASX256D:$a), (v4i64 LASX256D:$b), uimm8_32:$ui8), + (XVSHUF4I_D LASX256D:$a, LASX256D:$b, uimm8_32:$ui8)>; + +def : LASXPat<(LoongArchXVPERMI (v4i64 LASX256D:$a), uimm8_32:$ui8), + (XVPERMI_D LASX256D:$a, uimm8_32:$ui8)>; + + + + +//===----------------------------------------------------------------------===// +// Intrinsics +//===----------------------------------------------------------------------===// + +def : LASXPat<(int_loongarch_lasx_xvfcmp_cor_s (v8f32 LASX256W:$xj), (v8f32 LASX256W:$xk)), + (XVFCMP_COR_S LASX256W:$xj, LASX256W:$xk)>; +def : LASXPat<(int_loongarch_lasx_xvfcmp_cor_d (v4f64 LASX256D:$xj), (v4f64 LASX256D:$xk)), + (XVFCMP_COR_D LASX256D:$xj, LASX256D:$xk)>; + +def : LASXPat<(int_loongarch_lasx_xvfcmp_cun_s (v8f32 LASX256W:$xj), (v8f32 LASX256W:$xk)), + (XVFCMP_CUN_S LASX256W:$xj, LASX256W:$xk)>; +def : LASXPat<(int_loongarch_lasx_xvfcmp_cun_d (v4f64 LASX256D:$xj), (v4f64 LASX256D:$xk)), + (XVFCMP_CUN_D LASX256D:$xj, LASX256D:$xk)>; + +def : LASXPat<(int_loongarch_lasx_xvfcmp_cune_s (v8f32 LASX256W:$xj), (v8f32 LASX256W:$xk)), + (XVFCMP_CUNE_S LASX256W:$xj, LASX256W:$xk)>; +def : LASXPat<(int_loongarch_lasx_xvfcmp_cune_d (v4f64 LASX256D:$xj), (v4f64 LASX256D:$xk)), + (XVFCMP_CUNE_D LASX256D:$xj, LASX256D:$xk)>; + +def : LASXPat<(int_loongarch_lasx_xvfcmp_cueq_s (v8f32 LASX256W:$xj), (v8f32 LASX256W:$xk)), + (XVFCMP_CUEQ_S LASX256W:$xj, LASX256W:$xk)>; +def : LASXPat<(int_loongarch_lasx_xvfcmp_cueq_d (v4f64 LASX256D:$xj), (v4f64 LASX256D:$xk)), + (XVFCMP_CUEQ_D LASX256D:$xj, LASX256D:$xk)>; + +def : LASXPat<(int_loongarch_lasx_xvfcmp_ceq_s (v8f32 LASX256W:$xj), (v8f32 LASX256W:$xk)), + (XVFCMP_CEQ_S LASX256W:$xj, LASX256W:$xk)>; +def : LASXPat<(int_loongarch_lasx_xvfcmp_ceq_d (v4f64 LASX256D:$xj), (v4f64 LASX256D:$xk)), + (XVFCMP_CEQ_D LASX256D:$xj, LASX256D:$xk)>; + +def : LASXPat<(int_loongarch_lasx_xvfcmp_cne_s (v8f32 LASX256W:$xj), (v8f32 LASX256W:$xk)), + (XVFCMP_CNE_S LASX256W:$xj, LASX256W:$xk)>; +def : LASXPat<(int_loongarch_lasx_xvfcmp_cne_d (v4f64 LASX256D:$xj), (v4f64 LASX256D:$xk)), + (XVFCMP_CNE_D LASX256D:$xj, LASX256D:$xk)>; + +def : LASXPat<(int_loongarch_lasx_xvfcmp_clt_s (v8f32 LASX256W:$xj), (v8f32 LASX256W:$xk)), + (XVFCMP_CLT_S LASX256W:$xj, LASX256W:$xk)>; +def : LASXPat<(int_loongarch_lasx_xvfcmp_clt_d (v4f64 LASX256D:$xj), (v4f64 LASX256D:$xk)), + (XVFCMP_CLT_D LASX256D:$xj, LASX256D:$xk)>; + +def : LASXPat<(int_loongarch_lasx_xvfcmp_cult_s (v8f32 LASX256W:$xj), (v8f32 LASX256W:$xk)), + (XVFCMP_CULT_S LASX256W:$xj, LASX256W:$xk)>; +def : LASXPat<(int_loongarch_lasx_xvfcmp_cult_d (v4f64 LASX256D:$xj), (v4f64 LASX256D:$xk)), + (XVFCMP_CULT_D LASX256D:$xj, LASX256D:$xk)>; + +def : LASXPat<(int_loongarch_lasx_xvfcmp_cle_s (v8f32 LASX256W:$xj), (v8f32 LASX256W:$xk)), + (XVFCMP_CLE_S LASX256W:$xj, LASX256W:$xk)>; +def : LASXPat<(int_loongarch_lasx_xvfcmp_cle_d (v4f64 LASX256D:$xj), (v4f64 LASX256D:$xk)), + (XVFCMP_CLE_D LASX256D:$xj, LASX256D:$xk)>; + +def : LASXPat<(int_loongarch_lasx_xvfcmp_cule_s (v8f32 LASX256W:$xj), (v8f32 LASX256W:$xk)), + (XVFCMP_CULE_S LASX256W:$xj, LASX256W:$xk)>; +def : LASXPat<(int_loongarch_lasx_xvfcmp_cule_d (v4f64 LASX256D:$xj), (v4f64 LASX256D:$xk)), + (XVFCMP_CULE_D LASX256D:$xj, LASX256D:$xk)>; + +def : LASXPat<(int_loongarch_lasx_xvseq_b (v32i8 LASX256B:$xj), (v32i8 LASX256B:$xk)), + (XVSEQ_B LASX256B:$xj, LASX256B:$xk)>; +def : LASXPat<(int_loongarch_lasx_xvseq_h (v16i16 LASX256H:$xj), (v16i16 LASX256H:$xk)), + (XVSEQ_H LASX256H:$xj, LASX256H:$xk)>; +def : LASXPat<(int_loongarch_lasx_xvseq_w (v8i32 LASX256W:$xj), (v8i32 LASX256W:$xk)), + (XVSEQ_W LASX256W:$xj, LASX256W:$xk)>; +def : LASXPat<(int_loongarch_lasx_xvseq_d (v4i64 LASX256D:$xj), (v4i64 LASX256D:$xk)), + (XVSEQ_D LASX256D:$xj, LASX256D:$xk)>; + +def : LASXPat<(int_loongarch_lasx_xvsle_b (v32i8 LASX256B:$xj), (v32i8 LASX256B:$xk)), + (XVSLE_B LASX256B:$xj, LASX256B:$xk)>; +def : LASXPat<(int_loongarch_lasx_xvsle_h (v16i16 LASX256H:$xj), (v16i16 LASX256H:$xk)), + (XVSLE_H LASX256H:$xj, LASX256H:$xk)>; +def : LASXPat<(int_loongarch_lasx_xvsle_w (v8i32 LASX256W:$xj), (v8i32 LASX256W:$xk)), + (XVSLE_W LASX256W:$xj, LASX256W:$xk)>; +def : LASXPat<(int_loongarch_lasx_xvsle_d (v4i64 LASX256D:$xj), (v4i64 LASX256D:$xk)), + (XVSLE_D LASX256D:$xj, LASX256D:$xk)>; + +def : LASXPat<(int_loongarch_lasx_xvsle_bu (v32i8 LASX256B:$xj), (v32i8 LASX256B:$xk)), + (XVSLE_BU LASX256B:$xj, LASX256B:$xk)>; +def : LASXPat<(int_loongarch_lasx_xvsle_hu (v16i16 LASX256H:$xj), (v16i16 LASX256H:$xk)), + (XVSLE_HU LASX256H:$xj, LASX256H:$xk)>; +def : LASXPat<(int_loongarch_lasx_xvsle_wu (v8i32 LASX256W:$xj), (v8i32 LASX256W:$xk)), + (XVSLE_WU LASX256W:$xj, LASX256W:$xk)>; +def : LASXPat<(int_loongarch_lasx_xvsle_du (v4i64 LASX256D:$xj), (v4i64 LASX256D:$xk)), + (XVSLE_DU LASX256D:$xj, LASX256D:$xk)>; + +def : LASXPat<(int_loongarch_lasx_xvslt_b (v32i8 LASX256B:$xj), (v32i8 LASX256B:$xk)), + (XVSLT_B LASX256B:$xj, LASX256B:$xk)>; +def : LASXPat<(int_loongarch_lasx_xvslt_h (v16i16 LASX256H:$xj), (v16i16 LASX256H:$xk)), + (XVSLT_H LASX256H:$xj, LASX256H:$xk)>; +def : LASXPat<(int_loongarch_lasx_xvslt_w (v8i32 LASX256W:$xj), (v8i32 LASX256W:$xk)), + (XVSLT_W LASX256W:$xj, LASX256W:$xk)>; +def : LASXPat<(int_loongarch_lasx_xvslt_d (v4i64 LASX256D:$xj), (v4i64 LASX256D:$xk)), + (XVSLT_D LASX256D:$xj, LASX256D:$xk)>; + +def : LASXPat<(int_loongarch_lasx_xvslt_bu (v32i8 LASX256B:$xj), (v32i8 LASX256B:$xk)), + (XVSLT_BU LASX256B:$xj, LASX256B:$xk)>; +def : LASXPat<(int_loongarch_lasx_xvslt_hu (v16i16 LASX256H:$xj), (v16i16 LASX256H:$xk)), + (XVSLT_HU LASX256H:$xj, LASX256H:$xk)>; +def : LASXPat<(int_loongarch_lasx_xvslt_wu (v8i32 LASX256W:$xj), (v8i32 LASX256W:$xk)), + (XVSLT_WU LASX256W:$xj, LASX256W:$xk)>; +def : LASXPat<(int_loongarch_lasx_xvslt_du (v4i64 LASX256D:$xj), (v4i64 LASX256D:$xk)), + (XVSLT_DU LASX256D:$xj, LASX256D:$xk)>; + +def : LASXPat<(int_loongarch_lasx_xvadd_b (v32i8 LASX256B:$xj), (v32i8 LASX256B:$xk)), + (XVADD_B LASX256B:$xj, LASX256B:$xk)>; +def : LASXPat<(int_loongarch_lasx_xvadd_h (v16i16 LASX256H:$xj), (v16i16 LASX256H:$xk)), + (XVADD_H LASX256H:$xj, LASX256H:$xk)>; +def : LASXPat<(int_loongarch_lasx_xvadd_w (v8i32 LASX256W:$xj), (v8i32 LASX256W:$xk)), + (XVADD_W LASX256W:$xj, LASX256W:$xk)>; +def : LASXPat<(int_loongarch_lasx_xvadd_d (v4i64 LASX256D:$xj), (v4i64 LASX256D:$xk)), + (XVADD_D LASX256D:$xj, LASX256D:$xk)>; + +def : LASXPat<(int_loongarch_lasx_xvsub_b (v32i8 LASX256B:$xj), (v32i8 LASX256B:$xk)), + (XVSUB_B LASX256B:$xj, LASX256B:$xk)>; +def : LASXPat<(int_loongarch_lasx_xvsub_h (v16i16 LASX256H:$xj), (v16i16 LASX256H:$xk)), + (XVSUB_H LASX256H:$xj, LASX256H:$xk)>; +def : LASXPat<(int_loongarch_lasx_xvsub_w (v8i32 LASX256W:$xj), (v8i32 LASX256W:$xk)), + (XVSUB_W LASX256W:$xj, LASX256W:$xk)>; +def : LASXPat<(int_loongarch_lasx_xvsub_d (v4i64 LASX256D:$xj), (v4i64 LASX256D:$xk)), + (XVSUB_D LASX256D:$xj, LASX256D:$xk)>; + +def : LASXPat<(int_loongarch_lasx_xvmax_b (v32i8 LASX256B:$xj), (v32i8 LASX256B:$xk)), + (XVMAX_B LASX256B:$xj, LASX256B:$xk)>; +def : LASXPat<(int_loongarch_lasx_xvmax_h (v16i16 LASX256H:$xj), (v16i16 LASX256H:$xk)), + (XVMAX_H LASX256H:$xj, LASX256H:$xk)>; +def : LASXPat<(int_loongarch_lasx_xvmax_w (v8i32 LASX256W:$xj), (v8i32 LASX256W:$xk)), + (XVMAX_W LASX256W:$xj, LASX256W:$xk)>; +def : LASXPat<(int_loongarch_lasx_xvmax_d (v4i64 LASX256D:$xj), (v4i64 LASX256D:$xk)), + (XVMAX_D LASX256D:$xj, LASX256D:$xk)>; + +def : LASXPat<(int_loongarch_lasx_xvmin_b (v32i8 LASX256B:$xj), (v32i8 LASX256B:$xk)), + (XVMIN_B LASX256B:$xj, LASX256B:$xk)>; +def : LASXPat<(int_loongarch_lasx_xvmin_h (v16i16 LASX256H:$xj), (v16i16 LASX256H:$xk)), + (XVMIN_H LASX256H:$xj, LASX256H:$xk)>; +def : LASXPat<(int_loongarch_lasx_xvmin_w (v8i32 LASX256W:$xj), (v8i32 LASX256W:$xk)), + (XVMIN_W LASX256W:$xj, LASX256W:$xk)>; +def : LASXPat<(int_loongarch_lasx_xvmin_d (v4i64 LASX256D:$xj), (v4i64 LASX256D:$xk)), + (XVMIN_D LASX256D:$xj, LASX256D:$xk)>; + +def : LASXPat<(int_loongarch_lasx_xvmin_bu (v32i8 LASX256B:$xj), (v32i8 LASX256B:$xk)), + (XVMIN_BU LASX256B:$xj, LASX256B:$xk)>; +def : LASXPat<(int_loongarch_lasx_xvmin_hu (v16i16 LASX256H:$xj), (v16i16 LASX256H:$xk)), + (XVMIN_HU LASX256H:$xj, LASX256H:$xk)>; +def : LASXPat<(int_loongarch_lasx_xvmin_wu (v8i32 LASX256W:$xj), (v8i32 LASX256W:$xk)), + (XVMIN_WU LASX256W:$xj, LASX256W:$xk)>; +def : LASXPat<(int_loongarch_lasx_xvmin_du (v4i64 LASX256D:$xj), (v4i64 LASX256D:$xk)), + (XVMIN_DU LASX256D:$xj, LASX256D:$xk)>; + +def : LASXPat<(int_loongarch_lasx_xvmul_b (v32i8 LASX256B:$xj), (v32i8 LASX256B:$xk)), + (XVMUL_B LASX256B:$xj, LASX256B:$xk)>; +def : LASXPat<(int_loongarch_lasx_xvmul_h (v16i16 LASX256H:$xj), (v16i16 LASX256H:$xk)), + (XVMUL_H LASX256H:$xj, LASX256H:$xk)>; +def : LASXPat<(int_loongarch_lasx_xvmul_w (v8i32 LASX256W:$xj), (v8i32 LASX256W:$xk)), + (XVMUL_W LASX256W:$xj, LASX256W:$xk)>; +def : LASXPat<(int_loongarch_lasx_xvmul_d (v4i64 LASX256D:$xj), (v4i64 LASX256D:$xk)), + (XVMUL_D LASX256D:$xj, LASX256D:$xk)>; + +def : LASXPat<(int_loongarch_lasx_xvdiv_bu (v32i8 LASX256B:$xj), (v32i8 LASX256B:$xk)), + (XVDIV_BU LASX256B:$xj, LASX256B:$xk)>; +def : LASXPat<(int_loongarch_lasx_xvdiv_hu (v16i16 LASX256H:$xj), (v16i16 LASX256H:$xk)), + (XVDIV_HU LASX256H:$xj, LASX256H:$xk)>; +def : LASXPat<(int_loongarch_lasx_xvdiv_wu (v8i32 LASX256W:$xj), (v8i32 LASX256W:$xk)), + (XVDIV_WU LASX256W:$xj, LASX256W:$xk)>; +def : LASXPat<(int_loongarch_lasx_xvdiv_du (v4i64 LASX256D:$xj), (v4i64 LASX256D:$xk)), + (XVDIV_DU LASX256D:$xj, LASX256D:$xk)>; + +def : LASXPat<(int_loongarch_lasx_xvsll_b (v32i8 LASX256B:$xj), (v32i8 LASX256B:$xk)), + (XVSLL_B LASX256B:$xj, LASX256B:$xk)>; +def : LASXPat<(int_loongarch_lasx_xvsll_h (v16i16 LASX256H:$xj), (v16i16 LASX256H:$xk)), + (XVSLL_H LASX256H:$xj, LASX256H:$xk)>; +def : LASXPat<(int_loongarch_lasx_xvsll_w (v8i32 LASX256W:$xj), (v8i32 LASX256W:$xk)), + (XVSLL_W LASX256W:$xj, LASX256W:$xk)>; +def : LASXPat<(int_loongarch_lasx_xvsll_d (v4i64 LASX256D:$xj), (v4i64 LASX256D:$xk)), + (XVSLL_D LASX256D:$xj, LASX256D:$xk)>; + +def : LASXPat<(int_loongarch_lasx_xvsrl_b (v32i8 LASX256B:$xj), (v32i8 LASX256B:$xk)), + (XVSRL_B LASX256B:$xj, LASX256B:$xk)>; +def : LASXPat<(int_loongarch_lasx_xvsrl_h (v16i16 LASX256H:$xj), (v16i16 LASX256H:$xk)), + (XVSRL_H LASX256H:$xj, LASX256H:$xk)>; +def : LASXPat<(int_loongarch_lasx_xvsrl_w (v8i32 LASX256W:$xj), (v8i32 LASX256W:$xk)), + (XVSRL_W LASX256W:$xj, LASX256W:$xk)>; +def : LASXPat<(int_loongarch_lasx_xvsrl_d (v4i64 LASX256D:$xj), (v4i64 LASX256D:$xk)), + (XVSRL_D LASX256D:$xj, LASX256D:$xk)>; + +def : LASXPat<(int_loongarch_lasx_xvsra_b (v32i8 LASX256B:$xj), (v32i8 LASX256B:$xk)), + (XVSRA_B LASX256B:$xj, LASX256B:$xk)>; +def : LASXPat<(int_loongarch_lasx_xvsra_h (v16i16 LASX256H:$xj), (v16i16 LASX256H:$xk)), + (XVSRA_H LASX256H:$xj, LASX256H:$xk)>; +def : LASXPat<(int_loongarch_lasx_xvsra_w (v8i32 LASX256W:$xj), (v8i32 LASX256W:$xk)), + (XVSRA_W LASX256W:$xj, LASX256W:$xk)>; +def : LASXPat<(int_loongarch_lasx_xvsra_d (v4i64 LASX256D:$xj), (v4i64 LASX256D:$xk)), + (XVSRA_D LASX256D:$xj, LASX256D:$xk)>; + +def : LASXPat<(int_loongarch_lasx_xvfadd_s (v8f32 LASX256W:$xj), (v8f32 LASX256W:$xk)), + (XVFADD_S LASX256W:$xj, LASX256W:$xk)>; +def : LASXPat<(int_loongarch_lasx_xvfadd_d (v4f64 LASX256D:$xj), (v4f64 LASX256D:$xk)), + (XVFADD_D LASX256D:$xj, LASX256D:$xk)>; + +def : LASXPat<(int_loongarch_lasx_xvfsub_s (v8f32 LASX256W:$xj), (v8f32 LASX256W:$xk)), + (XVFSUB_S LASX256W:$xj, LASX256W:$xk)>; +def : LASXPat<(int_loongarch_lasx_xvfsub_d (v4f64 LASX256D:$xj), (v4f64 LASX256D:$xk)), + (XVFSUB_D LASX256D:$xj, LASX256D:$xk)>; + +def : LASXPat<(int_loongarch_lasx_xvfmul_s (v8f32 LASX256W:$xj), (v8f32 LASX256W:$xk)), + (XVFMUL_S LASX256W:$xj, LASX256W:$xk)>; +def : LASXPat<(int_loongarch_lasx_xvfmul_d (v4f64 LASX256D:$xj), (v4f64 LASX256D:$xk)), + (XVFMUL_D LASX256D:$xj, LASX256D:$xk)>; + +def : LASXPat<(int_loongarch_lasx_xvfdiv_s (v8f32 LASX256W:$xj), (v8f32 LASX256W:$xk)), + (XVFDIV_S LASX256W:$xj, LASX256W:$xk)>; +def : LASXPat<(int_loongarch_lasx_xvfdiv_d (v4f64 LASX256D:$xj), (v4f64 LASX256D:$xk)), + (XVFDIV_D LASX256D:$xj, LASX256D:$xk)>; + +def : LASXPat<(int_loongarch_lasx_xvfsqrt_s (v8f32 LASX256W:$xj)), + (XVFSQRT_S LASX256W:$xj)>; +def : LASXPat<(int_loongarch_lasx_xvfsqrt_d (v4f64 LASX256D:$xj)), + (XVFSQRT_D LASX256D:$xj)>; + +def : LASXPat<(v8f32 (int_loongarch_lasx_xvffint_s_w (v8i32 LASX256W:$xj))), + (XVFFINT_S_W (v8i32 LASX256W:$xj))>; +def : LASXPat<(v8f32 (int_loongarch_lasx_xvffint_s_wu (v8i32 LASX256W:$xj))), + (XVFFINT_S_WU (v8i32 LASX256W:$xj))>; + +def : LASXPat<(v4f64 (int_loongarch_lasx_xvffint_d_l (v4i64 LASX256D:$xj))), + (XVFFINT_D_L (v4i64 LASX256D:$xj))>; +def : LASXPat<(v4f64 (int_loongarch_lasx_xvffint_d_lu (v4i64 LASX256D:$xj))), + (XVFFINT_D_LU (v4i64 LASX256D:$xj))>; + +def : LASXPat<(int_loongarch_lasx_xvreplgr2vr_b GPR32Opnd:$rj), + (XVREPLGR2VR_B GPR32Opnd:$rj)>; +def : LASXPat<(int_loongarch_lasx_xvreplgr2vr_h GPR32Opnd:$rj), + (XVREPLGR2VR_H GPR32Opnd:$rj)>; +def : LASXPat<(int_loongarch_lasx_xvreplgr2vr_w GPR32Opnd:$rj), + (XVREPLGR2VR_W GPR32Opnd:$rj)>; +def : LASXPat<(int_loongarch_lasx_xvreplgr2vr_d GPR64Opnd:$rj), + (XVREPLGR2VR_D GPR64Opnd:$rj)>; + +def : LASXPat<(int_loongarch_lasx_xvpickve2gr_w (v8i32 LASX256W:$xj), (immZExt3:$ui3)), + (XVPICKVE2GR_W LASX256W:$xj, uimm3:$ui3)>; +def : LASXPat<(int_loongarch_lasx_xvpickve2gr_d (v4i64 LASX256D:$xj), (immZExt2:$ui2)), + (XVPICKVE2GR_D LASX256D:$xj, uimm2:$ui2)>; + +def : LASXPat<(int_loongarch_lasx_xvpickve2gr_wu (v8i32 LASX256W:$xj), (immZExt3:$ui3)), + (XVPICKVE2GR_WU LASX256W:$xj, uimm3:$ui3)>; +def : LASXPat<(int_loongarch_lasx_xvpickve2gr_du (v4i64 LASX256D:$xj), (immZExt2:$ui2)), + (XVPICKVE2GR_DU LASX256D:$xj, uimm2:$ui2)>; + +def : LASXPat<(int_loongarch_lasx_xvreplve0_d (v4i64 LASX256D:$xj)), + (XVREPLVE0_D (v4i64 LASX256D:$xj))>; + +def : LASXPat<(int_loongarch_lasx_xvinsgr2vr_w (v8i32 LASX256W:$xj), GPR32Opnd:$rj, (immZExt3:$ui3)), + (XVINSGR2VR_W LASX256W:$xj, GPR32Opnd:$rj, uimm3:$ui3)>; +def : LASXPat<(int_loongarch_lasx_xvinsgr2vr_d (v4i64 LASX256D:$xj), GPR64Opnd:$rj, (immZExt2:$ui2)), + (XVINSGR2VR_D LASX256D:$xj, GPR64Opnd:$rj, uimm2:$ui2)>; + +def : LASXPat<(int_loongarch_lasx_xvpickve_w (v8i32 LASX256W:$xj), (immZExt3:$ui3)), + (XVPICKVE_W (v8i32 (IMPLICIT_DEF)), LASX256W:$xj, uimm3:$ui3)>; +def : LASXPat<(int_loongarch_lasx_xvpickve_d (v4i64 LASX256D:$xj), (immZExt2:$ui2)), + (XVPICKVE_D (v4i64 (IMPLICIT_DEF)), LASX256D:$xj, uimm2:$ui2)>; + +def : LASXPat<(int_loongarch_lasx_xvdiv_b (v32i8 LASX256B:$xj), (v32i8 LASX256B:$xk)), + (XVDIV_B LASX256B:$xj, LASX256B:$xk)>; +def : LASXPat<(int_loongarch_lasx_xvdiv_h (v16i16 LASX256H:$xj), (v16i16 LASX256H:$xk)), + (XVDIV_H LASX256H:$xj, LASX256H:$xk)>; +def : LASXPat<(int_loongarch_lasx_xvdiv_w (v8i32 LASX256W:$xj), (v8i32 LASX256W:$xk)), + (XVDIV_W LASX256W:$xj, LASX256W:$xk)>; +def : LASXPat<(int_loongarch_lasx_xvdiv_d (v4i64 LASX256D:$xj), (v4i64 LASX256D:$xk)), + (XVDIV_D LASX256D:$xj, LASX256D:$xk)>; + +def : LASXPat<(int_loongarch_lasx_xvmod_bu (v32i8 LASX256B:$xj), (v32i8 LASX256B:$xk)), + (XVMOD_BU LASX256B:$xj, LASX256B:$xk)>; +def : LASXPat<(int_loongarch_lasx_xvmod_hu (v16i16 LASX256H:$xj), (v16i16 LASX256H:$xk)), + (XVMOD_HU LASX256H:$xj, LASX256H:$xk)>; +def : LASXPat<(int_loongarch_lasx_xvmod_wu (v8i32 LASX256W:$xj), (v8i32 LASX256W:$xk)), + (XVMOD_WU LASX256W:$xj, LASX256W:$xk)>; +def : LASXPat<(int_loongarch_lasx_xvmod_du (v4i64 LASX256D:$xj), (v4i64 LASX256D:$xk)), + (XVMOD_DU LASX256D:$xj, LASX256D:$xk)>; + +def : LASXPat<(int_loongarch_lasx_xvmod_b (v32i8 LASX256B:$xj), (v32i8 LASX256B:$xk)), + (XVMOD_B LASX256B:$xj, LASX256B:$xk)>; +def : LASXPat<(int_loongarch_lasx_xvmod_h (v16i16 LASX256H:$xj), (v16i16 LASX256H:$xk)), + (XVMOD_H LASX256H:$xj, LASX256H:$xk)>; +def : LASXPat<(int_loongarch_lasx_xvmod_w (v8i32 LASX256W:$xj), (v8i32 LASX256W:$xk)), + (XVMOD_W LASX256W:$xj, LASX256W:$xk)>; +def : LASXPat<(int_loongarch_lasx_xvmod_d (v4i64 LASX256D:$xj), (v4i64 LASX256D:$xk)), + (XVMOD_D LASX256D:$xj, LASX256D:$xk)>; + +def : LASXPat<(int_loongarch_lasx_xvmax_bu (v32i8 LASX256B:$xj), (v32i8 LASX256B:$xk)), + (XVMAX_BU LASX256B:$xj, LASX256B:$xk)>; +def : LASXPat<(int_loongarch_lasx_xvmax_hu (v16i16 LASX256H:$xj), (v16i16 LASX256H:$xk)), + (XVMAX_HU LASX256H:$xj, LASX256H:$xk)>; +def : LASXPat<(int_loongarch_lasx_xvmax_wu (v8i32 LASX256W:$xj), (v8i32 LASX256W:$xk)), + (XVMAX_WU LASX256W:$xj, LASX256W:$xk)>; +def : LASXPat<(int_loongarch_lasx_xvmax_du (v4i64 LASX256D:$xj), (v4i64 LASX256D:$xk)), + (XVMAX_DU LASX256D:$xj, LASX256D:$xk)>; + +def : LASXPat<(int_loongarch_lasx_xvfrint_s (v8f32 LASX256W:$xj)), + (XVFRINT_S LASX256W:$xj)>; +def : LASXPat<(int_loongarch_lasx_xvfrint_d (v4f64 LASX256D:$xj)), + (XVFRINT_D LASX256D:$xj)>; + +def : LASXPat<(int_loongarch_lasx_xvpackod_b (v32i8 LASX256B:$xj), (v32i8 LASX256B:$xk)), + (XVPACKOD_B LASX256B:$xj, LASX256B:$xk)>; +def : LASXPat<(int_loongarch_lasx_xvpackod_h (v16i16 LASX256H:$xj), (v16i16 LASX256H:$xk)), + (XVPACKOD_H LASX256H:$xj, LASX256H:$xk)>; +def : LASXPat<(int_loongarch_lasx_xvpackod_w (v8i32 LASX256W:$xj), (v8i32 LASX256W:$xk)), + (XVPACKOD_W LASX256W:$xj, LASX256W:$xk)>; +def : LASXPat<(int_loongarch_lasx_xvpackod_d (v4i64 LASX256D:$xj), (v4i64 LASX256D:$xk)), + (XVPACKOD_D LASX256D:$xj, LASX256D:$xk)>; + +def : LASXPat<(int_loongarch_lasx_xvpackev_b (v32i8 LASX256B:$xj), (v32i8 LASX256B:$xk)), + (XVPACKEV_B LASX256B:$xj, LASX256B:$xk)>; +def : LASXPat<(int_loongarch_lasx_xvpackev_h (v16i16 LASX256H:$xj), (v16i16 LASX256H:$xk)), + (XVPACKEV_H LASX256H:$xj, LASX256H:$xk)>; +def : LASXPat<(int_loongarch_lasx_xvpackev_w (v8i32 LASX256W:$xj), (v8i32 LASX256W:$xk)), + (XVPACKEV_W LASX256W:$xj, LASX256W:$xk)>; +def : LASXPat<(int_loongarch_lasx_xvpackev_d (v4i64 LASX256D:$xj), (v4i64 LASX256D:$xk)), + (XVPACKEV_D LASX256D:$xj, LASX256D:$xk)>; + +def : LASXPat<(int_loongarch_lasx_xvilvh_b (v32i8 LASX256B:$xj), (v32i8 LASX256B:$xk)), + (XVILVH_B LASX256B:$xj, LASX256B:$xk)>; +def : LASXPat<(int_loongarch_lasx_xvilvh_h (v16i16 LASX256H:$xj), (v16i16 LASX256H:$xk)), + (XVILVH_H LASX256H:$xj, LASX256H:$xk)>; +def : LASXPat<(int_loongarch_lasx_xvilvh_w (v8i32 LASX256W:$xj), (v8i32 LASX256W:$xk)), + (XVILVH_W LASX256W:$xj, LASX256W:$xk)>; +def : LASXPat<(int_loongarch_lasx_xvilvh_d (v4i64 LASX256D:$xj), (v4i64 LASX256D:$xk)), + (XVILVH_D LASX256D:$xj, LASX256D:$xk)>; + +def : LASXPat<(int_loongarch_lasx_xvilvl_b (v32i8 LASX256B:$xj), (v32i8 LASX256B:$xk)), + (XVILVL_B LASX256B:$xj, LASX256B:$xk)>; +def : LASXPat<(int_loongarch_lasx_xvilvl_h (v16i16 LASX256H:$xj), (v16i16 LASX256H:$xk)), + (XVILVL_H LASX256H:$xj, LASX256H:$xk)>; +def : LASXPat<(int_loongarch_lasx_xvilvl_w (v8i32 LASX256W:$xj), (v8i32 LASX256W:$xk)), + (XVILVL_W LASX256W:$xj, LASX256W:$xk)>; +def : LASXPat<(int_loongarch_lasx_xvilvl_d (v4i64 LASX256D:$xj), (v4i64 LASX256D:$xk)), + (XVILVL_D LASX256D:$xj, LASX256D:$xk)>; + +def : LASXPat<(int_loongarch_lasx_xvpickev_b (v32i8 LASX256B:$xj), (v32i8 LASX256B:$xk)), + (XVPICKEV_B LASX256B:$xj, LASX256B:$xk)>; +def : LASXPat<(int_loongarch_lasx_xvpickev_h (v16i16 LASX256H:$xj), (v16i16 LASX256H:$xk)), + (XVPICKEV_H LASX256H:$xj, LASX256H:$xk)>; +def : LASXPat<(int_loongarch_lasx_xvpickev_w (v8i32 LASX256W:$xj), (v8i32 LASX256W:$xk)), + (XVPICKEV_W LASX256W:$xj, LASX256W:$xk)>; +def : LASXPat<(int_loongarch_lasx_xvpickev_d (v4i64 LASX256D:$xj), (v4i64 LASX256D:$xk)), + (XVPICKEV_D LASX256D:$xj, LASX256D:$xk)>; + +def : LASXPat<(int_loongarch_lasx_xvpickod_b (v32i8 LASX256B:$xj), (v32i8 LASX256B:$xk)), + (XVPICKOD_B LASX256B:$xj, LASX256B:$xk)>; +def : LASXPat<(int_loongarch_lasx_xvpickod_h (v16i16 LASX256H:$xj), (v16i16 LASX256H:$xk)), + (XVPICKOD_H LASX256H:$xj, LASX256H:$xk)>; +def : LASXPat<(int_loongarch_lasx_xvpickod_w (v8i32 LASX256W:$xj), (v8i32 LASX256W:$xk)), + (XVPICKOD_W LASX256W:$xj, LASX256W:$xk)>; +def : LASXPat<(int_loongarch_lasx_xvpickod_d (v4i64 LASX256D:$xj), (v4i64 LASX256D:$xk)), + (XVPICKOD_D LASX256D:$xj, LASX256D:$xk)>; + +def : LASXPat<(int_loongarch_lasx_xvsadd_b (v32i8 LASX256B:$xj), (v32i8 LASX256B:$xk)), + (XVSADD_B LASX256B:$xj, LASX256B:$xk)>; +def : LASXPat<(int_loongarch_lasx_xvsadd_h (v16i16 LASX256H:$xj), (v16i16 LASX256H:$xk)), + (XVSADD_H LASX256H:$xj, LASX256H:$xk)>; +def : LASXPat<(int_loongarch_lasx_xvsadd_w (v8i32 LASX256W:$xj), (v8i32 LASX256W:$xk)), + (XVSADD_W LASX256W:$xj, LASX256W:$xk)>; +def : LASXPat<(int_loongarch_lasx_xvsadd_d (v4i64 LASX256D:$xj), (v4i64 LASX256D:$xk)), + (XVSADD_D LASX256D:$xj, LASX256D:$xk)>; + +def : LASXPat<(int_loongarch_lasx_xvssub_b (v32i8 LASX256B:$xj), (v32i8 LASX256B:$xk)), + (XVSSUB_B LASX256B:$xj, LASX256B:$xk)>; +def : LASXPat<(int_loongarch_lasx_xvssub_h (v16i16 LASX256H:$xj), (v16i16 LASX256H:$xk)), + (XVSSUB_H LASX256H:$xj, LASX256H:$xk)>; +def : LASXPat<(int_loongarch_lasx_xvssub_w (v8i32 LASX256W:$xj), (v8i32 LASX256W:$xk)), + (XVSSUB_W LASX256W:$xj, LASX256W:$xk)>; +def : LASXPat<(int_loongarch_lasx_xvssub_d (v4i64 LASX256D:$xj), (v4i64 LASX256D:$xk)), + (XVSSUB_D LASX256D:$xj, LASX256D:$xk)>; + +def : LASXPat<(int_loongarch_lasx_xvsadd_bu (v32i8 LASX256B:$xj), (v32i8 LASX256B:$xk)), + (XVSADD_BU LASX256B:$xj, LASX256B:$xk)>; +def : LASXPat<(int_loongarch_lasx_xvsadd_hu (v16i16 LASX256H:$xj), (v16i16 LASX256H:$xk)), + (XVSADD_HU LASX256H:$xj, LASX256H:$xk)>; +def : LASXPat<(int_loongarch_lasx_xvsadd_wu (v8i32 LASX256W:$xj), (v8i32 LASX256W:$xk)), + (XVSADD_WU LASX256W:$xj, LASX256W:$xk)>; +def : LASXPat<(int_loongarch_lasx_xvsadd_du (v4i64 LASX256D:$xj), (v4i64 LASX256D:$xk)), + (XVSADD_DU LASX256D:$xj, LASX256D:$xk)>; + +def : LASXPat<(int_loongarch_lasx_xvssub_bu (v32i8 LASX256B:$xj), (v32i8 LASX256B:$xk)), + (XVSSUB_BU LASX256B:$xj, LASX256B:$xk)>; +def : LASXPat<(int_loongarch_lasx_xvssub_hu (v16i16 LASX256H:$xj), (v16i16 LASX256H:$xk)), + (XVSSUB_HU LASX256H:$xj, LASX256H:$xk)>; +def : LASXPat<(int_loongarch_lasx_xvssub_wu (v8i32 LASX256W:$xj), (v8i32 LASX256W:$xk)), + (XVSSUB_WU LASX256W:$xj, LASX256W:$xk)>; +def : LASXPat<(int_loongarch_lasx_xvssub_du (v4i64 LASX256D:$xj), (v4i64 LASX256D:$xk)), + (XVSSUB_DU LASX256D:$xj, LASX256D:$xk)>; + +def : LASXPat<(int_loongarch_lasx_xvmadd_b (v32i8 LASX256B:$xd_in), (v32i8 LASX256B:$xj), (v32i8 LASX256B:$xk)), + (XVMADD_B LASX256B:$xd_in, LASX256B:$xj, LASX256B:$xk)>; +def : LASXPat<(int_loongarch_lasx_xvmadd_h (v16i16 LASX256H:$xd_in), (v16i16 LASX256H:$xj), (v16i16 LASX256H:$xk)), + (XVMADD_H LASX256H:$xd_in, LASX256H:$xj, LASX256H:$xk)>; +def : LASXPat<(int_loongarch_lasx_xvmadd_w (v8i32 LASX256W:$xd_in), (v8i32 LASX256W:$xj), (v8i32 LASX256W:$xk)), + (XVMADD_W LASX256W:$xd_in, LASX256W:$xj, LASX256W:$xk)>; +def : LASXPat<(int_loongarch_lasx_xvmadd_d (v4i64 LASX256D:$xd_in), (v4i64 LASX256D:$xj), (v4i64 LASX256D:$xk)), + (XVMADD_D LASX256D:$xd_in, LASX256D:$xj, LASX256D:$xk)>; + +def : LASXPat<(int_loongarch_lasx_xvmsub_b (v32i8 LASX256B:$xd_in), (v32i8 LASX256B:$xj), (v32i8 LASX256B:$xk)), + (XVMSUB_B LASX256B:$xd_in, LASX256B:$xj, LASX256B:$xk)>; +def : LASXPat<(int_loongarch_lasx_xvmsub_h (v16i16 LASX256H:$xd_in), (v16i16 LASX256H:$xj), (v16i16 LASX256H:$xk)), + (XVMSUB_H LASX256H:$xd_in, LASX256H:$xj, LASX256H:$xk)>; +def : LASXPat<(int_loongarch_lasx_xvmsub_w (v8i32 LASX256W:$xd_in), (v8i32 LASX256W:$xj), (v8i32 LASX256W:$xk)), + (XVMSUB_W LASX256W:$xd_in, LASX256W:$xj, LASX256W:$xk)>; +def : LASXPat<(int_loongarch_lasx_xvmsub_d (v4i64 LASX256D:$xd_in), (v4i64 LASX256D:$xj), (v4i64 LASX256D:$xk)), + (XVMSUB_D LASX256D:$xd_in, LASX256D:$xj, LASX256D:$xk)>; + +def : LASXPat<(v8i32 (int_loongarch_lasx_xvftintrz_wu_s (v8f32 LASX256W:$xj))), + (XVFTINTRZ_WU_S (v8f32 LASX256W:$xj))>; +def : LASXPat<(v4i64 (int_loongarch_lasx_xvftintrz_lu_d (v4f64 LASX256D:$xj))), + (XVFTINTRZ_LU_D (v4f64 LASX256D:$xj))>; + +def : LASXPat<(v8i32 (int_loongarch_lasx_xvftintrz_w_s (v8f32 LASX256W:$xj))), + (XVFTINTRZ_W_S (v8f32 LASX256W:$xj))>; +def : LASXPat<(v4i64 (int_loongarch_lasx_xvftintrz_l_d (v4f64 LASX256D:$xj))), + (XVFTINTRZ_L_D (v4f64 LASX256D:$xj))>; + +def : LASXPat<(int_loongarch_lasx_xvbitclr_b (v32i8 LASX256B:$xj), (v32i8 LASX256B:$xk)), + (XVBITCLR_B LASX256B:$xj, LASX256B:$xk)>; +def : LASXPat<(int_loongarch_lasx_xvbitclr_h (v16i16 LASX256H:$xj), (v16i16 LASX256H:$xk)), + (XVBITCLR_H LASX256H:$xj, LASX256H:$xk)>; +def : LASXPat<(int_loongarch_lasx_xvbitclr_w (v8i32 LASX256W:$xj), (v8i32 LASX256W:$xk)), + (XVBITCLR_W LASX256W:$xj, LASX256W:$xk)>; +def : LASXPat<(int_loongarch_lasx_xvbitclr_d (v4i64 LASX256D:$xj), (v4i64 LASX256D:$xk)), + (XVBITCLR_D LASX256D:$xj, LASX256D:$xk)>; + +def : LASXPat<(int_loongarch_lasx_xvclz_b (v32i8 LASX256B:$xj)), + (XVCLZ_B LASX256B:$xj)>; +def : LASXPat<(int_loongarch_lasx_xvclz_h (v16i16 LASX256H:$xj)), + (XVCLZ_H LASX256H:$xj)>; +def : LASXPat<(int_loongarch_lasx_xvclz_w (v8i32 LASX256W:$xj)), + (XVCLZ_W LASX256W:$xj)>; +def : LASXPat<(int_loongarch_lasx_xvclz_d (v4i64 LASX256D:$xj)), + (XVCLZ_D LASX256D:$xj)>; + +def : LASXPat<(int_loongarch_lasx_xvpcnt_b (v32i8 LASX256B:$xj)), + (XVPCNT_B LASX256B:$xj)>; +def : LASXPat<(int_loongarch_lasx_xvpcnt_h (v16i16 LASX256H:$xj)), + (XVPCNT_H LASX256H:$xj)>; +def : LASXPat<(int_loongarch_lasx_xvpcnt_w (v8i32 LASX256W:$xj)), + (XVPCNT_W LASX256W:$xj)>; +def : LASXPat<(int_loongarch_lasx_xvpcnt_d (v4i64 LASX256D:$xj)), + (XVPCNT_D LASX256D:$xj)>; + + +def : LASXPat<(v32i8 (load (add iPTR:$xj, iPTR:$xk))), + (XVLDX PtrRC:$xj, PtrRC:$xk)>; + +def : LASXPat<(store (v32i8 LASX256B:$xd), (add iPTR:$xj, iPTR:$xk)), + (XVSTX LASX256B:$xd, PtrRC:$xj, PtrRC:$xk)>; + + +def : LASXPat<(v4i64 (sext_invec (v8i32 LASX256W:$xj))), + (VEXT2XV_D_W LASX256W:$xj)>; +def : LASXPat<(v8i32 (sext_invec (v16i16 LASX256H:$xj))), + (VEXT2XV_W_H LASX256H:$xj)>; +def : LASXPat<(v16i16 (sext_invec (v32i8 LASX256B:$xj))), + (VEXT2XV_H_B LASX256B:$xj)>; + + +def : LASXPat<(v4i64 (zext_invec (v8i32 LASX256W:$xj))), + (VEXT2XV_DU_WU LASX256W:$xj)>; +def : LASXPat<(v8i32 (zext_invec (v16i16 LASX256H:$xj))), + (VEXT2XV_WU_HU LASX256H:$xj)>; +def : LASXPat<(v16i16 (zext_invec (v32i8 LASX256B:$xj))), + (VEXT2XV_HU_BU LASX256B:$xj)>; + + +def : LASXPat<(v4i64 (sext_invec (v16i16 LASX256H:$xj))), + (VEXT2XV_D_H LASX256H:$xj)>; +def : LASXPat<(v4i64 (sext_invec (v32i8 LASX256B:$xj))), + (VEXT2XV_D_B LASX256B:$xj)>; +def : LASXPat<(v8i32 (sext_invec (v32i8 LASX256B:$xj))), + (VEXT2XV_W_B LASX256B:$xj)>; + + +def : LASXPat<(v4i64 (zext_invec (v16i16 LASX256H:$xj))), + (VEXT2XV_DU_HU LASX256H:$xj)>; +def : LASXPat<(v4i64 (zext_invec (v32i8 LASX256B:$xj))), + (VEXT2XV_DU_BU LASX256B:$xj)>; +def : LASXPat<(v8i32 (zext_invec (v32i8 LASX256B:$xj))), + (VEXT2XV_WU_BU LASX256B:$xj)>; + + +def : LASXPat<(v4i64 (sext_invec (v16i16 LASX256H:$xj))), + (VEXT2XV_D_H LASX256H:$xj)>; +def : LASXPat<(v4i64 (sext_invec (v32i8 LASX256B:$xj))), + (VEXT2XV_D_B LASX256B:$xj)>; +def : LASXPat<(v8i32 (sext_invec (v32i8 LASX256B:$xj))), + (VEXT2XV_W_B LASX256B:$xj)>; + +def : LASXPat<(v4i64 (zext_invec (v16i16 LASX256H:$xj))), + (VEXT2XV_DU_HU LASX256H:$xj)>; +def : LASXPat<(v4i64 (zext_invec (v32i8 LASX256B:$xj))), + (VEXT2XV_DU_BU LASX256B:$xj)>; +def : LASXPat<(v8i32 (zext_invec (v32i8 LASX256B:$xj))), + (VEXT2XV_WU_BU LASX256B:$xj)>; + + +def : LASXPat<(v16i16 (sext (v16i8 LSX128B:$vj))), + (VEXT2XV_H_B + (INSERT_SUBREG (v32i8 (IMPLICIT_DEF)), LSX128B:$vj, sub_128))>; + +def : LASXPat<(v8i32 (sext (v8i16 LSX128H:$vj))), + (VEXT2XV_W_H + (INSERT_SUBREG (v16i16 (IMPLICIT_DEF)), LSX128H:$vj, sub_128))>; + +def : LASXPat<(v4i64 (sext (v4i32 LSX128W:$vj))), + (VEXT2XV_D_W + (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)), LSX128W:$vj, sub_128))>; + +def : LASXPat<(v16i16 (zext (v16i8 LSX128B:$vj))), + (VEXT2XV_HU_BU + (INSERT_SUBREG (v32i8 (IMPLICIT_DEF)), LSX128B:$vj, sub_128))>; + +def : LASXPat<(v8i32 (zext (v8i16 LSX128H:$vj))), + (VEXT2XV_WU_HU + (INSERT_SUBREG (v16i16 (IMPLICIT_DEF)), LSX128H:$vj, sub_128))>; + +def : LASXPat<(v4i64 (zext (v4i32 LSX128W:$vj))), + (VEXT2XV_DU_WU + (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)), LSX128W:$vj, sub_128))>; + + +def : LASXPat<(xor + (v16i16 LASX256H:$xj), (xvsplati16 imm_mask) + ), + (XNOR_V_H_PSEUDO (v16i16 LASX256H:$xj), (v16i16 LASX256H:$xj))>; + +def : LASXPat<(xor + (v8i32 LASX256W:$xj), (xvsplati32 imm_mask) + ), + (XNOR_V_W_PSEUDO (v8i32 LASX256W:$xj), (v8i32 LASX256W:$xj))>; + +def : LASXPat<(xor + (v4i64 LASX256D:$xj), (xvsplati64 imm_mask_64) + ), + (XNOR_V_D_PSEUDO (v4i64 LASX256D:$xj), (v4i64 LASX256D:$xj))>; + + +def : LASXPat<(and + (v32i8 (xor (v32i8 LASX256B:$xj), (xvsplati8 imm_mask))), + (v32i8 LASX256B:$xk) + ), + (XVANDN_V (v32i8 LASX256B:$xj), (v32i8 LASX256B:$xk))>; + +def : LASXPat<(and + (v16i16 (xor (v16i16 LASX256H:$xj), (xvsplati16 imm_mask))), + (v16i16 LASX256H:$xk) + ), + (XVANDN_H_PSEUDO (v16i16 LASX256H:$xj), (v16i16 LASX256H:$xk))>; + +def : LASXPat<(and + (v8i32 (xor (v8i32 LASX256W:$xj), (xvsplati32 imm_mask))), + (v8i32 LASX256W:$xk) + ), + (XVANDN_W_PSEUDO (v8i32 LASX256W:$xj), (v8i32 LASX256W:$xk))>; + +def : LASXPat<(and + (v4i64 (xor (v4i64 LASX256D:$xj), (xvsplati64 imm_mask_64))), + (v4i64 LASX256D:$xk) + ), + (XVANDN_D_PSEUDO (v4i64 LASX256D:$xj), (v4i64 LASX256D:$xk))>; + + +def : LASXPat<(or + (v32i8 LASX256B:$xj), + (v32i8 (xor (v32i8 LASX256B:$xk), (xvsplati8 imm_mask))) + ), + (XVORN_V (v32i8 LASX256B:$xj), (v32i8 LASX256B:$xk))>; + +def : LASXPat<(or + (v16i16 LASX256H:$xj), + (v16i16 (xor (v16i16 LASX256H:$xk), (xvsplati16 imm_mask))) + ), + (XVORN_H_PSEUDO (v16i16 LASX256H:$xj), (v16i16 LASX256H:$xk))>; + +def : LASXPat<(or + (v8i32 LASX256W:$xj), + (v8i32 (xor (v8i32 LASX256W:$xk), (xvsplati32 imm_mask))) + ), + (XVORN_W_PSEUDO (v8i32 LASX256W:$xj), (v8i32 LASX256W:$xk))>; + +def : LASXPat<(or + (v4i64 LASX256D:$xj), + (v4i64 (xor (v4i64 LASX256D:$xk), (xvsplati64 imm_mask_64))) + ), + (XVORN_D_PSEUDO (v4i64 LASX256D:$xj), (v4i64 LASX256D:$xk))>; + + +def : LASXPat<(add (v4i64 (abs LASX256D:$a)), (v4i64 (abs LASX256D:$b))), + (XVADDA_D (v4i64 LASX256D:$a),(v4i64 LASX256D:$b))>; + +def : LASXPat<(add (v8i32 (abs LASX256W:$a)), (v8i32 (abs LASX256W:$b))), + (XVADDA_W (v8i32 LASX256W:$a),(v8i32 LASX256W:$b))>; + +def : LASXPat<(add (v16i16 (abs LASX256H:$a)), (v16i16 (abs LASX256H:$b))), + (XVADDA_H (v16i16 LASX256H:$a),(v16i16 LASX256H:$b))>; + +def : LASXPat<(add (v32i8 (abs LASX256B:$a)), (v32i8 (abs LASX256B:$b))), + (XVADDA_B (v32i8 LASX256B:$a),(v32i8 LASX256B:$b))>; + + +def : LASXPat<(and v32i8:$xj, (xor (shl xvsplat_imm_eq_1, v32i8:$xk), + (xvsplati8 imm_mask))), + (XVBITCLR_B v32i8:$xj, v32i8:$xk)>; + +def : LASXPat<(and v16i16:$xj, (xor (shl xvsplat_imm_eq_1, v16i16:$xk), + (xvsplati16 imm_mask))), + (XVBITCLR_H v16i16:$xj, v16i16:$xk)>; + +def : LASXPat<(and v8i32:$xj, (xor (shl xvsplat_imm_eq_1, v8i32:$xk), + (xvsplati32 imm_mask))), + (XVBITCLR_W v8i32:$xj, v8i32:$xk)>; + +def : LASXPat<(and v4i64:$xj, (xor (shl xvsplat_imm_eq_1, v4i64:$xk), + (xvsplati64 imm_mask_64))), + (XVBITCLR_D v4i64:$xj, v4i64:$xk)>; diff --git a/lib/Target/LoongArch/LoongArchLSXInstrFormats.td b/lib/Target/LoongArch/LoongArchLSXInstrFormats.td new file mode 100644 index 00000000..50df4d72 --- /dev/null +++ b/lib/Target/LoongArch/LoongArchLSXInstrFormats.td @@ -0,0 +1,449 @@ +//===- LoongArchLSXInstrFormats.td - LoongArch LSX Instruction Formats ---*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +class LSXInst : InstLA<(outs), (ins), "", [], FrmOther>, + EXT_LSX { +} + +class LSXCBranch : LSXInst { +} + +class LSXSpecial : LSXInst { +} + +class LSXPseudo pattern>: + LoongArchPseudo { + let Predicates = [HasLSX]; +} + +class LSX_3R op>: LSXInst { + bits<5> vk; + bits<5> vj; + bits<5> vd; + + let Inst{31-15} = op; + let Inst{14-10} = vk; + let Inst{9-5} = vj; + let Inst{4-0} = vd; +} + +class LSX_3R_1GP op>: LSXInst { + bits<5> rk; + bits<5> vj; + bits<5> vd; + + let Inst{31-15} = op; + let Inst{14-10} = rk; + let Inst{9-5} = vj; + let Inst{4-0} = vd; +} + +class LSX_I5 op>: LSXInst { + bits<5> vd; + bits<5> vj; + bits<5> si5; + + let Inst{31-15} = op; + let Inst{14-10} = si5; + let Inst{9-5} = vj; + let Inst{4-0} = vd; +} + +class LSX_I5_U op>: LSXInst { + bits<5> vd; + bits<5> vj; + bits<5> ui5; + + let Inst{31-15} = op; + let Inst{14-10} = ui5; + let Inst{9-5} = vj; + let Inst{4-0} = vd; +} + +class LSX_2R op>: LSXInst { + bits<5> vj; + bits<5> vd; + + let Inst{31-10} = op; + let Inst{9-5} = vj; + let Inst{4-0} = vd; +} + +class LSX_2R_1GP op>: LSXInst { + bits<5> rj; + bits<5> vd; + + let Inst{31-10} = op; + let Inst{9-5} = rj; + let Inst{4-0} = vd; +} + +class LSX_I1_U op>: LSXInst { + bits<5> vd; + bits<5> vj; + bits<1> ui1; + + let Inst{31-11} = op; + let Inst{10} = ui1; + let Inst{9-5} = vj; + let Inst{4-0} = vd; +} + +class LSX_I2_U op>: LSXInst { + bits<5> vd; + bits<5> vj; + bits<2> ui2; + + let Inst{31-12} = op; + let Inst{11-10} = ui2; + let Inst{9-5} = vj; + let Inst{4-0} = vd; +} + +class LSX_I3_U op>: LSXInst { + bits<5> vd; + bits<5> vj; + bits<3> ui3; + + let Inst{31-13} = op; + let Inst{12-10} = ui3; + let Inst{9-5} = vj; + let Inst{4-0} = vd; +} + +class LSX_I4_U op>: LSXInst { + bits<5> vd; + bits<5> vj; + bits<4> ui4; + + let Inst{31-14} = op; + let Inst{13-10} = ui4; + let Inst{9-5} = vj; + let Inst{4-0} = vd; +} + +class LSX_I6_U op>: LSXInst { + bits<5> vd; + bits<5> vj; + bits<6> ui6; + + let Inst{31-16} = op; + let Inst{15-10} = ui6; + let Inst{9-5} = vj; + let Inst{4-0} = vd; +} + +class LSX_I1_R_U op>: LSXInst { + bits<5> vd; + bits<5> rj; + bits<1> ui1; + + let Inst{31-11} = op; + let Inst{10} = ui1; + let Inst{9-5} = rj; + let Inst{4-0} = vd; +} + +class LSX_I2_R_U op>: LSXInst { + bits<5> vd; + bits<5> rj; + bits<2> ui2; + + let Inst{31-12} = op; + let Inst{11-10} = ui2; + let Inst{9-5} = rj; + let Inst{4-0} = vd; +} + +class LSX_I3_R_U op>: LSXInst { + bits<5> vd; + bits<5> rj; + bits<3> ui3; + + let Inst{31-13} = op; + let Inst{12-10} = ui3; + let Inst{9-5} = rj; + let Inst{4-0} = vd; +} + +class LSX_I4_R_U op>: LSXInst { + bits<5> vd; + bits<5> rj; + bits<4> ui4; + + let Inst{31-14} = op; + let Inst{13-10} = ui4; + let Inst{9-5} = rj; + let Inst{4-0} = vd; +} + +class LSX_ELM_COPY_B op>: LSXInst { + bits<5> rd; + bits<5> vj; + bits<4> ui4; + + let Inst{31-14} = op; + let Inst{13-10} = ui4; + let Inst{9-5} = vj; + let Inst{4-0} = rd; +} + +class LSX_ELM_COPY_H op>: LSXInst { + bits<5> rd; + bits<5> vj; + bits<3> ui3; + + let Inst{31-13} = op; + let Inst{12-10} = ui3; + let Inst{9-5} = vj; + let Inst{4-0} = rd; +} + +class LSX_ELM_COPY_W op>: LSXInst { + bits<5> rd; + bits<5> vj; + bits<2> ui2; + + let Inst{31-12} = op; + let Inst{11-10} = ui2; + let Inst{9-5} = vj; + let Inst{4-0} = rd; +} + +class LSX_ELM_COPY_D op>: LSXInst { + bits<5> rd; + bits<5> vj; + bits<1> ui1; + + let Inst{31-11} = op; + let Inst{10} = ui1; + let Inst{9-5} = vj; + let Inst{4-0} = rd; +} + +class LSX_I8_U op>: LSXInst { + bits<5> vd; + bits<5> vj; + bits<8> ui8; + + let Inst{31-18} = op; + let Inst{17-10} = ui8; + let Inst{9-5} = vj; + let Inst{4-0} = vd; +} + +class LSX_I7_U op>: LSXInst { + bits<5> vd; + bits<5> vj; + bits<7> ui7; + + let Inst{31-17} = op; + let Inst{16-10} = ui7; + let Inst{9-5} = vj; + let Inst{4-0} = vd; +} + +class LSX_I12_S op>: LSXInst { + bits<5> vd; +// bits<5> rj; +// bits<12> si12; + bits<17> addr; + + let Inst{31-22} = op; + let Inst{21-10} = addr{11-0}; + let Inst{9-5} = addr{16-12}; + let Inst{4-0} = vd; +} + +class LSX_SI12_S op>: LSXInst { + bits<5> vd; + bits<17> addr; + + let Inst{31-22} = op; + let Inst{21-10} = addr{11-0}; + let Inst{9-5} = addr{16-12}; + let Inst{4-0} = vd; +} + +class LSX_SI11_S op>: LSXInst { + bits<5> vd; + bits<16> addr; + + let Inst{31-21} = op; + let Inst{20-10} = addr{10-0}; + let Inst{9-5} = addr{15-11}; + let Inst{4-0} = vd; +} + +class LSX_SI10_S op>: LSXInst { + bits<5> vd; + bits<15> addr; + + let Inst{31-20} = op; + let Inst{19-10} = addr{9-0}; + let Inst{9-5} = addr{14-10}; + let Inst{4-0} = vd; +} + +class LSX_SI9_S op>: LSXInst { + bits<5> vd; + bits<14> addr; + + let Inst{31-19} = op; + let Inst{18-10} = addr{8-0}; + let Inst{9-5} = addr{13-9}; + let Inst{4-0} = vd; +} + +class LSX_SET op>: LSXInst { + bits<5> vj; + bits<3> cd; + + let Inst{31-10} = op; + let Inst{9-5} = vj; + let Inst{4-3} = 0b00; + let Inst{2-0} = cd; +} + +class LSX_VR4MUL op>: LSXInst { + bits<5> vd; + bits<5> vj; + bits<5> vk; + bits<5> va; + + let Inst{31-20} = op; + let Inst{19-15} = va; + let Inst{14-10} = vk; + let Inst{9-5} = vj; + let Inst{4-0} = vd; +} + +class LSX_VFCMP op>: LSXInst { + bits<5> vd; + bits<5> vj; + bits<5> vk; + bits<5> cond; + + let Inst{31-20} = op; + let Inst{19-15} = cond; + let Inst{14-10} = vk; + let Inst{9-5} = vj; + let Inst{4-0} = vd; +} + +class LSX_Addr_SI8_idx1 op>: LSXInst { + bits<5> vd; + bits<13> addr; + bits<1> idx; + + let Inst{31-19} = op; + let Inst{18-11} = addr{7-0}; + let Inst{10} = idx; + let Inst{9-5} = addr{12-8}; + let Inst{4-0} = vd; +} + +class LSX_SI8_idx1 op>: LSXInst { + bits<5> vd; + bits<5> rj; + bits<8> si8; + bits<1> idx; + + let Inst{31-19} = op; + let Inst{18} = idx; + let Inst{17-10} = si8; + let Inst{9-5} = rj; + let Inst{4-0} = vd; +} + +class LSX_SI8_idx2 op>: LSXInst { + bits<5> vd; + bits<5> rj; + bits<8> si8; + bits<2> idx; + + let Inst{31-20} = op; + let Inst{19-18} = idx; + let Inst{17-10} = si8; + let Inst{9-5} = rj; + let Inst{4-0} = vd; +} + +class LSX_SI8_idx3 op>: LSXInst { + bits<5> vd; + bits<5> rj; + bits<8> si8; + bits<3> idx; + + let Inst{31-21} = op; + let Inst{20-18} = idx; + let Inst{17-10} = si8; + let Inst{9-5} = rj; + let Inst{4-0} = vd; +} + +class LSX_SI8_idx4 op>: LSXInst { + bits<5> vd; + bits<5> rj; + bits<8> si8; + bits<4> idx; + + let Inst{31-22} = op; + let Inst{21-18} = idx; + let Inst{17-10} = si8; + let Inst{9-5} = rj; + let Inst{4-0} = vd; +} + +class LSX_3R_2GP op>: LSXInst { + bits<5> rk; + bits<5> rj; + bits<5> vd; + + let Inst{31-15} = op; + let Inst{14-10} = rk; + let Inst{9-5} = rj; + let Inst{4-0} = vd; +} + +class LSX_I5_mode_U op>: LSXInst { + bits<5> vd; + bits<5> mode; + bits<5> ui5; + + let Inst{31-15} = op; + let Inst{14-10} = ui5; + let Inst{9-5} = mode; + let Inst{4-0} = vd; +} + +class LSX_1R_I13 op>: LSXInst { + bits<13> i13; + bits<5> vd; + + let Inst{31-18} = op; + let Inst{17-5} = i13; + let Inst{4-0} = vd; +} + +class LSX_1R_I13_I10 op>: LSXInst { + bits<10> i10; + bits<5> vd; + + let Inst{31-15} = op; + let Inst{14-5} = i10; + let Inst{4-0} = vd; +} + + + + + + + diff --git a/lib/Target/LoongArch/LoongArchLSXInstrInfo.td b/lib/Target/LoongArch/LoongArchLSXInstrInfo.td new file mode 100644 index 00000000..69fdc3a8 --- /dev/null +++ b/lib/Target/LoongArch/LoongArchLSXInstrInfo.td @@ -0,0 +1,5904 @@ +//===- LoongArchLSXInstrInfo.td - LSX instructions -*- tablegen ------------*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file describes LoongArch LSX instructions. +// +//===----------------------------------------------------------------------===// + +def SDT_LoongArchVecCond : SDTypeProfile<1, 1, [SDTCisInt<0>, SDTCisVec<1>]>; +def SDT_VSetCC : SDTypeProfile<1, 3, [SDTCisInt<0>, + SDTCisInt<1>, + SDTCisSameAs<1, 2>, + SDTCisVT<3, OtherVT>]>; +def SDT_VFSetCC : SDTypeProfile<1, 3, [SDTCisInt<0>, + SDTCisFP<1>, + SDTCisSameAs<1, 2>, + SDTCisVT<3, OtherVT>]>; +def SDT_VSHF : SDTypeProfile<1, 3, [SDTCisInt<0>, SDTCisVec<0>, + SDTCisInt<1>, SDTCisVec<1>, + SDTCisSameAs<0, 2>, SDTCisSameAs<2, 3>]>; +def SDT_SHF : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisVec<0>, + SDTCisVT<1, i32>, SDTCisSameAs<0, 2>]>; +def SDT_ILV : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisVec<0>, + SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>]>; +def SDTVABSD : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0, 1>, + SDTCisSameAs<0, 2>, SDTCisVT<3, i32>]>; + +def SDT_VBROADCAST : SDTypeProfile<1, 1, [SDTCisVec<0>]>; +def LoongArchVBROADCAST : SDNode<"LoongArchISD::VBROADCAST", SDT_VBROADCAST>; + +def LoongArchVAllNonZero : SDNode<"LoongArchISD::VALL_NONZERO", SDT_LoongArchVecCond>; +def LoongArchVAnyNonZero : SDNode<"LoongArchISD::VANY_NONZERO", SDT_LoongArchVecCond>; +def LoongArchVAllZero : SDNode<"LoongArchISD::VALL_ZERO", SDT_LoongArchVecCond>; +def LoongArchVAnyZero : SDNode<"LoongArchISD::VANY_ZERO", SDT_LoongArchVecCond>; +def LoongArchVNOR : SDNode<"LoongArchISD::VNOR", SDTIntBinOp, + [SDNPCommutative, SDNPAssociative]>; +def LoongArchVSHF : SDNode<"LoongArchISD::VSHF", SDT_VSHF>; +def LoongArchSHF : SDNode<"LoongArchISD::SHF", SDT_SHF>; +def LoongArchVPACKEV : SDNode<"LoongArchISD::VPACKEV", SDT_ILV>; +def LoongArchVPACKOD : SDNode<"LoongArchISD::VPACKOD", SDT_ILV>; +def LoongArchVILVH : SDNode<"LoongArchISD::VILVH", SDT_ILV>; +def LoongArchVILVL : SDNode<"LoongArchISD::VILVL", SDT_ILV>; +def LoongArchVPICKEV : SDNode<"LoongArchISD::VPICKEV", SDT_ILV>; +def LoongArchVPICKOD : SDNode<"LoongArchISD::VPICKOD", SDT_ILV>; +def LoongArchVABSD : SDNode<"LoongArchISD::VABSD", SDTVABSD>; +def LoongArchUVABSD : SDNode<"LoongArchISD::UVABSD", SDTVABSD>; + +def vsetcc : SDNode<"ISD::SETCC", SDT_VSetCC>; +def vfsetcc : SDNode<"ISD::SETCC", SDT_VFSetCC>; + +def LoongArchVExtractSExt : SDNode<"LoongArchISD::VEXTRACT_SEXT_ELT", + SDTypeProfile<1, 3, [SDTCisPtrTy<2>]>, []>; +def LoongArchVExtractZExt : SDNode<"LoongArchISD::VEXTRACT_ZEXT_ELT", + SDTypeProfile<1, 3, [SDTCisPtrTy<2>]>, []>; + +def immZExt1Ptr : ImmLeaf(Imm);}]>; +def immZExt2Ptr : ImmLeaf(Imm);}]>; +def immZExt3Ptr : ImmLeaf(Imm);}]>; +def immZExt4Ptr : ImmLeaf(Imm);}]>; +def immZExt5Ptr : ImmLeaf(Imm);}]>; +def immZExt8 : ImmLeaf(Imm);}]>; +def immZExt7 : PatLeaf<(imm), [{ return isUInt<7>(N->getZExtValue()); }]>; +def immZExt6 : ImmLeaf; +def immZExt4 : ImmLeaf(Imm);}]>; +def immZExt3 : ImmLeaf(Imm);}]>; +def immZExt2 : ImmLeaf(Imm);}]>; +def immZExt1 : ImmLeaf(Imm);}]>; +def immSExt12_l : ImmLeaf(Imm);}]>; +def immSExt11Ptr : ImmLeaf(Imm);}]>; + +def immSExt11_1 : ImmLeaf(Imm<<1);}]>; +def immSExt10Ptr : ImmLeaf(Imm);}]>; +def immSExt10_2 : ImmLeaf(Imm<<2);}]>; +def immSExt9Ptr : ImmLeaf(Imm);}]>; +def immSExt9_3 : ImmLeaf(Imm<<3);}]>; +def immSExt8 : ImmLeaf(Imm);}]>; +def immSExt5 : ImmLeaf(Imm);}]>; +def immSExt8_1 : ImmLeaf(Imm<<1);}]>; +def immSExt8_2 : ImmLeaf(Imm<<2);}]>; +def immSExt8_3 : ImmLeaf(Imm<<3);}]>; + +def addrimm10 : ComplexPattern; +def addrimm10lsl2 : ComplexPattern; +def addrimm9lsl3 : ComplexPattern; +def addrimm11lsl1 : ComplexPattern; + + +class SimmLslAsmOperandClass Supers = [], + int Shift = 0> : AsmOperandClass { + let Name = "Simm" # Bits # "_Lsl" # Shift; + let RenderMethod = "addImmOperands"; + let PredicateMethod = "isScaledSImm<" # Bits # ", " # Shift # ">"; + let SuperClasses = Supers; + let DiagnosticType = "SImm" # Bits # "_Lsl" # Shift; +} + +def Simm11Lsl1AsmOperand + : SimmLslAsmOperandClass<11, [], 1>; + +def immSExt11_1_O : Operand { + let EncoderMethod = "getSImm11Lsl1Encoding"; + let DecoderMethod = "DecodeSImmWithOffsetAndScale<2>"; + let ParserMatchClass = Simm11Lsl1AsmOperand; +} + +def Simm10Lsl2AsmOperand + : SimmLslAsmOperandClass<10, [], 2>; + +def immSExt10_2_O : Operand { + let EncoderMethod = "getSImm10Lsl2Encoding"; + let DecoderMethod = "DecodeSImmWithOffsetAndScale<4>"; + let ParserMatchClass = Simm10Lsl2AsmOperand; +} + +def Simm9Lsl3AsmOperand + : SimmLslAsmOperandClass<9, [], 3>; + +def immSExt9_3_O : Operand { + let EncoderMethod = "getSImm9Lsl3Encoding"; + let DecoderMethod = "DecodeSImmWithOffsetAndScale<8>"; + let ParserMatchClass = Simm9Lsl3AsmOperand; +} + +def Simm8Lsl3AsmOperand + : SimmLslAsmOperandClass<8, [], 3>; + +def immSExt8_3_O : Operand { + let EncoderMethod = "getSImm8Lsl3Encoding"; + let DecoderMethod = "DecodeSImmWithOffsetAndScale<8>"; + let ParserMatchClass = Simm8Lsl3AsmOperand; +} + +def Simm8Lsl2AsmOperand + : SimmLslAsmOperandClass<8, [], 2>; + +def immSExt8_2_O : Operand { + let EncoderMethod = "getSImm8Lsl2Encoding"; + let DecoderMethod = "DecodeSImmWithOffsetAndScale<4>"; + let ParserMatchClass = Simm8Lsl2AsmOperand; +} + +def Simm8Lsl1AsmOperand + : SimmLslAsmOperandClass<8, [], 1>; + +def immSExt8_1_O : Operand { + let EncoderMethod = "getSImm8Lsl1Encoding"; + let DecoderMethod = "DecodeSImmWithOffsetAndScale<2>"; + let ParserMatchClass = Simm8Lsl1AsmOperand; +} + + +class ConstantSImmAsmOperandClass Supers = [], + int Offset = 0> : AsmOperandClass { + let Name = "ConstantSImm" # Bits # "_" # Offset; + let RenderMethod = "addConstantSImmOperands<" # Bits # ", " # Offset # ">"; + let PredicateMethod = "isConstantSImm<" # Bits # ", " # Offset # ">"; + let SuperClasses = Supers; + let DiagnosticType = "SImm" # Bits # "_" # Offset; +} + +class ConstantUImmRangeAsmOperandClass Supers = []> + : AsmOperandClass { + let Name = "ConstantUImmRange" # Bottom # "_" # Top; + let RenderMethod = "addImmOperands"; + let PredicateMethod = "isConstantUImmRange<" # Bottom # ", " # Top # ">"; + let SuperClasses = Supers; + let DiagnosticType = "UImmRange" # Bottom # "_" # Top; +} + +def SImm16RelaxedAsmOperandClass + : SImmAsmOperandClass<16, [UImm16RelaxedAsmOperandClass]> { + let Name = "SImm16_Relaxed"; + let PredicateMethod = "isAnyImm<16>"; + let DiagnosticType = "SImm16_Relaxed"; +} + +def ConstantSImm11Lsl1AsmOperandClass : AsmOperandClass { + let Name = "SImm11Lsl1"; + let RenderMethod = "addImmOperands"; + let PredicateMethod = "isScaledSImm<11, 1>"; + let SuperClasses = [SImm12Operand]; + let DiagnosticType = "SImm11_Lsl1"; +} + +def ConstantSImm9Lsl3AsmOperandClass : AsmOperandClass { + let Name = "SImm9Lsl3"; + let RenderMethod = "addImmOperands"; + let PredicateMethod = "isScaledSImm<9, 3>"; + let SuperClasses = [SImm12Operand]; + let DiagnosticType = "SImm9_Lsl3"; +} + +def ConstantSImm10Lsl2AsmOperandClass : AsmOperandClass { + let Name = "SImm10Lsl2"; + let RenderMethod = "addImmOperands"; + let PredicateMethod = "isScaledSImm<10, 2>"; + let SuperClasses = [SImm12Operand]; + let DiagnosticType = "SImm10_Lsl2"; +} +def ConstantSImm11AsmOperandClass + : ConstantSImmAsmOperandClass<11, [ConstantSImm10Lsl2AsmOperandClass]>; +def ConstantSImm10Lsl1AsmOperandClass : AsmOperandClass { + let Name = "SImm10Lsl1"; + let RenderMethod = "addImmOperands"; + let PredicateMethod = "isScaledSImm<10, 1>"; + let SuperClasses = [ConstantSImm11AsmOperandClass]; + let DiagnosticType = "SImm10_Lsl1"; +} +def ConstantUImm10AsmOperandClass + : ConstantUImmAsmOperandClass<10, [ConstantSImm10Lsl1AsmOperandClass]>; +def ConstantSImm10AsmOperandClass + : ConstantSImmAsmOperandClass<10, [ConstantUImm10AsmOperandClass]>; +def ConstantSImm9AsmOperandClass + : ConstantSImmAsmOperandClass<9, [ConstantSImm10AsmOperandClass]>; +def ConstantSImm7Lsl2AsmOperandClass : AsmOperandClass { + let Name = "SImm7Lsl2"; + let RenderMethod = "addImmOperands"; + let PredicateMethod = "isScaledSImm<7, 2>"; + let SuperClasses = [ConstantSImm9AsmOperandClass]; + let DiagnosticType = "SImm7_Lsl2"; +} +def ConstantUImm8AsmOperandClass + : ConstantUImmAsmOperandClass<8, [ConstantSImm7Lsl2AsmOperandClass]>; +def ConstantUImm7Sub1AsmOperandClass + : ConstantUImmAsmOperandClass<7, [ConstantUImm8AsmOperandClass], -1> { + // Specify the names since the -1 offset causes invalid identifiers otherwise. + let Name = "UImm7_N1"; + let DiagnosticType = "UImm7_N1"; +} +def ConstantUImm7AsmOperandClass + : ConstantUImmAsmOperandClass<7, [ConstantUImm7Sub1AsmOperandClass]>; +def ConstantUImm6Lsl2AsmOperandClass : AsmOperandClass { + let Name = "UImm6Lsl2"; + let RenderMethod = "addImmOperands"; + let PredicateMethod = "isScaledUImm<6, 2>"; + let SuperClasses = [ConstantUImm7AsmOperandClass]; + let DiagnosticType = "UImm6_Lsl2"; +} +def ConstantUImm6AsmOperandClass + : ConstantUImmAsmOperandClass<6, [ConstantUImm6Lsl2AsmOperandClass]>; +def ConstantSImm6AsmOperandClass + : ConstantSImmAsmOperandClass<6, [ConstantUImm6AsmOperandClass]>; +def ConstantUImm5Lsl2AsmOperandClass : AsmOperandClass { + let Name = "UImm5Lsl2"; + let RenderMethod = "addImmOperands"; + let PredicateMethod = "isScaledUImm<5, 2>"; + let SuperClasses = [ConstantSImm6AsmOperandClass]; + let DiagnosticType = "UImm5_Lsl2"; +} +def ConstantUImm5_Range2_64AsmOperandClass + : ConstantUImmRangeAsmOperandClass<2, 64, [ConstantUImm5Lsl2AsmOperandClass]>; +def ConstantUImm5Plus33AsmOperandClass + : ConstantUImmAsmOperandClass<5, [ConstantUImm5_Range2_64AsmOperandClass], + 33>; +def ConstantUImm5ReportUImm6AsmOperandClass + : ConstantUImmAsmOperandClass<5, [ConstantUImm5Plus33AsmOperandClass]> { + let Name = "ConstantUImm5_0_Report_UImm6"; + let DiagnosticType = "UImm5_0_Report_UImm6"; +} +def ConstantUImm5Plus32AsmOperandClass + : ConstantUImmAsmOperandClass< + 5, [ConstantUImm5ReportUImm6AsmOperandClass], 32>; +def ConstantUImm5Plus32NormalizeAsmOperandClass + : ConstantUImmAsmOperandClass<5, [ConstantUImm5Plus32AsmOperandClass], 32> { + let Name = "ConstantUImm5_32_Norm"; + // We must also subtract 32 when we render the operand. + let RenderMethod = "addConstantUImmOperands<5, 32, -32>"; +} +def ConstantUImm5Plus1ReportUImm6AsmOperandClass + : ConstantUImmAsmOperandClass< + 5, [ConstantUImm5Plus32NormalizeAsmOperandClass], 1>{ + let Name = "ConstantUImm5_Plus1_Report_UImm6"; +} +def ConstantUImm5Plus1AsmOperandClass + : ConstantUImmAsmOperandClass< + 5, [ConstantUImm5Plus1ReportUImm6AsmOperandClass], 1>; +def ConstantUImm5AsmOperandClass + : ConstantUImmAsmOperandClass<5, [ConstantUImm5Plus1AsmOperandClass]>; +def ConstantSImm5AsmOperandClass + : ConstantSImmAsmOperandClass<5, [ConstantUImm5AsmOperandClass]>; +def ConstantUImm4AsmOperandClass + : ConstantUImmAsmOperandClass<4, [ConstantSImm5AsmOperandClass]>; +def ConstantSImm4AsmOperandClass + : ConstantSImmAsmOperandClass<4, [ConstantUImm4AsmOperandClass]>; +def ConstantUImm3AsmOperandClass + : ConstantUImmAsmOperandClass<3, [ConstantSImm4AsmOperandClass]>; +def ConstantUImm2AsmOperandClass + : ConstantUImmAsmOperandClass<2, [ConstantUImm3AsmOperandClass]>; +def ConstantUImm1AsmOperandClass + : ConstantUImmAsmOperandClass<1, [ConstantUImm2AsmOperandClass]>; +def ConstantImmzAsmOperandClass : AsmOperandClass { + let Name = "ConstantImmz"; + let RenderMethod = "addConstantUImmOperands<1>"; + let PredicateMethod = "isConstantImmz"; + let SuperClasses = [ConstantUImm1AsmOperandClass]; + let DiagnosticType = "Immz"; +} + +foreach I = {1, 2, 3, 4, 5, 6, 8} in + def vsplat_uimm # I : Operand { + let PrintMethod = "printUImm<" # I # ">"; + let ParserMatchClass = + !cast("ConstantUImm" # I # "AsmOperandClass"); + } + +foreach I = {5, 10} in + def vsplat_simm # I : Operand { + let ParserMatchClass = + !cast("ConstantSImm" # I # "AsmOperandClass"); + } + +foreach I = {1, 4, 7, 8, 10, 20, 26} in + def uimm # I : Operand { + let PrintMethod = "printUImm<" # I # ">"; + let ParserMatchClass = + !cast("ConstantUImm" # I # "AsmOperandClass"); + } + +foreach I = {1, 2, 3, 4, 5, 6, 7, 8} in + def uimm # I # _ptr : Operand { + let PrintMethod = "printUImm<" # I # ">"; + let ParserMatchClass = + !cast("ConstantUImm" # I # "AsmOperandClass"); + } + + +def addrimm12 : ComplexPattern; + + +def LoongArchMemSimm12AsmOperand : AsmOperandClass { + let Name = "MemOffsetSimm12"; + let SuperClasses = [LoongArchMemAsmOperand]; + let RenderMethod = "addMemOperands"; + let ParserMethod = "parseMemOperand"; + let PredicateMethod = "isMemWithSimmOffset<12>"; + let DiagnosticType = "MemSImm12"; +} + +def mem_simm12 : mem_generic { + let MIOperandInfo = (ops ptr_rc, simm12); + let EncoderMethod = "getMemEncoding"; + let ParserMatchClass = LoongArchMemSimm12AsmOperand; +} + +foreach I = {4, 6, 9, 10, 11} in + def simm # I : Operand { + let DecoderMethod = "DecodeSImmWithOffsetAndScale<" # I # ">"; + let ParserMatchClass = + !cast("ConstantSImm" # I # "AsmOperandClass"); + } + +def LoongArchMemSimm9AsmOperand : AsmOperandClass { + let Name = "MemOffsetSimm9"; + let SuperClasses = [LoongArchMemAsmOperand]; + let RenderMethod = "addMemOperands"; + let ParserMethod = "parseMemOperand"; + let PredicateMethod = "isMemWithSimmOffset<9>"; + let DiagnosticType = "MemSImm9"; +} + +def LoongArchMemSimm10AsmOperand : AsmOperandClass { + let Name = "MemOffsetSimm10"; + let SuperClasses = [LoongArchMemAsmOperand]; + let RenderMethod = "addMemOperands"; + let ParserMethod = "parseMemOperand"; + let PredicateMethod = "isMemWithSimmOffset<10>"; + let DiagnosticType = "MemSImm10"; +} + +def LoongArchMemSimm11AsmOperand : AsmOperandClass { + let Name = "MemOffsetSimm11"; + let SuperClasses = [LoongArchMemAsmOperand]; + let RenderMethod = "addMemOperands"; + let ParserMethod = "parseMemOperand"; + let PredicateMethod = "isMemWithSimmOffset<11>"; + let DiagnosticType = "MemSImm11"; +} + +def simm13 : PatLeaf<(imm), [{ return isInt<13>(N->getSExtValue()); }]>; + +def simm10Op : Operand { + let DecoderMethod = "DecodeSIMM10"; +} + +def simm13Op : Operand { + let DecoderMethod = "DecodeSIMM13"; +} + +def LoongArchMemSimm10Lsl2AsmOperand : AsmOperandClass { + let Name = "MemOffsetSimm10_2"; + let SuperClasses = [LoongArchMemAsmOperand]; + let RenderMethod = "addMemOperands"; + let ParserMethod = "parseMemOperand"; + let PredicateMethod = "isMemWithSimmOffset<10, 2>"; + let DiagnosticType = "MemSImm10Lsl2"; +} + + +def simm10_lsl2 : Operand { +// let DecoderMethod = "DecodeSImmWithOffsetAndScale<10, 2>"; + let ParserMatchClass = + !cast("ConstantSImm10Lsl2AsmOperandClass"); +} + +def mem_simm10_lsl2 : mem_generic { + let MIOperandInfo = (ops ptr_rc, !cast("simm10_lsl2")); + let EncoderMethod = "getMemEncoding10l2"; + let ParserMatchClass = + !cast("LoongArchMemSimm10Lsl2AsmOperand"); +} + + +def LoongArchMemSimm11Lsl1AsmOperand : AsmOperandClass { + let Name = "MemOffsetSimm11_1"; + let SuperClasses = [LoongArchMemAsmOperand]; + let RenderMethod = "addMemOperands"; + let ParserMethod = "parseMemOperand"; + let PredicateMethod = "isMemWithSimmOffset<11, 1>"; + let DiagnosticType = "MemSImm11Lsl1"; +} + + +def simm11_lsl1 : Operand { + // let DecoderMethod = "DecodeSImmWithOffsetAndScale<11, 1>"; + let ParserMatchClass = + !cast("ConstantSImm11Lsl1AsmOperandClass"); +} + +def mem_simm11_lsl1 : mem_generic { + let MIOperandInfo = (ops ptr_rc, !cast("simm11_lsl1")); + let EncoderMethod = "getMemEncoding11l1"; + let ParserMatchClass = + !cast("LoongArchMemSimm11Lsl1AsmOperand"); +} + +def LoongArchMemSimm9Lsl3AsmOperand : AsmOperandClass { + let Name = "MemOffsetSimm9_3"; + let SuperClasses = [LoongArchMemAsmOperand]; + let RenderMethod = "addMemOperands"; + let ParserMethod = "parseMemOperand"; + let PredicateMethod = "isMemWithSimmOffset<9, 3>"; + let DiagnosticType = "MemSImm9Lsl3"; +} + + +def simm9_lsl3 : Operand { + // let DecoderMethod = "DecodeSImmWithOffsetAndScale<9, 3>"; + let ParserMatchClass = + !cast("ConstantSImm9Lsl3AsmOperandClass"); +} + +def mem_simm9_lsl3 : mem_generic { + let MIOperandInfo = (ops ptr_rc, !cast("simm9_lsl3")); + let EncoderMethod = "getMemEncoding9l3"; + let ParserMatchClass = + !cast("LoongArchMemSimm9Lsl3AsmOperand"); +} + + + + +// Operands + +def immZExt2Lsa : ImmLeaf(Imm - 1);}]>; + +// Pattern fragments +def vextract_sext_i8 : PatFrag<(ops node:$vec, node:$idx), + (LoongArchVExtractSExt node:$vec, node:$idx, i8)>; +def vextract_sext_i16 : PatFrag<(ops node:$vec, node:$idx), + (LoongArchVExtractSExt node:$vec, node:$idx, i16)>; +def vextract_sext_i32 : PatFrag<(ops node:$vec, node:$idx), + (LoongArchVExtractSExt node:$vec, node:$idx, i32)>; +def vextract_sext_i64 : PatFrag<(ops node:$vec, node:$idx), + (LoongArchVExtractSExt node:$vec, node:$idx, i64)>; + +def vextract_zext_i8 : PatFrag<(ops node:$vec, node:$idx), + (LoongArchVExtractZExt node:$vec, node:$idx, i8)>; +def vextract_zext_i16 : PatFrag<(ops node:$vec, node:$idx), + (LoongArchVExtractZExt node:$vec, node:$idx, i16)>; +def vextract_zext_i32 : PatFrag<(ops node:$vec, node:$idx), + (LoongArchVExtractZExt node:$vec, node:$idx, i32)>; +def vextract_zext_i64 : PatFrag<(ops node:$vec, node:$idx), + (LoongArchVExtractZExt node:$vec, node:$idx, i64)>; + +def vldrepl_v16i8 : PatFrag<(ops node:$v1), + (v16i8 (LoongArchVBROADCAST node:$v1))>; +def vldrepl_v8i16 : PatFrag<(ops node:$v1), + (v8i16 (LoongArchVBROADCAST node:$v1))>; +def vldrepl_v4i32 : PatFrag<(ops node:$v1), + (v4i32 (LoongArchVBROADCAST node:$v1))>; +def vldrepl_v2i64 : PatFrag<(ops node:$v1), + (v2i64 (LoongArchVBROADCAST node:$v1))>; + +def vinsert_v16i8 : PatFrag<(ops node:$vec, node:$val, node:$idx), + (v16i8 (vector_insert node:$vec, node:$val, node:$idx))>; +def vinsert_v8i16 : PatFrag<(ops node:$vec, node:$val, node:$idx), + (v8i16 (vector_insert node:$vec, node:$val, node:$idx))>; +def vinsert_v4i32 : PatFrag<(ops node:$vec, node:$val, node:$idx), + (v4i32 (vector_insert node:$vec, node:$val, node:$idx))>; +def vinsert_v2i64 : PatFrag<(ops node:$vec, node:$val, node:$idx), + (v2i64 (vector_insert node:$vec, node:$val, node:$idx))>; + +class vfsetcc_type : + PatFrag<(ops node:$lhs, node:$rhs), + (ResTy (vfsetcc (OpTy node:$lhs), (OpTy node:$rhs), CC))>; + +// ISD::SETFALSE cannot occur +def vfseteq_v4f32 : vfsetcc_type; +def vfseteq_v2f64 : vfsetcc_type; +def vfsetge_v4f32 : vfsetcc_type; +def vfsetge_v2f64 : vfsetcc_type; +def vfsetgt_v4f32 : vfsetcc_type; +def vfsetgt_v2f64 : vfsetcc_type; +def vfsetle_v4f32 : vfsetcc_type; +def vfsetle_v2f64 : vfsetcc_type; +def vfsetlt_v4f32 : vfsetcc_type; +def vfsetlt_v2f64 : vfsetcc_type; +def vfsetne_v4f32 : vfsetcc_type; +def vfsetne_v2f64 : vfsetcc_type; +def vfsetoeq_v4f32 : vfsetcc_type; +def vfsetoeq_v2f64 : vfsetcc_type; +def vfsetoge_v4f32 : vfsetcc_type; +def vfsetoge_v2f64 : vfsetcc_type; +def vfsetogt_v4f32 : vfsetcc_type; +def vfsetogt_v2f64 : vfsetcc_type; +def vfsetole_v4f32 : vfsetcc_type; +def vfsetole_v2f64 : vfsetcc_type; +def vfsetolt_v4f32 : vfsetcc_type; +def vfsetolt_v2f64 : vfsetcc_type; +def vfsetone_v4f32 : vfsetcc_type; +def vfsetone_v2f64 : vfsetcc_type; +def vfsetord_v4f32 : vfsetcc_type; +def vfsetord_v2f64 : vfsetcc_type; +def vfsetun_v4f32 : vfsetcc_type; +def vfsetun_v2f64 : vfsetcc_type; +def vfsetueq_v4f32 : vfsetcc_type; +def vfsetueq_v2f64 : vfsetcc_type; +def vfsetuge_v4f32 : vfsetcc_type; +def vfsetuge_v2f64 : vfsetcc_type; +def vfsetugt_v4f32 : vfsetcc_type; +def vfsetugt_v2f64 : vfsetcc_type; +def vfsetule_v4f32 : vfsetcc_type; +def vfsetule_v2f64 : vfsetcc_type; +def vfsetult_v4f32 : vfsetcc_type; +def vfsetult_v2f64 : vfsetcc_type; +def vfsetune_v4f32 : vfsetcc_type; +def vfsetune_v2f64 : vfsetcc_type; + + + +// ISD::SETTRUE cannot occur +// ISD::SETFALSE2 cannot occur +// ISD::SETTRUE2 cannot occur + +class vsetcc_type : + PatFrag<(ops node:$lhs, node:$rhs), + (ResTy (vsetcc node:$lhs, node:$rhs, CC))>; + +def vseteq_v16i8 : vsetcc_type; +def vseteq_v8i16 : vsetcc_type; +def vseteq_v4i32 : vsetcc_type; +def vseteq_v2i64 : vsetcc_type; +def vsetle_v16i8 : vsetcc_type; +def vsetle_v8i16 : vsetcc_type; +def vsetle_v4i32 : vsetcc_type; +def vsetle_v2i64 : vsetcc_type; +def vsetlt_v16i8 : vsetcc_type; +def vsetlt_v8i16 : vsetcc_type; +def vsetlt_v4i32 : vsetcc_type; +def vsetlt_v2i64 : vsetcc_type; +def vsetule_v16i8 : vsetcc_type; +def vsetule_v8i16 : vsetcc_type; +def vsetule_v4i32 : vsetcc_type; +def vsetule_v2i64 : vsetcc_type; +def vsetult_v16i8 : vsetcc_type; +def vsetult_v8i16 : vsetcc_type; +def vsetult_v4i32 : vsetcc_type; +def vsetult_v2i64 : vsetcc_type; + +def vsplati8 : PatFrag<(ops node:$e0), + (v16i8 (build_vector node:$e0, node:$e0, + node:$e0, node:$e0, + node:$e0, node:$e0, + node:$e0, node:$e0, + node:$e0, node:$e0, + node:$e0, node:$e0, + node:$e0, node:$e0, + node:$e0, node:$e0))>; +def vsplati16 : PatFrag<(ops node:$e0), + (v8i16 (build_vector node:$e0, node:$e0, + node:$e0, node:$e0, + node:$e0, node:$e0, + node:$e0, node:$e0))>; +def vsplati32 : PatFrag<(ops node:$e0), + (v4i32 (build_vector node:$e0, node:$e0, + node:$e0, node:$e0))>; + +def vsplati64_imm_eq_1 : PatLeaf<(bitconvert (v4i32 (build_vector))), [{ + APInt Imm; + SDNode *BV = N->getOperand(0).getNode(); + EVT EltTy = N->getValueType(0).getVectorElementType(); + + return selectVSplat(BV, Imm, EltTy.getSizeInBits()) && + Imm.getBitWidth() == EltTy.getSizeInBits() && Imm == 1; +}]>; + +def vsplati64 : PatFrag<(ops node:$e0), + (v2i64 (build_vector node:$e0, node:$e0))>; + +def vsplati64_splat_d : PatFrag<(ops node:$e0), + (v2i64 (bitconvert + (v4i32 (and + (v4i32 (build_vector node:$e0, + node:$e0, + node:$e0, + node:$e0)), + vsplati64_imm_eq_1))))>; + +def vsplatf32 : PatFrag<(ops node:$e0), + (v4f32 (build_vector node:$e0, node:$e0, + node:$e0, node:$e0))>; +def vsplatf64 : PatFrag<(ops node:$e0), + (v2f64 (build_vector node:$e0, node:$e0))>; + +def vsplati8_elt : PatFrag<(ops node:$v, node:$i), + (LoongArchVSHF (vsplati8 node:$i), node:$v, node:$v)>; +def vsplati16_elt : PatFrag<(ops node:$v, node:$i), + (LoongArchVSHF (vsplati16 node:$i), node:$v, node:$v)>; +def vsplati32_elt : PatFrag<(ops node:$v, node:$i), + (LoongArchVSHF (vsplati32 node:$i), node:$v, node:$v)>; +def vsplati64_elt : PatFrag<(ops node:$v, node:$i), + (LoongArchVSHF (vsplati64_splat_d node:$i),node:$v, node:$v)>; + +class SplatPatLeaf + : PatLeaf { + Operand OpClass = opclass; +} + +class SplatComplexPattern roots = [], + list props = []> : + ComplexPattern { + Operand OpClass = opclass; +} + +def vsplati8_uimm3 : SplatComplexPattern; + +def vsplati8_uimm4 : SplatComplexPattern; + +def vsplati8_uimm5 : SplatComplexPattern; + +def vsplati8_uimm8 : SplatComplexPattern; + +def vsplati8_simm5 : SplatComplexPattern; + +def vsplati16_uimm3 : SplatComplexPattern; + +def vsplati16_uimm4 : SplatComplexPattern; + +def vsplati16_uimm5 : SplatComplexPattern; + +def vsplati16_simm5 : SplatComplexPattern; + +def vsplati32_uimm2 : SplatComplexPattern; + +def vsplati32_uimm5 : SplatComplexPattern; + +def vsplati32_simm5 : SplatComplexPattern; + +def vsplati64_uimm1 : SplatComplexPattern; + +def vsplati64_uimm5 : SplatComplexPattern; + +def vsplati64_uimm6 : SplatComplexPattern; + +def vsplati64_simm5 : SplatComplexPattern; + + +// Any build_vector that is a constant splat with a value that equals 1 +// FIXME: These should be a ComplexPattern but we can't use them because the +// ISel generator requires the uses to have a name, but providing a name +// causes other errors ("used in pattern but not operand list") +def vsplat_imm_eq_1 : PatLeaf<(build_vector), [{ + APInt Imm; + EVT EltTy = N->getValueType(0).getVectorElementType(); + + return selectVSplat(N, Imm, EltTy.getSizeInBits()) && + Imm.getBitWidth() == EltTy.getSizeInBits() && Imm == 1; +}]>; + +def vbitclr_b : PatFrag<(ops node:$vj, node:$vk), + (and node:$vj, (xor (shl vsplat_imm_eq_1, node:$vk), + immAllOnesV))>; +def vbitclr_h : PatFrag<(ops node:$vj, node:$vk), + (and node:$vj, (xor (shl vsplat_imm_eq_1, node:$vk), + immAllOnesV))>; +def vbitclr_w : PatFrag<(ops node:$vj, node:$vk), + (and node:$vj, (xor (shl vsplat_imm_eq_1, node:$vk), + immAllOnesV))>; +def vbitclr_d : PatFrag<(ops node:$vj, node:$vk), + (and node:$vj, (xor (shl (v2i64 vsplati64_imm_eq_1), + node:$vk), + (bitconvert (v4i32 immAllOnesV))))>; + +def vbneg_b : PatFrag<(ops node:$vj, node:$vk), + (xor node:$vj, (shl vsplat_imm_eq_1, node:$vk))>; +def vbneg_h : PatFrag<(ops node:$vj, node:$vk), + (xor node:$vj, (shl vsplat_imm_eq_1, node:$vk))>; +def vbneg_w : PatFrag<(ops node:$vj, node:$vk), + (xor node:$vj, (shl vsplat_imm_eq_1, node:$vk))>; +def vbneg_d : PatFrag<(ops node:$vj, node:$vk), + (xor node:$vj, (shl (v2i64 vsplati64_imm_eq_1), + node:$vk))>; + +def vbset_b : PatFrag<(ops node:$vj, node:$vk), + (or node:$vj, (shl vsplat_imm_eq_1, node:$vk))>; +def vbset_h : PatFrag<(ops node:$vj, node:$vk), + (or node:$vj, (shl vsplat_imm_eq_1, node:$vk))>; +def vbset_w : PatFrag<(ops node:$vj, node:$vk), + (or node:$vj, (shl vsplat_imm_eq_1, node:$vk))>; +def vbset_d : PatFrag<(ops node:$vj, node:$vk), + (or node:$vj, (shl (v2i64 vsplati64_imm_eq_1), + node:$vk))>; + +def muladd : PatFrag<(ops node:$vd, node:$vj, node:$vk), + (add node:$vd, (mul node:$vj, node:$vk))>; + +def mulsub : PatFrag<(ops node:$vd, node:$vj, node:$vk), + (sub node:$vd, (mul node:$vj, node:$vk))>; + +class IsCommutable { + bit isCommutable = 1; +} + + + +//class +class LSX_3R_DESC_BASE { + dag OutOperandList = (outs ROVD:$vd); + dag InOperandList = (ins ROVJ:$vj, ROVK:$vk); + string AsmString = !strconcat(instr_asm, "\t$vd, $vj, $vk"); + list Pattern = [(set ROVD:$vd, (OpNode ROVJ:$vj, ROVK:$vk))]; +} + +class LSX_3RN_DESC_BASE : + LSX_3R_DESC_BASE; + +class LSX_3R_4R_DESC_BASE { + dag OutOperandList = (outs ROVD:$vd); + dag InOperandList = (ins ROVD:$vd_in, ROVJ:$vj, ROVK:$vk); + string AsmString = !strconcat(instr_asm, "\t$vd, $vj, $vk"); + list Pattern = [(set ROVD:$vd, (OpNode ROVD:$vd_in, ROVJ:$vj, + ROVK:$vk))]; + string Constraints = "$vd = $vd_in"; +} + +class LSX_3R_VREPLVE_DESC_BASE { + dag OutOperandList = (outs ROVD:$vd); + dag InOperandList = (ins ROVJ:$vj, GPR32Opnd:$rk); + string AsmString = !strconcat(instr_asm, "\t$vd, $vj, $rk"); + list Pattern = [(set ROVD:$vd, (OpNode ROVJ:$vj, GPR32Opnd:$rk))]; +} + +class LSX_VEC_DESC_BASE { + dag OutOperandList = (outs ROVD:$vd); + dag InOperandList = (ins ROVJ:$vj, ROVK:$vk); + string AsmString = !strconcat(instr_asm, "\t$vd, $vj, $vk"); + list Pattern = [(set ROVD:$vd, (OpNode ROVJ:$vj, ROVK:$vk))]; +} + +class LSX_VEC_PSEUDO_BASE : + LSXPseudo<(outs ROVD:$vd), (ins ROVJ:$vj, ROVK:$vk), + [(set ROVD:$vd, (OpNode ROVJ:$vj, ROVK:$vk))]>; + +class LSX_3RF_DESC_BASE : + LSX_3R_DESC_BASE; + +class LSX_3RFN_DESC_BASE : + LSX_3R_DESC_BASE; + +class LSX_3R_DESC_BASE1 { + dag OutOperandList = (outs ROVD:$vd); + dag InOperandList = (ins ROVJ:$vj, ROVK:$vk); + string AsmString = !strconcat(instr_asm, "\t$vd, $vj, $vk"); + list Pattern = [(set ROVD:$vd, (OpNode ROVJ:$vk, ROVK:$vj))]; +} + +class LSX_3RF_DESC_BASE1 : + LSX_3R_DESC_BASE1; + +class LSX_3R_VSHF_DESC_BASE { + dag OutOperandList = (outs ROVD:$vd); + dag InOperandList = (ins ROVD:$vd_in, ROVJ:$vj, ROVK:$vk); + string AsmString = !strconcat(instr_asm, "\t$vd, $vj, $vk"); + list Pattern = [(set ROVD:$vd, (LoongArchVSHF ROVD:$vd_in, ROVJ:$vj, + ROVK:$vk))]; + string Constraints = "$vd = $vd_in"; +} + +class LSX_3R_4R_VSHF_DESC_BASE { + dag OutOperandList = (outs ROVD:$vd); + dag InOperandList = (ins ROVJ:$vj, ROVK:$vk, ROVD:$va); + string AsmString = !strconcat(instr_asm, "\t$vd, $vj, $vk, $va"); + list Pattern = [(set ROVD:$vd, (LoongArchVSHF ROVD:$va, ROVJ:$vj, + ROVK:$vk))]; +} + +class LSX_I5_DESC_BASE { + dag OutOperandList = (outs ROVD:$vd); + dag InOperandList = (ins ROVJ:$vj, SplatImm.OpClass:$si5); + string AsmString = !strconcat(instr_asm, "\t$vd, $vj, $si5"); + list Pattern = [(set ROVD:$vd, (OpNode ROVJ:$vj, SplatImm:$si5))]; +} + +class LSX_I5_U_DESC_BASE { + dag OutOperandList = (outs ROVD:$vd); + dag InOperandList = (ins ROVJ:$vj, SplatImm.OpClass:$ui5); + string AsmString = !strconcat(instr_asm, "\t$vd, $vj, $ui5"); + list Pattern = [(set ROVD:$vd, (OpNode ROVJ:$vj, SplatImm:$ui5))]; +} + +class LSX_BIT_3_DESC_BASE { + dag OutOperandList = (outs ROVD:$vd); + dag InOperandList = (ins ROVJ:$vj, ImmOp:$ui3); + string AsmString = !strconcat(instr_asm, "\t$vd, $vj, $ui3"); + list Pattern = [(set ROVD:$vd, (OpNode ROVJ:$vj, Imm:$ui3))]; +} + +class LSX_BIT_3N_DESC_BASE : + LSX_BIT_3_DESC_BASE; + +class LSX_BIT_4_DESC_BASE { + dag OutOperandList = (outs ROVD:$vd); + dag InOperandList = (ins ROVJ:$vj, ImmOp:$ui4); + string AsmString = !strconcat(instr_asm, "\t$vd, $vj, $ui4"); + list Pattern = [(set ROVD:$vd, (OpNode ROVJ:$vj, Imm:$ui4))]; +} + +class LSX_BIT_4N_DESC_BASE : + LSX_BIT_4_DESC_BASE; + +class LSX_BIT_5_DESC_BASE { + dag OutOperandList = (outs ROVD:$vd); + dag InOperandList = (ins ROVJ:$vj, ImmOp:$ui5); + string AsmString = !strconcat(instr_asm, "\t$vd, $vj, $ui5"); + list Pattern = [(set ROVD:$vd, (OpNode ROVJ:$vj, Imm:$ui5))]; +} + +class LSX_BIT_5N_DESC_BASE : + LSX_BIT_5_DESC_BASE; + +class LSX_BIT_6_DESC_BASE { + dag OutOperandList = (outs ROVD:$vd); + dag InOperandList = (ins ROVJ:$vj, ImmOp:$ui6); + string AsmString = !strconcat(instr_asm, "\t$vd, $vj, $ui6"); + list Pattern = [(set ROVD:$vd, (OpNode ROVJ:$vj, Imm:$ui6))]; +} + +class LSX_BIT_6N_DESC_BASE : + LSX_BIT_6_DESC_BASE; + +class LSX_2R_DESC_BASE { + dag OutOperandList = (outs ROVD:$vd); + dag InOperandList = (ins ROVJ:$vj); + string AsmString = !strconcat(instr_asm, "\t$vd, $vj"); + list Pattern = [(set ROVD:$vd, (OpNode ROVJ:$vj))]; +} + +class LSX_2RN_DESC_BASE : + LSX_2R_DESC_BASE; + +class LSX_2RF_DESC_BASE { + dag OutOperandList = (outs ROVD:$vd); + dag InOperandList = (ins ROVJ:$vj); + string AsmString = !strconcat(instr_asm, "\t$vd, $vj"); + list Pattern = [(set ROVD:$vd, (OpNode ROVJ:$vj))]; +} + +class LSX_2RFN_DESC_BASE : + LSX_2R_DESC_BASE; + +class LSX_2RF_DESC_BASE_CVT { + dag OutOperandList = (outs ROVD:$vd); + dag InOperandList = (ins ROVJ:$vj); + string AsmString = !strconcat(instr_asm, "\t$vd, $vj"); + list Pattern = [(set ROVD:$vd, (OpNode ROVJ:$vj))]; +} + +class LSX_2RFN_DESC_BASE_CVT : + LSX_2RF_DESC_BASE_CVT; + +class LSX_2RF_DESC_BASE_tmp { + dag OutOperandList = (outs ROVD:$vd); + dag InOperandList = (ins ROVJ:$vj); + string AsmString = !strconcat(instr_asm, "\t$vd, $vj"); + list Pattern = []; +} + +class LSX_2R_REPL_DESC_BASE { + dag OutOperandList = (outs ROVD:$vd); + dag InOperandList = (ins ROS:$rj); + string AsmString = !strconcat(instr_asm, "\t$vd, $rj"); + list Pattern = [(set ROVD:$vd, (VT (OpNode ROS:$rj)))]; +} + +class LSX_INSERT_U4_DESC_BASE { + dag OutOperandList = (outs ROVD:$vd); + dag InOperandList = (ins ROVD:$vd_in, ROVJ:$rj, ImmOp:$ui4); + string AsmString = !strconcat(instr_asm, "\t$vd, $rj, $ui4"); + list Pattern = [(set ROVD:$vd, (OpNode ROVD:$vd_in, ROVJ:$rj, Imm:$ui4))]; + string Constraints = "$vd = $vd_in"; +} + +class LSX_INSERT_U4N_DESC_BASE : + LSX_INSERT_U4_DESC_BASE; + +class LSX_INSERT_U3_DESC_BASE { + dag OutOperandList = (outs ROVD:$vd); + dag InOperandList = (ins ROVD:$vd_in, ROS:$rj, ImmOp:$ui3); + string AsmString = !strconcat(instr_asm, "\t$vd, $rj, $ui3"); + list Pattern = [(set ROVD:$vd, (OpNode ROVD:$vd_in, ROS:$rj, Imm:$ui3))]; + string Constraints = "$vd = $vd_in"; +} + +class LSX_INSERT_U3N_DESC_BASE : + LSX_INSERT_U3_DESC_BASE; + +class LSX_INSERT_U2_DESC_BASE { + dag OutOperandList = (outs ROVD:$vd); + dag InOperandList = (ins ROVD:$vd_in, ROS:$rj, ImmOp:$ui2); + string AsmString = !strconcat(instr_asm, "\t$vd, $rj, $ui2"); + list Pattern = [(set ROVD:$vd, (OpNode ROVD:$vd_in, ROS:$rj, Imm:$ui2))]; + string Constraints = "$vd = $vd_in"; +} + +class LSX_INSERT_U2N_DESC_BASE : + LSX_INSERT_U2_DESC_BASE; + +class LSX_INSERT_U1_DESC_BASE { + dag OutOperandList = (outs ROVD:$vd); + dag InOperandList = (ins ROVD:$vd_in, ROS:$rj, ImmOp:$ui1); + string AsmString = !strconcat(instr_asm, "\t$vd, $rj, $ui1"); + list Pattern = [(set ROVD:$vd, (OpNode ROVD:$vd_in, ROS:$rj, Imm:$ui1))]; + string Constraints = "$vd = $vd_in"; +} + +class LSX_INSERT_U1N_DESC_BASE : + LSX_INSERT_U1_DESC_BASE; + +class LSX_PICK_U1_DESC_BASE { + dag OutOperandList = (outs ROD:$rd); + dag InOperandList = (ins ROVJ:$vj, ImmOp:$ui1); + string AsmString = !strconcat(instr_asm, "\t$rd, $vj, $ui1"); + list Pattern = [(set ROD:$rd, (OpNode (VecTy ROVJ:$vj), Imm:$ui1))]; +} + +class LSX_PICK_U2_DESC_BASE { + dag OutOperandList = (outs ROD:$rd); + dag InOperandList = (ins ROVJ:$vj, ImmOp:$ui2); + string AsmString = !strconcat(instr_asm, "\t$rd, $vj, $ui2"); + list Pattern = [(set ROD:$rd, (OpNode (VecTy ROVJ:$vj), Imm:$ui2))]; +} + +class LSX_PICK_U3_DESC_BASE { + dag OutOperandList = (outs ROD:$rd); + dag InOperandList = (ins ROVJ:$vj, ImmOp:$ui3); + string AsmString = !strconcat(instr_asm, "\t$rd, $vj, $ui3"); + list Pattern = [(set ROD:$rd, (OpNode (VecTy ROVJ:$vj), Imm:$ui3))]; +} + +class LSX_PICK_U4_DESC_BASE { + dag OutOperandList = (outs ROD:$rd); + dag InOperandList = (ins ROVJ:$vj, ImmOp:$ui4); + string AsmString = !strconcat(instr_asm, "\t$rd, $vj, $ui4"); + list Pattern = [(set ROD:$rd, (OpNode (VecTy ROVJ:$vj), Imm:$ui4))]; +} + +class LSX_ELM_U3_VREPLVE_DESC_BASE { + dag OutOperandList = (outs ROVD:$vd); + dag InOperandList = (ins ROVJ:$vj, SplatImm.OpClass:$ui3); + string AsmString = !strconcat(instr_asm, "\t$vd, $vj, $ui3"); + list Pattern = [(set ROVD:$vd, (LoongArchVSHF SplatImm:$ui3, ROVJ:$vj, + ROVJ:$vj))]; +} + +class LSX_ELM_U2_VREPLVE_DESC_BASE { + dag OutOperandList = (outs ROVD:$vd); + dag InOperandList = (ins ROVJ:$vj, SplatImm.OpClass:$ui2); + string AsmString = !strconcat(instr_asm, "\t$vd, $vj, $ui2"); + list Pattern = [(set ROVD:$vd, (LoongArchVSHF SplatImm:$ui2, ROVJ:$vj, + ROVJ:$vj))]; +} + +class LSX_ELM_U1_VREPLVE_DESC_BASE { + dag OutOperandList = (outs ROVD:$vd); + dag InOperandList = (ins ROVJ:$vj, SplatImm.OpClass:$ui1); + string AsmString = !strconcat(instr_asm, "\t$vd, $vj, $ui1"); + list Pattern = [(set ROVD:$vd, (LoongArchVSHF SplatImm:$ui1, ROVJ:$vj, + ROVJ:$vj))]; +} + +class LSX_ELM_U4_VREPLVE_DESC_BASE { + dag OutOperandList = (outs ROVD:$vd); + dag InOperandList = (ins ROVJ:$vj, SplatImm.OpClass:$ui4); + string AsmString = !strconcat(instr_asm, "\t$vd, $vj, $ui4"); + list Pattern = [(set ROVD:$vd, (LoongArchVSHF SplatImm:$ui4, ROVJ:$vj, + ROVJ:$vj))]; +} + +class LSX_ELM_U4_SLD_DESC_BASE { + dag OutOperandList = (outs ROVD:$vd); + dag InOperandList = (ins ROVD:$vd_in, ROVJ:$vj, ImmOp:$ui4); + string AsmString = !strconcat(instr_asm, "\t$vd, $vj, $ui4"); + list Pattern = [(set ROVD:$vd, (OpNode ROVD:$vd_in, ROVJ:$vj, + Imm:$ui4))]; + string Constraints = "$vd = $vd_in"; +} + +class LSX_ELM_U3_SLD_DESC_BASE { + dag OutOperandList = (outs ROVD:$vd); + dag InOperandList = (ins ROVD:$vd_in, ROVJ:$vj, ImmOp:$ui3); + string AsmString = !strconcat(instr_asm, "\t$vd, $vj, $ui3"); + list Pattern = [(set ROVD:$vd, (OpNode ROVD:$vd_in, ROVJ:$vj, + Imm:$ui3))]; + string Constraints = "$vd = $vd_in"; +} + +class LSX_ELM_U2_SLD_DESC_BASE { + dag OutOperandList = (outs ROVD:$vd); + dag InOperandList = (ins ROVD:$vd_in, ROVJ:$vj, ImmOp:$ui2); + string AsmString = !strconcat(instr_asm, "\t$vd, $vj, $ui2"); + list Pattern = [(set ROVD:$vd, (OpNode ROVD:$vd_in, ROVJ:$vj, + Imm:$ui2))]; + string Constraints = "$vd = $vd_in"; +} + +class LSX_ELM_U1_SLD_DESC_BASE { + dag OutOperandList = (outs ROVD:$vd); + dag InOperandList = (ins ROVD:$vd_in, ROVJ:$vj, ImmOp:$ui1); + string AsmString = !strconcat(instr_asm, "\t$vd, $vj, $ui1"); + list Pattern = [(set ROVD:$vd, (OpNode ROVD:$vd_in, ROVJ:$vj, + Imm:$ui1))]; + string Constraints = "$vd = $vd_in"; +} + +class LSX_BIT_U3_VREPLVE_DESC_BASE { + dag OutOperandList = (outs ROVD:$vd); + dag InOperandList = (ins ROVJ:$vj, SplatImm.OpClass:$ui3); + string AsmString = !strconcat(instr_asm, "\t$vd, $vj, $ui3"); + list Pattern = [(set ROVD:$vd, (OpNode ROVJ:$vj, SplatImm:$ui3))]; +} + +class LSX_BIT_U4_VREPLVE_DESC_BASE { + dag OutOperandList = (outs ROVD:$vd); + dag InOperandList = (ins ROVJ:$vj, SplatImm.OpClass:$ui4); + string AsmString = !strconcat(instr_asm, "\t$vd, $vj, $ui4"); + list Pattern = [(set ROVD:$vd, (OpNode ROVJ:$vj, SplatImm:$ui4))]; +} + +class LSX_BIT_U5_VREPLVE_DESC_BASE { + dag OutOperandList = (outs ROVD:$vd); + dag InOperandList = (ins ROVJ:$vj, SplatImm.OpClass:$ui5); + string AsmString = !strconcat(instr_asm, "\t$vd, $vj, $ui5"); + list Pattern = [(set ROVD:$vd, (OpNode ROVJ:$vj, SplatImm:$ui5))]; +} + +class LSX_BIT_U6_VREPLVE_DESC_BASE { + dag OutOperandList = (outs ROVD:$vd); + dag InOperandList = (ins ROVJ:$vj, SplatImm.OpClass:$ui6); + string AsmString = !strconcat(instr_asm, "\t$vd, $vj, $ui6"); + list Pattern = [(set ROVD:$vd, (OpNode ROVJ:$vj, SplatImm:$ui6))]; +} + +class LSX_BIT_U6_VREPLVE_DESC_BASE_Intrinsic { + dag OutOperandList = (outs ROVD:$vd); + dag InOperandList = (ins ROVJ:$vj, uimm6:$ui6); + string AsmString = !strconcat(instr_asm, "\t$vd, $vj, $ui6"); + list Pattern = [(set ROVD:$vd, (OpNode ROVJ:$vj, immZExt6:$ui6))]; +} + +class LSX_BIT_U3_VREPLVE_DESC_BASE_Intrinsic { + dag OutOperandList = (outs ROVD:$vd); + dag InOperandList = (ins ROVJ:$vj, uimm3:$ui3); + string AsmString = !strconcat(instr_asm, "\t$vd, $vj, $ui3"); + list Pattern = [(set ROVD:$vd, (OpNode ROVJ:$vj, immZExt3:$ui3))]; +} + +class LSX_BIT_U4_VREPLVE_DESC_BASE_Intrinsic { + dag OutOperandList = (outs ROVD:$vd); + dag InOperandList = (ins ROVJ:$vj, uimm4:$ui4); + string AsmString = !strconcat(instr_asm, "\t$vd, $vj, $ui4"); + list Pattern = [(set ROVD:$vd, (OpNode ROVJ:$vj, immZExt4:$ui4))]; +} + +class LSX_BIT_U5_VREPLVE_DESC_BASE_Intrinsic { + dag OutOperandList = (outs ROVD:$vd); + dag InOperandList = (ins ROVJ:$vj, uimm5:$ui5); + string AsmString = !strconcat(instr_asm, "\t$vd, $vj, $ui5"); + list Pattern = [(set ROVD:$vd, (OpNode ROVJ:$vj, immZExt5:$ui5))]; +} + +class LSX_I8_SHF_DESC_BASE { + dag OutOperandList = (outs ROVD:$vd); + dag InOperandList = (ins ROVJ:$vj, uimm8:$ui8); + string AsmString = !strconcat(instr_asm, "\t$vd, $vj, $ui8"); + list Pattern = [(set ROVD:$vd, (LoongArchSHF immZExt8:$ui8, ROVJ:$vj))]; +} + +class LSX_I8_SHUF_DESC_BASE_D { + dag OutOperandList = (outs ROVD:$vd); + dag InOperandList = (ins ROVD:$vd_in, ROVJ:$vj, uimm8:$ui8); + string AsmString = !strconcat(instr_asm, "\t$vd, $vj, $ui8"); + list Pattern = [(set ROVD:$vd, (OpNode ROVD:$vd_in, ROVJ:$vj, immZExt8:$ui8))]; + string Constraints = "$vd = $vd_in"; +} + +def LoongArchSelect : SDNode<"LoongArchISD::VSELECT" ,SDTSelect>; +def LoongArchVROR : SDNode<"LoongArchISD::VROR", + SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisInt<0>, + SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>]>, []>; +def LoongArchVRORI : SDNode<"LoongArchISD::VRORI", + SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisInt<0>, + SDTCisSameAs<0, 1>, SDTCisVT<2, i32>]>, []>; + +class LSX2_RORI_U3_DESC_BASE { + dag OutOperandList = (outs ROVD:$vd); + dag InOperandList = (ins ROVJ:$vj, ImmOp:$ui3); + string AsmString = !strconcat(instr_asm, "\t$vd, $vj, $ui3"); + list Pattern = [(set ROVD:$vd, (LoongArchVRORI ROVJ:$vj, Imm:$ui3))]; +} + +class LSX2_RORI_U4_DESC_BASE { + dag OutOperandList = (outs ROVD:$vd); + dag InOperandList = (ins ROVJ:$vj, ImmOp:$ui4); + string AsmString = !strconcat(instr_asm, "\t$vd, $vj, $ui4"); + list Pattern = [(set ROVD:$vd, (LoongArchVRORI ROVJ:$vj, Imm:$ui4))]; +} + +class LSX2_RORI_U5_DESC_BASE { + dag OutOperandList = (outs ROVD:$vd); + dag InOperandList = (ins ROVJ:$vj, ImmOp:$ui5); + string AsmString = !strconcat(instr_asm, "\t$vd, $vj, $ui5"); + list Pattern = [(set ROVD:$vd, (LoongArchVRORI ROVJ:$vj, Imm:$ui5))]; +} + +class LSX2_RORI_U6_DESC_BASE { + dag OutOperandList = (outs ROVD:$vd); + dag InOperandList = (ins ROVJ:$vj, ImmOp:$ui6); + string AsmString = !strconcat(instr_asm, "\t$vd, $vj, $ui6"); + list Pattern = [(set ROVD:$vd, (LoongArchVRORI ROVJ:$vj, Imm:$ui6))]; +} + +class LSX_BIND_U4_DESC_BASE { + dag OutOperandList = (outs ROVD:$vd); + dag InOperandList = (ins ROVD:$vd_in, ROVJ:$vj, ImmOp:$ui4); + string AsmString = !strconcat(instr_asm, "\t$vd, $vj, $ui4"); + list Pattern = [(set ROVD:$vd, (OpNode ROVD:$vd_in, ROVJ:$vj, Imm:$ui4))]; + string Constraints = "$vd = $vd_in"; +} + +class LSX_BIND_U4N_DESC_BASE : + LSX_BIND_U4_DESC_BASE; + +class LSX_BIND_U5_DESC_BASE { + dag OutOperandList = (outs ROVD:$vd); + dag InOperandList = (ins ROVD:$vd_in, ROVJ:$vj, ImmOp:$ui5); + string AsmString = !strconcat(instr_asm, "\t$vd, $vj, $ui5"); + list Pattern = [(set ROVD:$vd, (OpNode ROVD:$vd_in, ROVJ:$vj, Imm:$ui5))]; + string Constraints = "$vd = $vd_in"; +} + +class LSX_BIND_U5N_DESC_BASE : + LSX_BIND_U5_DESC_BASE; + +class LSX_BIND_U6_DESC_BASE { + dag OutOperandList = (outs ROVD:$vd); + dag InOperandList = (ins ROVD:$vd_in, ROVJ:$vj, ImmOp:$ui6); + string AsmString = !strconcat(instr_asm, "\t$vd, $vj, $ui6"); + list Pattern = [(set ROVD:$vd, (OpNode ROVD:$vd_in, ROVJ:$vj, Imm:$ui6))]; + string Constraints = "$vd = $vd_in"; +} + +class LSX_BIND_U6N_DESC_BASE : + LSX_BIND_U6_DESC_BASE; + +class LSX_BIND_U7_DESC_BASE { + dag OutOperandList = (outs ROVD:$vd); + dag InOperandList = (ins ROVD:$vd_in, ROVJ:$vj, uimm7:$ui7); + string AsmString = !strconcat(instr_asm, "\t$vd, $vj, $ui7"); + list Pattern = [(set ROVD:$vd, (OpNode ROVD:$vd_in, ROVJ:$vj, immZExt7:$ui7))]; + string Constraints = "$vd = $vd_in"; +} + +class LSX_BIND_U7N_DESC_BASE : + LSX_BIND_U7_DESC_BASE; + + +class LD_DESC_BASE { + dag OutOperandList = (outs ROVD:$vd); + dag InOperandList = (ins MemOpnd:$addr); + string AsmString = !strconcat(instr_asm, "\t$vd, $addr"); + list Pattern = [(set ROVD:$vd, (TyNode (OpNode Addr:$addr)))]; + string DecoderMethod = "DecodeLSX128Mem"; +} + +class ST_DESC_BASE { + dag OutOperandList = (outs); + dag InOperandList = (ins ROVD:$vd, MemOpnd:$addr); + string AsmString = !strconcat(instr_asm, "\t$vd, $addr"); + list Pattern = [(OpNode (TyNode ROVD:$vd), Addr:$addr)]; + string DecoderMethod = "DecodeLSX128Mem"; +} + +class LSX_VEC_ADDR_PSEUDO_BASE : + LSXPseudo<(outs), (ins ROVD:$vd, MemOpnd:$addr), + [(OpNode (TyNode ROVD:$vd), MemOpnd:$addr)]>; + + +class LSX_SET_DESC_BASE { + dag OutOperandList = (outs FCFROpnd:$cd); + dag InOperandList = (ins ROVD:$vj); + string AsmString = !strconcat(instr_asm, "\t$cd, $vj"); + list Pattern = []; +} + +class LSX_SET_DESC_BASE_tmp { + dag OutOperandList = (outs FCFROpnd:$cd); + dag InOperandList = (ins ROVD:$vj); + string AsmString = !strconcat(instr_asm, "\t$cd, $vj"); + list Pattern = []; +} + +class LSX_VMul_Reg4 { + dag OutOperandList = (outs ROVD:$vd); + dag InOperandList = (ins ROVJ:$vj, ROVK:$vk, ROVA:$va); + string AsmString = !strconcat(instr_asm, "\t$vd, $vj, $vk, $va"); + list Pattern = [(set ROVD:$vd, (OpNode ROVJ:$vj, ROVK:$vk, ROVA:$va))]; +} + +class LSX_4RF { + dag OutOperandList = (outs ROVD:$vd); + dag InOperandList = (ins ROVJ:$vj, ROVK:$vk, ROVA:$va); + string AsmString = !strconcat(instr_asm, "\t$vd, $vj, $vk, $va"); + list Pattern = [(set ROVD:$vd, (OpNode ROVJ:$vj, ROVK:$vk, ROVA:$va))]; +} + + +class LSX_VFCMP_Reg3 { + dag OutOperandList = (outs ROVD:$vd); + dag InOperandList = (ins ROVJ:$vj, ROVK:$vk); + string AsmString = !strconcat(instr_asm, "\t$vd, $vj, $vk"); + list Pattern = [(set ROVD:$vd, (OpNode ROVJ:$vj, ROVK:$vk))]; +} + +class LSX_I12_DESC_BASE { + dag OutOperandList = (outs ROVD:$vd); + dag InOperandList = (ins PtrRC:$rj, ImmOp:$si12); + string AsmString = !strconcat(instr_asm, "\t$vd, $rj, $si12"); + list Pattern = [(set ROVD:$vd, (OpNode iPTR:$rj, Imm:$si12))]; +} + +class LSX_I11_DESC_BASE { + dag OutOperandList = (outs ROVD:$vd); + dag InOperandList = (ins PtrRC:$rj, ImmOp:$si11); + string AsmString = !strconcat(instr_asm, "\t$vd, $rj, $si11"); + list Pattern = [(set ROVD:$vd, (OpNode iPTR:$rj, Imm:$si11))]; +} + +class LSX_I10_DESC_BASE { + dag OutOperandList = (outs ROVD:$vd); + dag InOperandList = (ins PtrRC:$rj, ImmOp:$si10); + string AsmString = !strconcat(instr_asm, "\t$vd, $rj, $si10"); + list Pattern = [(set ROVD:$vd, (OpNode iPTR:$rj, Imm:$si10))]; +} + +class LSX_I9_DESC_BASE { + dag OutOperandList = (outs ROVD:$vd); + dag InOperandList = (ins PtrRC:$rj, ImmOp:$si9); + string AsmString = !strconcat(instr_asm, "\t$vd, $rj, $si9"); + list Pattern = [(set ROVD:$vd, (OpNode iPTR:$rj, Imm:$si9))]; +} + + +class LSX_I8_U1_DESC_BASE { + dag OutOperandList = (outs); + dag InOperandList = (ins ROVD:$vd, PtrRC:$rj, ImmOp:$si8, uimm1:$idx); + string AsmString = !strconcat(instr_asm, "\t$vd, $rj, $si8, $idx"); + list Pattern = [(OpNode ROVD:$vd, iPTR:$rj, Imm:$si8, immZExt1:$idx)]; + string DecoderMethod = "DecodeLSX128memstl"; +} + + +class LSX_I8_U2_DESC_BASE { + dag OutOperandList = (outs); + dag InOperandList = (ins ROVD:$vd, PtrRC:$rj, ImmOp:$si8, uimm2:$idx); + string AsmString = !strconcat(instr_asm, "\t$vd, $rj, $si8, $idx"); + list Pattern = [(OpNode ROVD:$vd, iPTR:$rj, Imm:$si8, immZExt2:$idx)]; + string DecoderMethod = "DecodeLSX128memstl"; +} + +class LSX_I8_U3_DESC_BASE { + dag OutOperandList = (outs); + dag InOperandList = (ins ROVD:$vd, PtrRC:$rj, ImmOp:$si8, uimm3:$idx); + string AsmString = !strconcat(instr_asm, "\t$vd, $rj, $si8, $idx"); + list Pattern = [(OpNode ROVD:$vd, iPTR:$rj, Imm:$si8, immZExt3:$idx)]; + string DecoderMethod = "DecodeLSX128memstl"; +} + +class LSX_I8_U4_DESC_BASE { + dag OutOperandList = (outs); + dag InOperandList = (ins ROVD:$vd, PtrRC:$rj, ImmOp:$si8, uimm4:$idx); + string AsmString = !strconcat(instr_asm, "\t$vd, $rj, $si8, $idx"); + list Pattern = [(OpNode ROVD:$vd, iPTR:$rj, Imm:$si8, immZExt4:$idx)]; + string DecoderMethod = "DecodeLSX128memstl"; +} + +class LSX_I5_U_DESC_BASE_Intrinsic { + dag OutOperandList = (outs ROVD:$vd); + dag InOperandList = (ins ROVJ:$vj, ImmOp:$ui5); + string AsmString = !strconcat(instr_asm, "\t$vd, $vj, $ui5"); + list Pattern = [(set ROVD:$vd, (OpNode ROVJ:$vj, Imm:$ui5))]; +} + +class LSX_I5_DESC_BASE_Intrinsic { + dag OutOperandList = (outs ROVD:$vd); + dag InOperandList = (ins ROVJ:$vj, ImmOp:$si5); + string AsmString = !strconcat(instr_asm, "\t$vd, $vj, $si5"); + list Pattern = [(set ROVD:$vd, (OpNode ROVJ:$vj, Imm:$si5))]; +} + +class LSX_LDX_LA { + dag OutOperandList = (outs ROVD:$vd); + dag InOperandList = (ins PtrRC:$rj, RORK:$rk); + string AsmString = !strconcat(instr_asm, "\t$vd, $rj, $rk"); + list Pattern = [(set ROVD:$vd, (OpNode iPTR:$rj, RORK:$rk))]; +} + +class LSX_SDX_LA { + dag OutOperandList = (outs); + dag InOperandList = (ins ROVD:$vd, PtrRC:$rj, RORK:$rk); + string AsmString = !strconcat(instr_asm, "\t$vd, $rj, $rk"); + list Pattern = [(OpNode ROVD:$vd, iPTR:$rj, RORK:$rk)]; +} + +class LSX_U5_DESC_BASE { + dag OutOperandList = (outs ROVD:$vd); + dag InOperandList = (ins ROVJ:$vj, uimm5:$ui5); + string AsmString = !strconcat(instr_asm, "\t$vd, $vj, $ui5"); + list Pattern = [(set ROVD:$vd, (OpNode ROVJ:$vj, immZExt5:$ui5))]; +} + +class LSX_U5_4R_DESC_BASE { + dag OutOperandList = (outs ROVD:$vd); + dag InOperandList = (ins ROVD:$vd_in, ROVJ:$vj, uimm5:$ui5); + string AsmString = !strconcat(instr_asm, "\t$vd, $vj, $ui5"); + list Pattern = [(set ROVD:$vd, (OpNode ROVD:$vd_in, ROVJ:$vj, immZExt5:$ui5))]; + string Constraints = "$vd = $vd_in"; +} + +class LSX_2R_U3_DESC_BASE { + dag OutOperandList = (outs ROVD:$vd); + dag InOperandList = (ins ROVJ:$vj, uimm3:$ui3); + string AsmString = !strconcat(instr_asm, "\t$vd, $vj, $ui3"); + list Pattern = [(set ROVD:$vd, (OpNode ROVJ:$vj, immZExt3:$ui3))]; +} + +class LSX_2R_U4_DESC_BASE { + dag OutOperandList = (outs ROVD:$vd); + dag InOperandList = (ins ROVJ:$vj, uimm4:$ui4); + string AsmString = !strconcat(instr_asm, "\t$vd, $vj, $ui4"); + list Pattern = [(set ROVD:$vd, (OpNode ROVJ:$vj, immZExt4:$ui4))]; +} + +class LSX_2R_U5_DESC_BASE { + dag OutOperandList = (outs ROVD:$vd); + dag InOperandList = (ins ROVJ:$vj, uimm5:$ui5); + string AsmString = !strconcat(instr_asm, "\t$vd, $vj, $ui5"); + list Pattern = [(set ROVD:$vd, (OpNode ROVJ:$vj, immZExt5:$ui5))]; +} + +class LSX_2R_U6_DESC_BASE { + dag OutOperandList = (outs ROVD:$vd); + dag InOperandList = (ins ROVJ:$vj, uimm6:$ui6); + string AsmString = !strconcat(instr_asm, "\t$vd, $vj, $ui6"); + list Pattern = [(set ROVD:$vd, (OpNode ROVJ:$vj, immZExt6:$ui6))]; +} + +class LSX_2R_3R_U4_DESC_BASE { + dag OutOperandList = (outs ROVD:$vd); + dag InOperandList = (ins ROVD:$vd_in, ROVJ:$vj, uimm4:$ui4); + string AsmString = !strconcat(instr_asm, "\t$vd, $vj, $ui4"); + list Pattern = [(set ROVD:$vd, (OpNode ROVD:$vd_in, ROVJ:$vj, immZExt4:$ui4))]; + string Constraints = "$vd = $vd_in"; +} + +class LSX_2R_3R_U5_DESC_BASE { + dag OutOperandList = (outs ROVD:$vd); + dag InOperandList = (ins ROVD:$vd_in, ROVJ:$vj, uimm5:$ui5); + string AsmString = !strconcat(instr_asm, "\t$vd, $vj, $ui5"); + list Pattern = [(set ROVD:$vd, (OpNode ROVD:$vd_in, ROVJ:$vj, immZExt5:$ui5))]; + string Constraints = "$vd = $vd_in"; +} + +class LSX_2R_3R_U6_DESC_BASE { + dag OutOperandList = (outs ROVD:$vd); + dag InOperandList = (ins ROVD:$vd_in, ROVJ:$vj, uimm6:$ui6); + string AsmString = !strconcat(instr_asm, "\t$vd, $vj, $ui6"); + list Pattern = [(set ROVD:$vd, (OpNode ROVD:$vd_in, ROVJ:$vj, immZExt6:$ui6))]; + string Constraints = "$vd = $vd_in"; +} + +class LSX_2R_3R_U7_DESC_BASE { + dag OutOperandList = (outs ROVD:$vd); + dag InOperandList = (ins ROVD:$vd_in, ROVJ:$vj, uimm7:$ui7); + string AsmString = !strconcat(instr_asm, "\t$vd, $vj, $ui7"); + list Pattern = [(set ROVD:$vd, (OpNode ROVD:$vd_in, ROVJ:$vj, immZExt7:$ui7))]; + string Constraints = "$vd = $vd_in"; +} + +class LSX_2R_3R_U8_DESC_BASE { + dag OutOperandList = (outs ROVD:$vd); + dag InOperandList = (ins ROVD:$vd_in, ROVJ:$vj, uimm8:$ui8); + string AsmString = !strconcat(instr_asm, "\t$vd, $vj, $ui8"); + list Pattern = [(set ROVD:$vd, (OpNode ROVD:$vd_in, ROVJ:$vj, immZExt8:$ui8))]; + string Constraints = "$vd = $vd_in"; +} + +class LSX_2R_3R_SELECT { + dag OutOperandList = (outs ROVD:$vd); + dag InOperandList = (ins ROVD:$vd_in, ROVJ:$vj, vsplat_uimm8:$ui8); + string AsmString = !strconcat(instr_asm, "\t$vd, $vj, $ui8"); + list Pattern = [(set ROVD:$vd, (OpNode ROVD:$vd_in, vsplati8_uimm8:$ui8, ROVJ:$vj))]; + string Constraints = "$vd = $vd_in"; +} + +class LSX_2R_U8_DESC_BASE { + dag OutOperandList = (outs ROVD:$vd); + dag InOperandList = (ins ROVJ:$vj, uimm8:$ui8); + string AsmString = !strconcat(instr_asm, "\t$vd, $vj, $ui8"); + list Pattern = [(set ROVD:$vd, (OpNode ROVJ:$vj, immZExt8:$ui8))]; +} + +class LSX_I13_DESC_BASE { + dag OutOperandList = (outs ROVD:$vd); + dag InOperandList = (ins immOp:$i13); + string AsmString = !strconcat(instr_asm, "\t$vd, $i13"); + list Pattern = [(set ROVD:$vd, (OpNode (Ty simm13:$i13)))]; + string DecoderMethod = "DecodeLSX128Mem13"; +} + +class LSX_I13_DESC_BASE_10 { + dag OutOperandList = (outs ROVD:$vd); + dag InOperandList = (ins vsplat_simm10:$i10); + string AsmString = !strconcat(instr_asm, "\t$vd, $i10"); + list Pattern = []; + bit hasSideEffects = 0; + string DecoderMethod = "DecodeLSX128Mem10"; +} + +class LSX_BIT_U8_VREPLVE_DESC_BASE { + dag OutOperandList = (outs ROVD:$vd); + dag InOperandList = (ins ROVJ:$vj, SplatImm.OpClass:$ui8); + string AsmString = !strconcat(instr_asm, "\t$vd, $vj, $ui8"); + list Pattern = [(set ROVD:$vd, (OpNode ROVJ:$vj, SplatImm:$ui8))]; +} + + +class LSXPat pred = [HasLSX]> : + Pat, Requires; + +// Instruction encoding. + + +def VSADD_B : LSX_3R<0b01110000010001100>, IsCommutable, + LSX_3RN_DESC_BASE<"vsadd.b", LSX128BOpnd>; + +def VSADD_H : LSX_3R<0b01110000010001101>, IsCommutable, + LSX_3RN_DESC_BASE<"vsadd.h", LSX128HOpnd>; + +def VSADD_W : LSX_3R<0b01110000010001110>, IsCommutable, + LSX_3RN_DESC_BASE<"vsadd.w", LSX128WOpnd>; + +def VSADD_D : LSX_3R<0b01110000010001111>, IsCommutable, + LSX_3RN_DESC_BASE<"vsadd.d", LSX128DOpnd>; + + +def VSSUB_B : LSX_3R<0b01110000010010000>, + LSX_3RN_DESC_BASE<"vssub.b", LSX128BOpnd>; + +def VSSUB_H : LSX_3R<0b01110000010010001>, + LSX_3RN_DESC_BASE<"vssub.h", LSX128HOpnd>; + +def VSSUB_W : LSX_3R<0b01110000010010010>, + LSX_3RN_DESC_BASE<"vssub.w", LSX128WOpnd>; + +def VSSUB_D : LSX_3R<0b01110000010010011>, + LSX_3RN_DESC_BASE<"vssub.d", LSX128DOpnd>; + + +def VSADD_BU : LSX_3R<0b01110000010010100>, IsCommutable, + LSX_3RN_DESC_BASE<"vsadd.bu", LSX128BOpnd>; + +def VSADD_HU : LSX_3R<0b01110000010010101>, IsCommutable, + LSX_3RN_DESC_BASE<"vsadd.hu", LSX128HOpnd>; + +def VSADD_WU : LSX_3R<0b01110000010010110>, IsCommutable, + LSX_3RN_DESC_BASE<"vsadd.wu", LSX128WOpnd>; + +def VSADD_DU : LSX_3R<0b01110000010010111>, IsCommutable, + LSX_3RN_DESC_BASE<"vsadd.du", LSX128DOpnd>; + + +def VSSUB_BU : LSX_3R<0b01110000010011000>, + LSX_3RN_DESC_BASE<"vssub.bu", LSX128BOpnd>; + +def VSSUB_HU : LSX_3R<0b01110000010011001>, + LSX_3RN_DESC_BASE<"vssub.hu", LSX128HOpnd>; + +def VSSUB_WU : LSX_3R<0b01110000010011010>, + LSX_3RN_DESC_BASE<"vssub.wu", LSX128WOpnd>; + +def VSSUB_DU : LSX_3R<0b01110000010011011>, + LSX_3RN_DESC_BASE<"vssub.du", LSX128DOpnd>; + + +def VHADDW_H_B : LSX_3R<0b01110000010101000>, + LSX_3RN_DESC_BASE<"vhaddw.h.b", LSX128HOpnd, LSX128BOpnd, LSX128BOpnd>; + +def VHADDW_W_H : LSX_3R<0b01110000010101001>, + LSX_3RN_DESC_BASE<"vhaddw.w.h", LSX128WOpnd, LSX128HOpnd, LSX128HOpnd>; + +def VHADDW_D_W : LSX_3R<0b01110000010101010>, + LSX_3RN_DESC_BASE<"vhaddw.d.w", LSX128DOpnd, LSX128WOpnd, LSX128WOpnd>; + + +def VHSUBW_H_B : LSX_3R<0b01110000010101100>, + LSX_3RN_DESC_BASE<"vhsubw.h.b", LSX128HOpnd, LSX128BOpnd, LSX128BOpnd>; + +def VHSUBW_W_H : LSX_3R<0b01110000010101101>, + LSX_3RN_DESC_BASE<"vhsubw.w.h", LSX128WOpnd, LSX128HOpnd, LSX128HOpnd>; + +def VHSUBW_D_W : LSX_3R<0b01110000010101110>, + LSX_3RN_DESC_BASE<"vhsubw.d.w", LSX128DOpnd, LSX128WOpnd, LSX128WOpnd>; + + +def VHADDW_HU_BU : LSX_3R<0b01110000010110000>, + LSX_3RN_DESC_BASE<"vhaddw.hu.bu", LSX128HOpnd, LSX128BOpnd, LSX128BOpnd>; + +def VHADDW_WU_HU : LSX_3R<0b01110000010110001>, + LSX_3RN_DESC_BASE<"vhaddw.wu.hu", LSX128WOpnd, LSX128HOpnd, LSX128HOpnd>; + +def VHADDW_DU_WU : LSX_3R<0b01110000010110010>, + LSX_3RN_DESC_BASE<"vhaddw.du.wu", LSX128DOpnd, LSX128WOpnd, LSX128WOpnd>; + + +def VHSUBW_HU_BU : LSX_3R<0b01110000010110100>, + LSX_3RN_DESC_BASE<"vhsubw.hu.bu", LSX128HOpnd, LSX128BOpnd, LSX128BOpnd>; + +def VHSUBW_WU_HU : LSX_3R<0b01110000010110101>, + LSX_3RN_DESC_BASE<"vhsubw.wu.hu", LSX128WOpnd, LSX128HOpnd, LSX128HOpnd>; + +def VHSUBW_DU_WU : LSX_3R<0b01110000010110110>, + LSX_3RN_DESC_BASE<"vhsubw.du.wu", LSX128DOpnd, LSX128WOpnd, LSX128WOpnd>; + + +def VADDA_B : LSX_3R<0b01110000010111000>, IsCommutable, + LSX_3RN_DESC_BASE<"vadda.b", LSX128BOpnd>; + +def VADDA_H : LSX_3R<0b01110000010111001>, IsCommutable, + LSX_3RN_DESC_BASE<"vadda.h", LSX128HOpnd>; + +def VADDA_W : LSX_3R<0b01110000010111010>, IsCommutable, + LSX_3RN_DESC_BASE<"vadda.w", LSX128WOpnd>; + +def VADDA_D : LSX_3R<0b01110000010111011>, IsCommutable, + LSX_3RN_DESC_BASE<"vadda.d", LSX128DOpnd>; + + +def VABSD_B : LSX_3R<0b01110000011000000>, + LSX_3RN_DESC_BASE<"vabsd.b", LSX128BOpnd>; + +def VABSD_H : LSX_3R<0b01110000011000001>, + LSX_3RN_DESC_BASE<"vabsd.h", LSX128HOpnd>; + +def VABSD_W : LSX_3R<0b01110000011000010>, + LSX_3RN_DESC_BASE<"vabsd.w", LSX128WOpnd>; + +def VABSD_D : LSX_3R<0b01110000011000011>, + LSX_3RN_DESC_BASE<"vabsd.d", LSX128DOpnd>; + + +def VABSD_BU : LSX_3R<0b01110000011000100>, + LSX_3RN_DESC_BASE<"vabsd.bu", LSX128BOpnd>; + +def VABSD_HU : LSX_3R<0b01110000011000101>, + LSX_3RN_DESC_BASE<"vabsd.hu", LSX128HOpnd>; + +def VABSD_WU : LSX_3R<0b01110000011000110>, + LSX_3RN_DESC_BASE<"vabsd.wu", LSX128WOpnd>; + +def VABSD_DU : LSX_3R<0b01110000011000111>, + LSX_3RN_DESC_BASE<"vabsd.du", LSX128DOpnd>; + + +def VAVG_B : LSX_3R<0b01110000011001000>, IsCommutable, + LSX_3RN_DESC_BASE<"vavg.b", LSX128BOpnd>; + +def VAVG_H : LSX_3R<0b01110000011001001>, IsCommutable, + LSX_3RN_DESC_BASE<"vavg.h", LSX128HOpnd>; + +def VAVG_W : LSX_3R<0b01110000011001010>, IsCommutable, + LSX_3RN_DESC_BASE<"vavg.w", LSX128WOpnd>; + +def VAVG_D : LSX_3R<0b01110000011001011>, IsCommutable, + LSX_3RN_DESC_BASE<"vavg.d", LSX128DOpnd>; + + +def VAVG_BU : LSX_3R<0b01110000011001100>, IsCommutable, + LSX_3RN_DESC_BASE<"vavg.bu", LSX128BOpnd>; + +def VAVG_HU : LSX_3R<0b01110000011001101>, IsCommutable, + LSX_3RN_DESC_BASE<"vavg.hu", LSX128HOpnd>; + +def VAVG_WU : LSX_3R<0b01110000011001110>, IsCommutable, + LSX_3RN_DESC_BASE<"vavg.wu", LSX128WOpnd>; + +def VAVG_DU : LSX_3R<0b01110000011001111>, IsCommutable, + LSX_3RN_DESC_BASE<"vavg.du", LSX128DOpnd>; + + +def VAVGR_B : LSX_3R<0b01110000011010000>, IsCommutable, + LSX_3RN_DESC_BASE<"vavgr.b", LSX128BOpnd>; + +def VAVGR_H : LSX_3R<0b01110000011010001>, IsCommutable, + LSX_3RN_DESC_BASE<"vavgr.h", LSX128HOpnd>; + +def VAVGR_W : LSX_3R<0b01110000011010010>, IsCommutable, + LSX_3RN_DESC_BASE<"vavgr.w", LSX128WOpnd>; + +def VAVGR_D : LSX_3R<0b01110000011010011>, IsCommutable, + LSX_3RN_DESC_BASE<"vavgr.d", LSX128DOpnd>; + + +def VAVGR_BU : LSX_3R<0b01110000011010100>, IsCommutable, + LSX_3RN_DESC_BASE<"vavgr.bu", LSX128BOpnd>; + +def VAVGR_HU : LSX_3R<0b01110000011010101>, IsCommutable, + LSX_3RN_DESC_BASE<"vavgr.hu", LSX128HOpnd>; + +def VAVGR_WU : LSX_3R<0b01110000011010110>, IsCommutable, + LSX_3RN_DESC_BASE<"vavgr.wu", LSX128WOpnd>; + +def VAVGR_DU : LSX_3R<0b01110000011010111>, IsCommutable, + LSX_3RN_DESC_BASE<"vavgr.du", LSX128DOpnd>; + + +def VMAX_B : LSX_3R<0b01110000011100000>, + LSX_3R_DESC_BASE<"vmax.b", smax, LSX128BOpnd>; + +def VMAX_H : LSX_3R<0b01110000011100001>, + LSX_3R_DESC_BASE<"vmax.h", smax, LSX128HOpnd>; + +def VMAX_W : LSX_3R<0b01110000011100010>, + LSX_3R_DESC_BASE<"vmax.w", smax, LSX128WOpnd>; + +def VMAX_D : LSX_3R<0b01110000011100011>, + LSX_3R_DESC_BASE<"vmax.d", smax, LSX128DOpnd>; + + +def VMIN_B : LSX_3R<0b01110000011100100>, + LSX_3R_DESC_BASE<"vmin.b", smin, LSX128BOpnd>; + +def VMIN_H : LSX_3R<0b01110000011100101>, + LSX_3R_DESC_BASE<"vmin.h", smin, LSX128HOpnd>; + +def VMIN_W : LSX_3R<0b01110000011100110>, + LSX_3R_DESC_BASE<"vmin.w", smin, LSX128WOpnd>; + +def VMIN_D : LSX_3R<0b01110000011100111>, + LSX_3R_DESC_BASE<"vmin.d", smin, LSX128DOpnd>; + + +def VMAX_BU : LSX_3R<0b01110000011101000>, + LSX_3R_DESC_BASE<"vmax.bu", umax, LSX128BOpnd>; + +def VMAX_HU : LSX_3R<0b01110000011101001>, + LSX_3R_DESC_BASE<"vmax.hu", umax, LSX128HOpnd>; + +def VMAX_WU : LSX_3R<0b01110000011101010>, + LSX_3R_DESC_BASE<"vmax.wu", umax, LSX128WOpnd>; + +def VMAX_DU : LSX_3R<0b01110000011101011>, + LSX_3R_DESC_BASE<"vmax.du", umax, LSX128DOpnd>; + + +def VMIN_BU : LSX_3R<0b01110000011101100>, + LSX_3R_DESC_BASE<"vmin.bu", umin, LSX128BOpnd>; + +def VMIN_HU : LSX_3R<0b01110000011101101>, + LSX_3R_DESC_BASE<"vmin.hu", umin, LSX128HOpnd>; + +def VMIN_WU : LSX_3R<0b01110000011101110>, + LSX_3R_DESC_BASE<"vmin.wu", umin, LSX128WOpnd>; + +def VMIN_DU : LSX_3R<0b01110000011101111>, + LSX_3R_DESC_BASE<"vmin.du", umin, LSX128DOpnd>; + + +def VMUL_B : LSX_3R<0b01110000100001000>, + LSX_3R_DESC_BASE<"vmul.b", mul, LSX128BOpnd>; + +def VMUL_H : LSX_3R<0b01110000100001001>, + LSX_3R_DESC_BASE<"vmul.h", mul, LSX128HOpnd>; + +def VMUL_W : LSX_3R<0b01110000100001010>, + LSX_3R_DESC_BASE<"vmul.w", mul, LSX128WOpnd>; + +def VMUL_D : LSX_3R<0b01110000100001011>, + LSX_3R_DESC_BASE<"vmul.d", mul, LSX128DOpnd>; + + +def VMADD_B : LSX_3R<0b01110000101010000>, + LSX_3R_4R_DESC_BASE<"vmadd.b", muladd, LSX128BOpnd>; + +def VMADD_H : LSX_3R<0b01110000101010001>, + LSX_3R_4R_DESC_BASE<"vmadd.h", muladd, LSX128HOpnd>; + +def VMADD_W : LSX_3R<0b01110000101010010>, + LSX_3R_4R_DESC_BASE<"vmadd.w", muladd, LSX128WOpnd>; + +def VMADD_D : LSX_3R<0b01110000101010011>, + LSX_3R_4R_DESC_BASE<"vmadd.d", muladd, LSX128DOpnd>; + + +def VMSUB_B : LSX_3R<0b01110000101010100>, + LSX_3R_4R_DESC_BASE<"vmsub.b", mulsub, LSX128BOpnd>; + +def VMSUB_H : LSX_3R<0b01110000101010101>, + LSX_3R_4R_DESC_BASE<"vmsub.h", mulsub, LSX128HOpnd>; + +def VMSUB_W : LSX_3R<0b01110000101010110>, + LSX_3R_4R_DESC_BASE<"vmsub.w", mulsub, LSX128WOpnd>; + +def VMSUB_D : LSX_3R<0b01110000101010111>, + LSX_3R_4R_DESC_BASE<"vmsub.d", mulsub, LSX128DOpnd>; + + +def VDIV_B : LSX_3R<0b01110000111000000>, + LSX_3R_DESC_BASE<"vdiv.b", sdiv, LSX128BOpnd>; + +def VDIV_H : LSX_3R<0b01110000111000001>, + LSX_3R_DESC_BASE<"vdiv.h", sdiv, LSX128HOpnd>; + +def VDIV_W : LSX_3R<0b01110000111000010>, + LSX_3R_DESC_BASE<"vdiv.w", sdiv, LSX128WOpnd>; + +def VDIV_D : LSX_3R<0b01110000111000011>, + LSX_3R_DESC_BASE<"vdiv.d", sdiv, LSX128DOpnd>; + + +def VMOD_B : LSX_3R<0b01110000111000100>, + LSX_3R_DESC_BASE<"vmod.b", srem, LSX128BOpnd>; + +def VMOD_H : LSX_3R<0b01110000111000101>, + LSX_3R_DESC_BASE<"vmod.h", srem, LSX128HOpnd>; + +def VMOD_W : LSX_3R<0b01110000111000110>, + LSX_3R_DESC_BASE<"vmod.w", srem, LSX128WOpnd>; + +def VMOD_D : LSX_3R<0b01110000111000111>, + LSX_3R_DESC_BASE<"vmod.d", srem, LSX128DOpnd>; + + +def VDIV_BU : LSX_3R<0b01110000111001000>, + LSX_3R_DESC_BASE<"vdiv.bu", udiv, LSX128BOpnd>; + +def VDIV_HU : LSX_3R<0b01110000111001001>, + LSX_3R_DESC_BASE<"vdiv.hu", udiv, LSX128HOpnd>; + +def VDIV_WU : LSX_3R<0b01110000111001010>, + LSX_3R_DESC_BASE<"vdiv.wu", udiv, LSX128WOpnd>; + +def VDIV_DU : LSX_3R<0b01110000111001011>, + LSX_3R_DESC_BASE<"vdiv.du", udiv, LSX128DOpnd>; + + +def VMOD_BU : LSX_3R<0b01110000111001100>, + LSX_3R_DESC_BASE<"vmod.bu", urem, LSX128BOpnd>; + +def VMOD_HU : LSX_3R<0b01110000111001101>, + LSX_3R_DESC_BASE<"vmod.hu", urem, LSX128HOpnd>; + +def VMOD_WU : LSX_3R<0b01110000111001110>, + LSX_3R_DESC_BASE<"vmod.wu", urem, LSX128WOpnd>; + +def VMOD_DU : LSX_3R<0b01110000111001111>, + LSX_3R_DESC_BASE<"vmod.du", urem, LSX128DOpnd>; + + +def VSLL_B : LSX_3R<0b01110000111010000>, + LSX_3R_DESC_BASE<"vsll.b", shl, LSX128BOpnd>; + +def VSLL_H : LSX_3R<0b01110000111010001>, + LSX_3R_DESC_BASE<"vsll.h", shl, LSX128HOpnd>; + +def VSLL_W : LSX_3R<0b01110000111010010>, + LSX_3R_DESC_BASE<"vsll.w", shl, LSX128WOpnd>; + +def VSLL_D : LSX_3R<0b01110000111010011>, + LSX_3R_DESC_BASE<"vsll.d", shl, LSX128DOpnd>; + + +def VSRL_B : LSX_3R<0b01110000111010100>, + LSX_3R_DESC_BASE<"vsrl.b", srl, LSX128BOpnd>; + +def VSRL_H : LSX_3R<0b01110000111010101>, + LSX_3R_DESC_BASE<"vsrl.h", srl, LSX128HOpnd>; + +def VSRL_W : LSX_3R<0b01110000111010110>, + LSX_3R_DESC_BASE<"vsrl.w", srl, LSX128WOpnd>; + +def VSRL_D : LSX_3R<0b01110000111010111>, + LSX_3R_DESC_BASE<"vsrl.d", srl, LSX128DOpnd>; + + +def VSRA_B : LSX_3R<0b01110000111011000>, + LSX_3R_DESC_BASE<"vsra.b", sra, LSX128BOpnd>; + +def VSRA_H : LSX_3R<0b01110000111011001>, + LSX_3R_DESC_BASE<"vsra.h", sra, LSX128HOpnd>; + +def VSRA_W : LSX_3R<0b01110000111011010>, + LSX_3R_DESC_BASE<"vsra.w", sra, LSX128WOpnd>; + +def VSRA_D : LSX_3R<0b01110000111011011>, + LSX_3R_DESC_BASE<"vsra.d", sra, LSX128DOpnd>; + + +def VSRLR_B : LSX_3R<0b01110000111100000>, + LSX_3RN_DESC_BASE<"vsrlr.b", LSX128BOpnd>; + +def VSRLR_H : LSX_3R<0b01110000111100001>, + LSX_3RN_DESC_BASE<"vsrlr.h", LSX128HOpnd>; + +def VSRLR_W : LSX_3R<0b01110000111100010>, + LSX_3RN_DESC_BASE<"vsrlr.w", LSX128WOpnd>; + +def VSRLR_D : LSX_3R<0b01110000111100011>, + LSX_3RN_DESC_BASE<"vsrlr.d", LSX128DOpnd>; + + +def VSRAR_B : LSX_3R<0b01110000111100100>, + LSX_3RN_DESC_BASE<"vsrar.b", LSX128BOpnd>; + +def VSRAR_H : LSX_3R<0b01110000111100101>, + LSX_3RN_DESC_BASE<"vsrar.h", LSX128HOpnd>; + +def VSRAR_W : LSX_3R<0b01110000111100110>, + LSX_3RN_DESC_BASE<"vsrar.w", LSX128WOpnd>; + +def VSRAR_D : LSX_3R<0b01110000111100111>, + LSX_3RN_DESC_BASE<"vsrar.d", LSX128DOpnd>; + + +def VBITCLR_B : LSX_3R<0b01110001000011000>, + LSX_3R_DESC_BASE<"vbitclr.b", vbitclr_b, LSX128BOpnd>; + +def VBITCLR_H : LSX_3R<0b01110001000011001>, + LSX_3R_DESC_BASE<"vbitclr.h", vbitclr_h, LSX128HOpnd>; + +def VBITCLR_W : LSX_3R<0b01110001000011010>, + LSX_3R_DESC_BASE<"vbitclr.w", vbitclr_w, LSX128WOpnd>; + +def VBITCLR_D : LSX_3R<0b01110001000011011>, + LSX_3R_DESC_BASE<"vbitclr.d", vbitclr_d, LSX128DOpnd>; + + +def VBITSET_B : LSX_3R<0b01110001000011100>, + LSX_3RN_DESC_BASE<"vbitset.b", LSX128BOpnd>; + +def VBITSET_H : LSX_3R<0b01110001000011101>, + LSX_3RN_DESC_BASE<"vbitset.h", LSX128HOpnd>; + +def VBITSET_W : LSX_3R<0b01110001000011110>, + LSX_3RN_DESC_BASE<"vbitset.w", LSX128WOpnd>; + +def VBITSET_D : LSX_3R<0b01110001000011111>, + LSX_3RN_DESC_BASE<"vbitset.d", LSX128DOpnd>; + + +def VBITREV_B : LSX_3R<0b01110001000100000>, + LSX_3RN_DESC_BASE<"vbitrev.b", LSX128BOpnd>; + +def VBITREV_H : LSX_3R<0b01110001000100001>, + LSX_3RN_DESC_BASE<"vbitrev.h", LSX128HOpnd>; + +def VBITREV_W : LSX_3R<0b01110001000100010>, + LSX_3RN_DESC_BASE<"vbitrev.w", LSX128WOpnd>; + +def VBITREV_D : LSX_3R<0b01110001000100011>, + LSX_3RN_DESC_BASE<"vbitrev.d", LSX128DOpnd>; + + +def VPACKEV_B : LSX_3R<0b01110001000101100>, + LSX_3R_DESC_BASE<"vpackev.b", LoongArchVPACKEV, LSX128BOpnd>; + +def VPACKEV_H : LSX_3R<0b01110001000101101>, + LSX_3R_DESC_BASE<"vpackev.h", LoongArchVPACKEV, LSX128HOpnd>; + +def VPACKEV_W : LSX_3R<0b01110001000101110>, + LSX_3R_DESC_BASE<"vpackev.w", LoongArchVPACKEV, LSX128WOpnd>; + +def VPACKEV_D : LSX_3R<0b01110001000101111>, + LSX_3R_DESC_BASE<"vpackev.d", LoongArchVPACKEV, LSX128DOpnd>; + + +def VPACKOD_B : LSX_3R<0b01110001000110000>, + LSX_3R_DESC_BASE<"vpackod.b", LoongArchVPACKOD, LSX128BOpnd>; + +def VPACKOD_H : LSX_3R<0b01110001000110001>, + LSX_3R_DESC_BASE<"vpackod.h", LoongArchVPACKOD, LSX128HOpnd>; + +def VPACKOD_W : LSX_3R<0b01110001000110010>, + LSX_3R_DESC_BASE<"vpackod.w", LoongArchVPACKOD, LSX128WOpnd>; + +def VPACKOD_D : LSX_3R<0b01110001000110011>, + LSX_3R_DESC_BASE<"vpackod.d", LoongArchVPACKOD, LSX128DOpnd>; + + +def VILVL_B : LSX_3R<0b01110001000110100>, + LSX_3R_DESC_BASE<"vilvl.b", LoongArchVILVL, LSX128BOpnd>; + +def VILVL_H : LSX_3R<0b01110001000110101>, + LSX_3R_DESC_BASE<"vilvl.h", LoongArchVILVL, LSX128HOpnd>; + +def VILVL_W : LSX_3R<0b01110001000110110>, + LSX_3R_DESC_BASE<"vilvl.w", LoongArchVILVL, LSX128WOpnd>; + +def VILVL_D : LSX_3R<0b01110001000110111>, + LSX_3R_DESC_BASE<"vilvl.d", LoongArchVILVL, LSX128DOpnd>; + + +def VILVH_B : LSX_3R<0b01110001000111000>, + LSX_3R_DESC_BASE<"vilvh.b", LoongArchVILVH, LSX128BOpnd>; + +def VILVH_H : LSX_3R<0b01110001000111001>, + LSX_3R_DESC_BASE<"vilvh.h", LoongArchVILVH, LSX128HOpnd>; + +def VILVH_W : LSX_3R<0b01110001000111010>, + LSX_3R_DESC_BASE<"vilvh.w", LoongArchVILVH, LSX128WOpnd>; + +def VILVH_D : LSX_3R<0b01110001000111011>, + LSX_3R_DESC_BASE<"vilvh.d", LoongArchVILVH, LSX128DOpnd>; + + +def VPICKEV_B : LSX_3R<0b01110001000111100>, + LSX_3R_DESC_BASE<"vpickev.b", LoongArchVPICKEV, LSX128BOpnd>; + +def VPICKEV_H : LSX_3R<0b01110001000111101>, + LSX_3R_DESC_BASE<"vpickev.h", LoongArchVPICKEV, LSX128HOpnd>; + +def VPICKEV_W : LSX_3R<0b01110001000111110>, + LSX_3R_DESC_BASE<"vpickev.w", LoongArchVPICKEV, LSX128WOpnd>; + +def VPICKEV_D : LSX_3R<0b01110001000111111>, + LSX_3R_DESC_BASE<"vpickev.d", LoongArchVPICKEV, LSX128DOpnd>; + + +def VPICKOD_B : LSX_3R<0b01110001001000000>, + LSX_3R_DESC_BASE<"vpickod.b", LoongArchVPICKOD, LSX128BOpnd>; + +def VPICKOD_H : LSX_3R<0b01110001001000001>, + LSX_3R_DESC_BASE<"vpickod.h", LoongArchVPICKOD, LSX128HOpnd>; + +def VPICKOD_W : LSX_3R<0b01110001001000010>, + LSX_3R_DESC_BASE<"vpickod.w", LoongArchVPICKOD, LSX128WOpnd>; + +def VPICKOD_D : LSX_3R<0b01110001001000011>, + LSX_3R_DESC_BASE<"vpickod.d", LoongArchVPICKOD, LSX128DOpnd>; + + +def VREPLVE_B : LSX_3R_1GP<0b01110001001000100>, + LSX_3R_VREPLVE_DESC_BASE<"vreplve.b", vsplati8_elt, LSX128BOpnd>; + +def VREPLVE_H : LSX_3R_1GP<0b01110001001000101>, + LSX_3R_VREPLVE_DESC_BASE<"vreplve.h", vsplati16_elt, LSX128HOpnd>; + +def VREPLVE_W : LSX_3R_1GP<0b01110001001000110>, + LSX_3R_VREPLVE_DESC_BASE<"vreplve.w", vsplati32_elt, LSX128WOpnd>; + +def VREPLVE_D : LSX_3R_1GP<0b01110001001000111>, + LSX_3R_VREPLVE_DESC_BASE<"vreplve.d", vsplati64_elt, LSX128DOpnd>; + + +def VAND_V : LSX_3R<0b01110001001001100>, + LSX_VEC_DESC_BASE<"vand.v", and, LSX128BOpnd>; +class AND_V_H_PSEUDO_DESC : LSX_VEC_PSEUDO_BASE; +class AND_V_W_PSEUDO_DESC : LSX_VEC_PSEUDO_BASE; +class AND_V_D_PSEUDO_DESC : LSX_VEC_PSEUDO_BASE; + +def AND_V_H_PSEUDO : AND_V_H_PSEUDO_DESC, + PseudoInstExpansion<(VAND_V LSX128BOpnd:$vd, + LSX128BOpnd:$vj, + LSX128BOpnd:$vk)>; +def AND_V_W_PSEUDO : AND_V_W_PSEUDO_DESC, + PseudoInstExpansion<(VAND_V LSX128BOpnd:$vd, + LSX128BOpnd:$vj, + LSX128BOpnd:$vk)>; +def AND_V_D_PSEUDO : AND_V_D_PSEUDO_DESC, + PseudoInstExpansion<(VAND_V LSX128BOpnd:$vd, + LSX128BOpnd:$vj, + LSX128BOpnd:$vk)>; + + +def VOR_V : LSX_3R<0b01110001001001101>, + LSX_VEC_DESC_BASE<"vor.v", or, LSX128BOpnd>; +class OR_V_H_PSEUDO_DESC : LSX_VEC_PSEUDO_BASE; +class OR_V_W_PSEUDO_DESC : LSX_VEC_PSEUDO_BASE; +class OR_V_D_PSEUDO_DESC : LSX_VEC_PSEUDO_BASE; + +def OR_V_H_PSEUDO : OR_V_H_PSEUDO_DESC, + PseudoInstExpansion<(VOR_V LSX128BOpnd:$vd, + LSX128BOpnd:$vj, + LSX128BOpnd:$vk)>; +def OR_V_W_PSEUDO : OR_V_W_PSEUDO_DESC, + PseudoInstExpansion<(VOR_V LSX128BOpnd:$vd, + LSX128BOpnd:$vj, + LSX128BOpnd:$vk)>; +def OR_V_D_PSEUDO : OR_V_D_PSEUDO_DESC, + PseudoInstExpansion<(VOR_V LSX128BOpnd:$vd, + LSX128BOpnd:$vj, + LSX128BOpnd:$vk)>; + + +def VXOR_V : LSX_3R<0b01110001001001110>, + LSX_VEC_DESC_BASE<"vxor.v", xor, LSX128BOpnd>; +class XOR_V_H_PSEUDO_DESC : LSX_VEC_PSEUDO_BASE; +class XOR_V_W_PSEUDO_DESC : LSX_VEC_PSEUDO_BASE; +class XOR_V_D_PSEUDO_DESC : LSX_VEC_PSEUDO_BASE; + +def XOR_V_H_PSEUDO : XOR_V_H_PSEUDO_DESC, + PseudoInstExpansion<(VXOR_V LSX128BOpnd:$vd, + LSX128BOpnd:$vj, + LSX128BOpnd:$vk)>; +def XOR_V_W_PSEUDO : XOR_V_W_PSEUDO_DESC, + PseudoInstExpansion<(VXOR_V LSX128BOpnd:$vd, + LSX128BOpnd:$vj, + LSX128BOpnd:$vk)>; +def XOR_V_D_PSEUDO : XOR_V_D_PSEUDO_DESC, + PseudoInstExpansion<(VXOR_V LSX128BOpnd:$vd, + LSX128BOpnd:$vj, + LSX128BOpnd:$vk)>; + + +def VNOR_V : LSX_3R<0b01110001001001111>, + LSX_VEC_DESC_BASE<"vnor.v", LoongArchVNOR, LSX128BOpnd>; +class NOR_V_H_PSEUDO_DESC : LSX_VEC_PSEUDO_BASE; +class NOR_V_W_PSEUDO_DESC : LSX_VEC_PSEUDO_BASE; +class NOR_V_D_PSEUDO_DESC : LSX_VEC_PSEUDO_BASE; + +def NOR_V_H_PSEUDO : NOR_V_H_PSEUDO_DESC, + PseudoInstExpansion<(VNOR_V LSX128BOpnd:$vd, + LSX128BOpnd:$vj, + LSX128BOpnd:$vk)>; +def NOR_V_W_PSEUDO : NOR_V_W_PSEUDO_DESC, + PseudoInstExpansion<(VNOR_V LSX128BOpnd:$vd, + LSX128BOpnd:$vj, + LSX128BOpnd:$vk)>; +def NOR_V_D_PSEUDO : NOR_V_D_PSEUDO_DESC, + PseudoInstExpansion<(VNOR_V LSX128BOpnd:$vd, + LSX128BOpnd:$vj, + LSX128BOpnd:$vk)>; + + +def VFADD_S : LSX_3R<0b01110001001100001>, IsCommutable, + LSX_3RF_DESC_BASE<"vfadd.s", fadd, LSX128WOpnd>; + +def VFADD_D : LSX_3R<0b01110001001100010>, IsCommutable, + LSX_3RF_DESC_BASE<"vfadd.d", fadd, LSX128DOpnd>; + + +def VFSUB_S : LSX_3R<0b01110001001100101>, + LSX_3RF_DESC_BASE<"vfsub.s", fsub, LSX128WOpnd>; + +def VFSUB_D : LSX_3R<0b01110001001100110>, + LSX_3RF_DESC_BASE<"vfsub.d", fsub, LSX128DOpnd>; + + +def VFMUL_S : LSX_3R<0b01110001001110001>, + LSX_3RF_DESC_BASE<"vfmul.s", fmul, LSX128WOpnd>; + +def VFMUL_D : LSX_3R<0b01110001001110010>, + LSX_3RF_DESC_BASE<"vfmul.d", fmul, LSX128DOpnd>; + + +def VFDIV_S : LSX_3R<0b01110001001110101>, + LSX_3RF_DESC_BASE<"vfdiv.s", fdiv, LSX128WOpnd>; + +def VFDIV_D : LSX_3R<0b01110001001110110>, + LSX_3RF_DESC_BASE<"vfdiv.d", fdiv, LSX128DOpnd>; + + +def VFMAX_S : LSX_3R<0b01110001001111001>, + LSX_3RFN_DESC_BASE<"vfmax.s", LSX128WOpnd>; + +def VFMAX_D : LSX_3R<0b01110001001111010>, + LSX_3RFN_DESC_BASE<"vfmax.d", LSX128DOpnd>; + + +def VFMIN_S : LSX_3R<0b01110001001111101>, + LSX_3RFN_DESC_BASE<"vfmin.s", LSX128WOpnd>; + +def VFMIN_D : LSX_3R<0b01110001001111110>, + LSX_3RFN_DESC_BASE<"vfmin.d", LSX128DOpnd>; + + +def VFMAXA_S : LSX_3R<0b01110001010000001>, + LSX_3RFN_DESC_BASE<"vfmaxa.s", LSX128WOpnd>; + +def VFMAXA_D : LSX_3R<0b01110001010000010>, + LSX_3RFN_DESC_BASE<"vfmaxa.d", LSX128DOpnd>; + + +def VFMINA_S : LSX_3R<0b01110001010000101>, + LSX_3RFN_DESC_BASE<"vfmina.s", LSX128WOpnd>; + +def VFMINA_D : LSX_3R<0b01110001010000110>, + LSX_3RFN_DESC_BASE<"vfmina.d", LSX128DOpnd>; + + +def VSHUF_H : LSX_3R<0b01110001011110101>, + LSX_3R_VSHF_DESC_BASE<"vshuf.h", LSX128HOpnd>; + +def VSHUF_W : LSX_3R<0b01110001011110110>, + LSX_3R_VSHF_DESC_BASE<"vshuf.w", LSX128WOpnd>; + +def VSHUF_D : LSX_3R<0b01110001011110111>, + LSX_3R_VSHF_DESC_BASE<"vshuf.d", LSX128DOpnd>; + + +def VSEQI_B : LSX_I5<0b01110010100000000>, + LSX_I5_DESC_BASE_Intrinsic<"vseqi.b", int_loongarch_lsx_vseqi_b, simm5_32, immSExt5, LSX128BOpnd>; + +def VSEQI_H : LSX_I5<0b01110010100000001>, + LSX_I5_DESC_BASE_Intrinsic<"vseqi.h", int_loongarch_lsx_vseqi_h, simm5_32, immSExt5, LSX128HOpnd>; + +def VSEQI_W : LSX_I5<0b01110010100000010>, + LSX_I5_DESC_BASE_Intrinsic<"vseqi.w", int_loongarch_lsx_vseqi_w, simm5_32, immSExt5, LSX128WOpnd>; + +def VSEQI_D : LSX_I5<0b01110010100000011>, + LSX_I5_DESC_BASE_Intrinsic<"vseqi.d", int_loongarch_lsx_vseqi_d, simm5_32, immSExt5, LSX128DOpnd>; + + +def VSLEI_B : LSX_I5<0b01110010100000100>, + LSX_I5_DESC_BASE_Intrinsic<"vslei.b", int_loongarch_lsx_vslei_b, simm5_32, immSExt5, LSX128BOpnd>; + +def VSLEI_H : LSX_I5<0b01110010100000101>, + LSX_I5_DESC_BASE_Intrinsic<"vslei.h", int_loongarch_lsx_vslei_h, simm5_32, immSExt5, LSX128HOpnd>; + +def VSLEI_W : LSX_I5<0b01110010100000110>, + LSX_I5_DESC_BASE_Intrinsic<"vslei.w", int_loongarch_lsx_vslei_w, simm5_32, immSExt5, LSX128WOpnd>; + +def VSLEI_D : LSX_I5<0b01110010100000111>, + LSX_I5_DESC_BASE_Intrinsic<"vslei.d", int_loongarch_lsx_vslei_d, simm5_32, immSExt5, LSX128DOpnd>; + + +def VSLEI_BU : LSX_I5_U<0b01110010100001000>, + LSX_I5_U_DESC_BASE_Intrinsic<"vslei.bu", int_loongarch_lsx_vslei_bu, uimm5, immZExt5, LSX128BOpnd>; + +def VSLEI_HU : LSX_I5_U<0b01110010100001001>, + LSX_I5_U_DESC_BASE_Intrinsic<"vslei.hu", int_loongarch_lsx_vslei_hu, uimm5, immZExt5, LSX128HOpnd>; + +def VSLEI_WU : LSX_I5_U<0b01110010100001010>, + LSX_I5_U_DESC_BASE_Intrinsic<"vslei.wu", int_loongarch_lsx_vslei_wu, uimm5, immZExt5, LSX128WOpnd>; + +def VSLEI_DU : LSX_I5_U<0b01110010100001011>, + LSX_I5_U_DESC_BASE_Intrinsic<"vslei.du", int_loongarch_lsx_vslei_du, uimm5, immZExt5, LSX128DOpnd>; + + +def VSLTI_B : LSX_I5<0b01110010100001100>, + LSX_I5_DESC_BASE_Intrinsic<"vslti.b", int_loongarch_lsx_vslti_b, simm5_32, immSExt5, LSX128BOpnd>; + +def VSLTI_H : LSX_I5<0b01110010100001101>, + LSX_I5_DESC_BASE_Intrinsic<"vslti.h", int_loongarch_lsx_vslti_h, simm5_32, immSExt5, LSX128HOpnd>; + +def VSLTI_W : LSX_I5<0b01110010100001110>, + LSX_I5_DESC_BASE_Intrinsic<"vslti.w", int_loongarch_lsx_vslti_w, simm5_32, immSExt5, LSX128WOpnd>; + +def VSLTI_D : LSX_I5<0b01110010100001111>, + LSX_I5_DESC_BASE_Intrinsic<"vslti.d", int_loongarch_lsx_vslti_d, simm5_32, immSExt5, LSX128DOpnd>; + + +def VSLTI_BU : LSX_I5_U<0b01110010100010000>, + LSX_I5_U_DESC_BASE_Intrinsic<"vslti.bu", int_loongarch_lsx_vslti_bu, uimm5, immZExt5, LSX128BOpnd>; + +def VSLTI_HU : LSX_I5_U<0b01110010100010001>, + LSX_I5_U_DESC_BASE_Intrinsic<"vslti.hu", int_loongarch_lsx_vslti_hu, uimm5, immZExt5, LSX128HOpnd>; + +def VSLTI_WU : LSX_I5_U<0b01110010100010010>, + LSX_I5_U_DESC_BASE_Intrinsic<"vslti.wu", int_loongarch_lsx_vslti_wu, uimm5, immZExt5, LSX128WOpnd>; + +def VSLTI_DU : LSX_I5_U<0b01110010100010011>, + LSX_I5_U_DESC_BASE_Intrinsic<"vslti.du", int_loongarch_lsx_vslti_du, uimm5, immZExt5, LSX128DOpnd>; + + +def VADDI_BU : LSX_I5_U<0b01110010100010100>, + LSX_I5_U_DESC_BASE<"vaddi.bu", add, vsplati8_uimm5, LSX128BOpnd>; + +def VADDI_HU : LSX_I5_U<0b01110010100010101>, + LSX_I5_U_DESC_BASE<"vaddi.hu", add, vsplati16_uimm5, LSX128HOpnd>; + +def VADDI_WU : LSX_I5_U<0b01110010100010110>, + LSX_I5_U_DESC_BASE<"vaddi.wu", add, vsplati32_uimm5, LSX128WOpnd>; + +def VADDI_DU : LSX_I5_U<0b01110010100010111>, + LSX_I5_U_DESC_BASE<"vaddi.du", add, vsplati64_uimm5, LSX128DOpnd>; + + +def VSUBI_BU : LSX_I5_U<0b01110010100011000>, + LSX_I5_U_DESC_BASE<"vsubi.bu", sub, vsplati8_uimm5, LSX128BOpnd>; + +def VSUBI_HU : LSX_I5_U<0b01110010100011001>, + LSX_I5_U_DESC_BASE<"vsubi.hu", sub, vsplati16_uimm5, LSX128HOpnd>; + +def VSUBI_WU : LSX_I5_U<0b01110010100011010>, + LSX_I5_U_DESC_BASE<"vsubi.wu", sub, vsplati32_uimm5, LSX128WOpnd>; + +def VSUBI_DU : LSX_I5_U<0b01110010100011011>, + LSX_I5_U_DESC_BASE<"vsubi.du", sub, vsplati64_uimm5, LSX128DOpnd>; + + +def VMAXI_B : LSX_I5<0b01110010100100000>, + LSX_I5_DESC_BASE_Intrinsic<"vmaxi.b", int_loongarch_lsx_vmaxi_b, simm5_32, immSExt5, LSX128BOpnd>; + +def VMAXI_H : LSX_I5<0b01110010100100001>, + LSX_I5_DESC_BASE_Intrinsic<"vmaxi.h", int_loongarch_lsx_vmaxi_h, simm5_32, immSExt5, LSX128HOpnd>; + +def VMAXI_W : LSX_I5<0b01110010100100010>, + LSX_I5_DESC_BASE_Intrinsic<"vmaxi.w", int_loongarch_lsx_vmaxi_w, simm5_32, immSExt5, LSX128WOpnd>; + +def VMAXI_D : LSX_I5<0b01110010100100011>, + LSX_I5_DESC_BASE_Intrinsic<"vmaxi.d", int_loongarch_lsx_vmaxi_d, simm5_32, immSExt5, LSX128DOpnd>; + + +def VMINI_B : LSX_I5<0b01110010100100100>, + LSX_I5_DESC_BASE_Intrinsic<"vmini.b", int_loongarch_lsx_vmini_b, simm5_32, immSExt5, LSX128BOpnd>; + +def VMINI_H : LSX_I5<0b01110010100100101>, + LSX_I5_DESC_BASE_Intrinsic<"vmini.h", int_loongarch_lsx_vmini_h, simm5_32, immSExt5, LSX128HOpnd>; + +def VMINI_W : LSX_I5<0b01110010100100110>, + LSX_I5_DESC_BASE_Intrinsic<"vmini.w", int_loongarch_lsx_vmini_w, simm5_32, immSExt5, LSX128WOpnd>; + +def VMINI_D : LSX_I5<0b01110010100100111>, + LSX_I5_DESC_BASE_Intrinsic<"vmini.d", int_loongarch_lsx_vmini_d, simm5_32, immSExt5, LSX128DOpnd>; + + +def VMAXI_BU : LSX_I5_U<0b01110010100101000>, + LSX_I5_U_DESC_BASE_Intrinsic<"vmaxi.bu", int_loongarch_lsx_vmaxi_bu, uimm5, immZExt5, LSX128BOpnd>; + +def VMAXI_HU : LSX_I5_U<0b01110010100101001>, + LSX_I5_U_DESC_BASE_Intrinsic<"vmaxi.hu", int_loongarch_lsx_vmaxi_hu, uimm5, immZExt5, LSX128HOpnd>; + +def VMAXI_WU : LSX_I5_U<0b01110010100101010>, + LSX_I5_U_DESC_BASE_Intrinsic<"vmaxi.wu", int_loongarch_lsx_vmaxi_wu, uimm5, immZExt5, LSX128WOpnd>; + +def VMAXI_DU : LSX_I5_U<0b01110010100101011>, + LSX_I5_U_DESC_BASE_Intrinsic<"vmaxi.du", int_loongarch_lsx_vmaxi_du, uimm5, immZExt5, LSX128DOpnd>; + + +def VMINI_BU : LSX_I5_U<0b01110010100101100>, + LSX_I5_U_DESC_BASE<"vmini.bu", umin, vsplati8_uimm5, LSX128BOpnd>; + +def VMINI_HU : LSX_I5_U<0b01110010100101101>, + LSX_I5_U_DESC_BASE<"vmini.hu", umin, vsplati16_uimm5, LSX128HOpnd>; + +def VMINI_WU : LSX_I5_U<0b01110010100101110>, + LSX_I5_U_DESC_BASE<"vmini.wu", umin, vsplati32_uimm5, LSX128WOpnd>; + +def VMINI_DU : LSX_I5_U<0b01110010100101111>, + LSX_I5_U_DESC_BASE<"vmini.du", umin, vsplati64_uimm5, LSX128DOpnd>; + + +def VCLO_B : LSX_2R<0b0111001010011100000000>, + LSX_2RN_DESC_BASE<"vclo.b", LSX128BOpnd>; + +def VCLO_H : LSX_2R<0b0111001010011100000001>, + LSX_2RN_DESC_BASE<"vclo.h", LSX128HOpnd>; + +def VCLO_W : LSX_2R<0b0111001010011100000010>, + LSX_2RN_DESC_BASE<"vclo.w", LSX128WOpnd>; + +def VCLO_D : LSX_2R<0b0111001010011100000011>, + LSX_2RN_DESC_BASE<"vclo.d", LSX128DOpnd>; + + +def VCLZ_B : LSX_2R<0b0111001010011100000100>, + LSX_2R_DESC_BASE<"vclz.b", ctlz, LSX128BOpnd>; + +def VCLZ_H : LSX_2R<0b0111001010011100000101>, + LSX_2R_DESC_BASE<"vclz.h", ctlz, LSX128HOpnd>; + +def VCLZ_W : LSX_2R<0b0111001010011100000110>, + LSX_2R_DESC_BASE<"vclz.w", ctlz, LSX128WOpnd>; + +def VCLZ_D : LSX_2R<0b0111001010011100000111>, + LSX_2R_DESC_BASE<"vclz.d", ctlz, LSX128DOpnd>; + + +def VPCNT_B : LSX_2R<0b0111001010011100001000>, + LSX_2R_DESC_BASE<"vpcnt.b", ctpop, LSX128BOpnd>; + +def VPCNT_H : LSX_2R<0b0111001010011100001001>, + LSX_2R_DESC_BASE<"vpcnt.h", ctpop, LSX128HOpnd>; + +def VPCNT_W : LSX_2R<0b0111001010011100001010>, + LSX_2R_DESC_BASE<"vpcnt.w", ctpop, LSX128WOpnd>; + +def VPCNT_D : LSX_2R<0b0111001010011100001011>, + LSX_2R_DESC_BASE<"vpcnt.d", ctpop, LSX128DOpnd>; + + +def VFLOGB_S : LSX_2R<0b0111001010011100110001>, + LSX_2RFN_DESC_BASE<"vflogb.s", LSX128WOpnd>; + +def VFLOGB_D : LSX_2R<0b0111001010011100110010>, + LSX_2RFN_DESC_BASE<"vflogb.d", LSX128DOpnd>; + + +def VFCLASS_S : LSX_2R<0b0111001010011100110101>, + LSX_2RFN_DESC_BASE<"vfclass.s", LSX128WOpnd>; + +def VFCLASS_D : LSX_2R<0b0111001010011100110110>, + LSX_2RFN_DESC_BASE<"vfclass.d", LSX128DOpnd>; + + +def VFSQRT_S : LSX_2R<0b0111001010011100111001>, + LSX_2RF_DESC_BASE<"vfsqrt.s", fsqrt, LSX128WOpnd>; + +def VFSQRT_D : LSX_2R<0b0111001010011100111010>, + LSX_2RF_DESC_BASE<"vfsqrt.d", fsqrt, LSX128DOpnd>; + + +def VFRECIP_S : LSX_2R<0b0111001010011100111101>, + LSX_2RFN_DESC_BASE<"vfrecip.s", LSX128WOpnd>; + +def VFRECIP_D : LSX_2R<0b0111001010011100111110>, + LSX_2RFN_DESC_BASE<"vfrecip.d", LSX128DOpnd>; + + +def VFRSQRT_S : LSX_2R<0b0111001010011101000001>, + LSX_2RFN_DESC_BASE<"vfrsqrt.s", LSX128WOpnd>; + +def VFRSQRT_D : LSX_2R<0b0111001010011101000010>, + LSX_2RFN_DESC_BASE<"vfrsqrt.d", LSX128DOpnd>; + + +def VFRINT_S : LSX_2R<0b0111001010011101001101>, + LSX_2RF_DESC_BASE<"vfrint.s", frint, LSX128WOpnd>; + +def VFRINT_D : LSX_2R<0b0111001010011101001110>, + LSX_2RF_DESC_BASE<"vfrint.d", frint, LSX128DOpnd>; + + +def VFCVTL_S_H : LSX_2R<0b0111001010011101111010>, + LSX_2RFN_DESC_BASE_CVT<"vfcvtl.s.h", LSX128WOpnd, LSX128HOpnd>; + +def VFCVTH_S_H : LSX_2R<0b0111001010011101111011>, + LSX_2RFN_DESC_BASE_CVT<"vfcvth.s.h", LSX128WOpnd, LSX128HOpnd>; + + +def VFCVTL_D_S : LSX_2R<0b0111001010011101111100>, + LSX_2RFN_DESC_BASE_CVT<"vfcvtl.d.s", LSX128DOpnd, LSX128WOpnd>; + +def VFCVTH_D_S : LSX_2R<0b0111001010011101111101>, + LSX_2RFN_DESC_BASE_CVT<"vfcvth.d.s", LSX128DOpnd, LSX128WOpnd>; + + +def VFFINT_S_W : LSX_2R<0b0111001010011110000000>, + LSX_2RF_DESC_BASE<"vffint.s.w", sint_to_fp, LSX128WOpnd>; + +def VFFINT_S_WU : LSX_2R<0b0111001010011110000001>, + LSX_2RF_DESC_BASE<"vffint.s.wu", uint_to_fp, LSX128WOpnd>; + + +def VFFINT_D_L : LSX_2R<0b0111001010011110000010>, + LSX_2RF_DESC_BASE<"vffint.d.l", sint_to_fp, LSX128DOpnd>; + +def VFFINT_D_LU : LSX_2R<0b0111001010011110000011>, + LSX_2RF_DESC_BASE<"vffint.d.lu", uint_to_fp, LSX128DOpnd>; + + +def VFTINT_W_S : LSX_2R<0b0111001010011110001100>, + LSX_2RFN_DESC_BASE<"vftint.w.s", LSX128WOpnd>; + +def VFTINT_L_D : LSX_2R<0b0111001010011110001101>, + LSX_2RFN_DESC_BASE<"vftint.l.d", LSX128DOpnd>; + + +def VFTINT_WU_S : LSX_2R<0b0111001010011110010110>, + LSX_2RFN_DESC_BASE<"vftint.wu.s", LSX128WOpnd>; + +def VFTINT_LU_D : LSX_2R<0b0111001010011110010111>, + LSX_2RFN_DESC_BASE<"vftint.lu.d", LSX128DOpnd>; + + +def VFTINTRZ_WU_S : LSX_2R<0b0111001010011110011100>, + LSX_2RF_DESC_BASE<"vftintrz.wu.s", fp_to_uint, LSX128WOpnd>; + +def VFTINTRZ_LU_D : LSX_2R<0b0111001010011110011101>, + LSX_2RF_DESC_BASE<"vftintrz.lu.d", fp_to_uint, LSX128DOpnd>; + + +def VREPLGR2VR_B : LSX_2R_1GP<0b0111001010011111000000>, + LSX_2R_REPL_DESC_BASE<"vreplgr2vr.b", v16i8, vsplati8, LSX128BOpnd, GPR32Opnd>; + +def VREPLGR2VR_H : LSX_2R_1GP<0b0111001010011111000001>, + LSX_2R_REPL_DESC_BASE<"vreplgr2vr.h", v8i16, vsplati16, LSX128HOpnd, GPR32Opnd>; + +def VREPLGR2VR_W : LSX_2R_1GP<0b0111001010011111000010>, + LSX_2R_REPL_DESC_BASE<"vreplgr2vr.w", v4i32, vsplati32, LSX128WOpnd, GPR32Opnd>; + +def VREPLGR2VR_D : LSX_2R_1GP<0b0111001010011111000011>, + LSX_2R_REPL_DESC_BASE<"vreplgr2vr.d", v2i64, vsplati64, LSX128DOpnd, GPR64Opnd>; + + +class LSX_2R_FILL_PSEUDO_BASE : + LSXPseudo<(outs RCVD:$vd), (ins RCVS:$fs), + [(set RCVD:$vd, (OpNode RCVS:$fs))]> { + let usesCustomInserter = 1; +} + +class FILL_FW_PSEUDO_DESC : LSX_2R_FILL_PSEUDO_BASE; +class FILL_FD_PSEUDO_DESC : LSX_2R_FILL_PSEUDO_BASE; + +def FILL_FW_PSEUDO : FILL_FW_PSEUDO_DESC; +def FILL_FD_PSEUDO : FILL_FD_PSEUDO_DESC; + + +def VSRLRI_B : LSX_I3_U<0b0111001010100100001>, + LSX_BIT_3N_DESC_BASE<"vsrlri.b", uimm3, immZExt3, LSX128BOpnd>; + +def VSRLRI_H : LSX_I4_U<0b011100101010010001>, + LSX_BIT_4N_DESC_BASE<"vsrlri.h", uimm4, immZExt4, LSX128HOpnd>; + +def VSRLRI_W : LSX_I5_U<0b01110010101001001>, + LSX_BIT_5N_DESC_BASE<"vsrlri.w", uimm5, immZExt5, LSX128WOpnd>; + +def VSRLRI_D : LSX_I6_U<0b0111001010100101>, + LSX_BIT_6N_DESC_BASE<"vsrlri.d", uimm6, immZExt6, LSX128DOpnd>; + + +def VSRARI_B : LSX_I3_U<0b0111001010101000001>, + LSX_BIT_3N_DESC_BASE<"vsrari.b", uimm3, immZExt3, LSX128BOpnd>; + +def VSRARI_H : LSX_I4_U<0b011100101010100001>, + LSX_BIT_4N_DESC_BASE<"vsrari.h", uimm4, immZExt4, LSX128HOpnd>; + +def VSRARI_W : LSX_I5_U<0b01110010101010001>, + LSX_BIT_5N_DESC_BASE<"vsrari.w", uimm5, immZExt5, LSX128WOpnd>; + +def VSRARI_D : LSX_I6_U<0b0111001010101001>, + LSX_BIT_6N_DESC_BASE<"vsrari.d", uimm6, immZExt6, LSX128DOpnd>; + + +def VINSGR2VR_B : LSX_I4_R_U<0b011100101110101110>, + LSX_INSERT_U4_DESC_BASE<"vinsgr2vr.b", vinsert_v16i8, uimm4, immZExt4Ptr, LSX128BOpnd, GPR32Opnd>; + +def VINSGR2VR_H : LSX_I3_R_U<0b0111001011101011110>, + LSX_INSERT_U3_DESC_BASE<"vinsgr2vr.h", vinsert_v8i16, uimm3, immZExt3Ptr, LSX128HOpnd, GPR32Opnd>; + +def VINSGR2VR_W : LSX_I2_R_U<0b01110010111010111110>, + LSX_INSERT_U2_DESC_BASE<"vinsgr2vr.w", vinsert_v4i32, uimm2, immZExt2Ptr, LSX128WOpnd, GPR32Opnd>; + +def VINSGR2VR_D : LSX_I1_R_U<0b011100101110101111110>, + LSX_INSERT_U1_DESC_BASE<"vinsgr2vr.d", vinsert_v2i64, uimm1, immZExt1Ptr, LSX128DOpnd, GPR64Opnd>; + + +def VPICKVE2GR_B : LSX_ELM_COPY_B<0b011100101110111110>, + LSX_PICK_U4_DESC_BASE<"vpickve2gr.b", vextract_sext_i8, v16i8, uimm4_ptr, immZExt4Ptr, GPR32Opnd, LSX128BOpnd>; + +def VPICKVE2GR_H : LSX_ELM_COPY_H<0b0111001011101111110>, + LSX_PICK_U3_DESC_BASE<"vpickve2gr.h", vextract_sext_i16, v8i16, uimm3_ptr, immZExt3Ptr, GPR32Opnd, LSX128HOpnd>; + +def VPICKVE2GR_W : LSX_ELM_COPY_W<0b01110010111011111110>, + LSX_PICK_U2_DESC_BASE<"vpickve2gr.w", vextract_sext_i32, v4i32, uimm2_ptr, immZExt2Ptr, GPR32Opnd, LSX128WOpnd>; + +def VPICKVE2GR_D : LSX_ELM_COPY_D<0b011100101110111111110>, + LSX_PICK_U1_DESC_BASE<"vpickve2gr.d", vextract_sext_i64, v2i64, uimm1_ptr, immZExt1Ptr, GPR64Opnd, LSX128DOpnd>; + + +def VPICKVE2GR_BU : LSX_ELM_COPY_B<0b011100101111001110>, + LSX_PICK_U4_DESC_BASE<"vpickve2gr.bu", vextract_zext_i8, v16i8, uimm4_ptr, immZExt4Ptr, GPR32Opnd, LSX128BOpnd>; + +def VPICKVE2GR_HU : LSX_ELM_COPY_H<0b0111001011110011110>, + LSX_PICK_U3_DESC_BASE<"vpickve2gr.hu", vextract_zext_i16, v8i16, uimm3_ptr, immZExt3Ptr, GPR32Opnd, LSX128HOpnd>; + +def VPICKVE2GR_WU : LSX_ELM_COPY_W<0b01110010111100111110>, + LSX_PICK_U2_DESC_BASE<"vpickve2gr.wu", vextract_zext_i32, v4i32, uimm2_ptr, immZExt2Ptr, GPR32Opnd, LSX128WOpnd>; + +def VPICKVE2GR_DU : LSX_ELM_COPY_D<0b011100101111001111110>, + LSX_PICK_U1_DESC_BASE<"vpickve2gr.du", int_loongarch_lsx_vpickve2gr_du, v2i64, uimm1, immZExt1, GPR64Opnd, LSX128DOpnd>; + + +def : LSXPat<(vextract_zext_i64 (v2i64 LSX128D:$vj), immZExt1Ptr:$idx), + (VPICKVE2GR_D LSX128D:$vj, immZExt1:$idx)>; +def : LSXPat<(vextract_zext_i64 (v2f64 LSX128D:$vj), immZExt1Ptr:$idx), + (VPICKVE2GR_D LSX128D:$vj, immZExt1:$idx)>; + + +def VREPLVEI_B : LSX_I4_U<0b011100101111011110>, + LSX_ELM_U4_VREPLVE_DESC_BASE<"vreplvei.b", vsplati8_uimm4, LSX128BOpnd>; + +def VREPLVEI_H : LSX_I3_U<0b0111001011110111110>, + LSX_ELM_U3_VREPLVE_DESC_BASE<"vreplvei.h", vsplati16_uimm3, LSX128HOpnd>; + +def VREPLVEI_W : LSX_I2_U<0b01110010111101111110>, + LSX_ELM_U2_VREPLVE_DESC_BASE<"vreplvei.w", vsplati32_uimm2, LSX128WOpnd>; + +def VREPLVEI_D : LSX_I1_U<0b011100101111011111110>, + LSX_ELM_U1_VREPLVE_DESC_BASE<"vreplvei.d", vsplati64_uimm1, LSX128DOpnd>; + + +def VSAT_B : LSX_I3_U<0b0111001100100100001>, + LSX_BIT_3N_DESC_BASE<"vsat.b", uimm3, immZExt3, LSX128BOpnd>; + +def VSAT_H : LSX_I4_U<0b011100110010010001>, + LSX_BIT_4N_DESC_BASE<"vsat.h", uimm4, immZExt4, LSX128HOpnd>; + +def VSAT_W : LSX_I5_U<0b01110011001001001>, + LSX_BIT_5N_DESC_BASE<"vsat.w", uimm5, immZExt5, LSX128WOpnd>; + +def VSAT_D : LSX_I6_U<0b0111001100100101>, + LSX_BIT_6N_DESC_BASE<"vsat.d", uimm6, immZExt6, LSX128DOpnd>; + + +def VSAT_BU : LSX_I3_U<0b0111001100101000001>, + LSX_BIT_3N_DESC_BASE<"vsat.bu", uimm3, immZExt3, LSX128BOpnd>; + +def VSAT_HU : LSX_I4_U<0b011100110010100001>, + LSX_BIT_4N_DESC_BASE<"vsat.hu", uimm4, immZExt4, LSX128HOpnd>; + +def VSAT_WU : LSX_I5_U<0b01110011001010001>, + LSX_BIT_5N_DESC_BASE<"vsat.wu", uimm5, immZExt5, LSX128WOpnd>; + +def VSAT_DU : LSX_I6_U<0b0111001100101001>, + LSX_BIT_6N_DESC_BASE<"vsat.du", uimm6, immZExt6, LSX128DOpnd>; + + +def VSLLI_B : LSX_I3_U<0b0111001100101100001>, + LSX_BIT_U3_VREPLVE_DESC_BASE<"vslli.b", shl, vsplati8_uimm3, LSX128BOpnd>; + +def VSLLI_H : LSX_I4_U<0b011100110010110001>, + LSX_BIT_U4_VREPLVE_DESC_BASE<"vslli.h", shl, vsplati16_uimm4, LSX128HOpnd>; + +def VSLLI_W : LSX_I5_U<0b01110011001011001>, + LSX_BIT_U5_VREPLVE_DESC_BASE<"vslli.w", shl, vsplati32_uimm5, LSX128WOpnd>; + +def VSLLI_D : LSX_I6_U<0b0111001100101101>, + LSX_BIT_U6_VREPLVE_DESC_BASE<"vslli.d", shl, vsplati64_uimm6, LSX128DOpnd>; + + +def VSRLI_B : LSX_I3_U<0b0111001100110000001>, + LSX_BIT_U3_VREPLVE_DESC_BASE<"vsrli.b", srl, vsplati8_uimm3, LSX128BOpnd>; + +def VSRLI_H : LSX_I4_U<0b011100110011000001>, + LSX_BIT_U4_VREPLVE_DESC_BASE<"vsrli.h", srl, vsplati16_uimm4, LSX128HOpnd>; + +def VSRLI_W : LSX_I5_U<0b01110011001100001>, + LSX_BIT_U5_VREPLVE_DESC_BASE<"vsrli.w", srl, vsplati32_uimm5, LSX128WOpnd>; + +def VSRLI_D : LSX_I6_U<0b0111001100110001>, + LSX_BIT_U6_VREPLVE_DESC_BASE<"vsrli.d", srl, vsplati64_uimm6, LSX128DOpnd>; + + +def VSRAI_B : LSX_I3_U<0b0111001100110100001>, + LSX_BIT_U3_VREPLVE_DESC_BASE_Intrinsic<"vsrai.b", int_loongarch_lsx_vsrai_b, LSX128BOpnd>; + +def VSRAI_H : LSX_I4_U<0b011100110011010001>, + LSX_BIT_U4_VREPLVE_DESC_BASE_Intrinsic<"vsrai.h", int_loongarch_lsx_vsrai_h, LSX128HOpnd>; + +def VSRAI_W : LSX_I5_U<0b01110011001101001>, + LSX_BIT_U5_VREPLVE_DESC_BASE_Intrinsic<"vsrai.w", int_loongarch_lsx_vsrai_w, LSX128WOpnd>; + +def VSRAI_D : LSX_I6_U<0b0111001100110101>, + LSX_BIT_U6_VREPLVE_DESC_BASE_Intrinsic<"vsrai.d", int_loongarch_lsx_vsrai_d, LSX128DOpnd>; + + +def VSHUF4I_B : LSX_I8_U<0b01110011100100>, + LSX_I8_SHF_DESC_BASE<"vshuf4i.b", LSX128BOpnd>; + +def VSHUF4I_H : LSX_I8_U<0b01110011100101>, + LSX_I8_SHF_DESC_BASE<"vshuf4i.h", LSX128HOpnd>; + +def VSHUF4I_W : LSX_I8_U<0b01110011100110>, + LSX_I8_SHF_DESC_BASE<"vshuf4i.w", LSX128WOpnd>; + +def VSHUF4I_D : LSX_I8_U<0b01110011100111>, + LSX_I8_SHUF_DESC_BASE_D<"vshuf4i.d", int_loongarch_lsx_vshuf4i_d, LSX128DOpnd>; + + +def VROTR_B : LSX_3R<0b01110000111011100>, + LSX_3R_DESC_BASE<"vrotr.b", LoongArchVROR, LSX128BOpnd>; + +def VROTR_H : LSX_3R<0b01110000111011101>, + LSX_3R_DESC_BASE<"vrotr.h", LoongArchVROR, LSX128HOpnd>; + +def VROTR_W : LSX_3R<0b01110000111011110>, + LSX_3R_DESC_BASE<"vrotr.w", LoongArchVROR, LSX128WOpnd>; + +def VROTR_D : LSX_3R<0b01110000111011111>, + LSX_3R_DESC_BASE<"vrotr.d", LoongArchVROR, LSX128DOpnd>; + + +def VMSKLTZ_B : LSX_2R<0b0111001010011100010000>, + LSX_2RN_DESC_BASE<"vmskltz.b", LSX128BOpnd>; + +def VMSKLTZ_H : LSX_2R<0b0111001010011100010001>, + LSX_2RN_DESC_BASE<"vmskltz.h", LSX128HOpnd>; + +def VMSKLTZ_W : LSX_2R<0b0111001010011100010010>, + LSX_2RN_DESC_BASE<"vmskltz.w", LSX128WOpnd>; + +def VMSKLTZ_D : LSX_2R<0b0111001010011100010011>, + LSX_2RN_DESC_BASE<"vmskltz.d", LSX128DOpnd>; + + +def VROTRI_B : LSX_I3_U<0b0111001010100000001>, + LSX2_RORI_U3_DESC_BASE<"vrotri.b", uimm3, immZExt3, LSX128BOpnd>; + +def VROTRI_H : LSX_I4_U<0b011100101010000001>, + LSX2_RORI_U4_DESC_BASE<"vrotri.h", uimm4, immZExt4, LSX128HOpnd>; + +def VROTRI_W : LSX_I5_U<0b01110010101000001>, + LSX2_RORI_U5_DESC_BASE<"vrotri.w", uimm5, immZExt5, LSX128WOpnd>; + +def VROTRI_D : LSX_I6_U<0b0111001010100001>, + LSX2_RORI_U6_DESC_BASE<"vrotri.d", uimm6, immZExt6, LSX128DOpnd>; + + +def VSRLNI_B_H : LSX_I4_U<0b011100110100000001>, + LSX_BIND_U4N_DESC_BASE<"vsrlni.b.h", uimm4, immZExt4, LSX128BOpnd>; + +def VSRLNI_H_W : LSX_I5_U<0b01110011010000001>, + LSX_BIND_U5N_DESC_BASE<"vsrlni.h.w", uimm5, immZExt5, LSX128HOpnd>; + +def VSRLNI_W_D : LSX_I6_U<0b0111001101000001>, + LSX_BIND_U6N_DESC_BASE<"vsrlni.w.d", uimm6, immZExt6, LSX128WOpnd>; + +def VSRLNI_D_Q : LSX_I7_U<0b011100110100001>, + LSX_BIND_U7N_DESC_BASE<"vsrlni.d.q", LSX128DOpnd>; + + +def VSRLRNI_B_H : LSX_I4_U<0b011100110100010001>, + LSX_BIND_U4N_DESC_BASE<"vsrlrni.b.h", uimm4, immZExt4, LSX128BOpnd>; + +def VSRLRNI_H_W : LSX_I5_U<0b01110011010001001>, + LSX_BIND_U5N_DESC_BASE<"vsrlrni.h.w", uimm5, immZExt5, LSX128HOpnd>; + +def VSRLRNI_W_D : LSX_I6_U<0b0111001101000101>, + LSX_BIND_U6N_DESC_BASE<"vsrlrni.w.d", uimm6, immZExt6, LSX128WOpnd>; + +def VSRLRNI_D_Q : LSX_I7_U<0b011100110100011>, + LSX_BIND_U7N_DESC_BASE<"vsrlrni.d.q", LSX128DOpnd>; + + +def VSSRLNI_B_H : LSX_I4_U<0b011100110100100001>, + LSX_BIND_U4N_DESC_BASE<"vssrlni.b.h", uimm4, immZExt4, LSX128BOpnd>; + +def VSSRLNI_H_W : LSX_I5_U<0b01110011010010001>, + LSX_BIND_U5N_DESC_BASE<"vssrlni.h.w", uimm5, immZExt5, LSX128HOpnd>; + +def VSSRLNI_W_D : LSX_I6_U<0b0111001101001001>, + LSX_BIND_U6N_DESC_BASE<"vssrlni.w.d", uimm6, immZExt6, LSX128WOpnd>; + +def VSSRLNI_D_Q : LSX_I7_U<0b011100110100101>, + LSX_BIND_U7N_DESC_BASE<"vssrlni.d.q", LSX128DOpnd>; + + +def VSSRLNI_BU_H : LSX_I4_U<0b011100110100110001>, + LSX_BIND_U4N_DESC_BASE<"vssrlni.bu.h", uimm4, immZExt4, LSX128BOpnd> ; + +def VSSRLNI_HU_W : LSX_I5_U<0b01110011010011001>, + LSX_BIND_U5N_DESC_BASE<"vssrlni.hu.w", uimm5, immZExt5, LSX128HOpnd>; + +def VSSRLNI_WU_D : LSX_I6_U<0b0111001101001101>, + LSX_BIND_U6N_DESC_BASE<"vssrlni.wu.d", uimm6, immZExt6, LSX128WOpnd>; + +def VSSRLNI_DU_Q : LSX_I7_U<0b011100110100111>, + LSX_BIND_U7N_DESC_BASE<"vssrlni.du.q", LSX128DOpnd>; + + +def VSSRLRNI_BU_H : LSX_I4_U<0b011100110101010001>, + LSX_BIND_U4N_DESC_BASE<"vssrlrni.bu.h", uimm4, immZExt4, LSX128BOpnd>; + +def VSSRLRNI_HU_W : LSX_I5_U<0b01110011010101001>, + LSX_BIND_U5N_DESC_BASE<"vssrlrni.hu.w", uimm5, immZExt5, LSX128HOpnd>; + +def VSSRLRNI_WU_D : LSX_I6_U<0b0111001101010101>, + LSX_BIND_U6N_DESC_BASE<"vssrlrni.wu.d", uimm6, immZExt6, LSX128WOpnd>; + +def VSSRLRNI_DU_Q : LSX_I7_U<0b011100110101011>, + LSX_BIND_U7N_DESC_BASE<"vssrlrni.du.q", LSX128DOpnd>; + + +def VSRARNI_B_H : LSX_I4_U<0b011100110101110001>, + LSX_BIND_U4N_DESC_BASE<"vsrarni.b.h", uimm4, immZExt4, LSX128BOpnd>; + +def VSRARNI_H_W : LSX_I5_U<0b01110011010111001>, + LSX_BIND_U5N_DESC_BASE<"vsrarni.h.w", uimm5, immZExt5, LSX128HOpnd>; + +def VSRARNI_W_D : LSX_I6_U<0b0111001101011101>, + LSX_BIND_U6N_DESC_BASE<"vsrarni.w.d", uimm6, immZExt6, LSX128WOpnd>; + +def VSRARNI_D_Q : LSX_I7_U<0b011100110101111>, + LSX_BIND_U7N_DESC_BASE<"vsrarni.d.q", LSX128DOpnd>; + + +def VSSRANI_B_H : LSX_I4_U<0b011100110110000001>, + LSX_BIND_U4N_DESC_BASE<"vssrani.b.h", uimm4, immZExt4, LSX128BOpnd>; + +def VSSRANI_H_W : LSX_I5_U<0b01110011011000001>, + LSX_BIND_U5N_DESC_BASE<"vssrani.h.w", uimm5, immZExt5, LSX128HOpnd>; + +def VSSRANI_W_D : LSX_I6_U<0b0111001101100001>, + LSX_BIND_U6N_DESC_BASE<"vssrani.w.d", uimm6, immZExt6, LSX128WOpnd>; + +def VSSRANI_D_Q : LSX_I7_U<0b011100110110001>, + LSX_BIND_U7N_DESC_BASE<"vssrani.d.q", LSX128DOpnd>; + + +def VSSRANI_BU_H : LSX_I4_U<0b011100110110010001>, + LSX_BIND_U4N_DESC_BASE<"vssrani.bu.h", uimm4, immZExt4, LSX128BOpnd>; + +def VSSRANI_HU_W : LSX_I5_U<0b01110011011001001>, + LSX_BIND_U5N_DESC_BASE<"vssrani.hu.w", uimm5, immZExt5, LSX128HOpnd>; + +def VSSRANI_WU_D : LSX_I6_U<0b0111001101100101>, + LSX_BIND_U6N_DESC_BASE<"vssrani.wu.d", uimm6, immZExt6, LSX128WOpnd>; + +def VSSRANI_DU_Q : LSX_I7_U<0b011100110110011>, + LSX_BIND_U7N_DESC_BASE<"vssrani.du.q", LSX128DOpnd>; + + +def VSSRARNI_B_H : LSX_I4_U<0b011100110110100001>, + LSX_BIND_U4N_DESC_BASE<"vssrarni.b.h", uimm4, immZExt4, LSX128BOpnd>; + +def VSSRARNI_H_W : LSX_I5_U<0b01110011011010001>, + LSX_BIND_U5N_DESC_BASE<"vssrarni.h.w", uimm5, immZExt5, LSX128HOpnd>; + +def VSSRARNI_W_D : LSX_I6_U<0b0111001101101001>, + LSX_BIND_U6N_DESC_BASE<"vssrarni.w.d", uimm6, immZExt6, LSX128WOpnd>; + +def VSSRARNI_D_Q : LSX_I7_U<0b011100110110101>, + LSX_BIND_U7N_DESC_BASE<"vssrarni.d.q", LSX128DOpnd>; + + +def VSSRARNI_BU_H : LSX_I4_U<0b011100110110110001>, + LSX_BIND_U4N_DESC_BASE<"vssrarni.bu.h", uimm4, immZExt4, LSX128BOpnd>; + +def VSSRARNI_HU_W : LSX_I5_U<0b01110011011011001>, + LSX_BIND_U5N_DESC_BASE<"vssrarni.hu.w", uimm5, immZExt5, LSX128HOpnd>; + +def VSSRARNI_WU_D : LSX_I6_U<0b0111001101101101>, + LSX_BIND_U6N_DESC_BASE<"vssrarni.wu.d", uimm6, immZExt6, LSX128WOpnd>; + +def VSSRARNI_DU_Q : LSX_I7_U<0b011100110110111>, + LSX_BIND_U7N_DESC_BASE<"vssrarni.du.q", LSX128DOpnd>; + + + +def VLD : LSX_I12_S<0b0010110000>, + LD_DESC_BASE<"vld", load, v16i8, LSX128BOpnd, mem>; + +def VST : LSX_I12_S<0b0010110001>, + ST_DESC_BASE<"vst", store, v16i8, LSX128BOpnd, mem_simm12>; + + +def VSETEQZ_V : LSX_SET<0b0111001010011100100110>, + LSX_SET_DESC_BASE<"vseteqz.v", LSX128BOpnd>; + +def VSETNEZ_V : LSX_SET<0b0111001010011100100111>, + LSX_SET_DESC_BASE<"vsetnez.v", LSX128BOpnd>; + + +def VSETANYEQZ_B : LSX_SET<0b0111001010011100101000>, + LSX_SET_DESC_BASE<"vsetanyeqz.b", LSX128BOpnd>; + +def VSETANYEQZ_H : LSX_SET<0b0111001010011100101001>, + LSX_SET_DESC_BASE<"vsetanyeqz.h", LSX128HOpnd>; + +def VSETANYEQZ_W : LSX_SET<0b0111001010011100101010>, + LSX_SET_DESC_BASE<"vsetanyeqz.w", LSX128WOpnd>; + +def VSETANYEQZ_D : LSX_SET<0b0111001010011100101011>, + LSX_SET_DESC_BASE<"vsetanyeqz.d", LSX128DOpnd>; + + +def VSETALLNEZ_B : LSX_SET<0b0111001010011100101100>, + LSX_SET_DESC_BASE<"vsetallnez.b", LSX128BOpnd>; + +def VSETALLNEZ_H : LSX_SET<0b0111001010011100101101>, + LSX_SET_DESC_BASE<"vsetallnez.h", LSX128HOpnd>; + +def VSETALLNEZ_W : LSX_SET<0b0111001010011100101110>, + LSX_SET_DESC_BASE<"vsetallnez.w", LSX128WOpnd>; + +def VSETALLNEZ_D : LSX_SET<0b0111001010011100101111>, + LSX_SET_DESC_BASE<"vsetallnez.d", LSX128DOpnd>; + +class LSX_CBRANCH_PSEUDO_DESC_BASE : + LoongArchPseudo<(outs GPR32Opnd:$rd), + (ins RCVS:$vj), + [(set GPR32Opnd:$rd, (OpNode (TyNode RCVS:$vj)))]> { + bit usesCustomInserter = 1; +} + +def SNZ_B_PSEUDO : LSX_CBRANCH_PSEUDO_DESC_BASE; +def SNZ_H_PSEUDO : LSX_CBRANCH_PSEUDO_DESC_BASE; +def SNZ_W_PSEUDO : LSX_CBRANCH_PSEUDO_DESC_BASE; +def SNZ_D_PSEUDO : LSX_CBRANCH_PSEUDO_DESC_BASE; +def SNZ_V_PSEUDO : LSX_CBRANCH_PSEUDO_DESC_BASE; + +def SZ_B_PSEUDO : LSX_CBRANCH_PSEUDO_DESC_BASE; +def SZ_H_PSEUDO : LSX_CBRANCH_PSEUDO_DESC_BASE; +def SZ_W_PSEUDO : LSX_CBRANCH_PSEUDO_DESC_BASE; +def SZ_D_PSEUDO : LSX_CBRANCH_PSEUDO_DESC_BASE; +def SZ_V_PSEUDO : LSX_CBRANCH_PSEUDO_DESC_BASE; + + +def VFMADD_S : LSX_VR4MUL<0b000010010001>, + LSX_4RF<"vfmadd.s", int_loongarch_lsx_vfmadd_s, LSX128WOpnd>; + +def VFMADD_D : LSX_VR4MUL<0b000010010010>, + LSX_4RF<"vfmadd.d", int_loongarch_lsx_vfmadd_d, LSX128DOpnd>; + +def VFMSUB_S : LSX_VR4MUL<0b000010010101>, + LSX_4RF<"vfmsub.s", int_loongarch_lsx_vfmsub_s, LSX128WOpnd>; + +def VFMSUB_D : LSX_VR4MUL<0b000010010110>, + LSX_4RF<"vfmsub.d", int_loongarch_lsx_vfmsub_d, LSX128DOpnd>; + +def VFNMADD_S : LSX_VR4MUL<0b000010011001>, + LSX_4RF<"vfnmadd.s", int_loongarch_lsx_vfnmadd_s, LSX128WOpnd>; + +def VFNMADD_D : LSX_VR4MUL<0b000010011010>, + LSX_4RF<"vfnmadd.d", int_loongarch_lsx_vfnmadd_d, LSX128DOpnd>; + +def VFNMSUB_S : LSX_VR4MUL<0b000010011101>, + LSX_4RF<"vfnmsub.s", int_loongarch_lsx_vfnmsub_s, LSX128WOpnd>; + +def VFNMSUB_D : LSX_VR4MUL<0b000010011110>, + LSX_4RF<"vfnmsub.d", int_loongarch_lsx_vfnmsub_d, LSX128DOpnd>; + + +// vfmadd: vj * vk + va +def : LSXPat<(fma v2f64:$vj, v2f64:$vk, v2f64:$va), + (VFMADD_D $vj, $vk, $va)>; + +def : LSXPat<(fma v4f32:$vj, v4f32:$vk, v4f32:$va), + (VFMADD_S $vj, $vk, $va)>; + + +// vfmsub: vj * vk - va +def : LSXPat<(fma v2f64:$vj, v2f64:$vk, (fneg v2f64:$va)), + (VFMSUB_D v2f64:$vj, v2f64:$vk, v2f64:$va)>; + +def : LSXPat<(fma v4f32:$vj, v4f32:$vk, (fneg v4f32:$va)), + (VFMSUB_S v4f32:$vj, v4f32:$vk, v4f32:$va)>; + + +// vfnmadd: -(vj * vk + va) +def : LSXPat<(fma (fneg v2f64:$vj), v2f64:$vk, (fneg v2f64:$va)), + (VFNMADD_D v2f64:$vj, v2f64:$vk, v2f64:$va)>; + +def : LSXPat<(fma (fneg v4f32:$vj), v4f32:$vk, (fneg v4f32:$va)), + (VFNMADD_S v4f32:$vj, v4f32:$vk, v4f32:$va)>; + +// vfnmsub: -(vj * vk - va) +def : LSXPat<(fma (fneg v2f64:$vj), v2f64:$vk, v2f64:$va), + (VFNMSUB_D v2f64:$vj, v2f64:$vk, v2f64:$va)>; + +def : LSXPat<(fma (fneg v4f32:$vj), v4f32:$vk, v4f32:$va), + (VFNMSUB_S v4f32:$vj, v4f32:$vk, v4f32:$va)>; + + +def VFCMP_CAF_S : LSX_VFCMP<0b000011000101>, + LSX_VFCMP_Reg3<"vfcmp.caf.s", LSX128WOpnd, LSX128WOpnd, LSX128WOpnd, int_loongarch_lsx_vfcmp_caf_s>{ + bits<5> cond=0x0; + } + +def VFCMP_CAF_D : LSX_VFCMP<0b000011000110>, + LSX_VFCMP_Reg3<"vfcmp.caf.d", LSX128DOpnd, LSX128DOpnd, LSX128DOpnd, int_loongarch_lsx_vfcmp_caf_d>{ + bits<5> cond=0x0; + } + + +def VFCMP_COR_S : LSX_VFCMP<0b000011000101>, + LSX_VFCMP_Reg3<"vfcmp.cor.s", LSX128WOpnd, LSX128WOpnd, LSX128WOpnd, vfsetord_v4f32>{ + bits<5> cond=0x14; + } + +def VFCMP_COR_D : LSX_VFCMP<0b000011000110>, + LSX_VFCMP_Reg3<"vfcmp.cor.d", LSX128DOpnd, LSX128DOpnd, LSX128DOpnd, vfsetord_v2f64>{ + bits<5> cond=0x14; + } + + +def VFCMP_CUN_S : LSX_VFCMP<0b000011000101>, + LSX_VFCMP_Reg3<"vfcmp.cun.s", LSX128WOpnd, LSX128WOpnd, LSX128WOpnd, vfsetun_v4f32>{ + bits<5> cond=0x8; + } + +def VFCMP_CUN_D : LSX_VFCMP<0b000011000110>, + LSX_VFCMP_Reg3<"vfcmp.cun.d", LSX128DOpnd, LSX128DOpnd, LSX128DOpnd, vfsetun_v2f64>{ + bits<5> cond=0x8; + } + + +def VFCMP_CUNE_S : LSX_VFCMP<0b000011000101>, + LSX_VFCMP_Reg3<"vfcmp.cune.s", LSX128WOpnd, LSX128WOpnd, LSX128WOpnd, vfsetune_v4f32>{ + bits<5> cond=0x18; + } + +def VFCMP_CUNE_D : LSX_VFCMP<0b000011000110>, + LSX_VFCMP_Reg3<"vfcmp.cune.d", LSX128DOpnd, LSX128DOpnd, LSX128DOpnd, vfsetune_v2f64>{ + bits<5> cond=0x18; + } + + +def VFCMP_CUEQ_S : LSX_VFCMP<0b000011000101>, + LSX_VFCMP_Reg3<"vfcmp.cueq.s", LSX128WOpnd, LSX128WOpnd, LSX128WOpnd, vfsetueq_v4f32>{ + bits<5> cond=0xc; + } + +def VFCMP_CUEQ_D : LSX_VFCMP<0b000011000110>, + LSX_VFCMP_Reg3<"vfcmp.cueq.d", LSX128DOpnd, LSX128DOpnd, LSX128DOpnd, vfsetueq_v2f64>{ + bits<5> cond=0xc; + } + +def VFCMP_CEQ_S : LSX_VFCMP<0b000011000101>, + LSX_VFCMP_Reg3<"vfcmp.ceq.s", LSX128WOpnd, LSX128WOpnd, LSX128WOpnd, vfsetoeq_v4f32>{ + bits<5> cond=0x4; + } + +def VFCMP_CEQ_D : LSX_VFCMP<0b000011000110>, + LSX_VFCMP_Reg3<"vfcmp.ceq.d", LSX128DOpnd, LSX128DOpnd, LSX128DOpnd, vfsetoeq_v2f64>{ + bits<5> cond=0x4; + } + + +def VFCMP_CNE_S : LSX_VFCMP<0b000011000101>, + LSX_VFCMP_Reg3<"vfcmp.cne.s", LSX128WOpnd, LSX128WOpnd, LSX128WOpnd, vfsetone_v4f32>{ + bits<5> cond=0x10; + } + +def VFCMP_CNE_D : LSX_VFCMP<0b000011000110>, + LSX_VFCMP_Reg3<"vfcmp.cne.d", LSX128DOpnd, LSX128DOpnd, LSX128DOpnd, vfsetone_v2f64>{ + bits<5> cond=0x10; + } + + +def VFCMP_CLT_S : LSX_VFCMP<0b000011000101>, + LSX_VFCMP_Reg3<"vfcmp.clt.s", LSX128WOpnd, LSX128WOpnd, LSX128WOpnd, vfsetolt_v4f32>{ + bits<5> cond=0x2; + } + +def VFCMP_CLT_D : LSX_VFCMP<0b000011000110>, + LSX_VFCMP_Reg3<"vfcmp.clt.d", LSX128DOpnd, LSX128DOpnd, LSX128DOpnd, vfsetolt_v2f64>{ + bits<5> cond=0x2; + } + + +def VFCMP_CULT_S : LSX_VFCMP<0b000011000101>, + LSX_VFCMP_Reg3<"vfcmp.cult.s", LSX128WOpnd, LSX128WOpnd, LSX128WOpnd, vfsetult_v4f32>{ + bits<5> cond=0xa; + } + +def VFCMP_CULT_D : LSX_VFCMP<0b000011000110>, + LSX_VFCMP_Reg3<"vfcmp.cult.d", LSX128DOpnd, LSX128DOpnd, LSX128DOpnd, vfsetult_v2f64>{ + bits<5> cond=0xa; + } + + +def VFCMP_CLE_S : LSX_VFCMP<0b000011000101>, + LSX_VFCMP_Reg3<"vfcmp.cle.s", LSX128WOpnd, LSX128WOpnd, LSX128WOpnd, vfsetole_v4f32>{ + bits<5> cond=0x6; + } + +def VFCMP_CLE_D : LSX_VFCMP<0b000011000110>, + LSX_VFCMP_Reg3<"vfcmp.cle.d", LSX128DOpnd, LSX128DOpnd, LSX128DOpnd, vfsetole_v2f64>{ + bits<5> cond=0x6; + } + + +def VFCMP_CULE_S : LSX_VFCMP<0b000011000101>, + LSX_VFCMP_Reg3<"vfcmp.cule.s", LSX128WOpnd, LSX128WOpnd, LSX128WOpnd, vfsetule_v4f32>{ + bits<5> cond=0xe; + } + +def VFCMP_CULE_D : LSX_VFCMP<0b000011000110>, + LSX_VFCMP_Reg3<"vfcmp.cule.d", LSX128DOpnd, LSX128DOpnd, LSX128DOpnd, vfsetule_v2f64>{ + bits<5> cond=0xe; + } + + +def VFCMP_SAF_S : LSX_VFCMP<0b000011000101>, + LSX_VFCMP_Reg3<"vfcmp.saf.s", LSX128WOpnd, LSX128WOpnd, LSX128WOpnd, int_loongarch_lsx_vfcmp_saf_s>{ + bits<5> cond=0x1; + } + +def VFCMP_SAF_D : LSX_VFCMP<0b000011000110>, + LSX_VFCMP_Reg3<"vfcmp.saf.d", LSX128DOpnd, LSX128DOpnd, LSX128DOpnd, int_loongarch_lsx_vfcmp_saf_d>{ + bits<5> cond=0x1; + } + +def VFCMP_SOR_S : LSX_VFCMP<0b000011000101>, + LSX_VFCMP_Reg3<"vfcmp.sor.s", LSX128WOpnd, LSX128WOpnd, LSX128WOpnd, int_loongarch_lsx_vfcmp_sor_s>{ + bits<5> cond=0x15; + } + +def VFCMP_SOR_D : LSX_VFCMP<0b000011000110>, + LSX_VFCMP_Reg3<"vfcmp.sor.d", LSX128DOpnd, LSX128DOpnd, LSX128DOpnd, int_loongarch_lsx_vfcmp_sor_d>{ + bits<5> cond=0x15; + } + +def VFCMP_SUN_S : LSX_VFCMP<0b000011000101>, + LSX_VFCMP_Reg3<"vfcmp.sun.s", LSX128WOpnd, LSX128WOpnd, LSX128WOpnd, int_loongarch_lsx_vfcmp_sun_s>{ + bits<5> cond=0x9; + } + +def VFCMP_SUN_D : LSX_VFCMP<0b000011000110>, + LSX_VFCMP_Reg3<"vfcmp.sun.d", LSX128DOpnd, LSX128DOpnd, LSX128DOpnd, int_loongarch_lsx_vfcmp_sun_d>{ + bits<5> cond=0x9; + } + +def VFCMP_SUNE_S : LSX_VFCMP<0b000011000101>, + LSX_VFCMP_Reg3<"vfcmp.sune.s", LSX128WOpnd, LSX128WOpnd, LSX128WOpnd, int_loongarch_lsx_vfcmp_sune_s>{ + bits<5> cond=0x19; + } + +def VFCMP_SUNE_D : LSX_VFCMP<0b000011000110>, + LSX_VFCMP_Reg3<"vfcmp.sune.d", LSX128DOpnd, LSX128DOpnd, LSX128DOpnd, int_loongarch_lsx_vfcmp_sune_d>{ + bits<5> cond=0x19; + } + +def VFCMP_SUEQ_S : LSX_VFCMP<0b000011000101>, + LSX_VFCMP_Reg3<"vfcmp.sueq.s", LSX128WOpnd, LSX128WOpnd, LSX128WOpnd, int_loongarch_lsx_vfcmp_sueq_s>{ + bits<5> cond=0xd; + } + +def VFCMP_SUEQ_D : LSX_VFCMP<0b000011000110>, + LSX_VFCMP_Reg3<"vfcmp.sueq.d", LSX128DOpnd, LSX128DOpnd, LSX128DOpnd, int_loongarch_lsx_vfcmp_sueq_d>{ + bits<5> cond=0xd; + } + +def VFCMP_SEQ_S : LSX_VFCMP<0b000011000101>, + LSX_VFCMP_Reg3<"vfcmp.seq.s", LSX128WOpnd, LSX128WOpnd, LSX128WOpnd, int_loongarch_lsx_vfcmp_seq_s>{ + bits<5> cond=0x5; + } + +def VFCMP_SEQ_D : LSX_VFCMP<0b000011000110>, + LSX_VFCMP_Reg3<"vfcmp.seq.d", LSX128DOpnd, LSX128DOpnd, LSX128DOpnd, int_loongarch_lsx_vfcmp_seq_d>{ + bits<5> cond=0x5; + } + +def VFCMP_SNE_S : LSX_VFCMP<0b000011000101>, + LSX_VFCMP_Reg3<"vfcmp.sne.s", LSX128WOpnd, LSX128WOpnd, LSX128WOpnd, int_loongarch_lsx_vfcmp_sne_s>{ + bits<5> cond=0x11; + } + +def VFCMP_SNE_D : LSX_VFCMP<0b000011000110>, + LSX_VFCMP_Reg3<"vfcmp.sne.d", LSX128DOpnd, LSX128DOpnd, LSX128DOpnd, int_loongarch_lsx_vfcmp_sne_d>{ + bits<5> cond=0x11; + } + +def VFCMP_SLT_S : LSX_VFCMP<0b000011000101>, + LSX_VFCMP_Reg3<"vfcmp.slt.s", LSX128WOpnd, LSX128WOpnd, LSX128WOpnd, int_loongarch_lsx_vfcmp_slt_s>{ + bits<5> cond=0x3; + } + +def VFCMP_SLT_D : LSX_VFCMP<0b000011000110>, + LSX_VFCMP_Reg3<"vfcmp.slt.d", LSX128DOpnd, LSX128DOpnd, LSX128DOpnd, int_loongarch_lsx_vfcmp_slt_d>{ + bits<5> cond=0x3; + } + +def VFCMP_SULT_S : LSX_VFCMP<0b000011000101>, + LSX_VFCMP_Reg3<"vfcmp.sult.s", LSX128WOpnd, LSX128WOpnd, LSX128WOpnd, int_loongarch_lsx_vfcmp_sult_s>{ + bits<5> cond=0xb; + } + +def VFCMP_SULT_D : LSX_VFCMP<0b000011000110>, + LSX_VFCMP_Reg3<"vfcmp.sult.d", LSX128DOpnd, LSX128DOpnd, LSX128DOpnd, int_loongarch_lsx_vfcmp_sult_d>{ + bits<5> cond=0xb; + } + +def VFCMP_SLE_S : LSX_VFCMP<0b000011000101>, + LSX_VFCMP_Reg3<"vfcmp.sle.s", LSX128WOpnd, LSX128WOpnd, LSX128WOpnd, int_loongarch_lsx_vfcmp_sle_s>{ + bits<5> cond=0x7; + } + +def VFCMP_SLE_D : LSX_VFCMP<0b000011000110>, + LSX_VFCMP_Reg3<"vfcmp.sle.d", LSX128DOpnd, LSX128DOpnd, LSX128DOpnd, int_loongarch_lsx_vfcmp_sle_d>{ + bits<5> cond=0x7; + } + +def VFCMP_SULE_S : LSX_VFCMP<0b000011000101>, + LSX_VFCMP_Reg3<"vfcmp.sule.s", LSX128WOpnd, LSX128WOpnd, LSX128WOpnd, int_loongarch_lsx_vfcmp_sule_s>{ + bits<5> cond=0xf; + } + +def VFCMP_SULE_D : LSX_VFCMP<0b000011000110>, + LSX_VFCMP_Reg3<"vfcmp.sule.d", LSX128DOpnd, LSX128DOpnd, LSX128DOpnd, int_loongarch_lsx_vfcmp_sule_d>{ + bits<5> cond=0xf; + } + +def VBITSEL_V : LSX_VR4MUL<0b000011010001>, + LSX_VMul_Reg4<"vbitsel.v", LSX128BOpnd, LSX128BOpnd, LSX128BOpnd, LSX128BOpnd, int_loongarch_lsx_vbitsel_v>; + +def VSHUF_B : LSX_VR4MUL<0b000011010101>, + LSX_3R_4R_VSHF_DESC_BASE<"vshuf.b", LSX128BOpnd>; + + +class LSX_BSEL_PSEUDO_BASE : + LSXPseudo<(outs RO:$vd), (ins RO:$vd_in, RO:$vs, RO:$vt), + [(set RO:$vd, (Ty (vselect RO:$vd_in, RO:$vt, RO:$vs)))]>, + PseudoInstExpansion<(VBITSEL_V LSX128BOpnd:$vd, LSX128BOpnd:$vs, + LSX128BOpnd:$vt, LSX128BOpnd:$vd_in)> { + let Constraints = "$vd_in = $vd"; +} + +def BSEL_B_PSEUDO : LSX_BSEL_PSEUDO_BASE; +def BSEL_H_PSEUDO : LSX_BSEL_PSEUDO_BASE; +def BSEL_W_PSEUDO : LSX_BSEL_PSEUDO_BASE; +def BSEL_D_PSEUDO : LSX_BSEL_PSEUDO_BASE; +def BSEL_FW_PSEUDO : LSX_BSEL_PSEUDO_BASE; +def BSEL_FD_PSEUDO : LSX_BSEL_PSEUDO_BASE; + + +class LSX_LD_DESC_BASE { + dag OutOperandList = (outs ROVD:$vd); + dag InOperandList = (ins MemOpnd:$addr); + string AsmString = !strconcat(instr_asm, "\t$vd, $addr"); + list Pattern = [(set ROVD:$vd, (OpNode (TyNode (load Addr:$addr))))]; + string DecoderMethod = "DecodeLSX128memlsl"; +} + +def VLDREPL_B : LSX_SI12_S<0b0011000010>, + LSX_LD_DESC_BASE<"vldrepl.b", vldrepl_v16i8, v16i8, LSX128BOpnd>; + +def VLDREPL_H : LSX_SI11_S<0b00110000010>, + LSX_LD_DESC_BASE<"vldrepl.h", vldrepl_v8i16, v8i16, LSX128HOpnd, mem_simm11_lsl1, addrimm11lsl1>; + +def VLDREPL_W : LSX_SI10_S<0b001100000010>, + LSX_LD_DESC_BASE<"vldrepl.w", vldrepl_v4i32, v4i32, LSX128WOpnd, mem_simm10_lsl2, addrimm10lsl2>; + +def VLDREPL_D : LSX_SI9_S<0b0011000000010>, + LSX_LD_DESC_BASE<"vldrepl.d", vldrepl_v2i64, v2i64, LSX128DOpnd, mem_simm9_lsl3, addrimm9lsl3>; + + +def VSTELM_B : LSX_SI8_idx4<0b0011000110>, + LSX_I8_U4_DESC_BASE<"vstelm.b", int_loongarch_lsx_vstelm_b, simm8_32, immSExt8, LSX128BOpnd>; + +def VSTELM_H : LSX_SI8_idx3<0b00110001010>, + LSX_I8_U3_DESC_BASE<"vstelm.h", int_loongarch_lsx_vstelm_h, immSExt8_1_O, immSExt8, LSX128HOpnd>; + +def VSTELM_W : LSX_SI8_idx2<0b001100010010>, + LSX_I8_U2_DESC_BASE<"vstelm.w", int_loongarch_lsx_vstelm_w, immSExt8_2_O, immSExt8, LSX128WOpnd>; + +def VSTELM_D : LSX_SI8_idx1<0b0011000100010>, + LSX_I8_U1_DESC_BASE<"vstelm.d", int_loongarch_lsx_vstelm_d, immSExt8_3_O, immSExt8, LSX128DOpnd>; + + +let mayLoad = 1, canFoldAsLoad = 1 in { + def VLDX : LSX_3R_2GP<0b00111000010000000>, + LSX_LDX_LA<"vldx", int_loongarch_lsx_vldx, GPR64Opnd, LSX128BOpnd>; +} + +let mayStore = 1 in{ + def VSTX : LSX_3R_2GP<0b00111000010001000>, + LSX_SDX_LA<"vstx", int_loongarch_lsx_vstx, GPR64Opnd, LSX128BOpnd>; +} + + +def VADDWEV_H_B : LSX_3R<0b01110000000111100>, + LSX_3R_DESC_BASE<"vaddwev.h.b", int_loongarch_lsx_vaddwev_h_b, LSX128HOpnd, LSX128BOpnd, LSX128BOpnd>; + +def VADDWEV_W_H : LSX_3R<0b01110000000111101>, + LSX_3R_DESC_BASE<"vaddwev.w.h", int_loongarch_lsx_vaddwev_w_h, LSX128WOpnd, LSX128HOpnd, LSX128HOpnd>; + +def VADDWEV_D_W : LSX_3R<0b01110000000111110>, + LSX_3R_DESC_BASE<"vaddwev.d.w", int_loongarch_lsx_vaddwev_d_w, LSX128DOpnd, LSX128WOpnd, LSX128WOpnd> ; + +def VADDWEV_Q_D : LSX_3R<0b01110000000111111>, + LSX_3R_DESC_BASE<"vaddwev.q.d", int_loongarch_lsx_vaddwev_q_d, LSX128DOpnd, LSX128DOpnd, LSX128DOpnd>; + + +def VSUBWEV_H_B : LSX_3R<0b01110000001000000>, + LSX_3R_DESC_BASE<"vsubwev.h.b", int_loongarch_lsx_vsubwev_h_b, LSX128HOpnd, LSX128BOpnd, LSX128BOpnd>; + +def VSUBWEV_W_H : LSX_3R<0b01110000001000001>, + LSX_3R_DESC_BASE<"vsubwev.w.h", int_loongarch_lsx_vsubwev_w_h, LSX128WOpnd, LSX128HOpnd, LSX128HOpnd>; + +def VSUBWEV_D_W : LSX_3R<0b01110000001000010>, + LSX_3R_DESC_BASE<"vsubwev.d.w", int_loongarch_lsx_vsubwev_d_w, LSX128DOpnd, LSX128WOpnd, LSX128WOpnd> ; + +def VSUBWEV_Q_D : LSX_3R<0b01110000001000011>, + LSX_3R_DESC_BASE<"vsubwev.q.d", int_loongarch_lsx_vsubwev_q_d, LSX128DOpnd, LSX128DOpnd, LSX128DOpnd>; + + +def VADDWOD_H_B : LSX_3R<0b01110000001000100>, + LSX_3R_DESC_BASE<"vaddwod.h.b", int_loongarch_lsx_vaddwod_h_b, LSX128HOpnd, LSX128BOpnd, LSX128BOpnd>; + +def VADDWOD_W_H : LSX_3R<0b01110000001000101>, + LSX_3R_DESC_BASE<"vaddwod.w.h", int_loongarch_lsx_vaddwod_w_h, LSX128WOpnd, LSX128HOpnd, LSX128HOpnd>; + +def VADDWOD_D_W : LSX_3R<0b01110000001000110>, + LSX_3R_DESC_BASE<"vaddwod.d.w", int_loongarch_lsx_vaddwod_d_w, LSX128DOpnd, LSX128WOpnd, LSX128WOpnd> ; + +def VADDWOD_Q_D : LSX_3R<0b01110000001000111>, + LSX_3R_DESC_BASE<"vaddwod.q.d", int_loongarch_lsx_vaddwod_q_d, LSX128DOpnd, LSX128DOpnd, LSX128DOpnd>; + + +def VSUBWOD_H_B : LSX_3R<0b01110000001001000>, + LSX_3R_DESC_BASE<"vsubwod.h.b", int_loongarch_lsx_vsubwod_h_b, LSX128HOpnd, LSX128BOpnd, LSX128BOpnd>; + +def VSUBWOD_W_H : LSX_3R<0b01110000001001001>, + LSX_3R_DESC_BASE<"vsubwod.w.h", int_loongarch_lsx_vsubwod_w_h, LSX128WOpnd, LSX128HOpnd, LSX128HOpnd>; + +def VSUBWOD_D_W : LSX_3R<0b01110000001001010>, + LSX_3R_DESC_BASE<"vsubwod.d.w", int_loongarch_lsx_vsubwod_d_w, LSX128DOpnd, LSX128WOpnd, LSX128WOpnd> ; + +def VSUBWOD_Q_D : LSX_3R<0b01110000001001011>, + LSX_3R_DESC_BASE<"vsubwod.q.d", int_loongarch_lsx_vsubwod_q_d, LSX128DOpnd, LSX128DOpnd, LSX128DOpnd>; + + +def VADDWEV_H_BU : LSX_3R<0b01110000001011100>, + LSX_3R_DESC_BASE<"vaddwev.h.bu", int_loongarch_lsx_vaddwev_h_bu, LSX128HOpnd, LSX128BOpnd, LSX128BOpnd>; + +def VADDWEV_W_HU : LSX_3R<0b01110000001011101>, + LSX_3R_DESC_BASE<"vaddwev.w.hu", int_loongarch_lsx_vaddwev_w_hu, LSX128WOpnd, LSX128HOpnd, LSX128HOpnd>; + +def VADDWEV_D_WU : LSX_3R<0b01110000001011110>, + LSX_3R_DESC_BASE<"vaddwev.d.wu", int_loongarch_lsx_vaddwev_d_wu, LSX128DOpnd, LSX128WOpnd, LSX128WOpnd> ; + +def VADDWEV_Q_DU : LSX_3R<0b01110000001011111>, + LSX_3R_DESC_BASE<"vaddwev.q.du", int_loongarch_lsx_vaddwev_q_du, LSX128DOpnd, LSX128DOpnd, LSX128DOpnd>; + + +def VSUBWEV_H_BU : LSX_3R<0b01110000001100000>, + LSX_3R_DESC_BASE<"vsubwev.h.bu", int_loongarch_lsx_vsubwev_h_bu, LSX128HOpnd, LSX128BOpnd, LSX128BOpnd>; + +def VSUBWEV_W_HU : LSX_3R<0b01110000001100001>, + LSX_3R_DESC_BASE<"vsubwev.w.hu", int_loongarch_lsx_vsubwev_w_hu, LSX128WOpnd, LSX128HOpnd, LSX128HOpnd>; + +def VSUBWEV_D_WU : LSX_3R<0b01110000001100010>, + LSX_3R_DESC_BASE<"vsubwev.d.wu", int_loongarch_lsx_vsubwev_d_wu, LSX128DOpnd, LSX128WOpnd, LSX128WOpnd> ; + +def VSUBWEV_Q_DU : LSX_3R<0b01110000001100011>, + LSX_3R_DESC_BASE<"vsubwev.q.du", int_loongarch_lsx_vsubwev_q_du, LSX128DOpnd, LSX128DOpnd, LSX128DOpnd>; + + +def VADDWOD_H_BU : LSX_3R<0b01110000001100100>, + LSX_3R_DESC_BASE<"vaddwod.h.bu", int_loongarch_lsx_vaddwod_h_bu, LSX128HOpnd, LSX128BOpnd, LSX128BOpnd>; + +def VADDWOD_W_HU : LSX_3R<0b01110000001100101>, + LSX_3R_DESC_BASE<"vaddwod.w.hu", int_loongarch_lsx_vaddwod_w_hu, LSX128WOpnd, LSX128HOpnd, LSX128HOpnd>; + +def VADDWOD_D_WU : LSX_3R<0b01110000001100110>, + LSX_3R_DESC_BASE<"vaddwod.d.wu", int_loongarch_lsx_vaddwod_d_wu, LSX128DOpnd, LSX128WOpnd, LSX128WOpnd> ; + +def VADDWOD_Q_DU : LSX_3R<0b01110000001100111>, + LSX_3R_DESC_BASE<"vaddwod.q.du", int_loongarch_lsx_vaddwod_q_du, LSX128DOpnd, LSX128DOpnd, LSX128DOpnd>; + + +def VSUBWOD_H_BU : LSX_3R<0b01110000001101000>, + LSX_3R_DESC_BASE<"vsubwod.h.bu", int_loongarch_lsx_vsubwod_h_bu, LSX128HOpnd, LSX128BOpnd, LSX128BOpnd>; + +def VSUBWOD_W_HU : LSX_3R<0b01110000001101001>, + LSX_3R_DESC_BASE<"vsubwod.w.hu", int_loongarch_lsx_vsubwod_w_hu, LSX128WOpnd, LSX128HOpnd, LSX128HOpnd>; + +def VSUBWOD_D_WU : LSX_3R<0b01110000001101010>, + LSX_3R_DESC_BASE<"vsubwod.d.wu", int_loongarch_lsx_vsubwod_d_wu, LSX128DOpnd, LSX128WOpnd, LSX128WOpnd> ; + +def VSUBWOD_Q_DU : LSX_3R<0b01110000001101011>, + LSX_3R_DESC_BASE<"vsubwod.q.du", int_loongarch_lsx_vsubwod_q_du, LSX128DOpnd, LSX128DOpnd, LSX128DOpnd>; + + +def VADDWEV_H_BU_B : LSX_3R<0b01110000001111100>, + LSX_3R_DESC_BASE<"vaddwev.h.bu.b", int_loongarch_lsx_vaddwev_h_bu_b, LSX128HOpnd, LSX128BOpnd, LSX128BOpnd>; + +def VADDWEV_W_HU_H : LSX_3R<0b01110000001111101>, + LSX_3R_DESC_BASE<"vaddwev.w.hu.h", int_loongarch_lsx_vaddwev_w_hu_h, LSX128WOpnd, LSX128HOpnd, LSX128HOpnd>; + +def VADDWEV_D_WU_W : LSX_3R<0b01110000001111110>, + LSX_3R_DESC_BASE<"vaddwev.d.wu.w", int_loongarch_lsx_vaddwev_d_wu_w, LSX128DOpnd, LSX128WOpnd, LSX128WOpnd> ; + +def VADDWEV_Q_DU_D : LSX_3R<0b01110000001111111>, + LSX_3R_DESC_BASE<"vaddwev.q.du.d", int_loongarch_lsx_vaddwev_q_du_d, LSX128DOpnd, LSX128DOpnd, LSX128DOpnd>; + + +def VADDWOD_H_BU_B : LSX_3R<0b01110000010000000>, + LSX_3R_DESC_BASE<"vaddwod.h.bu.b", int_loongarch_lsx_vaddwod_h_bu_b, LSX128HOpnd, LSX128BOpnd, LSX128BOpnd>; + +def VADDWOD_W_HU_H : LSX_3R<0b01110000010000001>, + LSX_3R_DESC_BASE<"vaddwod.w.hu.h", int_loongarch_lsx_vaddwod_w_hu_h, LSX128WOpnd, LSX128HOpnd, LSX128HOpnd>; + +def VADDWOD_D_WU_W : LSX_3R<0b01110000010000010>, + LSX_3R_DESC_BASE<"vaddwod.d.wu.w", int_loongarch_lsx_vaddwod_d_wu_w, LSX128DOpnd, LSX128WOpnd, LSX128WOpnd> ; + +def VADDWOD_Q_DU_D : LSX_3R<0b01110000010000011>, + LSX_3R_DESC_BASE<"vaddwod.q.du.d", int_loongarch_lsx_vaddwod_q_du_d, LSX128DOpnd, LSX128DOpnd, LSX128DOpnd>; + + +def VHADDW_Q_D : LSX_3R<0b01110000010101011>, + LSX_3R_DESC_BASE<"vhaddw.q.d", int_loongarch_lsx_vhaddw_q_d, LSX128DOpnd, LSX128DOpnd, LSX128DOpnd>; + +def VHSUBW_Q_D : LSX_3R<0b01110000010101111>, + LSX_3R_DESC_BASE<"vhsubw.q.d", int_loongarch_lsx_vhsubw_q_d, LSX128DOpnd, LSX128DOpnd, LSX128DOpnd>; + + +def VHADDW_QU_DU : LSX_3R<0b01110000010110011>, + LSX_3R_DESC_BASE<"vhaddw.qu.du", int_loongarch_lsx_vhaddw_qu_du, LSX128DOpnd, LSX128DOpnd, LSX128DOpnd>; + +def VHSUBW_QU_DU : LSX_3R<0b01110000010110111>, + LSX_3R_DESC_BASE<"vhsubw.qu.du", int_loongarch_lsx_vhsubw_qu_du, LSX128DOpnd, LSX128DOpnd, LSX128DOpnd>; + + +def VMUH_B : LSX_3R<0b01110000100001100>, + LSX_3R_DESC_BASE<"vmuh.b", int_loongarch_lsx_vmuh_b, LSX128BOpnd, LSX128BOpnd, LSX128BOpnd>; + +def VMUH_H : LSX_3R<0b01110000100001101>, + LSX_3R_DESC_BASE<"vmuh.h", int_loongarch_lsx_vmuh_h, LSX128HOpnd, LSX128HOpnd, LSX128HOpnd>; + +def VMUH_W : LSX_3R<0b01110000100001110>, + LSX_3R_DESC_BASE<"vmuh.w", int_loongarch_lsx_vmuh_w, LSX128WOpnd, LSX128WOpnd, LSX128WOpnd> ; + +def VMUH_D : LSX_3R<0b01110000100001111>, + LSX_3R_DESC_BASE<"vmuh.d", int_loongarch_lsx_vmuh_d, LSX128DOpnd, LSX128DOpnd, LSX128DOpnd>; + + +def VMUH_BU : LSX_3R<0b01110000100010000>, + LSX_3R_DESC_BASE<"vmuh.bu", int_loongarch_lsx_vmuh_bu, LSX128BOpnd, LSX128BOpnd, LSX128BOpnd>; + +def VMUH_HU : LSX_3R<0b01110000100010001>, + LSX_3R_DESC_BASE<"vmuh.hu", int_loongarch_lsx_vmuh_hu, LSX128HOpnd, LSX128HOpnd, LSX128HOpnd>; + +def VMUH_WU : LSX_3R<0b01110000100010010>, + LSX_3R_DESC_BASE<"vmuh.wu", int_loongarch_lsx_vmuh_wu, LSX128WOpnd, LSX128WOpnd, LSX128WOpnd> ; + +def VMUH_DU : LSX_3R<0b01110000100010011>, + LSX_3R_DESC_BASE<"vmuh.du", int_loongarch_lsx_vmuh_du, LSX128DOpnd, LSX128DOpnd, LSX128DOpnd>; + + +def VMULWEV_H_B : LSX_3R<0b01110000100100000>, + LSX_3R_DESC_BASE<"vmulwev.h.b", int_loongarch_lsx_vmulwev_h_b, LSX128HOpnd, LSX128BOpnd, LSX128BOpnd>; + +def VMULWEV_W_H : LSX_3R<0b01110000100100001>, + LSX_3R_DESC_BASE<"vmulwev.w.h", int_loongarch_lsx_vmulwev_w_h, LSX128WOpnd, LSX128HOpnd, LSX128HOpnd>; + +def VMULWEV_D_W : LSX_3R<0b01110000100100010>, + LSX_3R_DESC_BASE<"vmulwev.d.w", int_loongarch_lsx_vmulwev_d_w, LSX128DOpnd, LSX128WOpnd, LSX128WOpnd> ; + +def VMULWEV_Q_D : LSX_3R<0b01110000100100011>, + LSX_3R_DESC_BASE<"vmulwev.q.d", int_loongarch_lsx_vmulwev_q_d, LSX128DOpnd, LSX128DOpnd, LSX128DOpnd>; + + +def VMULWOD_H_B : LSX_3R<0b01110000100100100>, + LSX_3R_DESC_BASE<"vmulwod.h.b", int_loongarch_lsx_vmulwod_h_b, LSX128HOpnd, LSX128BOpnd, LSX128BOpnd>; + +def VMULWOD_W_H : LSX_3R<0b01110000100100101>, + LSX_3R_DESC_BASE<"vmulwod.w.h", int_loongarch_lsx_vmulwod_w_h, LSX128WOpnd, LSX128HOpnd, LSX128HOpnd>; + +def VMULWOD_D_W : LSX_3R<0b01110000100100110>, + LSX_3R_DESC_BASE<"vmulwod.d.w", int_loongarch_lsx_vmulwod_d_w, LSX128DOpnd, LSX128WOpnd, LSX128WOpnd> ; + +def VMULWOD_Q_D : LSX_3R<0b01110000100100111>, + LSX_3R_DESC_BASE<"vmulwod.q.d", int_loongarch_lsx_vmulwod_q_d, LSX128DOpnd, LSX128DOpnd, LSX128DOpnd>; + + +def VMULWEV_H_BU : LSX_3R<0b01110000100110000>, + LSX_3R_DESC_BASE<"vmulwev.h.bu", int_loongarch_lsx_vmulwev_h_bu, LSX128HOpnd, LSX128BOpnd, LSX128BOpnd>; + +def VMULWEV_W_HU : LSX_3R<0b01110000100110001>, + LSX_3R_DESC_BASE<"vmulwev.w.hu", int_loongarch_lsx_vmulwev_w_hu, LSX128WOpnd, LSX128HOpnd, LSX128HOpnd>; + +def VMULWEV_D_WU : LSX_3R<0b01110000100110010>, + LSX_3R_DESC_BASE<"vmulwev.d.wu", int_loongarch_lsx_vmulwev_d_wu, LSX128DOpnd, LSX128WOpnd, LSX128WOpnd> ; + +def VMULWEV_Q_DU : LSX_3R<0b01110000100110011>, + LSX_3R_DESC_BASE<"vmulwev.q.du", int_loongarch_lsx_vmulwev_q_du, LSX128DOpnd, LSX128DOpnd, LSX128DOpnd>; + + +def VMULWOD_H_BU : LSX_3R<0b01110000100110100>, + LSX_3R_DESC_BASE<"vmulwod.h.bu", int_loongarch_lsx_vmulwod_h_bu, LSX128HOpnd, LSX128BOpnd, LSX128BOpnd>; + +def VMULWOD_W_HU : LSX_3R<0b01110000100110101>, + LSX_3R_DESC_BASE<"vmulwod.w.hu", int_loongarch_lsx_vmulwod_w_hu, LSX128WOpnd, LSX128HOpnd, LSX128HOpnd>; + +def VMULWOD_D_WU : LSX_3R<0b01110000100110110>, + LSX_3R_DESC_BASE<"vmulwod.d.wu", int_loongarch_lsx_vmulwod_d_wu, LSX128DOpnd, LSX128WOpnd, LSX128WOpnd> ; + +def VMULWOD_Q_DU : LSX_3R<0b01110000100110111>, + LSX_3R_DESC_BASE<"vmulwod.q.du", int_loongarch_lsx_vmulwod_q_du, LSX128DOpnd, LSX128DOpnd, LSX128DOpnd>; + + +def VMULWEV_H_BU_B : LSX_3R<0b01110000101000000>, + LSX_3R_DESC_BASE<"vmulwev.h.bu.b", int_loongarch_lsx_vmulwev_h_bu_b, LSX128HOpnd, LSX128BOpnd, LSX128BOpnd>; + +def VMULWEV_W_HU_H : LSX_3R<0b01110000101000001>, + LSX_3R_DESC_BASE<"vmulwev.w.hu.h", int_loongarch_lsx_vmulwev_w_hu_h, LSX128WOpnd, LSX128HOpnd, LSX128HOpnd>; + +def VMULWEV_D_WU_W : LSX_3R<0b01110000101000010>, + LSX_3R_DESC_BASE<"vmulwev.d.wu.w", int_loongarch_lsx_vmulwev_d_wu_w, LSX128DOpnd, LSX128WOpnd, LSX128WOpnd> ; + +def VMULWEV_Q_DU_D : LSX_3R<0b01110000101000011>, + LSX_3R_DESC_BASE<"vmulwev.q.du.d", int_loongarch_lsx_vmulwev_q_du_d, LSX128DOpnd, LSX128DOpnd, LSX128DOpnd>; + + +def VMULWOD_H_BU_B : LSX_3R<0b01110000101000100>, + LSX_3R_DESC_BASE<"vmulwod.h.bu.b", int_loongarch_lsx_vmulwod_h_bu_b, LSX128HOpnd, LSX128BOpnd, LSX128BOpnd>; + +def VMULWOD_W_HU_H : LSX_3R<0b01110000101000101>, + LSX_3R_DESC_BASE<"vmulwod.w.hu.h", int_loongarch_lsx_vmulwod_w_hu_h, LSX128WOpnd, LSX128HOpnd, LSX128HOpnd>; + +def VMULWOD_D_WU_W : LSX_3R<0b01110000101000110>, + LSX_3R_DESC_BASE<"vmulwod.d.wu.w", int_loongarch_lsx_vmulwod_d_wu_w, LSX128DOpnd, LSX128WOpnd, LSX128WOpnd> ; + +def VMULWOD_Q_DU_D : LSX_3R<0b01110000101000111>, + LSX_3R_DESC_BASE<"vmulwod.q.du.d", int_loongarch_lsx_vmulwod_q_du_d, LSX128DOpnd, LSX128DOpnd, LSX128DOpnd>; + + +def VMADDWEV_H_B : LSX_3R<0b01110000101011000>, + LSX_3R_4R_DESC_BASE<"vmaddwev.h.b", int_loongarch_lsx_vmaddwev_h_b, LSX128HOpnd, LSX128BOpnd, LSX128BOpnd>; + +def VMADDWEV_W_H : LSX_3R<0b01110000101011001>, + LSX_3R_4R_DESC_BASE<"vmaddwev.w.h", int_loongarch_lsx_vmaddwev_w_h, LSX128WOpnd, LSX128HOpnd, LSX128HOpnd>; + +def VMADDWEV_D_W : LSX_3R<0b01110000101011010>, + LSX_3R_4R_DESC_BASE<"vmaddwev.d.w", int_loongarch_lsx_vmaddwev_d_w, LSX128DOpnd, LSX128WOpnd, LSX128WOpnd>; + +def VMADDWEV_Q_D : LSX_3R<0b01110000101011011>, + LSX_3R_4R_DESC_BASE<"vmaddwev.q.d", int_loongarch_lsx_vmaddwev_q_d, LSX128DOpnd, LSX128DOpnd, LSX128DOpnd>; + + +def VMADDWOD_H_B : LSX_3R<0b01110000101011100>, + LSX_3R_4R_DESC_BASE<"vmaddwod.h.b", int_loongarch_lsx_vmaddwod_h_b, LSX128HOpnd, LSX128BOpnd, LSX128BOpnd>; + +def VMADDWOD_W_H : LSX_3R<0b01110000101011101>, + LSX_3R_4R_DESC_BASE<"vmaddwod.w.h", int_loongarch_lsx_vmaddwod_w_h, LSX128WOpnd, LSX128HOpnd, LSX128HOpnd>; + +def VMADDWOD_D_W : LSX_3R<0b01110000101011110>, + LSX_3R_4R_DESC_BASE<"vmaddwod.d.w", int_loongarch_lsx_vmaddwod_d_w, LSX128DOpnd, LSX128WOpnd, LSX128WOpnd>; + +def VMADDWOD_Q_D : LSX_3R<0b01110000101011111>, + LSX_3R_4R_DESC_BASE<"vmaddwod.q.d", int_loongarch_lsx_vmaddwod_q_d, LSX128DOpnd, LSX128DOpnd, LSX128DOpnd>; + + +def VMADDWEV_H_BU : LSX_3R<0b01110000101101000>, + LSX_3R_4R_DESC_BASE<"vmaddwev.h.bu", int_loongarch_lsx_vmaddwev_h_bu, LSX128HOpnd, LSX128BOpnd, LSX128BOpnd>; + +def VMADDWEV_W_HU : LSX_3R<0b01110000101101001>, + LSX_3R_4R_DESC_BASE<"vmaddwev.w.hu", int_loongarch_lsx_vmaddwev_w_hu, LSX128WOpnd, LSX128HOpnd, LSX128HOpnd>; + +def VMADDWEV_D_WU : LSX_3R<0b01110000101101010>, + LSX_3R_4R_DESC_BASE<"vmaddwev.d.wu", int_loongarch_lsx_vmaddwev_d_wu, LSX128DOpnd, LSX128WOpnd, LSX128WOpnd>; + +def VMADDWEV_Q_DU : LSX_3R<0b01110000101101011>, + LSX_3R_4R_DESC_BASE<"vmaddwev.q.du", int_loongarch_lsx_vmaddwev_q_du, LSX128DOpnd, LSX128DOpnd, LSX128DOpnd>; + + +def VMADDWOD_H_BU : LSX_3R<0b01110000101101100>, + LSX_3R_4R_DESC_BASE<"vmaddwod.h.bu", int_loongarch_lsx_vmaddwod_h_bu, LSX128HOpnd, LSX128BOpnd, LSX128BOpnd>; + +def VMADDWOD_W_HU : LSX_3R<0b01110000101101101>, + LSX_3R_4R_DESC_BASE<"vmaddwod.w.hu", int_loongarch_lsx_vmaddwod_w_hu, LSX128WOpnd, LSX128HOpnd, LSX128HOpnd>; + +def VMADDWOD_D_WU : LSX_3R<0b01110000101101110>, + LSX_3R_4R_DESC_BASE<"vmaddwod.d.wu", int_loongarch_lsx_vmaddwod_d_wu, LSX128DOpnd, LSX128WOpnd, LSX128WOpnd>; + +def VMADDWOD_Q_DU : LSX_3R<0b01110000101101111>, + LSX_3R_4R_DESC_BASE<"vmaddwod.q.du", int_loongarch_lsx_vmaddwod_q_du, LSX128DOpnd, LSX128DOpnd, LSX128DOpnd>; + + +def VMADDWEV_H_BU_B : LSX_3R<0b01110000101111000>, + LSX_3R_4R_DESC_BASE<"vmaddwev.h.bu.b", int_loongarch_lsx_vmaddwev_h_bu_b, LSX128HOpnd, LSX128BOpnd, LSX128BOpnd>; + +def VMADDWEV_W_HU_H : LSX_3R<0b01110000101111001>, + LSX_3R_4R_DESC_BASE<"vmaddwev.w.hu.h", int_loongarch_lsx_vmaddwev_w_hu_h, LSX128WOpnd, LSX128HOpnd, LSX128HOpnd>; + +def VMADDWEV_D_WU_W : LSX_3R<0b01110000101111010>, + LSX_3R_4R_DESC_BASE<"vmaddwev.d.wu.w", int_loongarch_lsx_vmaddwev_d_wu_w, LSX128DOpnd, LSX128WOpnd, LSX128WOpnd> ; + +def VMADDWEV_Q_DU_D : LSX_3R<0b01110000101111011>, + LSX_3R_4R_DESC_BASE<"vmaddwev.q.du.d", int_loongarch_lsx_vmaddwev_q_du_d, LSX128DOpnd, LSX128DOpnd, LSX128DOpnd>; + + +def VMADDWOD_H_BU_B : LSX_3R<0b01110000101111100>, + LSX_3R_4R_DESC_BASE<"vmaddwod.h.bu.b", int_loongarch_lsx_vmaddwod_h_bu_b, LSX128HOpnd, LSX128BOpnd, LSX128BOpnd>; + +def VMADDWOD_W_HU_H : LSX_3R<0b01110000101111101>, + LSX_3R_4R_DESC_BASE<"vmaddwod.w.hu.h", int_loongarch_lsx_vmaddwod_w_hu_h, LSX128WOpnd, LSX128HOpnd, LSX128HOpnd>; + +def VMADDWOD_D_WU_W : LSX_3R<0b01110000101111110>, + LSX_3R_4R_DESC_BASE<"vmaddwod.d.wu.w", int_loongarch_lsx_vmaddwod_d_wu_w, LSX128DOpnd, LSX128WOpnd, LSX128WOpnd> ; + +def VMADDWOD_Q_DU_D : LSX_3R<0b01110000101111111>, + LSX_3R_4R_DESC_BASE<"vmaddwod.q.du.d", int_loongarch_lsx_vmaddwod_q_du_d, LSX128DOpnd, LSX128DOpnd, LSX128DOpnd>; + + +def VSRLN_B_H : LSX_3R<0b01110000111101001>, + LSX_3R_DESC_BASE<"vsrln.b.h", int_loongarch_lsx_vsrln_b_h, LSX128BOpnd, LSX128HOpnd, LSX128HOpnd>; + +def VSRLN_H_W : LSX_3R<0b01110000111101010>, + LSX_3R_DESC_BASE<"vsrln.h.w", int_loongarch_lsx_vsrln_h_w, LSX128HOpnd, LSX128WOpnd, LSX128WOpnd>; + +def VSRLN_W_D : LSX_3R<0b01110000111101011>, + LSX_3R_DESC_BASE<"vsrln.w.d", int_loongarch_lsx_vsrln_w_d, LSX128WOpnd, LSX128DOpnd, LSX128DOpnd>; + + +def VSRAN_B_H : LSX_3R<0b01110000111101101>, + LSX_3R_DESC_BASE<"vsran.b.h", int_loongarch_lsx_vsran_b_h, LSX128BOpnd, LSX128HOpnd, LSX128HOpnd>; + +def VSRAN_H_W : LSX_3R<0b01110000111101110>, + LSX_3R_DESC_BASE<"vsran.h.w", int_loongarch_lsx_vsran_h_w, LSX128HOpnd, LSX128WOpnd, LSX128WOpnd>; + +def VSRAN_W_D : LSX_3R<0b01110000111101111>, + LSX_3R_DESC_BASE<"vsran.w.d", int_loongarch_lsx_vsran_w_d, LSX128WOpnd, LSX128DOpnd, LSX128DOpnd>; + + +def VSRLRN_B_H : LSX_3R<0b01110000111110001>, + LSX_3R_DESC_BASE<"vsrlrn.b.h", int_loongarch_lsx_vsrlrn_b_h, LSX128BOpnd, LSX128HOpnd, LSX128HOpnd>; + +def VSRLRN_H_W : LSX_3R<0b01110000111110010>, + LSX_3R_DESC_BASE<"vsrlrn.h.w", int_loongarch_lsx_vsrlrn_h_w, LSX128HOpnd, LSX128WOpnd, LSX128WOpnd>; + +def VSRLRN_W_D : LSX_3R<0b01110000111110011>, + LSX_3R_DESC_BASE<"vsrlrn.w.d", int_loongarch_lsx_vsrlrn_w_d, LSX128WOpnd, LSX128DOpnd, LSX128DOpnd>; + + +def VSRARN_B_H : LSX_3R<0b01110000111110101>, + LSX_3R_DESC_BASE<"vsrarn.b.h", int_loongarch_lsx_vsrarn_b_h, LSX128BOpnd, LSX128HOpnd, LSX128HOpnd>; + +def VSRARN_H_W : LSX_3R<0b01110000111110110>, + LSX_3R_DESC_BASE<"vsrarn.h.w", int_loongarch_lsx_vsrarn_h_w, LSX128HOpnd, LSX128WOpnd, LSX128WOpnd>; + +def VSRARN_W_D : LSX_3R<0b01110000111110111>, + LSX_3R_DESC_BASE<"vsrarn.w.d", int_loongarch_lsx_vsrarn_w_d, LSX128WOpnd, LSX128DOpnd, LSX128DOpnd>; + + +def VSSRLN_B_H : LSX_3R<0b01110000111111001>, + LSX_3R_DESC_BASE<"vssrln.b.h", int_loongarch_lsx_vssrln_b_h, LSX128BOpnd, LSX128HOpnd, LSX128HOpnd>; + +def VSSRLN_H_W : LSX_3R<0b01110000111111010>, + LSX_3R_DESC_BASE<"vssrln.h.w", int_loongarch_lsx_vssrln_h_w, LSX128HOpnd, LSX128WOpnd, LSX128WOpnd>; + +def VSSRLN_W_D : LSX_3R<0b01110000111111011>, + LSX_3R_DESC_BASE<"vssrln.w.d", int_loongarch_lsx_vssrln_w_d, LSX128WOpnd, LSX128DOpnd, LSX128DOpnd>; + + +def VSSRAN_B_H : LSX_3R<0b01110000111111101>, + LSX_3R_DESC_BASE<"vssran.b.h", int_loongarch_lsx_vssran_b_h, LSX128BOpnd, LSX128HOpnd, LSX128HOpnd>; + +def VSSRAN_H_W : LSX_3R<0b01110000111111110>, + LSX_3R_DESC_BASE<"vssran.h.w", int_loongarch_lsx_vssran_h_w, LSX128HOpnd, LSX128WOpnd, LSX128WOpnd>; + +def VSSRAN_W_D : LSX_3R<0b01110000111111111>, + LSX_3R_DESC_BASE<"vssran.w.d", int_loongarch_lsx_vssran_w_d, LSX128WOpnd, LSX128DOpnd, LSX128DOpnd>; + + +def VSSRLRN_B_H : LSX_3R<0b01110001000000001>, + LSX_3R_DESC_BASE<"vssrlrn.b.h", int_loongarch_lsx_vssrlrn_b_h, LSX128BOpnd, LSX128HOpnd, LSX128HOpnd>; + +def VSSRLRN_H_W : LSX_3R<0b01110001000000010>, + LSX_3R_DESC_BASE<"vssrlrn.h.w", int_loongarch_lsx_vssrlrn_h_w, LSX128HOpnd, LSX128WOpnd, LSX128WOpnd>; + +def VSSRLRN_W_D : LSX_3R<0b01110001000000011>, + LSX_3R_DESC_BASE<"vssrlrn.w.d", int_loongarch_lsx_vssrlrn_w_d, LSX128WOpnd, LSX128DOpnd, LSX128DOpnd>; + + +def VSSRARN_B_H : LSX_3R<0b01110001000000101>, + LSX_3R_DESC_BASE<"vssrarn.b.h", int_loongarch_lsx_vssrarn_b_h, LSX128BOpnd, LSX128HOpnd, LSX128HOpnd>; + +def VSSRARN_H_W : LSX_3R<0b01110001000000110>, + LSX_3R_DESC_BASE<"vssrarn.h.w", int_loongarch_lsx_vssrarn_h_w, LSX128HOpnd, LSX128WOpnd, LSX128WOpnd>; + +def VSSRARN_W_D : LSX_3R<0b01110001000000111>, + LSX_3R_DESC_BASE<"vssrarn.w.d", int_loongarch_lsx_vssrarn_w_d, LSX128WOpnd, LSX128DOpnd, LSX128DOpnd>; + + +def VSSRLN_BU_H : LSX_3R<0b01110001000001001>, + LSX_3R_DESC_BASE<"vssrln.bu.h", int_loongarch_lsx_vssrln_bu_h, LSX128BOpnd, LSX128HOpnd, LSX128HOpnd>; + +def VSSRLN_HU_W : LSX_3R<0b01110001000001010>, + LSX_3R_DESC_BASE<"vssrln.hu.w", int_loongarch_lsx_vssrln_hu_w, LSX128HOpnd, LSX128WOpnd, LSX128WOpnd>; + +def VSSRLN_WU_D : LSX_3R<0b01110001000001011>, + LSX_3R_DESC_BASE<"vssrln.wu.d", int_loongarch_lsx_vssrln_wu_d, LSX128WOpnd, LSX128DOpnd, LSX128DOpnd>; + + +def VSSRAN_BU_H : LSX_3R<0b01110001000001101>, + LSX_3R_DESC_BASE<"vssran.bu.h", int_loongarch_lsx_vssran_bu_h, LSX128BOpnd, LSX128HOpnd, LSX128HOpnd>; + +def VSSRAN_HU_W : LSX_3R<0b01110001000001110>, + LSX_3R_DESC_BASE<"vssran.hu.w", int_loongarch_lsx_vssran_hu_w, LSX128HOpnd, LSX128WOpnd, LSX128WOpnd>; + +def VSSRAN_WU_D : LSX_3R<0b01110001000001111>, + LSX_3R_DESC_BASE<"vssran.wu.d", int_loongarch_lsx_vssran_wu_d, LSX128WOpnd, LSX128DOpnd, LSX128DOpnd>; + + +def VSSRLRN_BU_H : LSX_3R<0b01110001000010001>, + LSX_3R_DESC_BASE<"vssrlrn.bu.h", int_loongarch_lsx_vssrlrn_bu_h, LSX128BOpnd, LSX128HOpnd, LSX128HOpnd>; + +def VSSRLRN_HU_W : LSX_3R<0b01110001000010010>, + LSX_3R_DESC_BASE<"vssrlrn.hu.w", int_loongarch_lsx_vssrlrn_hu_w, LSX128HOpnd, LSX128WOpnd, LSX128WOpnd>; + +def VSSRLRN_WU_D : LSX_3R<0b01110001000010011>, + LSX_3R_DESC_BASE<"vssrlrn.wu.d", int_loongarch_lsx_vssrlrn_wu_d, LSX128WOpnd, LSX128DOpnd, LSX128DOpnd>; + + +def VSSRARN_BU_H : LSX_3R<0b01110001000010101>, + LSX_3R_DESC_BASE<"vssrarn.bu.h", int_loongarch_lsx_vssrarn_bu_h, LSX128BOpnd, LSX128HOpnd, LSX128HOpnd>; + +def VSSRARN_HU_W : LSX_3R<0b01110001000010110>, + LSX_3R_DESC_BASE<"vssrarn.hu.w", int_loongarch_lsx_vssrarn_hu_w, LSX128HOpnd, LSX128WOpnd, LSX128WOpnd>; + +def VSSRARN_WU_D : LSX_3R<0b01110001000010111>, + LSX_3R_DESC_BASE<"vssrarn.wu.d", int_loongarch_lsx_vssrarn_wu_d, LSX128WOpnd, LSX128DOpnd, LSX128DOpnd>; + + +def VANDN_V : LSX_3R<0b01110001001010000>, + LSX_3R_DESC_BASE<"vandn.v", int_loongarch_lsx_vandn_v, LSX128BOpnd>; + + +class LSX_VANDN_PSEUDO_BASE : + LSXPseudo<(outs RO:$vd), (ins RO:$vj, RO:$vk), + []>, + PseudoInstExpansion<(VANDN_V LSX128BOpnd:$vd, + LSX128BOpnd:$vj, + LSX128BOpnd:$vk)>; + +def VANDN_H_PSEUDO : LSX_VANDN_PSEUDO_BASE; +def VANDN_W_PSEUDO : LSX_VANDN_PSEUDO_BASE; +def VANDN_D_PSEUDO : LSX_VANDN_PSEUDO_BASE; + + + +def VORN_V : LSX_3R<0b01110001001010001>, + LSX_3R_DESC_BASE<"vorn.v", int_loongarch_lsx_vorn_v, LSX128BOpnd>; + + +class LSX_VORN_PSEUDO_BASE : + LSXPseudo<(outs RO:$vd), (ins RO:$vj, RO:$vk), + []>, + PseudoInstExpansion<(VORN_V LSX128BOpnd:$vd, + LSX128BOpnd:$vj, + LSX128BOpnd:$vk)>; + +def VORN_H_PSEUDO : LSX_VORN_PSEUDO_BASE; +def VORN_W_PSEUDO : LSX_VORN_PSEUDO_BASE; +def VORN_D_PSEUDO : LSX_VORN_PSEUDO_BASE; + + +def VFRSTP_B : LSX_3R<0b01110001001010110>, + LSX_3R_4R_DESC_BASE<"vfrstp.b", int_loongarch_lsx_vfrstp_b, LSX128BOpnd>; + +def VFRSTP_H : LSX_3R<0b01110001001010111>, + LSX_3R_4R_DESC_BASE<"vfrstp.h", int_loongarch_lsx_vfrstp_h, LSX128HOpnd>; + + +def VADD_Q : LSX_3R<0b01110001001011010>, IsCommutable, + LSX_3R_DESC_BASE<"vadd.q", int_loongarch_lsx_vadd_q, LSX128DOpnd>; + +def VSUB_Q : LSX_3R<0b01110001001011011>, + LSX_3R_DESC_BASE<"vsub.q", int_loongarch_lsx_vsub_q, LSX128DOpnd>; + + +def VSIGNCOV_B : LSX_3R<0b01110001001011100>, + LSX_3R_DESC_BASE<"vsigncov.b", int_loongarch_lsx_vsigncov_b, LSX128BOpnd>; + +def VSIGNCOV_H : LSX_3R<0b01110001001011101>, + LSX_3R_DESC_BASE<"vsigncov.h", int_loongarch_lsx_vsigncov_h, LSX128HOpnd>; + +def VSIGNCOV_W : LSX_3R<0b01110001001011110>, + LSX_3R_DESC_BASE<"vsigncov.w", int_loongarch_lsx_vsigncov_w, LSX128WOpnd>; + +def VSIGNCOV_D : LSX_3R<0b01110001001011111>, + LSX_3R_DESC_BASE<"vsigncov.d", int_loongarch_lsx_vsigncov_d, LSX128DOpnd>; + + +def VFCVT_H_S : LSX_3R<0b01110001010001100>, + LSX_3RF_DESC_BASE<"vfcvt.h.s", int_loongarch_lsx_vfcvt_h_s, LSX128HOpnd, LSX128WOpnd, LSX128WOpnd>; + +def VFCVT_S_D : LSX_3R<0b01110001010001101>, + LSX_3RF_DESC_BASE1<"vfcvt.s.d", int_loongarch_lsx_vfcvt_s_d, LSX128WOpnd, LSX128DOpnd, LSX128DOpnd>; + + +def VFFINT_S_L : LSX_3R<0b01110001010010000>, + LSX_3RF_DESC_BASE<"vffint.s.l", int_loongarch_lsx_vffint_s_l, LSX128WOpnd, LSX128DOpnd, LSX128DOpnd>; + +def VFTINT_W_D : LSX_3R<0b01110001010010011>, + LSX_3RF_DESC_BASE<"vftint.w.d", int_loongarch_lsx_vftint_w_d, LSX128WOpnd, LSX128DOpnd, LSX128DOpnd>; + + +def VFTINTRZ_W_D : LSX_3R<0b01110001010010110>, + LSX_3RF_DESC_BASE<"vftintrz.w.d", int_loongarch_lsx_vftintrz_w_d, LSX128WOpnd, LSX128DOpnd, LSX128DOpnd>; + +def VFTINTRP_W_D : LSX_3R<0b01110001010010101>, + LSX_3RF_DESC_BASE<"vftintrp.w.d", int_loongarch_lsx_vftintrp_w_d, LSX128WOpnd, LSX128DOpnd, LSX128DOpnd>; + +def VFTINTRM_W_D : LSX_3R<0b01110001010010100>, + LSX_3RF_DESC_BASE<"vftintrm.w.d", int_loongarch_lsx_vftintrm_w_d, LSX128WOpnd, LSX128DOpnd, LSX128DOpnd>; + +def VFTINTRNE_W_D : LSX_3R<0b01110001010010111>, + LSX_3RF_DESC_BASE<"vftintrne.w.d", int_loongarch_lsx_vftintrne_w_d, LSX128WOpnd, LSX128DOpnd, LSX128DOpnd>; + + +def VBSRL_V : LSX_I5_U<0b01110010100011101>, + LSX_U5_DESC_BASE<"vbsrl.v", int_loongarch_lsx_vbsrl_v, LSX128BOpnd>; + +def VBSLL_V : LSX_I5_U<0b01110010100011100>, + LSX_U5_DESC_BASE<"vbsll.v", int_loongarch_lsx_vbsll_v, LSX128BOpnd>; + + +def VFRSTPI_B : LSX_I5_U<0b01110010100110100>, + LSX_U5_4R_DESC_BASE<"vfrstpi.b", int_loongarch_lsx_vfrstpi_b, LSX128BOpnd>; + +def VFRSTPI_H : LSX_I5_U<0b01110010100110101>, + LSX_U5_4R_DESC_BASE<"vfrstpi.h", int_loongarch_lsx_vfrstpi_h, LSX128HOpnd>; + + +def VNEG_B : LSX_2R<0b0111001010011100001100>, + LSX_2R_DESC_BASE<"vneg.b", int_loongarch_lsx_vneg_b, LSX128BOpnd>; + +def VNEG_H : LSX_2R<0b0111001010011100001101>, + LSX_2R_DESC_BASE<"vneg.h", int_loongarch_lsx_vneg_h, LSX128HOpnd>; + +def VNEG_W : LSX_2R<0b0111001010011100001110>, + LSX_2R_DESC_BASE<"vneg.w", int_loongarch_lsx_vneg_w, LSX128WOpnd>; + +def VNEG_D : LSX_2R<0b0111001010011100001111>, + LSX_2R_DESC_BASE<"vneg.d", int_loongarch_lsx_vneg_d, LSX128DOpnd>; + + +def VMSKGEZ_B : LSX_2R<0b0111001010011100010100>, + LSX_2R_DESC_BASE<"vmskgez.b", int_loongarch_lsx_vmskgez_b, LSX128BOpnd>; + +def VMSKNZ_B : LSX_2R<0b0111001010011100011000>, + LSX_2R_DESC_BASE<"vmsknz.b", int_loongarch_lsx_vmsknz_b, LSX128BOpnd>; + + +def VFRINTRM_S : LSX_2R<0b0111001010011101010001>, + LSX_2RF_DESC_BASE<"vfrintrm.s", int_loongarch_lsx_vfrintrm_s, LSX128WOpnd>; + +def VFRINTRM_D : LSX_2R<0b0111001010011101010010>, + LSX_2RF_DESC_BASE<"vfrintrm.d", int_loongarch_lsx_vfrintrm_d, LSX128DOpnd>; + + +def VFRINTRP_S : LSX_2R<0b0111001010011101010101>, + LSX_2RF_DESC_BASE<"vfrintrp.s", int_loongarch_lsx_vfrintrp_s, LSX128WOpnd>; + +def VFRINTRP_D : LSX_2R<0b0111001010011101010110>, + LSX_2RF_DESC_BASE<"vfrintrp.d", int_loongarch_lsx_vfrintrp_d, LSX128DOpnd>; + + +def VFRINTRZ_S : LSX_2R<0b0111001010011101011001>, + LSX_2RF_DESC_BASE<"vfrintrz.s", int_loongarch_lsx_vfrintrz_s, LSX128WOpnd>; + +def VFRINTRZ_D : LSX_2R<0b0111001010011101011010>, + LSX_2RF_DESC_BASE<"vfrintrz.d", int_loongarch_lsx_vfrintrz_d, LSX128DOpnd>; + + +def VFRINTRNE_S : LSX_2R<0b0111001010011101011101>, + LSX_2RF_DESC_BASE<"vfrintrne.s", int_loongarch_lsx_vfrintrne_s, LSX128WOpnd>; + +def VFRINTRNE_D : LSX_2R<0b0111001010011101011110>, + LSX_2RF_DESC_BASE<"vfrintrne.d", int_loongarch_lsx_vfrintrne_d, LSX128DOpnd>; + + +def VFFINTL_D_W : LSX_2R<0b0111001010011110000100>, + LSX_2RF_DESC_BASE<"vffintl.d.w", int_loongarch_lsx_vffintl_d_w, LSX128DOpnd, LSX128WOpnd>; + +def VFFINTH_D_W : LSX_2R<0b0111001010011110000101>, + LSX_2RF_DESC_BASE<"vffinth.d.w", int_loongarch_lsx_vffinth_d_w, LSX128DOpnd, LSX128WOpnd>; + + +def VFTINTRM_W_S : LSX_2R<0b0111001010011110001110>, + LSX_2RF_DESC_BASE<"vftintrm.w.s", int_loongarch_lsx_vftintrm_w_s, LSX128WOpnd>; + +def VFTINTRM_L_D : LSX_2R<0b0111001010011110001111>, + LSX_2RF_DESC_BASE<"vftintrm.l.d", int_loongarch_lsx_vftintrm_l_d, LSX128DOpnd>; + + +def VFTINTRP_W_S : LSX_2R<0b0111001010011110010000>, + LSX_2RF_DESC_BASE<"vftintrp.w.s", int_loongarch_lsx_vftintrp_w_s, LSX128WOpnd>; + +def VFTINTRP_L_D : LSX_2R<0b0111001010011110010001>, + LSX_2RF_DESC_BASE<"vftintrp.l.d", int_loongarch_lsx_vftintrp_l_d, LSX128DOpnd>; + + +def VFTINTRZ_W_S : LSX_2R<0b0111001010011110010010>, + LSX_2RF_DESC_BASE<"vftintrz.w.s", fp_to_sint, LSX128WOpnd>; + +def VFTINTRZ_L_D : LSX_2R<0b0111001010011110010011>, + LSX_2RF_DESC_BASE<"vftintrz.l.d", fp_to_sint, LSX128DOpnd>; + + +def VFTINTRNE_W_S : LSX_2R<0b0111001010011110010100>, + LSX_2RF_DESC_BASE<"vftintrne.w.s", int_loongarch_lsx_vftintrne_w_s, LSX128WOpnd>; + +def VFTINTRNE_L_D : LSX_2R<0b0111001010011110010101>, + LSX_2RF_DESC_BASE<"vftintrne.l.d", int_loongarch_lsx_vftintrne_l_d, LSX128DOpnd>; + + +def VFTINTL_L_S : LSX_2R<0b0111001010011110100000>, + LSX_2RF_DESC_BASE<"vftintl.l.s", int_loongarch_lsx_vftintl_l_s, LSX128DOpnd, LSX128WOpnd>; + +def VFTINTH_L_S : LSX_2R<0b0111001010011110100001>, + LSX_2RF_DESC_BASE<"vftinth.l.s", int_loongarch_lsx_vftinth_l_s, LSX128DOpnd, LSX128WOpnd>; + + +def VFTINTRML_L_S : LSX_2R<0b0111001010011110100010>, + LSX_2RF_DESC_BASE<"vftintrml.l.s", int_loongarch_lsx_vftintrml_l_s, LSX128DOpnd, LSX128WOpnd>; + +def VFTINTRMH_L_S : LSX_2R<0b0111001010011110100011>, + LSX_2RF_DESC_BASE<"vftintrmh.l.s", int_loongarch_lsx_vftintrmh_l_s, LSX128DOpnd, LSX128WOpnd>; + + +def VFTINTRPL_L_S : LSX_2R<0b0111001010011110100100>, + LSX_2RF_DESC_BASE<"vftintrpl.l.s", int_loongarch_lsx_vftintrpl_l_s, LSX128DOpnd, LSX128WOpnd>; + +def VFTINTRPH_L_S : LSX_2R<0b0111001010011110100101>, + LSX_2RF_DESC_BASE<"vftintrph.l.s", int_loongarch_lsx_vftintrph_l_s, LSX128DOpnd, LSX128WOpnd>; + + +def VFTINTRZL_L_S : LSX_2R<0b0111001010011110100110>, + LSX_2RF_DESC_BASE<"vftintrzl.l.s", int_loongarch_lsx_vftintrzl_l_s, LSX128DOpnd, LSX128WOpnd>; + +def VFTINTRZH_L_S : LSX_2R<0b0111001010011110100111>, + LSX_2RF_DESC_BASE<"vftintrzh.l.s", int_loongarch_lsx_vftintrzh_l_s, LSX128DOpnd, LSX128WOpnd>; + + +def VFTINTRNEL_L_S : LSX_2R<0b0111001010011110101000>, + LSX_2RF_DESC_BASE<"vftintrnel.l.s", int_loongarch_lsx_vftintrnel_l_s, LSX128DOpnd, LSX128WOpnd>; + +def VFTINTRNEH_L_S : LSX_2R<0b0111001010011110101001>, + LSX_2RF_DESC_BASE<"vftintrneh.l.s", int_loongarch_lsx_vftintrneh_l_s, LSX128DOpnd, LSX128WOpnd>; + + +def VEXTH_H_B : LSX_2R<0b0111001010011110111000>, + LSX_2R_DESC_BASE<"vexth.h.b", int_loongarch_lsx_vexth_h_b, LSX128HOpnd, LSX128BOpnd>; + +def VEXTH_W_H : LSX_2R<0b0111001010011110111001>, + LSX_2R_DESC_BASE<"vexth.w.h", int_loongarch_lsx_vexth_w_h, LSX128WOpnd, LSX128HOpnd>; + +def VEXTH_D_W : LSX_2R<0b0111001010011110111010>, + LSX_2R_DESC_BASE<"vexth.d.w", int_loongarch_lsx_vexth_d_w, LSX128DOpnd, LSX128WOpnd> ; + +def VEXTH_Q_D : LSX_2R<0b0111001010011110111011>, + LSX_2R_DESC_BASE<"vexth.q.d", int_loongarch_lsx_vexth_q_d, LSX128DOpnd, LSX128DOpnd>; + + +def VEXTH_HU_BU : LSX_2R<0b0111001010011110111100>, + LSX_2R_DESC_BASE<"vexth.hu.bu", int_loongarch_lsx_vexth_hu_bu, LSX128HOpnd, LSX128BOpnd>; + +def VEXTH_WU_HU : LSX_2R<0b0111001010011110111101>, + LSX_2R_DESC_BASE<"vexth.wu.hu", int_loongarch_lsx_vexth_wu_hu, LSX128WOpnd, LSX128HOpnd>; + +def VEXTH_DU_WU : LSX_2R<0b0111001010011110111110>, + LSX_2R_DESC_BASE<"vexth.du.wu", int_loongarch_lsx_vexth_du_wu, LSX128DOpnd, LSX128WOpnd> ; + +def VEXTH_QU_DU : LSX_2R<0b0111001010011110111111>, + LSX_2R_DESC_BASE<"vexth.qu.du", int_loongarch_lsx_vexth_qu_du, LSX128DOpnd, LSX128DOpnd>; + + +def VSLLWIL_H_B : LSX_I3_U<0b0111001100001000001>, + LSX_2R_U3_DESC_BASE<"vsllwil.h.b", int_loongarch_lsx_vsllwil_h_b, LSX128HOpnd, LSX128BOpnd>; + +def VSLLWIL_W_H : LSX_I4_U<0b011100110000100001>, + LSX_2R_U4_DESC_BASE<"vsllwil.w.h", int_loongarch_lsx_vsllwil_w_h, LSX128WOpnd, LSX128HOpnd>; + +def VSLLWIL_D_W : LSX_I5_U<0b01110011000010001>, + LSX_2R_U5_DESC_BASE<"vsllwil.d.w", int_loongarch_lsx_vsllwil_d_w, LSX128DOpnd, LSX128WOpnd> ; + + +def VEXTL_Q_D : LSX_2R<0b0111001100001001000000>, + LSX_2R_DESC_BASE<"vextl.q.d", int_loongarch_lsx_vextl_q_d, LSX128DOpnd, LSX128DOpnd>; + + +def VSLLWIL_HU_BU : LSX_I3_U<0b0111001100001100001>, + LSX_2R_U3_DESC_BASE<"vsllwil.hu.bu", int_loongarch_lsx_vsllwil_hu_bu, LSX128HOpnd, LSX128BOpnd>; + +def VSLLWIL_WU_HU : LSX_I4_U<0b011100110000110001>, + LSX_2R_U4_DESC_BASE<"vsllwil.wu.hu", int_loongarch_lsx_vsllwil_wu_hu, LSX128WOpnd, LSX128HOpnd>; + +def VSLLWIL_DU_WU : LSX_I5_U<0b01110011000011001>, + LSX_2R_U5_DESC_BASE<"vsllwil.du.wu", int_loongarch_lsx_vsllwil_du_wu, LSX128DOpnd, LSX128WOpnd> ; + + +def VEXTL_QU_DU : LSX_2R<0b0111001100001101000000>, + LSX_2R_DESC_BASE<"vextl.qu.du", int_loongarch_lsx_vextl_qu_du, LSX128DOpnd, LSX128DOpnd>; + + +def VBITCLRI_B : LSX_I3_U<0b0111001100010000001>, + LSX_2R_U3_DESC_BASE<"vbitclri.b", int_loongarch_lsx_vbitclri_b, LSX128BOpnd, LSX128BOpnd>; + +def VBITCLRI_H : LSX_I4_U<0b011100110001000001>, + LSX_2R_U4_DESC_BASE<"vbitclri.h", int_loongarch_lsx_vbitclri_h, LSX128HOpnd, LSX128HOpnd>; + +def VBITCLRI_W : LSX_I5_U<0b01110011000100001>, + LSX_2R_U5_DESC_BASE<"vbitclri.w", int_loongarch_lsx_vbitclri_w, LSX128WOpnd, LSX128WOpnd>; + +def VBITCLRI_D : LSX_I6_U<0b0111001100010001>, + LSX_2R_U6_DESC_BASE<"vbitclri.d", int_loongarch_lsx_vbitclri_d, LSX128DOpnd, LSX128DOpnd>; + + +def VBITSETI_B : LSX_I3_U<0b0111001100010100001>, + LSX_2R_U3_DESC_BASE<"vbitseti.b", int_loongarch_lsx_vbitseti_b, LSX128BOpnd, LSX128BOpnd>; + +def VBITSETI_H : LSX_I4_U<0b011100110001010001>, + LSX_2R_U4_DESC_BASE<"vbitseti.h", int_loongarch_lsx_vbitseti_h, LSX128HOpnd, LSX128HOpnd>; + +def VBITSETI_W : LSX_I5_U<0b01110011000101001>, + LSX_2R_U5_DESC_BASE<"vbitseti.w", int_loongarch_lsx_vbitseti_w, LSX128WOpnd, LSX128WOpnd>; + +def VBITSETI_D : LSX_I6_U<0b0111001100010101>, + LSX_2R_U6_DESC_BASE<"vbitseti.d", int_loongarch_lsx_vbitseti_d, LSX128DOpnd, LSX128DOpnd>; + + +def VBITREVI_B : LSX_I3_U<0b0111001100011000001>, + LSX_2R_U3_DESC_BASE<"vbitrevi.b", int_loongarch_lsx_vbitrevi_b, LSX128BOpnd, LSX128BOpnd>; + +def VBITREVI_H : LSX_I4_U<0b011100110001100001>, + LSX_2R_U4_DESC_BASE<"vbitrevi.h", int_loongarch_lsx_vbitrevi_h, LSX128HOpnd, LSX128HOpnd>; + +def VBITREVI_W : LSX_I5_U<0b01110011000110001>, + LSX_2R_U5_DESC_BASE<"vbitrevi.w", int_loongarch_lsx_vbitrevi_w, LSX128WOpnd, LSX128WOpnd>; + +def VBITREVI_D : LSX_I6_U<0b0111001100011001>, + LSX_2R_U6_DESC_BASE<"vbitrevi.d", int_loongarch_lsx_vbitrevi_d, LSX128DOpnd, LSX128DOpnd>; + + +def VSSRLRNI_B_H : LSX_I4_U<0b011100110101000001>, + LSX_2R_3R_U4_DESC_BASE<"vssrlrni.b.h", int_loongarch_lsx_vssrlrni_b_h, LSX128BOpnd, LSX128BOpnd>; + +def VSSRLRNI_H_W : LSX_I5_U<0b01110011010100001>, + LSX_2R_3R_U5_DESC_BASE<"vssrlrni.h.w", int_loongarch_lsx_vssrlrni_h_w, LSX128HOpnd, LSX128HOpnd>; + +def VSSRLRNI_W_D : LSX_I6_U<0b0111001101010001>, + LSX_2R_3R_U6_DESC_BASE<"vssrlrni.w.d", int_loongarch_lsx_vssrlrni_w_d, LSX128WOpnd, LSX128WOpnd>; + +def VSSRLRNI_D_Q : LSX_I7_U<0b011100110101001>, + LSX_2R_3R_U7_DESC_BASE<"vssrlrni.d.q", int_loongarch_lsx_vssrlrni_d_q, LSX128DOpnd, LSX128DOpnd>; + + +def VSRANI_B_H : LSX_I4_U<0b011100110101100001>, + LSX_2R_3R_U4_DESC_BASE<"vsrani.b.h", int_loongarch_lsx_vsrani_b_h, LSX128BOpnd, LSX128BOpnd>; + +def VSRANI_H_W : LSX_I5_U<0b01110011010110001>, + LSX_2R_3R_U5_DESC_BASE<"vsrani.h.w", int_loongarch_lsx_vsrani_h_w, LSX128HOpnd, LSX128HOpnd>; + +def VSRANI_W_D : LSX_I6_U<0b0111001101011001>, + LSX_2R_3R_U6_DESC_BASE<"vsrani.w.d", int_loongarch_lsx_vsrani_w_d, LSX128WOpnd, LSX128WOpnd>; + +def VSRANI_D_Q : LSX_I7_U<0b011100110101101>, + LSX_2R_3R_U7_DESC_BASE<"vsrani.d.q", int_loongarch_lsx_vsrani_d_q, LSX128DOpnd, LSX128DOpnd>; + + +def VEXTRINS_B : LSX_I8_U<0b01110011100011>, + LSX_2R_3R_U8_DESC_BASE<"vextrins.b", int_loongarch_lsx_vextrins_b, LSX128BOpnd, LSX128BOpnd>; + +def VEXTRINS_H : LSX_I8_U<0b01110011100010>, + LSX_2R_3R_U8_DESC_BASE<"vextrins.h", int_loongarch_lsx_vextrins_h, LSX128HOpnd, LSX128HOpnd>; + +def VEXTRINS_W : LSX_I8_U<0b01110011100001>, + LSX_2R_3R_U8_DESC_BASE<"vextrins.w", int_loongarch_lsx_vextrins_w, LSX128WOpnd, LSX128WOpnd>; + +def VEXTRINS_D : LSX_I8_U<0b01110011100000>, + LSX_2R_3R_U8_DESC_BASE<"vextrins.d", int_loongarch_lsx_vextrins_d, LSX128DOpnd, LSX128DOpnd>; + + +def VBITSELI_B : LSX_I8_U<0b01110011110001>, + LSX_2R_3R_U8_DESC_BASE<"vbitseli.b", int_loongarch_lsx_vbitseli_b, LSX128BOpnd, LSX128BOpnd>; + + +def VANDI_B : LSX_I8_U<0b01110011110100>, + LSX_2R_U8_DESC_BASE<"vandi.b", int_loongarch_lsx_vandi_b, LSX128BOpnd, LSX128BOpnd>; + + +def VORI_B : LSX_I8_U<0b01110011110101>, + LSX_2R_U8_DESC_BASE<"vori.b", int_loongarch_lsx_vori_b, LSX128BOpnd, LSX128BOpnd>; + + +def VXORI_B : LSX_I8_U<0b01110011110110>, + LSX_2R_U8_DESC_BASE<"vxori.b", int_loongarch_lsx_vxori_b, LSX128BOpnd, LSX128BOpnd>; + + +def VNORI_B : LSX_I8_U<0b01110011110111>, + LSX_2R_U8_DESC_BASE<"vnori.b", int_loongarch_lsx_vnori_b, LSX128BOpnd, LSX128BOpnd>; + + +def VLDI : LSX_1R_I13<0b01110011111000>, + LSX_I13_DESC_BASE<"vldi", int_loongarch_lsx_vldi, i32, simm13Op, LSX128DOpnd>; + +def VLDI_B : LSX_1R_I13_I10<0b01110011111000000>, + LSX_I13_DESC_BASE_10<"vldi", LSX128BOpnd>; + +def VLDI_H : LSX_1R_I13_I10<0b01110011111000001>, + LSX_I13_DESC_BASE_10<"vldi", LSX128HOpnd>; + +def VLDI_W : LSX_1R_I13_I10<0b01110011111000010>, + LSX_I13_DESC_BASE_10<"vldi", LSX128WOpnd>; + +def VLDI_D : LSX_1R_I13_I10<0b01110011111000011>, + LSX_I13_DESC_BASE_10<"vldi", LSX128DOpnd>; + +def VPERMI_W : LSX_I8_U<0b01110011111001>, + LSX_2R_3R_U8_DESC_BASE<"vpermi.w", int_loongarch_lsx_vpermi_w, LSX128WOpnd, LSX128WOpnd>; + + +def VSEQ_B : LSX_3R<0b01110000000000000>, IsCommutable, + LSX_3R_DESC_BASE<"vseq.b", vseteq_v16i8, LSX128BOpnd>; + +def VSEQ_H : LSX_3R<0b01110000000000001>, IsCommutable, + LSX_3R_DESC_BASE<"vseq.h", vseteq_v8i16, LSX128HOpnd>; + +def VSEQ_W : LSX_3R<0b01110000000000010>, IsCommutable, + LSX_3R_DESC_BASE<"vseq.w", vseteq_v4i32, LSX128WOpnd> ; + +def VSEQ_D : LSX_3R<0b01110000000000011>, IsCommutable, + LSX_3R_DESC_BASE<"vseq.d", vseteq_v2i64, LSX128DOpnd>; + + +def VSLE_B : LSX_3R<0b01110000000000100>, + LSX_3R_DESC_BASE<"vsle.b", vsetle_v16i8, LSX128BOpnd>; + +def VSLE_H : LSX_3R<0b01110000000000101>, + LSX_3R_DESC_BASE<"vsle.h", vsetle_v8i16, LSX128HOpnd>; + +def VSLE_W : LSX_3R<0b01110000000000110>, + LSX_3R_DESC_BASE<"vsle.w", vsetle_v4i32, LSX128WOpnd>; + +def VSLE_D : LSX_3R<0b01110000000000111>, + LSX_3R_DESC_BASE<"vsle.d", vsetle_v2i64, LSX128DOpnd>; + + +def VSLE_BU : LSX_3R<0b01110000000001000>, + LSX_3R_DESC_BASE<"vsle.bu", vsetule_v16i8, LSX128BOpnd>; + +def VSLE_HU : LSX_3R<0b01110000000001001>, + LSX_3R_DESC_BASE<"vsle.hu", vsetule_v8i16, LSX128HOpnd>; + +def VSLE_WU : LSX_3R<0b01110000000001010>, + LSX_3R_DESC_BASE<"vsle.wu", vsetule_v4i32, LSX128WOpnd>; + +def VSLE_DU : LSX_3R<0b01110000000001011>, + LSX_3R_DESC_BASE<"vsle.du", vsetule_v2i64, LSX128DOpnd>; + + +def VSLT_B : LSX_3R<0b01110000000001100>, + LSX_3R_DESC_BASE<"vslt.b", vsetlt_v16i8, LSX128BOpnd>; + +def VSLT_H : LSX_3R<0b01110000000001101>, + LSX_3R_DESC_BASE<"vslt.h", vsetlt_v8i16, LSX128HOpnd>; + +def VSLT_W : LSX_3R<0b01110000000001110>, + LSX_3R_DESC_BASE<"vslt.w", vsetlt_v4i32, LSX128WOpnd>; + +def VSLT_D : LSX_3R<0b01110000000001111>, + LSX_3R_DESC_BASE<"vslt.d", vsetlt_v2i64, LSX128DOpnd>; + + +def VSLT_BU : LSX_3R<0b01110000000010000>, + LSX_3R_DESC_BASE<"vslt.bu", vsetult_v16i8, LSX128BOpnd>; + +def VSLT_HU : LSX_3R<0b01110000000010001>, + LSX_3R_DESC_BASE<"vslt.hu", vsetult_v8i16, LSX128HOpnd>; + +def VSLT_WU : LSX_3R<0b01110000000010010>, + LSX_3R_DESC_BASE<"vslt.wu", vsetult_v4i32, LSX128WOpnd>; + +def VSLT_DU : LSX_3R<0b01110000000010011>, + LSX_3R_DESC_BASE<"vslt.du", vsetult_v2i64, LSX128DOpnd>; + + +def VADD_B : LSX_3R<0b01110000000010100>, IsCommutable, + LSX_3R_DESC_BASE<"vadd.b", add, LSX128BOpnd>; + +def VADD_H : LSX_3R<0b01110000000010101>, IsCommutable, + LSX_3R_DESC_BASE<"vadd.h", add, LSX128HOpnd>; + +def VADD_W : LSX_3R<0b01110000000010110>, IsCommutable, + LSX_3R_DESC_BASE<"vadd.w", add, LSX128WOpnd>; + +def VADD_D : LSX_3R<0b01110000000010111>, IsCommutable, + LSX_3R_DESC_BASE<"vadd.d", add, LSX128DOpnd>; + + +def VSUB_B : LSX_3R<0b01110000000011000>, + LSX_3R_DESC_BASE<"vsub.b", sub, LSX128BOpnd>; + +def VSUB_H : LSX_3R<0b01110000000011001>, + LSX_3R_DESC_BASE<"vsub.h", sub, LSX128HOpnd>; + +def VSUB_W : LSX_3R<0b01110000000011010>, + LSX_3R_DESC_BASE<"vsub.w", sub, LSX128WOpnd>; + +def VSUB_D : LSX_3R<0b01110000000011011>, + LSX_3R_DESC_BASE<"vsub.d", sub, LSX128DOpnd>; + + + +//Pat +class LSXBitconvertPat preds = [HasLSX]> : + LSXPat<(DstVT (bitconvert SrcVT:$src)), + (COPY_TO_REGCLASS SrcVT:$src, DstRC), preds>; + +def : LSXBitconvertPat; +def : LSXBitconvertPat; +def : LSXBitconvertPat; +def : LSXBitconvertPat; + +def : LSXBitconvertPat; +def : LSXBitconvertPat; +def : LSXBitconvertPat; +def : LSXBitconvertPat; +def : LSXBitconvertPat; + +def : LSXBitconvertPat; +def : LSXBitconvertPat; +def : LSXBitconvertPat; +def : LSXBitconvertPat; +def : LSXBitconvertPat; + +def : LSXBitconvertPat; +def : LSXBitconvertPat; +def : LSXBitconvertPat; +def : LSXBitconvertPat; + +def : LSXBitconvertPat; +def : LSXBitconvertPat; +def : LSXBitconvertPat; +def : LSXBitconvertPat; + +def : LSXBitconvertPat; +def : LSXBitconvertPat; +def : LSXBitconvertPat; +def : LSXBitconvertPat; + +def : LSXBitconvertPat; +def : LSXBitconvertPat; +def : LSXBitconvertPat; +def : LSXBitconvertPat; + + + + +def : LSXPat<(i32 (vextract_sext_i8 v16i8:$vj, i32:$idx)), + (SRAI_W (COPY_TO_REGCLASS (i32 (EXTRACT_SUBREG (VREPLVE_B v16i8:$vj, + i32:$idx), + sub_lo)), + GPR32), (i32 24))>; +def : LSXPat<(i32 (vextract_sext_i16 v8i16:$vj, i32:$idx)), + (SRAI_W (COPY_TO_REGCLASS (i32 (EXTRACT_SUBREG (VREPLVE_H v8i16:$vj, + i32:$idx), + sub_lo)), + GPR32), (i32 16))>; +def : LSXPat<(i32 (vextract_sext_i32 v4i32:$vj, i32:$idx)), + (COPY_TO_REGCLASS (i32 (EXTRACT_SUBREG (VREPLVE_W v4i32:$vj, + i32:$idx), + sub_lo)), + GPR32)>; +def : LSXPat<(i64 (vextract_sext_i64 v2i64:$vj, i32:$idx)), + (COPY_TO_REGCLASS (i64 (EXTRACT_SUBREG (VREPLVE_D v2i64:$vj, + i32:$idx), + sub_64)), + GPR64)>; + +def : LSXPat<(i32 (vextract_zext_i8 v16i8:$vj, i32:$idx)), + (SRLI_W (COPY_TO_REGCLASS (i32 (EXTRACT_SUBREG (VREPLVE_B v16i8:$vj, + i32:$idx), + sub_lo)), + GPR32), (i32 24))>; +def : LSXPat<(i32 (vextract_zext_i16 v8i16:$vj, i32:$idx)), + (SRLI_W (COPY_TO_REGCLASS (i32 (EXTRACT_SUBREG (VREPLVE_H v8i16:$vj, + i32:$idx), + sub_lo)), + GPR32), (i32 16))>; +def : LSXPat<(i32 (vextract_zext_i32 v4i32:$vj, i32:$idx)), + (COPY_TO_REGCLASS (i32 (EXTRACT_SUBREG (VREPLVE_W v4i32:$vj, + i32:$idx), + sub_lo)), + GPR32)>; + +def : LSXPat<(i64 (vextract_zext_i64 v2i64:$vj, i32:$idx)), + (COPY_TO_REGCLASS (i64 (EXTRACT_SUBREG (VREPLVE_D v2i64:$vj, + i32:$idx), + sub_64)), + GPR64)>; + +def : LSXPat<(f32 (vector_extract v4f32:$vj, i32:$idx)), + (f32 (EXTRACT_SUBREG (VREPLVE_W v4f32:$vj, + i32:$idx), + sub_lo))>; +def : LSXPat<(f64 (vector_extract v2f64:$vj, i32:$idx)), + (f64 (EXTRACT_SUBREG (VREPLVE_D v2f64:$vj, + i32:$idx), + sub_64))>; + +def : LSXPat< + (i32 (vextract_sext_i8 v16i8:$vj, i64:$idx)), + (SRAI_W (COPY_TO_REGCLASS + (i32 (EXTRACT_SUBREG + (VREPLVE_B v16i8:$vj, + (COPY_TO_REGCLASS + (i32 (EXTRACT_SUBREG i64:$idx, sub_32)), GPR32)), + sub_lo)), + GPR32), + (i32 24))>; +def : LSXPat< + (i32 (vextract_sext_i16 v8i16:$vj, i64:$idx)), + (SRAI_W (COPY_TO_REGCLASS + (i32 (EXTRACT_SUBREG + (VREPLVE_H v8i16:$vj, + (COPY_TO_REGCLASS + (i32 (EXTRACT_SUBREG i64:$idx, sub_32)), GPR32)), + sub_lo)), + GPR32), + (i32 16))>; + +def : LSXPat< + (i32 (vextract_sext_i32 v4i32:$vj, i64:$idx)), + (COPY_TO_REGCLASS + (i32 (EXTRACT_SUBREG + (VREPLVE_W v4i32:$vj, + (COPY_TO_REGCLASS + (i32 (EXTRACT_SUBREG i64:$idx, sub_32)), GPR32)), + sub_lo)), + GPR32)>; + +def : LSXPat< + (i64 (vextract_sext_i64 v2i64:$vj, i64:$idx)), + (COPY_TO_REGCLASS + (i64 (EXTRACT_SUBREG + (VREPLVE_D v2i64:$vj, + (COPY_TO_REGCLASS (i32 (EXTRACT_SUBREG i64:$idx, sub_32)), GPR32)), + sub_64)), + GPR64)>; + +def : LSXPat< + (i32 (vextract_zext_i8 v16i8:$vj, i64:$idx)), + (SRLI_W (COPY_TO_REGCLASS + (i32 (EXTRACT_SUBREG + (VREPLVE_B v16i8:$vj, + (COPY_TO_REGCLASS + (i32 (EXTRACT_SUBREG i64:$idx, sub_32)), GPR32)), + sub_lo)), + GPR32), + (i32 24))>; +def : LSXPat< + (i32 (vextract_zext_i16 v8i16:$vj, i64:$idx)), + (SRLI_W (COPY_TO_REGCLASS + (i32 (EXTRACT_SUBREG + (VREPLVE_H v8i16:$vj, + (COPY_TO_REGCLASS + (i32 (EXTRACT_SUBREG i64:$idx, sub_32)), GPR32)), + sub_lo)), + GPR32), + (i32 16))>; +def : LSXPat< + (i32 (vextract_zext_i32 v4i32:$vj, i64:$idx)), + (COPY_TO_REGCLASS + (i32 (EXTRACT_SUBREG + (VREPLVE_W v4i32:$vj, + (COPY_TO_REGCLASS (i32 (EXTRACT_SUBREG i64:$idx, sub_32)), GPR32)), + sub_lo)), + GPR32)>; +def : LSXPat< + (i64 (vextract_zext_i64 v2i64:$vj, i64:$idx)), + (COPY_TO_REGCLASS + (i64 (EXTRACT_SUBREG + (VREPLVE_D v2i64:$vj, + (COPY_TO_REGCLASS (i32 (EXTRACT_SUBREG i64:$idx, sub_32)), GPR32)), + sub_64)), + GPR64)>; + + def : LSXPat< + (f32 (vector_extract v4f32:$vj, i64:$idx)), + (f32 (EXTRACT_SUBREG + (VREPLVE_W v4f32:$vj, + (COPY_TO_REGCLASS (i32 (EXTRACT_SUBREG i64:$idx, sub_32)), GPR32)), + sub_lo))>; +def : LSXPat< + (f64 (vector_extract v2f64:$vj, i64:$idx)), + (f64 (EXTRACT_SUBREG + (VREPLVE_D v2f64:$vj, + (COPY_TO_REGCLASS (i32 (EXTRACT_SUBREG i64:$idx, sub_32)), GPR32)), + sub_64))>; + + +def : LSXPat<(vfseteq_v4f32 LSX128WOpnd:$a, LSX128WOpnd:$b), + (VFCMP_CEQ_S LSX128WOpnd:$a, LSX128WOpnd:$b)>; + +def : LSXPat<(vfseteq_v2f64 LSX128DOpnd:$a, LSX128DOpnd:$b), + (VFCMP_CEQ_D LSX128DOpnd:$a, LSX128DOpnd:$b)>; + +def : LSXPat<(vfsetle_v4f32 LSX128WOpnd:$a, LSX128WOpnd:$b), + (VFCMP_CLE_S LSX128WOpnd:$a, LSX128WOpnd:$b)>; + +def : LSXPat<(vfsetle_v2f64 LSX128DOpnd:$a, LSX128DOpnd:$b), + (VFCMP_CLE_D LSX128DOpnd:$a, LSX128DOpnd:$b)>; + +def : LSXPat<(vfsetlt_v4f32 LSX128WOpnd:$a, LSX128WOpnd:$b), + (VFCMP_CLT_S LSX128WOpnd:$a, LSX128WOpnd:$b)>; + +def : LSXPat<(vfsetlt_v2f64 LSX128DOpnd:$a, LSX128DOpnd:$b), + (VFCMP_CLT_D LSX128DOpnd:$a, LSX128DOpnd:$b)>; + +def : LSXPat<(vfsetne_v4f32 LSX128WOpnd:$a, LSX128WOpnd:$b), + (VFCMP_CNE_S LSX128WOpnd:$a, LSX128WOpnd:$b)>; + +def : LSXPat<(vfsetne_v2f64 LSX128DOpnd:$a, LSX128DOpnd:$b), + (VFCMP_CNE_D LSX128DOpnd:$a, LSX128DOpnd:$b)>; + + +class LSX_INSERT_PSEUDO_BASE : + LSXPseudo<(outs ROVD:$vd), (ins ROVD:$vd_in, ImmOp:$n, ROFS:$fs), + [(set ROVD:$vd, (OpNode (Ty ROVD:$vd_in), ROFS:$fs, Imm:$n))]> { + bit usesCustomInserter = 1; + string Constraints = "$vd = $vd_in"; +} + + +class INSERT_FW_PSEUDO_DESC : LSX_INSERT_PSEUDO_BASE; +class INSERT_FD_PSEUDO_DESC : LSX_INSERT_PSEUDO_BASE; + +def INSERT_FW_PSEUDO : INSERT_FW_PSEUDO_DESC; +def INSERT_FD_PSEUDO : INSERT_FD_PSEUDO_DESC; + + +class LSX_INSERT_VIDX_PSEUDO_BASE : + LSXPseudo<(outs ROVD:$vd), (ins ROVD:$vd_in, ROIdx:$n, ROFS:$fs), + [(set ROVD:$vd, (OpNode (Ty ROVD:$vd_in), ROFS:$fs, + ROIdx:$n))]> { + bit usesCustomInserter = 1; + string Constraints = "$vd = $vd_in"; +} + +class INSERT_H_VIDX64_PSEUDO_DESC : + LSX_INSERT_VIDX_PSEUDO_BASE; +def INSERT_H_VIDX64_PSEUDO : INSERT_H_VIDX64_PSEUDO_DESC; + +class INSERTPostRA : + LoongArchPseudo<(outs RC:$xd), (ins RC:$xd_in, RD:$n, RE:$fs), []> { + let mayLoad = 1; + let mayStore = 1; +} + +def INSERT_H_VIDX64_PSEUDO_POSTRA : INSERTPostRA; + +class LSX_COPY_PSEUDO_BASE : + LSXPseudo<(outs RCD:$vd), (ins RCVS:$vj, ImmOp:$n), + [(set RCD:$vd, (OpNode (VecTy RCVS:$vj), Imm:$n))]> { + bit usesCustomInserter = 1; +} + + +class COPY_FW_PSEUDO_DESC : LSX_COPY_PSEUDO_BASE; +class COPY_FD_PSEUDO_DESC : LSX_COPY_PSEUDO_BASE; +def COPY_FW_PSEUDO : COPY_FW_PSEUDO_DESC; +def COPY_FD_PSEUDO : COPY_FD_PSEUDO_DESC; + + +let isCodeGenOnly = 1 in { + +def VST_H : LSX_I12_S<0b0010110001>, + ST_DESC_BASE<"vst", store, v8i16, LSX128HOpnd, mem_simm12>; +def VST_W : LSX_I12_S<0b0010110001>, + ST_DESC_BASE<"vst", store, v4i32, LSX128WOpnd, mem_simm12>; +def VST_D : LSX_I12_S<0b0010110001>, + ST_DESC_BASE<"vst", store, v2i64, LSX128DOpnd, mem_simm12>; + + +def VLD_H : LSX_I12_S<0b0010110000>, + LD_DESC_BASE<"vld", load, v8i16, LSX128HOpnd, mem_simm12>; +def VLD_W : LSX_I12_S<0b0010110000>, + LD_DESC_BASE<"vld", load, v4i32, LSX128WOpnd, mem_simm12>; +def VLD_D : LSX_I12_S<0b0010110000>, + LD_DESC_BASE<"vld", load, v2i64, LSX128DOpnd, mem_simm12>; + + + +def VANDI_B_N : LSX_I8_U<0b01110011110100>, + LSX_BIT_U8_VREPLVE_DESC_BASE<"vandi.b", and, vsplati8_uimm8, LSX128BOpnd>; + + +def VXORI_B_N : LSX_I8_U<0b01110011110110>, + LSX_BIT_U8_VREPLVE_DESC_BASE<"vxori.b", xor, vsplati8_uimm8, LSX128BOpnd>; + + +def VSRAI_B_N : LSX_I3_U<0b0111001100110100001>, + LSX_BIT_U3_VREPLVE_DESC_BASE<"vsrai.b", sra, vsplati8_uimm3, LSX128BOpnd>; + +def VSRAI_H_N : LSX_I4_U<0b011100110011010001>, + LSX_BIT_U4_VREPLVE_DESC_BASE<"vsrai.h", sra, vsplati16_uimm4, LSX128HOpnd>; + +def VSRAI_W_N : LSX_I5_U<0b01110011001101001>, + LSX_BIT_U5_VREPLVE_DESC_BASE<"vsrai.w", sra, vsplati32_uimm5, LSX128WOpnd>; + +def VSRAI_D_N : LSX_I6_U<0b0111001100110101>, + LSX_BIT_U6_VREPLVE_DESC_BASE<"vsrai.d", sra, vsplati64_uimm6, LSX128DOpnd>; + + +def VMAXI_BU_N : LSX_I5_U<0b01110010100101000>, + LSX_I5_U_DESC_BASE<"vmaxi.bu", umax, vsplati8_uimm5, LSX128BOpnd>; + +def VMAXI_HU_N : LSX_I5_U<0b01110010100101001>, + LSX_I5_U_DESC_BASE<"vmaxi.hu", umax, vsplati16_uimm5, LSX128HOpnd>; + +def VMAXI_WU_N : LSX_I5_U<0b01110010100101010>, + LSX_I5_U_DESC_BASE<"vmaxi.wu", umax, vsplati32_uimm5, LSX128WOpnd>; + +def VMAXI_DU_N : LSX_I5_U<0b01110010100101011>, + LSX_I5_U_DESC_BASE<"vmaxi.du", umax, vsplati64_uimm5, LSX128DOpnd>; + + +def VMINI_B_N : LSX_I5<0b01110010100100100>, + LSX_I5_DESC_BASE<"vmini.b", smin, vsplati8_simm5, LSX128BOpnd>; + +def VMINI_H_N : LSX_I5<0b01110010100100101>, + LSX_I5_DESC_BASE<"vmini.h", smin, vsplati16_simm5, LSX128HOpnd>; + +def VMINI_W_N : LSX_I5<0b01110010100100110>, + LSX_I5_DESC_BASE<"vmini.w", smin, vsplati32_simm5, LSX128WOpnd>; + +def VMINI_D_N : LSX_I5<0b01110010100100111>, + LSX_I5_DESC_BASE<"vmini.d", smin, vsplati64_simm5, LSX128DOpnd>; + + +def VMAXI_B_N : LSX_I5<0b01110010100100000>, + LSX_I5_DESC_BASE<"vmaxi.b", smax, vsplati8_simm5, LSX128BOpnd>; + +def VMAXI_H_N : LSX_I5<0b01110010100100001>, + LSX_I5_DESC_BASE<"vmaxi.h", smax, vsplati16_simm5, LSX128HOpnd>; + +def VMAXI_W_N : LSX_I5<0b01110010100100010>, + LSX_I5_DESC_BASE<"vmaxi.w", smax, vsplati32_simm5, LSX128WOpnd>; + +def VMAXI_D_N : LSX_I5<0b01110010100100011>, + LSX_I5_DESC_BASE<"vmaxi.d", smax, vsplati64_simm5, LSX128DOpnd>; + + +def VSEQI_B_N : LSX_I5<0b01110010100000000>, + LSX_I5_DESC_BASE<"vseqi.b", vseteq_v16i8, vsplati8_simm5, LSX128BOpnd>; + +def VSEQI_H_N : LSX_I5<0b01110010100000001>, + LSX_I5_DESC_BASE<"vseqi.h", vseteq_v8i16, vsplati16_simm5, LSX128HOpnd>; + +def VSEQI_W_N : LSX_I5<0b01110010100000010>, + LSX_I5_DESC_BASE<"vseqi.w", vseteq_v4i32, vsplati32_simm5, LSX128WOpnd>; + +def VSEQI_D_N : LSX_I5<0b01110010100000011>, + LSX_I5_DESC_BASE<"vseqi.d", vseteq_v2i64, vsplati64_simm5, LSX128DOpnd>; + + +def VSLEI_B_N : LSX_I5<0b01110010100000100>, + LSX_I5_DESC_BASE<"vslei.b", vsetle_v16i8, vsplati8_simm5, LSX128BOpnd>; + +def VSLEI_H_N : LSX_I5<0b01110010100000101>, + LSX_I5_DESC_BASE<"vslei.h", vsetle_v8i16, vsplati16_simm5, LSX128HOpnd>; + +def VSLEI_W_N : LSX_I5<0b01110010100000110>, + LSX_I5_DESC_BASE<"vslei.w", vsetle_v4i32, vsplati32_simm5, LSX128WOpnd>; + +def VSLEI_D_N : LSX_I5<0b01110010100000111>, + LSX_I5_DESC_BASE<"vslei.d", vsetle_v2i64, vsplati64_simm5, LSX128DOpnd>; + +def VSLEI_BU_N : LSX_I5_U<0b01110010100001000>, + LSX_I5_U_DESC_BASE<"vslei.bu", vsetule_v16i8, vsplati8_uimm5, LSX128BOpnd>; + +def VSLEI_HU_N : LSX_I5_U<0b01110010100001001>, + LSX_I5_U_DESC_BASE<"vslei.hu", vsetule_v8i16, vsplati16_uimm5, LSX128HOpnd>; + +def VSLEI_WU_N : LSX_I5_U<0b01110010100001010>, + LSX_I5_U_DESC_BASE<"vslei.wu", vsetule_v4i32, vsplati32_uimm5, LSX128WOpnd>; + +def VSLEI_DU_N : LSX_I5_U<0b01110010100001011>, + LSX_I5_U_DESC_BASE<"vslei.du", vsetule_v2i64, vsplati64_uimm5, LSX128DOpnd>; + + +def VSLTI_B_N : LSX_I5<0b01110010100001100>, + LSX_I5_DESC_BASE<"vslti.b", vsetlt_v16i8, vsplati8_simm5, LSX128BOpnd>; + +def VSLTI_H_N : LSX_I5<0b01110010100001101>, + LSX_I5_DESC_BASE<"vslti.h", vsetlt_v8i16, vsplati16_simm5, LSX128HOpnd>; + +def VSLTI_W_N : LSX_I5<0b01110010100001110>, + LSX_I5_DESC_BASE<"vslti.w", vsetlt_v4i32, vsplati32_simm5, LSX128WOpnd>; + +def VSLTI_D_N : LSX_I5<0b01110010100001111>, + LSX_I5_DESC_BASE<"vslti.d", vsetlt_v2i64, vsplati64_simm5, LSX128DOpnd>; + + +def VSLTI_BU_N : LSX_I5_U<0b01110010100010000>, + LSX_I5_U_DESC_BASE<"vslti.bu", vsetult_v16i8, vsplati8_uimm5, LSX128BOpnd>; + +def VSLTI_HU_N : LSX_I5_U<0b01110010100010001>, + LSX_I5_U_DESC_BASE<"vslti.hu", vsetult_v8i16, vsplati16_uimm5, LSX128HOpnd>; + +def VSLTI_WU_N : LSX_I5_U<0b01110010100010010>, + LSX_I5_U_DESC_BASE<"vslti.wu", vsetult_v4i32, vsplati32_uimm5, LSX128WOpnd>; + +def VSLTI_DU_N : LSX_I5_U<0b01110010100010011>, + LSX_I5_U_DESC_BASE<"vslti.du", vsetult_v2i64, vsplati64_uimm5, LSX128DOpnd>; + + +def VBITSELI_B_N : LSX_I8_U<0b01110011110001>, + LSX_2R_3R_SELECT<"vbitseli.b", vselect, LSX128BOpnd, LSX128BOpnd>; + +} + + +def : LSXPat<(v4f32 (load addrimm12:$addr)), (VLD_W addrimm12:$addr)>; +def : LSXPat<(v2f64 (load addrimm12:$addr)), (VLD_D addrimm12:$addr)>; + +def VST_FW : LSXPat<(store (v4f32 LSX128W:$vj), addrimm12:$addr), + (VST_W LSX128W:$vj, addrimm12:$addr)>; +def VST_FD : LSXPat<(store (v2f64 LSX128D:$vj), addrimm12:$addr), + (VST_D LSX128D:$vj, addrimm12:$addr)>; + +def VNEG_FW : LSXPat<(fneg (v4f32 LSX128W:$vj)), + (VBITREVI_W LSX128W:$vj, 31)>; +def VNEG_FD : LSXPat<(fneg (v2f64 LSX128D:$vj)), + (VBITREVI_D LSX128D:$vj, 63)>; + + +def : LSXPat<(v2i64 (LoongArchVABSD v2i64:$vj, v2i64:$vk, (i32 0))), + (v2i64 (VABSD_D $vj, $vk))>; + +def : LSXPat<(v4i32 (LoongArchVABSD v4i32:$vj, v4i32:$vk, (i32 0))), + (v4i32 (VABSD_W $vj, $vk))>; + +def : LSXPat<(v8i16 (LoongArchVABSD v8i16:$vj, v8i16:$vk, (i32 0))), + (v8i16 (VABSD_H $vj, $vk))>; + +def : LSXPat<(v16i8 (LoongArchVABSD v16i8:$vj, v16i8:$vk, (i32 0))), + (v16i8 (VABSD_B $vj, $vk))>; + +def : LSXPat<(v2i64 (LoongArchUVABSD v2i64:$vj, v2i64:$vk, (i32 0))), + (v2i64 (VABSD_DU $vj, $vk))>; + +def : LSXPat<(v4i32 (LoongArchUVABSD v4i32:$vj, v4i32:$vk, (i32 0))), + (v4i32 (VABSD_WU $vj, $vk))>; + +def : LSXPat<(v8i16 (LoongArchUVABSD v8i16:$vj, v8i16:$vk, (i32 0))), + (v8i16 (VABSD_HU $vj, $vk))>; + +def : LSXPat<(v16i8 (LoongArchUVABSD v16i8:$vj, v16i8:$vk, (i32 0))), + (v16i8 (VABSD_BU $vj, $vk))>; + + +def : LSXPat<(or v16i8:$vj, (shl vsplat_imm_eq_1, v16i8:$vk)), + (VBITSET_B v16i8:$vj, v16i8:$vk)>; +def : LSXPat<(or v8i16:$vj, (shl vsplat_imm_eq_1, v8i16:$vk)), + (VBITSET_H v8i16:$vj, v8i16:$vk)>; +def : LSXPat<(or v4i32:$vj, (shl vsplat_imm_eq_1, v4i32:$vk)), + (VBITSET_W v4i32:$vj, v4i32:$vk)>; +def : LSXPat<(or v2i64:$vj, (shl vsplat_imm_eq_1, v2i64:$vk)), + (VBITSET_D v2i64:$vj, v2i64:$vk)>; + +def : LSXPat<(xor v16i8:$vj, (shl vsplat_imm_eq_1, v16i8:$vk)), + (VBITREV_B v16i8:$vj, v16i8:$vk)>; +def : LSXPat<(xor v8i16:$vj, (shl vsplat_imm_eq_1, v8i16:$vk)), + (VBITREV_H v8i16:$vj, v8i16:$vk)>; +def : LSXPat<(xor v4i32:$vj, (shl vsplat_imm_eq_1, v4i32:$vk)), + (VBITREV_W v4i32:$vj, v4i32:$vk)>; +def : LSXPat<(xor v2i64:$vj, (shl (v2i64 vsplati64_imm_eq_1), v2i64:$vk)), + (VBITREV_D v2i64:$vj, v2i64:$vk)>; + +def : LSXPat<(and v16i8:$vj, (xor (shl vsplat_imm_eq_1, v16i8:$vk), immAllOnesV)), + (VBITCLR_B v16i8:$vj, v16i8:$vk)>; +def : LSXPat<(and v8i16:$vj, (xor (shl vsplat_imm_eq_1, v8i16:$vk), immAllOnesV)), + (VBITCLR_H v8i16:$vj, v8i16:$vk)>; +def : LSXPat<(and v4i32:$vj, (xor (shl vsplat_imm_eq_1, v4i32:$vk), immAllOnesV)), + (VBITCLR_W v4i32:$vj, v4i32:$vk)>; +def : LSXPat<(and v2i64:$vj, (xor (shl (v2i64 vsplati64_imm_eq_1), v2i64:$vk), (bitconvert (v4i32 immAllOnesV)))), + (VBITCLR_D v2i64:$vj, v2i64:$vk)>; +def vsplati64_imm_eq_63 : PatLeaf<(bitconvert (v4i32 (build_vector))), [{ + APInt Imm; + SDNode *BV = N->getOperand(0).getNode(); + EVT EltTy = N->getValueType(0).getVectorElementType(); + + return selectVSplat(BV, Imm, EltTy.getSizeInBits()) && + Imm.getBitWidth() == EltTy.getSizeInBits() && Imm == 63; +}]>; + +def immi32Cst7 : ImmLeaf(Imm) && Imm == 7;}]>; +def immi32Cst15 : ImmLeaf(Imm) && Imm == 15;}]>; +def immi32Cst31 : ImmLeaf(Imm) && Imm == 31;}]>; + +def vsplati8imm7 : PatFrag<(ops node:$vt), + (and node:$vt, (vsplati8 immi32Cst7))>; +def vsplati16imm15 : PatFrag<(ops node:$vt), + (and node:$vt, (vsplati16 immi32Cst15))>; +def vsplati32imm31 : PatFrag<(ops node:$vt), + (and node:$vt, (vsplati32 immi32Cst31))>; +def vsplati64imm63 : PatFrag<(ops node:$vt), + (and node:$vt, vsplati64_imm_eq_63)>; + +class LSXShiftPat : + LSXPat<(VT (Node VT:$vs, (VT (and VT:$vt, Vec)))), + (VT (Insn VT:$vs, VT:$vt))>; + +class LSXBitPat : + LSXPat<(VT (Node VT:$vs, (shl vsplat_imm_eq_1, (Frag VT:$vt)))), + (VT (Insn VT:$vs, VT:$vt))>; + +multiclass LSXShiftPats { + def : LSXShiftPat(Insn#_B), + (vsplati8 immi32Cst7)>; + def : LSXShiftPat(Insn#_H), + (vsplati16 immi32Cst15)>; + def : LSXShiftPat(Insn#_W), + (vsplati32 immi32Cst31)>; + def : LSXPat<(v2i64 (Node v2i64:$vs, (v2i64 (and v2i64:$vt, + vsplati64_imm_eq_63)))), + (v2i64 (!cast(Insn#_D) v2i64:$vs, v2i64:$vt))>; +} + +multiclass LSXBitPats { + def : LSXBitPat(Insn#_B), vsplati8imm7>; + def : LSXBitPat(Insn#_H), vsplati16imm15>; + def : LSXBitPat(Insn#_W), vsplati32imm31>; + def : LSXPat<(Node v2i64:$vs, (shl (v2i64 vsplati64_imm_eq_1), + (vsplati64imm63 v2i64:$vt))), + (v2i64 (!cast(Insn#_D) v2i64:$vs, v2i64:$vt))>; +} + +defm : LSXShiftPats; +defm : LSXShiftPats; +defm : LSXShiftPats; +defm : LSXBitPats; +defm : LSXBitPats; + +def : LSXPat<(and v16i8:$vs, (xor (shl vsplat_imm_eq_1, + (vsplati8imm7 v16i8:$vt)), + immAllOnesV)), + (v16i8 (VBITCLR_B v16i8:$vs, v16i8:$vt))>; +def : LSXPat<(and v8i16:$vs, (xor (shl vsplat_imm_eq_1, + (vsplati16imm15 v8i16:$vt)), + immAllOnesV)), + (v8i16 (VBITCLR_H v8i16:$vs, v8i16:$vt))>; +def : LSXPat<(and v4i32:$vs, (xor (shl vsplat_imm_eq_1, + (vsplati32imm31 v4i32:$vt)), + immAllOnesV)), + (v4i32 (VBITCLR_W v4i32:$vs, v4i32:$vt))>; +def : LSXPat<(and v2i64:$vs, (xor (shl (v2i64 vsplati64_imm_eq_1), + (vsplati64imm63 v2i64:$vt)), + (bitconvert (v4i32 immAllOnesV)))), + (v2i64 (VBITCLR_D v2i64:$vs, v2i64:$vt))>; + + +def : LSXPat<(fdiv (v4f32 (build_vector (f32 fpimm1), (f32 fpimm1), (f32 fpimm1), (f32 fpimm1))), v4f32:$v), + (VFRECIP_S v4f32:$v)>; + +def : LSXPat<(fdiv (v2f64 (build_vector (f64 fpimm1), (f64 fpimm1))), v2f64:$v), + (VFRECIP_D v2f64:$v)>; + +def : LSXPat<(fdiv (v4f32 fpimm1), v4f32:$v), + (VFRECIP_S v4f32:$v)>; + +def : LSXPat<(fdiv (v2f64 fpimm1), v2f64:$v), + (VFRECIP_D v2f64:$v)>; + + +def : LSXPat<(fdiv (v4f32 (build_vector (f32 fpimm1), (f32 fpimm1), (f32 fpimm1), (f32 fpimm1))), (fsqrt v4f32:$v)), + (VFRSQRT_S v4f32:$v)>; + +def : LSXPat<(fdiv (v2f64 (build_vector (f64 fpimm1), (f64 fpimm1))), (fsqrt v2f64:$v)), + (VFRSQRT_D v2f64:$v)>; + +def : LSXPat<(fdiv (v4f32 fpimm1), (fsqrt v4f32:$v)), + (VFRSQRT_S v4f32:$v)>; + +def : LSXPat<(fdiv (v2f64 fpimm1), (fsqrt v2f64:$v)), + (VFRSQRT_D v2f64:$v)>; + + +def : LSXPat<(abs v2i64:$v), + (VMAX_D v2i64:$v, (VNEG_D v2i64:$v))>; + +def : LSXPat<(abs v4i32:$v), + (VMAX_W v4i32:$v, (VNEG_W v4i32:$v))>; + +def : LSXPat<(abs v8i16:$v), + (VMAX_H v8i16:$v, (VNEG_H v8i16:$v))>; + +def : LSXPat<(abs v16i8:$v), + (VMAX_B v16i8:$v, (VNEG_B v16i8:$v))>; + + +def : LSXPat<(sub (v16i8 immAllZerosV), v16i8:$v), + (VNEG_B v16i8:$v)>; + +def : LSXPat<(sub (v8i16 immAllZerosV), v8i16:$v), + (VNEG_H v8i16:$v)>; + +def : LSXPat<(sub (v4i32 immAllZerosV), v4i32:$v), + (VNEG_W v4i32:$v)>; + +def : LSXPat<(sub (v2i64 immAllZerosV), v2i64:$v), + (VNEG_D v2i64:$v)>; + + +def : LSXPat<(sra + (v16i8 (add + (v16i8 (add LSX128B:$a, LSX128B:$b)), + (v16i8 (srl + (v16i8 (add LSX128B:$a, LSX128B:$b)), + (v16i8 (build_vector (i32 7),(i32 7),(i32 7),(i32 7), + (i32 7),(i32 7),(i32 7),(i32 7), + (i32 7),(i32 7),(i32 7),(i32 7), + (i32 7),(i32 7),(i32 7),(i32 7)) + ) + ) + ) + ) + ), + (v16i8 (build_vector (i32 1),(i32 1),(i32 1),(i32 1), + (i32 1),(i32 1),(i32 1),(i32 1), + (i32 1),(i32 1),(i32 1),(i32 1), + (i32 1),(i32 1),(i32 1),(i32 1) + ))), + (VAVG_B (v16i8 LSX128B:$a), (v16i8 LSX128B:$b))>; + +def : LSXPat<(sra + (v8i16 (add + (v8i16 (add LSX128H:$a, LSX128H:$b)), + (v8i16 (srl + (v8i16 (add LSX128H:$a, LSX128H:$b)), + (v8i16 (build_vector (i32 15),(i32 15),(i32 15),(i32 15), + (i32 15),(i32 15),(i32 15),(i32 15)) + ) + ) + ) + ) + ), + (v8i16 (build_vector (i32 1),(i32 1),(i32 1),(i32 1), + (i32 1),(i32 1),(i32 1),(i32 1) + ))), + (VAVG_H (v8i16 LSX128H:$a), (v8i16 LSX128H:$b))>; + +def : LSXPat<(sra + (v4i32 (add + (v4i32 (add LSX128W:$a, LSX128W:$b)), + (v4i32 (srl + (v4i32 (add LSX128W:$a, LSX128W:$b)), + (v4i32 (build_vector (i32 31),(i32 31),(i32 31),(i32 31)) + ) + ) + ) + ) + ), + (v4i32 (build_vector (i32 1),(i32 1),(i32 1),(i32 1)))), + (VAVG_W (v4i32 LSX128W:$a), (v4i32 LSX128W:$b))>; + +def : LSXPat<(sra + (v2i64 (add + (v2i64 (add LSX128D:$a, LSX128D:$b)), + (v2i64 (srl + (v2i64 (add LSX128D:$a, LSX128D:$b)), + (v2i64 (build_vector (i64 63),(i64 63))) + ) + ) + ) + ), + (v2i64 (build_vector (i64 1),(i64 1)))), + (VAVG_D (v2i64 LSX128D:$a), (v2i64 LSX128D:$b))>; + + + +def : LSXPat<(srl + (v16i8 (add LSX128B:$a, LSX128B:$b)), + (v16i8 (build_vector (i32 1),(i32 1),(i32 1),(i32 1), + (i32 1),(i32 1),(i32 1),(i32 1), + (i32 1),(i32 1),(i32 1),(i32 1), + (i32 1),(i32 1),(i32 1),(i32 1)) + ) + ), + (VAVG_BU (v16i8 LSX128B:$a), (v16i8 LSX128B:$b))>; + +def : LSXPat<(srl + (v8i16 (add LSX128H:$a, LSX128H:$b)), + (v8i16 (build_vector (i32 1),(i32 1),(i32 1),(i32 1), + (i32 1),(i32 1),(i32 1),(i32 1)) + ) + ), + (VAVG_HU (v8i16 LSX128H:$a), (v8i16 LSX128H:$b))>; + +def : LSXPat<(srl + (v4i32 (add LSX128W:$a, LSX128W:$b)), + (v4i32 (build_vector (i32 1),(i32 1),(i32 1),(i32 1)) + ) + ), + (VAVG_WU (v4i32 LSX128W:$a), (v4i32 LSX128W:$b))>; + +def : LSXPat<(srl + (v2i64 (add LSX128D:$a, LSX128D:$b)), + (v2i64 (build_vector (i64 1),(i64 1)) + ) + ), + (VAVG_DU (v2i64 LSX128D:$a), (v2i64 LSX128D:$b))>; + + + + +def : LSXPat<(sra + (v16i8 (add + (v16i8 (add (v16i8 ( + build_vector (i32 1),(i32 1),(i32 1),(i32 1), + (i32 1),(i32 1),(i32 1),(i32 1), + (i32 1),(i32 1),(i32 1),(i32 1), + (i32 1),(i32 1),(i32 1),(i32 1) + )), + (v16i8 (add LSX128B:$a, LSX128B:$b)) + )), + (v16i8 (srl + (v16i8 ( add (v16i8( + build_vector (i32 1),(i32 1),(i32 1),(i32 1), + (i32 1),(i32 1),(i32 1),(i32 1), + (i32 1),(i32 1),(i32 1),(i32 1), + (i32 1),(i32 1),(i32 1),(i32 1) + )), + (v16i8 (add LSX128B:$a, LSX128B:$b)) + )), + (v16i8 (build_vector (i32 7),(i32 7),(i32 7),(i32 7), + (i32 7),(i32 7),(i32 7),(i32 7), + (i32 7),(i32 7),(i32 7),(i32 7), + (i32 7),(i32 7),(i32 7),(i32 7)) + ) + ) + ) + ) + ), + (v16i8 (build_vector (i32 1),(i32 1),(i32 1),(i32 1), + (i32 1),(i32 1),(i32 1),(i32 1), + (i32 1),(i32 1),(i32 1),(i32 1), + (i32 1),(i32 1),(i32 1),(i32 1) + ))), + (VAVGR_B (v16i8 LSX128B:$a), (v16i8 LSX128B:$b))>; + +def : LSXPat<(sra + (v8i16 (add + (v8i16 (add (v8i16 ( + build_vector (i32 1),(i32 1),(i32 1),(i32 1), + (i32 1),(i32 1),(i32 1),(i32 1) + )), + (v8i16 (add LSX128H:$a, LSX128H:$b)) + )), + (v8i16 (srl + (v8i16 (add (v8i16 ( + build_vector (i32 1),(i32 1),(i32 1),(i32 1), + (i32 1),(i32 1),(i32 1),(i32 1) + )), + (v8i16 (add LSX128H:$a, LSX128H:$b)) + )), + (v8i16 (build_vector (i32 15),(i32 15),(i32 15),(i32 15), + (i32 15),(i32 15),(i32 15),(i32 15)) + ) + ) + ) + ) + ), + (v8i16 (build_vector (i32 1),(i32 1),(i32 1),(i32 1), + (i32 1),(i32 1),(i32 1),(i32 1) + ))), + (VAVGR_H (v8i16 LSX128H:$a), (v8i16 LSX128H:$b))>; + +def : LSXPat<(sra + (v4i32 (add + (v4i32 (add (v4i32 ( + build_vector (i32 1),(i32 1),(i32 1),(i32 1) + )), + (v4i32 (add LSX128W:$a, LSX128W:$b)) + )), + (v4i32 (srl + (v4i32 (add (v4i32 ( + build_vector (i32 1),(i32 1),(i32 1),(i32 1) + )), + (v4i32 (add LSX128W:$a, LSX128W:$b)) + )), + (v4i32 (build_vector (i32 31),(i32 31),(i32 31),(i32 31)) + ) + ) + ) + ) + ), + (v4i32 (build_vector (i32 1),(i32 1),(i32 1),(i32 1)))), + (VAVGR_W (v4i32 LSX128W:$a), (v4i32 LSX128W:$b))>; + +def : LSXPat<(sra + (v2i64 (add + (v2i64 (add (v2i64 ( + build_vector (i64 1),(i64 1) + )), + (v2i64 (add LSX128D:$a, LSX128D:$b)) + )), + (v2i64 (srl + (v2i64 (add (v2i64 ( + build_vector (i64 1),(i64 1) + )), + (v2i64 (add LSX128D:$a, LSX128D:$b)) + )), + (v2i64 (build_vector (i64 63),(i64 63))) + ) + ) + ) + ), + (v2i64 (build_vector (i64 1),(i64 1)))), + (VAVGR_D (v2i64 LSX128D:$a), (v2i64 LSX128D:$b))>; + + + + +def : LSXPat<(srl + (v16i8 (add (v16i8 ( + build_vector (i32 1),(i32 1),(i32 1),(i32 1), + (i32 1),(i32 1),(i32 1),(i32 1), + (i32 1),(i32 1),(i32 1),(i32 1), + (i32 1),(i32 1),(i32 1),(i32 1) + )), + (v16i8 (add LSX128B:$a, LSX128B:$b)) + )), + (v16i8 (build_vector (i32 1),(i32 1),(i32 1),(i32 1), + (i32 1),(i32 1),(i32 1),(i32 1), + (i32 1),(i32 1),(i32 1),(i32 1), + (i32 1),(i32 1),(i32 1),(i32 1)) + ) + ), + (VAVGR_BU (v16i8 LSX128B:$a), (v16i8 LSX128B:$b))>; + +def : LSXPat<(srl + (v8i16 (add (v8i16 ( + build_vector (i32 1),(i32 1),(i32 1),(i32 1), + (i32 1),(i32 1),(i32 1),(i32 1) + )), + (v8i16 (add LSX128H:$a, LSX128H:$b)) + )), + (v8i16 (build_vector (i32 1),(i32 1),(i32 1),(i32 1), + (i32 1),(i32 1),(i32 1),(i32 1)) + ) + ), + (VAVGR_HU (v8i16 LSX128H:$a), (v8i16 LSX128H:$b))>; + +def : LSXPat<(srl + (v4i32 (add (v4i32 ( + build_vector (i32 1),(i32 1),(i32 1),(i32 1) + )), + (v4i32 (add LSX128W:$a, LSX128W:$b)) + )), + (v4i32 (build_vector (i32 1),(i32 1),(i32 1),(i32 1)) + ) + ), + (VAVGR_WU (v4i32 LSX128W:$a), (v4i32 LSX128W:$b))>; + +def : LSXPat<(srl + (v2i64 (add (v2i64 ( + build_vector (i64 1),(i64 1) + )), + (v2i64 (add LSX128D:$a, LSX128D:$b)) + )), + (v2i64 (build_vector (i64 1),(i64 1)) + ) + ), + (VAVGR_DU (v2i64 LSX128D:$a), (v2i64 LSX128D:$b))>; + + +def : LSXPat<(mulhs LSX128D:$a, LSX128D:$b), + (VMUH_D LSX128D:$a, LSX128D:$b)>; + +def : LSXPat<(mulhs LSX128W:$a, LSX128W:$b), + (VMUH_W LSX128W:$a, LSX128W:$b)>; + +def : LSXPat<(mulhs LSX128H:$a, LSX128H:$b), + (VMUH_H LSX128H:$a, LSX128H:$b)>; + +def : LSXPat<(mulhs LSX128B:$a, LSX128B:$b), + (VMUH_B LSX128B:$a, LSX128B:$b)>; + + +def : LSXPat<(mulhu LSX128D:$a, LSX128D:$b), + (VMUH_DU LSX128D:$a, LSX128D:$b)>; + +def : LSXPat<(mulhu LSX128W:$a, LSX128W:$b), + (VMUH_WU LSX128W:$a, LSX128W:$b)>; + +def : LSXPat<(mulhu LSX128H:$a, LSX128H:$b), + (VMUH_HU LSX128H:$a, LSX128H:$b)>; + +def : LSXPat<(mulhu LSX128B:$a, LSX128B:$b), + (VMUH_BU LSX128B:$a, LSX128B:$b)>; + + + +//===----------------------------------------------------------------------===// +// Intrinsics +//===----------------------------------------------------------------------===// + +def : LSXPat<(int_loongarch_lsx_vseq_b (v16i8 LSX128B:$vj), (v16i8 LSX128B:$vk)), + (VSEQ_B LSX128B:$vj, LSX128B:$vk)>; +def : LSXPat<(int_loongarch_lsx_vseq_h (v8i16 LSX128H:$vj), (v8i16 LSX128H:$vk)), + (VSEQ_H LSX128H:$vj, LSX128H:$vk)>; +def : LSXPat<(int_loongarch_lsx_vseq_w (v4i32 LSX128W:$vj), (v4i32 LSX128W:$vk)), + (VSEQ_W LSX128W:$vj, LSX128W:$vk)>; +def : LSXPat<(int_loongarch_lsx_vseq_d (v2i64 LSX128D:$vj), (v2i64 LSX128D:$vk)), + (VSEQ_D LSX128D:$vj, LSX128D:$vk)>; + +def : LSXPat<(int_loongarch_lsx_vsle_b (v16i8 LSX128B:$vj), (v16i8 LSX128B:$vk)), + (VSLE_B LSX128B:$vj, LSX128B:$vk)>; +def : LSXPat<(int_loongarch_lsx_vsle_h (v8i16 LSX128H:$vj), (v8i16 LSX128H:$vk)), + (VSLE_H LSX128H:$vj, LSX128H:$vk)>; +def : LSXPat<(int_loongarch_lsx_vsle_w (v4i32 LSX128W:$vj), (v4i32 LSX128W:$vk)), + (VSLE_W LSX128W:$vj, LSX128W:$vk)>; +def : LSXPat<(int_loongarch_lsx_vsle_d (v2i64 LSX128D:$vj), (v2i64 LSX128D:$vk)), + (VSLE_D LSX128D:$vj, LSX128D:$vk)>; + +def : LSXPat<(int_loongarch_lsx_vsle_bu (v16i8 LSX128B:$vj), (v16i8 LSX128B:$vk)), + (VSLE_BU LSX128B:$vj, LSX128B:$vk)>; +def : LSXPat<(int_loongarch_lsx_vsle_hu (v8i16 LSX128H:$vj), (v8i16 LSX128H:$vk)), + (VSLE_HU LSX128H:$vj, LSX128H:$vk)>; +def : LSXPat<(int_loongarch_lsx_vsle_wu (v4i32 LSX128W:$vj), (v4i32 LSX128W:$vk)), + (VSLE_WU LSX128W:$vj, LSX128W:$vk)>; +def : LSXPat<(int_loongarch_lsx_vsle_du (v2i64 LSX128D:$vj), (v2i64 LSX128D:$vk)), + (VSLE_DU LSX128D:$vj, LSX128D:$vk)>; + +def : LSXPat<(int_loongarch_lsx_vslt_b (v16i8 LSX128B:$vj), (v16i8 LSX128B:$vk)), + (VSLT_B LSX128B:$vj, LSX128B:$vk)>; +def : LSXPat<(int_loongarch_lsx_vslt_h (v8i16 LSX128H:$vj), (v8i16 LSX128H:$vk)), + (VSLT_H LSX128H:$vj, LSX128H:$vk)>; +def : LSXPat<(int_loongarch_lsx_vslt_w (v4i32 LSX128W:$vj), (v4i32 LSX128W:$vk)), + (VSLT_W LSX128W:$vj, LSX128W:$vk)>; +def : LSXPat<(int_loongarch_lsx_vslt_d (v2i64 LSX128D:$vj), (v2i64 LSX128D:$vk)), + (VSLT_D LSX128D:$vj, LSX128D:$vk)>; + +def : LSXPat<(int_loongarch_lsx_vslt_bu (v16i8 LSX128B:$vj), (v16i8 LSX128B:$vk)), + (VSLT_BU LSX128B:$vj, LSX128B:$vk)>; +def : LSXPat<(int_loongarch_lsx_vslt_hu (v8i16 LSX128H:$vj), (v8i16 LSX128H:$vk)), + (VSLT_HU LSX128H:$vj, LSX128H:$vk)>; +def : LSXPat<(int_loongarch_lsx_vslt_wu (v4i32 LSX128W:$vj), (v4i32 LSX128W:$vk)), + (VSLT_WU LSX128W:$vj, LSX128W:$vk)>; +def : LSXPat<(int_loongarch_lsx_vslt_du (v2i64 LSX128D:$vj), (v2i64 LSX128D:$vk)), + (VSLT_DU LSX128D:$vj, LSX128D:$vk)>; + +def : LSXPat<(int_loongarch_lsx_vadd_b (v16i8 LSX128B:$vj), (v16i8 LSX128B:$vk)), + (VADD_B LSX128B:$vj, LSX128B:$vk)>; +def : LSXPat<(int_loongarch_lsx_vadd_h (v8i16 LSX128H:$vj), (v8i16 LSX128H:$vk)), + (VADD_H LSX128H:$vj, LSX128H:$vk)>; +def : LSXPat<(int_loongarch_lsx_vadd_w (v4i32 LSX128W:$vj), (v4i32 LSX128W:$vk)), + (VADD_W LSX128W:$vj, LSX128W:$vk)>; +def : LSXPat<(int_loongarch_lsx_vadd_d (v2i64 LSX128D:$vj), (v2i64 LSX128D:$vk)), + (VADD_D LSX128D:$vj, LSX128D:$vk)>; + +def : LSXPat<(int_loongarch_lsx_vsub_b (v16i8 LSX128B:$vj), (v16i8 LSX128B:$vk)), + (VSUB_B LSX128B:$vj, LSX128B:$vk)>; +def : LSXPat<(int_loongarch_lsx_vsub_h (v8i16 LSX128H:$vj), (v8i16 LSX128H:$vk)), + (VSUB_H LSX128H:$vj, LSX128H:$vk)>; +def : LSXPat<(int_loongarch_lsx_vsub_w (v4i32 LSX128W:$vj), (v4i32 LSX128W:$vk)), + (VSUB_W LSX128W:$vj, LSX128W:$vk)>; +def : LSXPat<(int_loongarch_lsx_vsub_d (v2i64 LSX128D:$vj), (v2i64 LSX128D:$vk)), + (VSUB_D LSX128D:$vj, LSX128D:$vk)>; + +def : LSXPat<(int_loongarch_lsx_vsadd_b (v16i8 LSX128B:$vj), (v16i8 LSX128B:$vk)), + (VSADD_B LSX128B:$vj, LSX128B:$vk)>; +def : LSXPat<(int_loongarch_lsx_vsadd_h (v8i16 LSX128H:$vj), (v8i16 LSX128H:$vk)), + (VSADD_H LSX128H:$vj, LSX128H:$vk)>; +def : LSXPat<(int_loongarch_lsx_vsadd_w (v4i32 LSX128W:$vj), (v4i32 LSX128W:$vk)), + (VSADD_W LSX128W:$vj, LSX128W:$vk)>; +def : LSXPat<(int_loongarch_lsx_vsadd_d (v2i64 LSX128D:$vj), (v2i64 LSX128D:$vk)), + (VSADD_D LSX128D:$vj, LSX128D:$vk)>; + +def : LSXPat<(int_loongarch_lsx_vssub_b (v16i8 LSX128B:$vj), (v16i8 LSX128B:$vk)), + (VSSUB_B LSX128B:$vj, LSX128B:$vk)>; +def : LSXPat<(int_loongarch_lsx_vssub_h (v8i16 LSX128H:$vj), (v8i16 LSX128H:$vk)), + (VSSUB_H LSX128H:$vj, LSX128H:$vk)>; +def : LSXPat<(int_loongarch_lsx_vssub_w (v4i32 LSX128W:$vj), (v4i32 LSX128W:$vk)), + (VSSUB_W LSX128W:$vj, LSX128W:$vk)>; +def : LSXPat<(int_loongarch_lsx_vssub_d (v2i64 LSX128D:$vj), (v2i64 LSX128D:$vk)), + (VSSUB_D LSX128D:$vj, LSX128D:$vk)>; + +def : LSXPat<(int_loongarch_lsx_vsadd_bu (v16i8 LSX128B:$vj), (v16i8 LSX128B:$vk)), + (VSADD_BU LSX128B:$vj, LSX128B:$vk)>; +def : LSXPat<(int_loongarch_lsx_vsadd_hu (v8i16 LSX128H:$vj), (v8i16 LSX128H:$vk)), + (VSADD_HU LSX128H:$vj, LSX128H:$vk)>; +def : LSXPat<(int_loongarch_lsx_vsadd_wu (v4i32 LSX128W:$vj), (v4i32 LSX128W:$vk)), + (VSADD_WU LSX128W:$vj, LSX128W:$vk)>; +def : LSXPat<(int_loongarch_lsx_vsadd_du (v2i64 LSX128D:$vj), (v2i64 LSX128D:$vk)), + (VSADD_DU LSX128D:$vj, LSX128D:$vk)>; + +def : LSXPat<(int_loongarch_lsx_vssub_bu (v16i8 LSX128B:$vj), (v16i8 LSX128B:$vk)), + (VSSUB_BU LSX128B:$vj, LSX128B:$vk)>; +def : LSXPat<(int_loongarch_lsx_vssub_hu (v8i16 LSX128H:$vj), (v8i16 LSX128H:$vk)), + (VSSUB_HU LSX128H:$vj, LSX128H:$vk)>; +def : LSXPat<(int_loongarch_lsx_vssub_wu (v4i32 LSX128W:$vj), (v4i32 LSX128W:$vk)), + (VSSUB_WU LSX128W:$vj, LSX128W:$vk)>; +def : LSXPat<(int_loongarch_lsx_vssub_du (v2i64 LSX128D:$vj), (v2i64 LSX128D:$vk)), + (VSSUB_DU LSX128D:$vj, LSX128D:$vk)>; + +def : LSXPat<(int_loongarch_lsx_vhaddw_h_b (v16i8 LSX128B:$vj), (v16i8 LSX128B:$vk)), + (VHADDW_H_B LSX128B:$vj, LSX128B:$vk)>; +def : LSXPat<(int_loongarch_lsx_vhaddw_w_h (v8i16 LSX128H:$vj), (v8i16 LSX128H:$vk)), + (VHADDW_W_H LSX128H:$vj, LSX128H:$vk)>; +def : LSXPat<(int_loongarch_lsx_vhaddw_d_w (v4i32 LSX128W:$vj), (v4i32 LSX128W:$vk)), + (VHADDW_D_W LSX128W:$vj, LSX128W:$vk)>; + +def : LSXPat<(int_loongarch_lsx_vhsubw_h_b (v16i8 LSX128B:$vj), (v16i8 LSX128B:$vk)), + (VHSUBW_H_B LSX128B:$vj, LSX128B:$vk)>; +def : LSXPat<(int_loongarch_lsx_vhsubw_w_h (v8i16 LSX128H:$vj), (v8i16 LSX128H:$vk)), + (VHSUBW_W_H LSX128H:$vj, LSX128H:$vk)>; +def : LSXPat<(int_loongarch_lsx_vhsubw_d_w (v4i32 LSX128W:$vj), (v4i32 LSX128W:$vk)), + (VHSUBW_D_W LSX128W:$vj, LSX128W:$vk)>; + +def : LSXPat<(int_loongarch_lsx_vhaddw_hu_bu (v16i8 LSX128B:$vj), (v16i8 LSX128B:$vk)), + (VHADDW_HU_BU LSX128B:$vj, LSX128B:$vk)>; +def : LSXPat<(int_loongarch_lsx_vhaddw_wu_hu (v8i16 LSX128H:$vj), (v8i16 LSX128H:$vk)), + (VHADDW_WU_HU LSX128H:$vj, LSX128H:$vk)>; +def : LSXPat<(int_loongarch_lsx_vhaddw_du_wu (v4i32 LSX128W:$vj), (v4i32 LSX128W:$vk)), + (VHADDW_DU_WU LSX128W:$vj, LSX128W:$vk)>; + +def : LSXPat<(int_loongarch_lsx_vhsubw_hu_bu (v16i8 LSX128B:$vj), (v16i8 LSX128B:$vk)), + (VHSUBW_HU_BU LSX128B:$vj, LSX128B:$vk)>; +def : LSXPat<(int_loongarch_lsx_vhsubw_wu_hu (v8i16 LSX128H:$vj), (v8i16 LSX128H:$vk)), + (VHSUBW_WU_HU LSX128H:$vj, LSX128H:$vk)>; +def : LSXPat<(int_loongarch_lsx_vhsubw_du_wu (v4i32 LSX128W:$vj), (v4i32 LSX128W:$vk)), + (VHSUBW_DU_WU LSX128W:$vj, LSX128W:$vk)>; + +def : LSXPat<(int_loongarch_lsx_vadda_b (v16i8 LSX128B:$vj), (v16i8 LSX128B:$vk)), + (VADDA_B LSX128B:$vj, LSX128B:$vk)>; +def : LSXPat<(int_loongarch_lsx_vadda_h (v8i16 LSX128H:$vj), (v8i16 LSX128H:$vk)), + (VADDA_H LSX128H:$vj, LSX128H:$vk)>; +def : LSXPat<(int_loongarch_lsx_vadda_w (v4i32 LSX128W:$vj), (v4i32 LSX128W:$vk)), + (VADDA_W LSX128W:$vj, LSX128W:$vk)>; +def : LSXPat<(int_loongarch_lsx_vadda_d (v2i64 LSX128D:$vj), (v2i64 LSX128D:$vk)), + (VADDA_D LSX128D:$vj, LSX128D:$vk)>; + +def : LSXPat<(int_loongarch_lsx_vabsd_b (v16i8 LSX128B:$vj), (v16i8 LSX128B:$vk)), + (VABSD_B LSX128B:$vj, LSX128B:$vk)>; +def : LSXPat<(int_loongarch_lsx_vabsd_h (v8i16 LSX128H:$vj), (v8i16 LSX128H:$vk)), + (VABSD_H LSX128H:$vj, LSX128H:$vk)>; +def : LSXPat<(int_loongarch_lsx_vabsd_w (v4i32 LSX128W:$vj), (v4i32 LSX128W:$vk)), + (VABSD_W LSX128W:$vj, LSX128W:$vk)>; +def : LSXPat<(int_loongarch_lsx_vabsd_d (v2i64 LSX128D:$vj), (v2i64 LSX128D:$vk)), + (VABSD_D LSX128D:$vj, LSX128D:$vk)>; + +def : LSXPat<(int_loongarch_lsx_vabsd_bu (v16i8 LSX128B:$vj), (v16i8 LSX128B:$vk)), + (VABSD_BU LSX128B:$vj, LSX128B:$vk)>; +def : LSXPat<(int_loongarch_lsx_vabsd_hu (v8i16 LSX128H:$vj), (v8i16 LSX128H:$vk)), + (VABSD_HU LSX128H:$vj, LSX128H:$vk)>; +def : LSXPat<(int_loongarch_lsx_vabsd_wu (v4i32 LSX128W:$vj), (v4i32 LSX128W:$vk)), + (VABSD_WU LSX128W:$vj, LSX128W:$vk)>; +def : LSXPat<(int_loongarch_lsx_vabsd_du (v2i64 LSX128D:$vj), (v2i64 LSX128D:$vk)), + (VABSD_DU LSX128D:$vj, LSX128D:$vk)>; + +def : LSXPat<(int_loongarch_lsx_vavg_b (v16i8 LSX128B:$vj), (v16i8 LSX128B:$vk)), + (VAVG_B LSX128B:$vj, LSX128B:$vk)>; +def : LSXPat<(int_loongarch_lsx_vavg_h (v8i16 LSX128H:$vj), (v8i16 LSX128H:$vk)), + (VAVG_H LSX128H:$vj, LSX128H:$vk)>; +def : LSXPat<(int_loongarch_lsx_vavg_w (v4i32 LSX128W:$vj), (v4i32 LSX128W:$vk)), + (VAVG_W LSX128W:$vj, LSX128W:$vk)>; +def : LSXPat<(int_loongarch_lsx_vavg_d (v2i64 LSX128D:$vj), (v2i64 LSX128D:$vk)), + (VAVG_D LSX128D:$vj, LSX128D:$vk)>; + +def : LSXPat<(int_loongarch_lsx_vavg_bu (v16i8 LSX128B:$vj), (v16i8 LSX128B:$vk)), + (VAVG_BU LSX128B:$vj, LSX128B:$vk)>; +def : LSXPat<(int_loongarch_lsx_vavg_hu (v8i16 LSX128H:$vj), (v8i16 LSX128H:$vk)), + (VAVG_HU LSX128H:$vj, LSX128H:$vk)>; +def : LSXPat<(int_loongarch_lsx_vavg_wu (v4i32 LSX128W:$vj), (v4i32 LSX128W:$vk)), + (VAVG_WU LSX128W:$vj, LSX128W:$vk)>; +def : LSXPat<(int_loongarch_lsx_vavg_du (v2i64 LSX128D:$vj), (v2i64 LSX128D:$vk)), + (VAVG_DU LSX128D:$vj, LSX128D:$vk)>; + +def : LSXPat<(int_loongarch_lsx_vavgr_b (v16i8 LSX128B:$vj), (v16i8 LSX128B:$vk)), + (VAVGR_B LSX128B:$vj, LSX128B:$vk)>; +def : LSXPat<(int_loongarch_lsx_vavgr_h (v8i16 LSX128H:$vj), (v8i16 LSX128H:$vk)), + (VAVGR_H LSX128H:$vj, LSX128H:$vk)>; +def : LSXPat<(int_loongarch_lsx_vavgr_w (v4i32 LSX128W:$vj), (v4i32 LSX128W:$vk)), + (VAVGR_W LSX128W:$vj, LSX128W:$vk)>; +def : LSXPat<(int_loongarch_lsx_vavgr_d (v2i64 LSX128D:$vj), (v2i64 LSX128D:$vk)), + (VAVGR_D LSX128D:$vj, LSX128D:$vk)>; + +def : LSXPat<(int_loongarch_lsx_vavgr_bu (v16i8 LSX128B:$vj), (v16i8 LSX128B:$vk)), + (VAVGR_BU LSX128B:$vj, LSX128B:$vk)>; +def : LSXPat<(int_loongarch_lsx_vavgr_hu (v8i16 LSX128H:$vj), (v8i16 LSX128H:$vk)), + (VAVGR_HU LSX128H:$vj, LSX128H:$vk)>; +def : LSXPat<(int_loongarch_lsx_vavgr_wu (v4i32 LSX128W:$vj), (v4i32 LSX128W:$vk)), + (VAVGR_WU LSX128W:$vj, LSX128W:$vk)>; +def : LSXPat<(int_loongarch_lsx_vavgr_du (v2i64 LSX128D:$vj), (v2i64 LSX128D:$vk)), + (VAVGR_DU LSX128D:$vj, LSX128D:$vk)>; + +def : LSXPat<(int_loongarch_lsx_vsrlr_b (v16i8 LSX128B:$vj), (v16i8 LSX128B:$vk)), + (VSRLR_B LSX128B:$vj, LSX128B:$vk)>; +def : LSXPat<(int_loongarch_lsx_vsrlr_h (v8i16 LSX128H:$vj), (v8i16 LSX128H:$vk)), + (VSRLR_H LSX128H:$vj, LSX128H:$vk)>; +def : LSXPat<(int_loongarch_lsx_vsrlr_w (v4i32 LSX128W:$vj), (v4i32 LSX128W:$vk)), + (VSRLR_W LSX128W:$vj, LSX128W:$vk)>; +def : LSXPat<(int_loongarch_lsx_vsrlr_d (v2i64 LSX128D:$vj), (v2i64 LSX128D:$vk)), + (VSRLR_D LSX128D:$vj, LSX128D:$vk)>; + +def : LSXPat<(int_loongarch_lsx_vsrar_b (v16i8 LSX128B:$vj), (v16i8 LSX128B:$vk)), + (VSRAR_B LSX128B:$vj, LSX128B:$vk)>; +def : LSXPat<(int_loongarch_lsx_vsrar_h (v8i16 LSX128H:$vj), (v8i16 LSX128H:$vk)), + (VSRAR_H LSX128H:$vj, LSX128H:$vk)>; +def : LSXPat<(int_loongarch_lsx_vsrar_w (v4i32 LSX128W:$vj), (v4i32 LSX128W:$vk)), + (VSRAR_W LSX128W:$vj, LSX128W:$vk)>; +def : LSXPat<(int_loongarch_lsx_vsrar_d (v2i64 LSX128D:$vj), (v2i64 LSX128D:$vk)), + (VSRAR_D LSX128D:$vj, LSX128D:$vk)>; + +def : LSXPat<(int_loongarch_lsx_vbitset_b (v16i8 LSX128B:$vj), (v16i8 LSX128B:$vk)), + (VBITSET_B LSX128B:$vj, LSX128B:$vk)>; +def : LSXPat<(int_loongarch_lsx_vbitset_h (v8i16 LSX128H:$vj), (v8i16 LSX128H:$vk)), + (VBITSET_H LSX128H:$vj, LSX128H:$vk)>; +def : LSXPat<(int_loongarch_lsx_vbitset_w (v4i32 LSX128W:$vj), (v4i32 LSX128W:$vk)), + (VBITSET_W LSX128W:$vj, LSX128W:$vk)>; +def : LSXPat<(int_loongarch_lsx_vbitset_d (v2i64 LSX128D:$vj), (v2i64 LSX128D:$vk)), + (VBITSET_D LSX128D:$vj, LSX128D:$vk)>; + +def : LSXPat<(int_loongarch_lsx_vbitrev_b (v16i8 LSX128B:$vj), (v16i8 LSX128B:$vk)), + (VBITREV_B LSX128B:$vj, LSX128B:$vk)>; +def : LSXPat<(int_loongarch_lsx_vbitrev_h (v8i16 LSX128H:$vj), (v8i16 LSX128H:$vk)), + (VBITREV_H LSX128H:$vj, LSX128H:$vk)>; +def : LSXPat<(int_loongarch_lsx_vbitrev_w (v4i32 LSX128W:$vj), (v4i32 LSX128W:$vk)), + (VBITREV_W LSX128W:$vj, LSX128W:$vk)>; +def : LSXPat<(int_loongarch_lsx_vbitrev_d (v2i64 LSX128D:$vj), (v2i64 LSX128D:$vk)), + (VBITREV_D LSX128D:$vj, LSX128D:$vk)>; + +def : LSXPat<(int_loongarch_lsx_vfadd_s (v4f32 LSX128W:$vj), (v4f32 LSX128W:$vk)), + (VFADD_S LSX128W:$vj, LSX128W:$vk)>; +def : LSXPat<(int_loongarch_lsx_vfadd_d (v2f64 LSX128D:$vj), (v2f64 LSX128D:$vk)), + (VFADD_D LSX128D:$vj, LSX128D:$vk)>; + +def : LSXPat<(int_loongarch_lsx_vfsub_s (v4f32 LSX128W:$vj), (v4f32 LSX128W:$vk)), + (VFSUB_S LSX128W:$vj, LSX128W:$vk)>; +def : LSXPat<(int_loongarch_lsx_vfsub_d (v2f64 LSX128D:$vj), (v2f64 LSX128D:$vk)), + (VFSUB_D LSX128D:$vj, LSX128D:$vk)>; + +def : LSXPat<(int_loongarch_lsx_vfmax_s (v4f32 LSX128W:$vj), (v4f32 LSX128W:$vk)), + (VFMAX_S LSX128W:$vj, LSX128W:$vk)>; +def : LSXPat<(int_loongarch_lsx_vfmax_d (v2f64 LSX128D:$vj), (v2f64 LSX128D:$vk)), + (VFMAX_D LSX128D:$vj, LSX128D:$vk)>; + +def : LSXPat<(int_loongarch_lsx_vfmin_s (v4f32 LSX128W:$vj), (v4f32 LSX128W:$vk)), + (VFMIN_S LSX128W:$vj, LSX128W:$vk)>; +def : LSXPat<(int_loongarch_lsx_vfmin_d (v2f64 LSX128D:$vj), (v2f64 LSX128D:$vk)), + (VFMIN_D LSX128D:$vj, LSX128D:$vk)>; + +def : LSXPat<(int_loongarch_lsx_vfmaxa_s (v4f32 LSX128W:$vj), (v4f32 LSX128W:$vk)), + (VFMAXA_S LSX128W:$vj, LSX128W:$vk)>; +def : LSXPat<(int_loongarch_lsx_vfmaxa_d (v2f64 LSX128D:$vj), (v2f64 LSX128D:$vk)), + (VFMAXA_D LSX128D:$vj, LSX128D:$vk)>; + +def : LSXPat<(int_loongarch_lsx_vfmina_s (v4f32 LSX128W:$vj), (v4f32 LSX128W:$vk)), + (VFMINA_S LSX128W:$vj, LSX128W:$vk)>; +def : LSXPat<(int_loongarch_lsx_vfmina_d (v2f64 LSX128D:$vj), (v2f64 LSX128D:$vk)), + (VFMINA_D LSX128D:$vj, LSX128D:$vk)>; + +def : LSXPat<(int_loongarch_lsx_vclo_b (v16i8 LSX128B:$vj)), + (VCLO_B LSX128B:$vj)>; +def : LSXPat<(int_loongarch_lsx_vclo_h (v8i16 LSX128H:$vj)), + (VCLO_H LSX128H:$vj)>; +def : LSXPat<(int_loongarch_lsx_vclo_w (v4i32 LSX128W:$vj)), + (VCLO_W LSX128W:$vj)>; +def : LSXPat<(int_loongarch_lsx_vclo_d (v2i64 LSX128D:$vj)), + (VCLO_D LSX128D:$vj)>; + +def : LSXPat<(int_loongarch_lsx_vflogb_s (v4f32 LSX128W:$vj)), + (VFLOGB_S LSX128W:$vj)>; +def : LSXPat<(int_loongarch_lsx_vflogb_d (v2f64 LSX128D:$vj)), + (VFLOGB_D LSX128D:$vj)>; + +def : LSXPat<(int_loongarch_lsx_vfclass_s (v4f32 LSX128W:$vj)), + (VFCLASS_S LSX128W:$vj)>; +def : LSXPat<(int_loongarch_lsx_vfclass_d (v2f64 LSX128D:$vj)), + (VFCLASS_D LSX128D:$vj)>; + +def : LSXPat<(int_loongarch_lsx_vfrecip_s (v4f32 LSX128W:$vj)), + (VFRECIP_S LSX128W:$vj)>; +def : LSXPat<(int_loongarch_lsx_vfrecip_d (v2f64 LSX128D:$vj)), + (VFRECIP_D LSX128D:$vj)>; + +def : LSXPat<(int_loongarch_lsx_vfrsqrt_s (v4f32 LSX128W:$vj)), + (VFRSQRT_S LSX128W:$vj)>; +def : LSXPat<(int_loongarch_lsx_vfrsqrt_d (v2f64 LSX128D:$vj)), + (VFRSQRT_D LSX128D:$vj)>; + +def : LSXPat<(int_loongarch_lsx_vfcvtl_s_h (v8i16 LSX128H:$vk)), + (VFCVTL_S_H LSX128H:$vk)>; +def : LSXPat<(int_loongarch_lsx_vfcvth_s_h (v8i16 LSX128H:$vk)), + (VFCVTH_S_H LSX128H:$vk)>; + +def : LSXPat<(int_loongarch_lsx_vfcvtl_d_s (v4f32 LSX128W:$vj)), + (VFCVTL_D_S LSX128W:$vj)>; +def : LSXPat<(int_loongarch_lsx_vfcvth_d_s (v4f32 LSX128W:$vj)), + (VFCVTH_D_S LSX128W:$vj)>; + +def : LSXPat<(int_loongarch_lsx_vftint_w_s (v4f32 LSX128W:$vj)), + (VFTINT_W_S LSX128W:$vj)>; +def : LSXPat<(int_loongarch_lsx_vftint_l_d (v2f64 LSX128D:$vj)), + (VFTINT_L_D LSX128D:$vj)>; + +def : LSXPat<(int_loongarch_lsx_vftint_wu_s (v4f32 LSX128W:$vj)), + (VFTINT_WU_S LSX128W:$vj)>; +def : LSXPat<(int_loongarch_lsx_vftint_lu_d (v2f64 LSX128D:$vj)), + (VFTINT_LU_D LSX128D:$vj)>; + +def : LSXPat<(int_loongarch_lsx_vreplgr2vr_b GPR32Opnd:$rj), + (VREPLGR2VR_B GPR32Opnd:$rj)>; +def : LSXPat<(int_loongarch_lsx_vreplgr2vr_h GPR32Opnd:$rj), + (VREPLGR2VR_H GPR32Opnd:$rj)>; +def : LSXPat<(int_loongarch_lsx_vreplgr2vr_w GPR32Opnd:$rj), + (VREPLGR2VR_W GPR32Opnd:$rj)>; +def : LSXPat<(int_loongarch_lsx_vreplgr2vr_d GPR64Opnd:$rj), + (VREPLGR2VR_D GPR64Opnd:$rj)>; + +def : LSXPat<(int_loongarch_lsx_vsrlri_b (v16i8 LSX128B:$vj), (immZExt3:$ui3)), + (VSRLRI_B LSX128B:$vj, uimm3:$ui3)>; +def : LSXPat<(int_loongarch_lsx_vsrlri_h (v8i16 LSX128H:$vj), (immZExt4:$ui4)), + (VSRLRI_H LSX128H:$vj, uimm4i:$ui4)>; +def : LSXPat<(int_loongarch_lsx_vsrlri_w (v4i32 LSX128W:$vj), (immZExt5:$ui5)), + (VSRLRI_W LSX128W:$vj, uimm5:$ui5)>; +def : LSXPat<(int_loongarch_lsx_vsrlri_d (v2i64 LSX128D:$vj), (immZExt6:$ui6)), + (VSRLRI_D LSX128D:$vj, uimm6:$ui6)>; + +def : LSXPat<(int_loongarch_lsx_vsrari_b (v16i8 LSX128B:$vj), (immZExt3:$ui3)), + (VSRARI_B LSX128B:$vj, uimm3:$ui3)>; +def : LSXPat<(int_loongarch_lsx_vsrari_h (v8i16 LSX128H:$vj), (immZExt4:$ui4)), + (VSRARI_H LSX128H:$vj, uimm4i:$ui4)>; +def : LSXPat<(int_loongarch_lsx_vsrari_w (v4i32 LSX128W:$vj), (immZExt5:$ui5)), + (VSRARI_W LSX128W:$vj, uimm5:$ui5)>; +def : LSXPat<(int_loongarch_lsx_vsrari_d (v2i64 LSX128D:$vj), (immZExt6:$ui6)), + (VSRARI_D LSX128D:$vj, uimm6:$ui6)>; + +def : LSXPat<(int_loongarch_lsx_vinsgr2vr_b (v16i8 LSX128B:$vj), GPR32Opnd:$rj, (immZExt4:$ui4)), + (VINSGR2VR_B LSX128B:$vj, GPR32Opnd:$rj, (uimm4i:$ui4))>; +def : LSXPat<(int_loongarch_lsx_vinsgr2vr_h (v8i16 LSX128H:$vj), GPR32Opnd:$rj, (immZExt3:$ui3)), + (VINSGR2VR_H LSX128H:$vj, GPR32Opnd:$rj, uimm3:$ui3)>; +def : LSXPat<(int_loongarch_lsx_vinsgr2vr_w (v4i32 LSX128W:$vj), GPR32Opnd:$rj, (immZExt2:$ui2)), + (VINSGR2VR_W LSX128W:$vj, GPR32Opnd:$rj, uimm2:$ui2)>; +def : LSXPat<(int_loongarch_lsx_vinsgr2vr_d (v2i64 LSX128D:$vj), GPR64Opnd:$rj, (immZExt1:$ui1)), + (VINSGR2VR_D LSX128D:$vj, GPR64Opnd:$rj, uimm1i:$ui1)>; + +def : LSXPat<(int_loongarch_lsx_vpickve2gr_b (v16i8 LSX128B:$vj), (immZExt4:$ui4)), + (VPICKVE2GR_B LSX128B:$vj, (uimm4i:$ui4))>; +def : LSXPat<(int_loongarch_lsx_vpickve2gr_h (v8i16 LSX128H:$vj), (immZExt3:$ui3)), + (VPICKVE2GR_H LSX128H:$vj, uimm3:$ui3)>; +def : LSXPat<(int_loongarch_lsx_vpickve2gr_w (v4i32 LSX128W:$vj), (immZExt2:$ui2)), + (VPICKVE2GR_W LSX128W:$vj, uimm2:$ui2)>; +def : LSXPat<(int_loongarch_lsx_vpickve2gr_d (v2i64 LSX128D:$vj), (immZExt1:$ui1)), + (VPICKVE2GR_D LSX128D:$vj, uimm1i:$ui1)>; + +def : LSXPat<(int_loongarch_lsx_vpickve2gr_bu (v16i8 LSX128B:$vj), (immZExt4:$ui4)), + (VPICKVE2GR_BU LSX128B:$vj, (uimm4i:$ui4))>; +def : LSXPat<(int_loongarch_lsx_vpickve2gr_hu (v8i16 LSX128H:$vj), (immZExt3:$ui3)), + (VPICKVE2GR_HU LSX128H:$vj, uimm3:$ui3)>; +def : LSXPat<(int_loongarch_lsx_vpickve2gr_wu (v4i32 LSX128W:$vj), (immZExt2:$ui2)), + (VPICKVE2GR_WU LSX128W:$vj, uimm2:$ui2)>; + +def : LSXPat<(int_loongarch_lsx_vsat_b (v16i8 LSX128B:$vj), (immZExt3:$ui3)), + (VSAT_B LSX128B:$vj, uimm3:$ui3)>; +def : LSXPat<(int_loongarch_lsx_vsat_h (v8i16 LSX128H:$vj), (immZExt4:$ui4)), + (VSAT_H LSX128H:$vj, uimm4i:$ui4)>; +def : LSXPat<(int_loongarch_lsx_vsat_w (v4i32 LSX128W:$vj), (immZExt5:$ui5)), + (VSAT_W LSX128W:$vj, uimm5:$ui5)>; +def : LSXPat<(int_loongarch_lsx_vsat_d (v2i64 LSX128D:$vj), (immZExt6:$ui6)), + (VSAT_D LSX128D:$vj, uimm6:$ui6)>; + +def : LSXPat<(int_loongarch_lsx_vsat_bu (v16i8 LSX128B:$vj), (immZExt3:$ui3)), + (VSAT_BU LSX128B:$vj, uimm3:$ui3)>; +def : LSXPat<(int_loongarch_lsx_vsat_hu (v8i16 LSX128H:$vj), (immZExt4:$ui4)), + (VSAT_HU LSX128H:$vj, uimm4i:$ui4)>; +def : LSXPat<(int_loongarch_lsx_vsat_wu (v4i32 LSX128W:$vj), (immZExt5:$ui5)), + (VSAT_WU LSX128W:$vj, uimm5:$ui5)>; +def : LSXPat<(int_loongarch_lsx_vsat_du (v2i64 LSX128D:$vj), (immZExt6:$ui6)), + (VSAT_DU LSX128D:$vj, uimm6:$ui6)>; + +def : LSXPat<(int_loongarch_lsx_vmskltz_b (v16i8 LSX128B:$vj)), + (VMSKLTZ_B LSX128B:$vj)>; +def : LSXPat<(int_loongarch_lsx_vmskltz_h (v8i16 LSX128H:$vj)), + (VMSKLTZ_H LSX128H:$vj)>; +def : LSXPat<(int_loongarch_lsx_vmskltz_w (v4i32 LSX128W:$vj)), + (VMSKLTZ_W LSX128W:$vj)>; +def : LSXPat<(int_loongarch_lsx_vmskltz_d (v2i64 LSX128D:$vj)), + (VMSKLTZ_D LSX128D:$vj)>; + +def : LSXPat<(int_loongarch_lsx_vsrlni_b_h (v16i8 LSX128B:$vd_in), (v16i8 LSX128B:$vj), (immZExt4:$ui4)), + (VSRLNI_B_H LSX128B:$vd_in, LSX128B:$vj, uimm4i:$ui4)>; +def : LSXPat<(int_loongarch_lsx_vsrlni_h_w (v8i16 LSX128H:$vd_in), (v8i16 LSX128H:$vj), (immZExt5:$ui5)), + (VSRLNI_H_W LSX128H:$vd_in, LSX128H:$vj, uimm5:$ui5)>; +def : LSXPat<(int_loongarch_lsx_vsrlni_w_d (v4i32 LSX128W:$vd_in), (v4i32 LSX128W:$vj), (immZExt6:$ui6)), + (VSRLNI_W_D LSX128W:$vd_in, LSX128W:$vj, uimm6:$ui6)>; +def : LSXPat<(int_loongarch_lsx_vsrlni_d_q (v2i64 LSX128D:$vd_in), (v2i64 LSX128D:$vj), (immZExt7:$ui7)), + (VSRLNI_D_Q LSX128D:$vd_in, LSX128D:$vj, uimm7i:$ui7)>; + +def : LSXPat<(int_loongarch_lsx_vssrlni_b_h (v16i8 LSX128B:$vd_in), (v16i8 LSX128B:$vj), (immZExt4:$ui4)), + (VSSRLNI_B_H LSX128B:$vd_in, LSX128B:$vj, uimm4i:$ui4)>; +def : LSXPat<(int_loongarch_lsx_vssrlni_h_w (v8i16 LSX128H:$vd_in), (v8i16 LSX128H:$vj), (immZExt5:$ui5)), + (VSSRLNI_H_W LSX128H:$vd_in, LSX128H:$vj, uimm5:$ui5)>; +def : LSXPat<(int_loongarch_lsx_vssrlni_w_d (v4i32 LSX128W:$vd_in), (v4i32 LSX128W:$vj), (immZExt6:$ui6)), + (VSSRLNI_W_D LSX128W:$vd_in, LSX128W:$vj, uimm6:$ui6)>; +def : LSXPat<(int_loongarch_lsx_vssrlni_d_q (v2i64 LSX128D:$vd_in), (v2i64 LSX128D:$vj), (immZExt7:$ui7)), + (VSSRLNI_D_Q LSX128D:$vd_in, LSX128D:$vj, uimm7i:$ui7)>; + +def : LSXPat<(int_loongarch_lsx_vssrlni_bu_h (v16i8 LSX128B:$vd_in), (v16i8 LSX128B:$vj), (immZExt4:$ui4)), + (VSSRLNI_BU_H LSX128B:$vd_in, LSX128B:$vj, uimm4i:$ui4)>; +def : LSXPat<(int_loongarch_lsx_vssrlni_hu_w (v8i16 LSX128H:$vd_in), (v8i16 LSX128H:$vj), (immZExt5:$ui5)), + (VSSRLNI_HU_W LSX128H:$vd_in, LSX128H:$vj, uimm5:$ui5)>; +def : LSXPat<(int_loongarch_lsx_vssrlni_wu_d (v4i32 LSX128W:$vd_in), (v4i32 LSX128W:$vj), (immZExt6:$ui6)), + (VSSRLNI_WU_D LSX128W:$vd_in, LSX128W:$vj, uimm6:$ui6)>; +def : LSXPat<(int_loongarch_lsx_vssrlni_du_q (v2i64 LSX128D:$vd_in), (v2i64 LSX128D:$vj), (immZExt7:$ui7)), + (VSSRLNI_DU_Q LSX128D:$vd_in, LSX128D:$vj, uimm7i:$ui7)>; + +def : LSXPat<(int_loongarch_lsx_vssrlrni_bu_h (v16i8 LSX128B:$vd_in), (v16i8 LSX128B:$vj), (immZExt4:$ui4)), + (VSSRLRNI_BU_H LSX128B:$vd_in, LSX128B:$vj, uimm4i:$ui4)>; +def : LSXPat<(int_loongarch_lsx_vssrlrni_hu_w (v8i16 LSX128H:$vd_in), (v8i16 LSX128H:$vj), (immZExt5:$ui5)), + (VSSRLRNI_HU_W LSX128H:$vd_in, LSX128H:$vj, uimm5:$ui5)>; +def : LSXPat<(int_loongarch_lsx_vssrlrni_wu_d (v4i32 LSX128W:$vd_in), (v4i32 LSX128W:$vj), (immZExt6:$ui6)), + (VSSRLRNI_WU_D LSX128W:$vd_in, LSX128W:$vj, uimm6:$ui6)>; +def : LSXPat<(int_loongarch_lsx_vssrlrni_du_q (v2i64 LSX128D:$vd_in), (v2i64 LSX128D:$vj), (immZExt7:$ui7)), + (VSSRLRNI_DU_Q LSX128D:$vd_in, LSX128D:$vj, uimm7i:$ui7)>; + +def : LSXPat<(int_loongarch_lsx_vsrarni_b_h (v16i8 LSX128B:$vd_in), (v16i8 LSX128B:$vj), (immZExt4:$ui4)), + (VSRARNI_B_H LSX128B:$vd_in, LSX128B:$vj, uimm4i:$ui4)>; +def : LSXPat<(int_loongarch_lsx_vsrarni_h_w (v8i16 LSX128H:$vd_in), (v8i16 LSX128H:$vj), (immZExt5:$ui5)), + (VSRARNI_H_W LSX128H:$vd_in, LSX128H:$vj, uimm5:$ui5)>; +def : LSXPat<(int_loongarch_lsx_vsrarni_w_d (v4i32 LSX128W:$vd_in), (v4i32 LSX128W:$vj), (immZExt6:$ui6)), + (VSRARNI_W_D LSX128W:$vd_in, LSX128W:$vj, uimm6:$ui6)>; +def : LSXPat<(int_loongarch_lsx_vsrarni_d_q (v2i64 LSX128D:$vd_in), (v2i64 LSX128D:$vj), (immZExt7:$ui7)), + (VSRARNI_D_Q LSX128D:$vd_in, LSX128D:$vj, uimm7i:$ui7)>; + +def : LSXPat<(int_loongarch_lsx_vssrani_b_h (v16i8 LSX128B:$vd_in), (v16i8 LSX128B:$vj), (immZExt4:$ui4)), + (VSSRANI_B_H LSX128B:$vd_in, LSX128B:$vj, uimm4i:$ui4)>; +def : LSXPat<(int_loongarch_lsx_vssrani_h_w (v8i16 LSX128H:$vd_in), (v8i16 LSX128H:$vj), (immZExt5:$ui5)), + (VSSRANI_H_W LSX128H:$vd_in, LSX128H:$vj, uimm5:$ui5)>; +def : LSXPat<(int_loongarch_lsx_vssrani_w_d (v4i32 LSX128W:$vd_in), (v4i32 LSX128W:$vj), (immZExt6:$ui6)), + (VSSRANI_W_D LSX128W:$vd_in, LSX128W:$vj, uimm6:$ui6)>; +def : LSXPat<(int_loongarch_lsx_vssrani_d_q (v2i64 LSX128D:$vd_in), (v2i64 LSX128D:$vj), (immZExt7:$ui7)), + (VSSRANI_D_Q LSX128D:$vd_in, LSX128D:$vj, uimm7i:$ui7)>; + +def : LSXPat<(int_loongarch_lsx_vssrani_bu_h (v16i8 LSX128B:$vd_in), (v16i8 LSX128B:$vj), (immZExt4:$ui4)), + (VSSRANI_BU_H LSX128B:$vd_in, LSX128B:$vj, uimm4i:$ui4)>; +def : LSXPat<(int_loongarch_lsx_vssrani_hu_w (v8i16 LSX128H:$vd_in), (v8i16 LSX128H:$vj), (immZExt5:$ui5)), + (VSSRANI_HU_W LSX128H:$vd_in, LSX128H:$vj, uimm5:$ui5)>; +def : LSXPat<(int_loongarch_lsx_vssrani_wu_d (v4i32 LSX128W:$vd_in), (v4i32 LSX128W:$vj), (immZExt6:$ui6)), + (VSSRANI_WU_D LSX128W:$vd_in, LSX128W:$vj, uimm6:$ui6)>; +def : LSXPat<(int_loongarch_lsx_vssrani_du_q (v2i64 LSX128D:$vd_in), (v2i64 LSX128D:$vj), (immZExt7:$ui7)), + (VSSRANI_DU_Q LSX128D:$vd_in, LSX128D:$vj, uimm7i:$ui7)>; + +def : LSXPat<(int_loongarch_lsx_vssrarni_b_h (v16i8 LSX128B:$vd_in), (v16i8 LSX128B:$vj), (immZExt4:$ui4)), + (VSSRARNI_B_H LSX128B:$vd_in, LSX128B:$vj, uimm4i:$ui4)>; +def : LSXPat<(int_loongarch_lsx_vssrarni_h_w (v8i16 LSX128H:$vd_in), (v8i16 LSX128H:$vj), (immZExt5:$ui5)), + (VSSRARNI_H_W LSX128H:$vd_in, LSX128H:$vj, uimm5:$ui5)>; +def : LSXPat<(int_loongarch_lsx_vssrarni_w_d (v4i32 LSX128W:$vd_in), (v4i32 LSX128W:$vj), (immZExt6:$ui6)), + (VSSRARNI_W_D LSX128W:$vd_in, LSX128W:$vj, uimm6:$ui6)>; +def : LSXPat<(int_loongarch_lsx_vssrarni_d_q (v2i64 LSX128D:$vd_in), (v2i64 LSX128D:$vj), (immZExt7:$ui7)), + (VSSRARNI_D_Q LSX128D:$vd_in, LSX128D:$vj, uimm7i:$ui7)>; + +def : LSXPat<(int_loongarch_lsx_vssrarni_bu_h (v16i8 LSX128B:$vd_in), (v16i8 LSX128B:$vj), (immZExt4:$ui4)), + (VSSRARNI_BU_H LSX128B:$vd_in, LSX128B:$vj, uimm4i:$ui4)>; +def : LSXPat<(int_loongarch_lsx_vssrarni_hu_w (v8i16 LSX128H:$vd_in), (v8i16 LSX128H:$vj), (immZExt5:$ui5)), + (VSSRARNI_HU_W LSX128H:$vd_in, LSX128H:$vj, uimm5:$ui5)>; +def : LSXPat<(int_loongarch_lsx_vssrarni_wu_d (v4i32 LSX128W:$vd_in), (v4i32 LSX128W:$vj), (immZExt6:$ui6)), + (VSSRARNI_WU_D LSX128W:$vd_in, LSX128W:$vj, uimm6:$ui6)>; +def : LSXPat<(int_loongarch_lsx_vssrarni_du_q (v2i64 LSX128D:$vd_in), (v2i64 LSX128D:$vj), (immZExt7:$ui7)), + (VSSRARNI_DU_Q LSX128D:$vd_in, LSX128D:$vj, uimm7i:$ui7)>; + +def : LSXPat<(load (add iPTR:$vj, GPR64Opnd:$vk)), + (VLDX PtrRC:$vj, GPR64Opnd:$vk)>; + +def : LSXPat<(store (v16i8 LSX128B:$vd), (add iPTR:$vj, GPR64Opnd:$vk)), + (VSTX LSX128B:$vd, PtrRC:$vj, GPR64Opnd:$vk)>; + +def : LSXPat<(int_loongarch_lsx_vshuf_b (v16i8 LSX128B:$vj), (v16i8 LSX128B:$vk), (v16i8 LSX128B:$va)), + (VSHUF_B LSX128B:$vj, LSX128B:$vk, LSX128B:$va)>; + +def : LSXPat<(int_loongarch_lsx_vfcmp_ceq_s (v4f32 LSX128W:$vj), (v4f32 LSX128W:$vk)), + (VFCMP_CEQ_S LSX128W:$vj, LSX128W:$vk)>; +def : LSXPat<(int_loongarch_lsx_vfcmp_ceq_d (v2f64 LSX128D:$vj), (v2f64 LSX128D:$vk)), + (VFCMP_CEQ_D LSX128D:$vj, LSX128D:$vk)>; + +def : LSXPat<(int_loongarch_lsx_vfcmp_cor_s (v4f32 LSX128W:$vj), (v4f32 LSX128W:$vk)), + (VFCMP_COR_S LSX128W:$vj, LSX128W:$vk)>; +def : LSXPat<(int_loongarch_lsx_vfcmp_cor_d (v2f64 LSX128D:$vj), (v2f64 LSX128D:$vk)), + (VFCMP_COR_D LSX128D:$vj, LSX128D:$vk)>; + +def : LSXPat<(int_loongarch_lsx_vfcmp_cun_s (v4f32 LSX128W:$vj), (v4f32 LSX128W:$vk)), + (VFCMP_CUN_S LSX128W:$vj, LSX128W:$vk)>; +def : LSXPat<(int_loongarch_lsx_vfcmp_cun_d (v2f64 LSX128D:$vj), (v2f64 LSX128D:$vk)), + (VFCMP_CUN_D LSX128D:$vj, LSX128D:$vk)>; + +def : LSXPat<(int_loongarch_lsx_vfcmp_cune_s (v4f32 LSX128W:$vj), (v4f32 LSX128W:$vk)), + (VFCMP_CUNE_S LSX128W:$vj, LSX128W:$vk)>; +def : LSXPat<(int_loongarch_lsx_vfcmp_cune_d (v2f64 LSX128D:$vj), (v2f64 LSX128D:$vk)), + (VFCMP_CUNE_D LSX128D:$vj, LSX128D:$vk)>; + +def : LSXPat<(int_loongarch_lsx_vfcmp_cueq_s (v4f32 LSX128W:$vj), (v4f32 LSX128W:$vk)), + (VFCMP_CUEQ_S LSX128W:$vj, LSX128W:$vk)>; +def : LSXPat<(int_loongarch_lsx_vfcmp_cueq_d (v2f64 LSX128D:$vj), (v2f64 LSX128D:$vk)), + (VFCMP_CUEQ_D LSX128D:$vj, LSX128D:$vk)>; + +def : LSXPat<(int_loongarch_lsx_vfcmp_cne_s (v4f32 LSX128W:$vj), (v4f32 LSX128W:$vk)), + (VFCMP_CNE_S LSX128W:$vj, LSX128W:$vk)>; +def : LSXPat<(int_loongarch_lsx_vfcmp_cne_d (v2f64 LSX128D:$vj), (v2f64 LSX128D:$vk)), + (VFCMP_CNE_D LSX128D:$vj, LSX128D:$vk)>; + +def : LSXPat<(int_loongarch_lsx_vfcmp_clt_s (v4f32 LSX128W:$vj), (v4f32 LSX128W:$vk)), + (VFCMP_CLT_S LSX128W:$vj, LSX128W:$vk)>; +def : LSXPat<(int_loongarch_lsx_vfcmp_clt_d (v2f64 LSX128D:$vj), (v2f64 LSX128D:$vk)), + (VFCMP_CLT_D LSX128D:$vj, LSX128D:$vk)>; + +def : LSXPat<(int_loongarch_lsx_vfcmp_cult_s (v4f32 LSX128W:$vj), (v4f32 LSX128W:$vk)), + (VFCMP_CULT_S LSX128W:$vj, LSX128W:$vk)>; +def : LSXPat<(int_loongarch_lsx_vfcmp_cult_d (v2f64 LSX128D:$vj), (v2f64 LSX128D:$vk)), + (VFCMP_CULT_D LSX128D:$vj, LSX128D:$vk)>; + +def : LSXPat<(int_loongarch_lsx_vfcmp_cle_s (v4f32 LSX128W:$vj), (v4f32 LSX128W:$vk)), + (VFCMP_CLE_S LSX128W:$vj, LSX128W:$vk)>; +def : LSXPat<(int_loongarch_lsx_vfcmp_cle_d (v2f64 LSX128D:$vj), (v2f64 LSX128D:$vk)), + (VFCMP_CLE_D LSX128D:$vj, LSX128D:$vk)>; + +def : LSXPat<(int_loongarch_lsx_vfcmp_cule_s (v4f32 LSX128W:$vj), (v4f32 LSX128W:$vk)), + (VFCMP_CULE_S LSX128W:$vj, LSX128W:$vk)>; +def : LSXPat<(int_loongarch_lsx_vfcmp_cule_d (v2f64 LSX128D:$vj), (v2f64 LSX128D:$vk)), + (VFCMP_CULE_D LSX128D:$vj, LSX128D:$vk)>; + +def : LSXPat<(int_loongarch_lsx_vftintrz_w_s (v4f32 LSX128W:$vj)), + (VFTINTRZ_W_S LSX128W:$vj)>; +def : LSXPat<(int_loongarch_lsx_vftintrz_l_d (v2f64 LSX128D:$vj)), + (VFTINTRZ_L_D LSX128D:$vj)>; + + +def imm_mask : ImmLeaf(Imm) && Imm == -1;}]>; +def imm_mask_64 : ImmLeaf(Imm) && Imm == -1;}]>; + + +def : LSXPat<(xor (v8i16 LSX128H:$vj), (vsplati16 imm_mask)), + (NOR_V_H_PSEUDO (v8i16 LSX128H:$vj), (v8i16 LSX128H:$vj))>; + +def : LSXPat<(xor (v4i32 LSX128W:$vj), (vsplati32 imm_mask)), + (NOR_V_W_PSEUDO (v4i32 LSX128W:$vj), (v4i32 LSX128W:$vj))>; + +def : LSXPat<(xor (v2i64 LSX128D:$vj), (vsplati64 imm_mask_64)), + (NOR_V_D_PSEUDO (v2i64 LSX128D:$vj), (v2i64 LSX128D:$vj))>; + + +def : LSXPat<(and + (v16i8 (xor (v16i8 LSX128B:$vj),(vsplati8 imm_mask))), + (v16i8 LSX128B:$vk) + ), + (VANDN_V (v16i8 LSX128B:$vj), (v16i8 LSX128B:$vk))>; + +def : LSXPat<(and + (v8i16 (xor (v8i16 LSX128H:$vj), (vsplati16 imm_mask))), + (v8i16 LSX128H:$vk) + ), + (VANDN_H_PSEUDO (v8i16 LSX128H:$vj), (v8i16 LSX128H:$vk))>; + +def : LSXPat<(and + (v4i32 (xor (v4i32 LSX128W:$vj), (vsplati32 imm_mask))), + (v4i32 LSX128W:$vk) + ), + (VANDN_W_PSEUDO (v4i32 LSX128W:$vj), (v4i32 LSX128W:$vk))>; + +def : LSXPat<(and + (v2i64 (xor (v2i64 LSX128D:$vj), (vsplati64 imm_mask_64))), + (v2i64 LSX128D:$vk) + ), + (VANDN_D_PSEUDO (v2i64 LSX128D:$vj), (v2i64 LSX128D:$vk))>; + + +def : LSXPat<(or + (v16i8 LSX128B:$vj), + (v16i8 (xor (v16i8 LSX128B:$vk), (vsplati8 imm_mask))) + ), + (VORN_V (v16i8 LSX128B:$vj), (v16i8 LSX128B:$vk))>; + +def : LSXPat<(or + (v8i16 LSX128H:$vj), + (v8i16 (xor (v8i16 LSX128H:$vk), (vsplati16 imm_mask))) + ), + (VORN_H_PSEUDO (v8i16 LSX128H:$vj), (v8i16 LSX128H:$vk))>; + +def : LSXPat<(or + (v4i32 LSX128W:$vj), + (v4i32 (xor (v4i32 LSX128W:$vk), (vsplati32 imm_mask))) + ), + (VORN_W_PSEUDO (v4i32 LSX128W:$vj), (v4i32 LSX128W:$vk))>; + +def : LSXPat<(or + (v2i64 LSX128D:$vj), + (v2i64 (xor (v2i64 LSX128D:$vk), (vsplati64 imm_mask_64))) + ), + (VORN_D_PSEUDO (v2i64 LSX128D:$vj), (v2i64 LSX128D:$vk))>; + + +def : LSXPat<(add (v2i64 (abs LSX128D:$a)), (v2i64 (abs LSX128D:$b))), + (VADDA_D (v2i64 LSX128D:$a),(v2i64 LSX128D:$b))>; + +def : LSXPat<(add (v4i32 (abs LSX128W:$a)), (v4i32 (abs LSX128W:$b))), + (VADDA_W (v4i32 LSX128W:$a),(v4i32 LSX128W:$b))>; + +def : LSXPat<(add (v8i16 (abs LSX128H:$a)), (v8i16 (abs LSX128H:$b))), + (VADDA_H (v8i16 LSX128H:$a),(v8i16 LSX128H:$b))>; + +def : LSXPat<(add (v16i8 (abs LSX128B:$a)), (v16i8 (abs LSX128B:$b))), + (VADDA_B (v16i8 LSX128B:$a),(v16i8 LSX128B:$b))>; + + +def : LSXPat<(and v16i8:$vj, (xor (shl vsplat_imm_eq_1, v16i8:$vk), + (vsplati8 imm_mask))), + (VBITCLR_B v16i8:$vj, v16i8:$vk)>; + +def : LSXPat<(and v8i16:$vj, (xor (shl vsplat_imm_eq_1, v8i16:$vk), + (vsplati16 imm_mask))), + (VBITCLR_H v8i16:$vj, v8i16:$vk)>; + +def : LSXPat<(and v4i32:$vj, (xor (shl vsplat_imm_eq_1, v4i32:$vk), + (vsplati32 imm_mask))), + (VBITCLR_W v4i32:$vj, v4i32:$vk)>; + +def : LSXPat<(and v2i64:$vj, (xor (shl vsplat_imm_eq_1, v2i64:$vk), + (vsplati64 imm_mask_64))), + (VBITCLR_D v2i64:$vj, v2i64:$vk)>; diff --git a/lib/Target/LoongArch/LoongArchMCInstLower.cpp b/lib/Target/LoongArch/LoongArchMCInstLower.cpp new file mode 100644 index 00000000..bf70b09d --- /dev/null +++ b/lib/Target/LoongArch/LoongArchMCInstLower.cpp @@ -0,0 +1,342 @@ +//===- LoongArchMCInstLower.cpp - Convert LoongArch MachineInstr to MCInst ----------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file contains code to lower LoongArch MachineInstrs to their corresponding +// MCInst records. +// +//===----------------------------------------------------------------------===// + +#include "LoongArchMCInstLower.h" +#include "MCTargetDesc/LoongArchBaseInfo.h" +#include "MCTargetDesc/LoongArchMCExpr.h" +#include "LoongArchAsmPrinter.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineOperand.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCInst.h" +#include "llvm/Support/ErrorHandling.h" +#include + +using namespace llvm; + +LoongArchMCInstLower::LoongArchMCInstLower(LoongArchAsmPrinter &asmprinter) + : AsmPrinter(asmprinter) {} + +void LoongArchMCInstLower::Initialize(MCContext *C) { + Ctx = C; +} + +MCOperand LoongArchMCInstLower::LowerSymbolOperand(const MachineOperand &MO, + MachineOperandType MOTy, + unsigned Offset) const { + MCSymbolRefExpr::VariantKind Kind = MCSymbolRefExpr::VK_None; + LoongArchMCExpr::LoongArchExprKind TargetKind = LoongArchMCExpr::MEK_None; + const MCSymbol *Symbol; + + switch(MO.getTargetFlags()) { + default: + llvm_unreachable("Invalid target flag!"); + case LoongArchII::MO_NO_FLAG: + break; + case LoongArchII::MO_GOT_HI: + TargetKind = LoongArchMCExpr::MEK_GOT_HI; + break; + case LoongArchII::MO_GOT_LO: + TargetKind = LoongArchMCExpr::MEK_GOT_LO; + break; + case LoongArchII::MO_GOT_RRHI: + TargetKind = LoongArchMCExpr::MEK_GOT_RRHI; + break; + case LoongArchII::MO_GOT_RRHIGHER: + TargetKind = LoongArchMCExpr::MEK_GOT_RRHIGHER; + break; + case LoongArchII::MO_GOT_RRHIGHEST: + TargetKind = LoongArchMCExpr::MEK_GOT_RRHIGHEST; + break; + case LoongArchII::MO_GOT_RRLO: + TargetKind = LoongArchMCExpr::MEK_GOT_RRLO; + break; + case LoongArchII::MO_PCREL_HI: + TargetKind = LoongArchMCExpr::MEK_PCREL_HI; + break; + case LoongArchII::MO_PCREL_LO: + TargetKind = LoongArchMCExpr::MEK_PCREL_LO; + break; + case LoongArchII::MO_PCREL_RRHI: + TargetKind = LoongArchMCExpr::MEK_PCREL_RRHI; + break; + case LoongArchII::MO_PCREL_RRHIGHER: + TargetKind = LoongArchMCExpr::MEK_PCREL_RRHIGHER; + break; + case LoongArchII::MO_PCREL_RRHIGHEST: + TargetKind = LoongArchMCExpr::MEK_PCREL_RRHIGHEST; + break; + case LoongArchII::MO_PCREL_RRLO: + TargetKind = LoongArchMCExpr::MEK_PCREL_RRLO; + break; + case LoongArchII::MO_TLSIE_HI: + TargetKind = LoongArchMCExpr::MEK_TLSIE_HI; + break; + case LoongArchII::MO_TLSIE_LO: + TargetKind = LoongArchMCExpr::MEK_TLSIE_LO; + break; + case LoongArchII::MO_TLSIE_RRHI: + TargetKind = LoongArchMCExpr::MEK_TLSIE_RRHI; + break; + case LoongArchII::MO_TLSIE_RRHIGHER: + TargetKind = LoongArchMCExpr::MEK_TLSIE_RRHIGHER; + break; + case LoongArchII::MO_TLSIE_RRHIGHEST: + TargetKind = LoongArchMCExpr::MEK_TLSIE_RRHIGHEST; + break; + case LoongArchII::MO_TLSIE_RRLO: + TargetKind = LoongArchMCExpr::MEK_TLSIE_RRLO; + break; + case LoongArchII::MO_TLSLE_HI: + TargetKind = LoongArchMCExpr::MEK_TLSLE_HI; + break; + case LoongArchII::MO_TLSLE_HIGHER: + TargetKind = LoongArchMCExpr::MEK_TLSLE_HIGHER; + break; + case LoongArchII::MO_TLSLE_HIGHEST: + TargetKind = LoongArchMCExpr::MEK_TLSLE_HIGHEST; + break; + case LoongArchII::MO_TLSLE_LO: + TargetKind = LoongArchMCExpr::MEK_TLSLE_LO; + break; + case LoongArchII::MO_TLSGD_HI: + TargetKind = LoongArchMCExpr::MEK_TLSGD_HI; + break; + case LoongArchII::MO_TLSGD_LO: + TargetKind = LoongArchMCExpr::MEK_TLSGD_LO; + break; + case LoongArchII::MO_TLSGD_RRHI: + TargetKind = LoongArchMCExpr::MEK_TLSGD_RRHI; + break; + case LoongArchII::MO_TLSGD_RRHIGHER: + TargetKind = LoongArchMCExpr::MEK_TLSGD_RRHIGHER; + break; + case LoongArchII::MO_TLSGD_RRHIGHEST: + TargetKind = LoongArchMCExpr::MEK_TLSGD_RRHIGHEST; + break; + case LoongArchII::MO_TLSGD_RRLO: + TargetKind = LoongArchMCExpr::MEK_TLSGD_RRLO; + break; + case LoongArchII::MO_ABS_HI: + TargetKind = LoongArchMCExpr::MEK_ABS_HI; + break; + case LoongArchII::MO_ABS_HIGHER: + TargetKind = LoongArchMCExpr::MEK_ABS_HIGHER; + break; + case LoongArchII::MO_ABS_HIGHEST: + TargetKind = LoongArchMCExpr::MEK_ABS_HIGHEST; + break; + case LoongArchII::MO_ABS_LO: + TargetKind = LoongArchMCExpr::MEK_ABS_LO; + break; + case LoongArchII::MO_CALL_HI: + TargetKind = LoongArchMCExpr::MEK_CALL_HI; + break; + case LoongArchII::MO_CALL_LO: + TargetKind = LoongArchMCExpr::MEK_CALL_LO; + break; + } + + switch (MOTy) { + case MachineOperand::MO_MachineBasicBlock: + Symbol = MO.getMBB()->getSymbol(); + break; + + case MachineOperand::MO_GlobalAddress: + Symbol = AsmPrinter.getSymbol(MO.getGlobal()); + Offset += MO.getOffset(); + break; + + case MachineOperand::MO_BlockAddress: + Symbol = AsmPrinter.GetBlockAddressSymbol(MO.getBlockAddress()); + Offset += MO.getOffset(); + break; + + case MachineOperand::MO_ExternalSymbol: + Symbol = AsmPrinter.GetExternalSymbolSymbol(MO.getSymbolName()); + Offset += MO.getOffset(); + break; + + case MachineOperand::MO_MCSymbol: + Symbol = MO.getMCSymbol(); + Offset += MO.getOffset(); + break; + + case MachineOperand::MO_JumpTableIndex: + Symbol = AsmPrinter.GetJTISymbol(MO.getIndex()); + break; + + case MachineOperand::MO_ConstantPoolIndex: + Symbol = AsmPrinter.GetCPISymbol(MO.getIndex()); + Offset += MO.getOffset(); + break; + + default: + llvm_unreachable(""); + } + + const MCExpr *Expr = MCSymbolRefExpr::create(Symbol, Kind, *Ctx); + + if (Offset) { + // Assume offset is never negative. + assert(Offset > 0); + + Expr = MCBinaryExpr::createAdd(Expr, MCConstantExpr::create(Offset, *Ctx), + *Ctx); + } + + if (TargetKind != LoongArchMCExpr::MEK_None) + Expr = LoongArchMCExpr::create(TargetKind, Expr, *Ctx); + + return MCOperand::createExpr(Expr); +} + +MCOperand LoongArchMCInstLower::LowerOperand(const MachineOperand &MO, + unsigned offset) const { + MachineOperandType MOTy = MO.getType(); + + switch (MOTy) { + default: llvm_unreachable("unknown operand type"); + case MachineOperand::MO_Register: + // Ignore all implicit register operands. + if (MO.isImplicit()) break; + return MCOperand::createReg(MO.getReg()); + case MachineOperand::MO_Immediate: + return MCOperand::createImm(MO.getImm() + offset); + case MachineOperand::MO_MachineBasicBlock: + case MachineOperand::MO_GlobalAddress: + case MachineOperand::MO_ExternalSymbol: + case MachineOperand::MO_MCSymbol: + case MachineOperand::MO_JumpTableIndex: + case MachineOperand::MO_ConstantPoolIndex: + case MachineOperand::MO_BlockAddress: + return LowerSymbolOperand(MO, MOTy, offset); + case MachineOperand::MO_RegisterMask: + break; + } + + return MCOperand(); +} + +MCOperand LoongArchMCInstLower::createSub(MachineBasicBlock *BB1, + MachineBasicBlock *BB2, + LoongArchMCExpr::LoongArchExprKind Kind) const { + const MCSymbolRefExpr *Sym1 = MCSymbolRefExpr::create(BB1->getSymbol(), *Ctx); + const MCSymbolRefExpr *Sym2 = MCSymbolRefExpr::create(BB2->getSymbol(), *Ctx); + const MCBinaryExpr *Sub = MCBinaryExpr::createSub(Sym1, Sym2, *Ctx); + + return MCOperand::createExpr(LoongArchMCExpr::create(Kind, Sub, *Ctx)); +} + +void LoongArchMCInstLower::lowerLongBranchADDI(const MachineInstr *MI, + MCInst &OutMI, int Opcode) const { + OutMI.setOpcode(Opcode); + + LoongArchMCExpr::LoongArchExprKind Kind; + unsigned TargetFlags = MI->getOperand(2).getTargetFlags(); + switch (TargetFlags) { + case LoongArchII::MO_ABS_HIGHEST: + Kind = LoongArchMCExpr::MEK_ABS_HIGHEST; + break; + case LoongArchII::MO_ABS_HIGHER: + Kind = LoongArchMCExpr::MEK_ABS_HIGHER; + break; + case LoongArchII::MO_ABS_HI: + Kind = LoongArchMCExpr::MEK_ABS_HI; + break; + case LoongArchII::MO_ABS_LO: + Kind = LoongArchMCExpr::MEK_ABS_LO; + break; + default: + report_fatal_error("Unexpected flags for lowerLongBranchADDI"); + } + + // Lower two register operands. + for (unsigned I = 0, E = 2; I != E; ++I) { + const MachineOperand &MO = MI->getOperand(I); + OutMI.addOperand(LowerOperand(MO)); + } + + if (MI->getNumOperands() == 3) { + // Lower register operand. + const MCExpr *Expr = + MCSymbolRefExpr::create(MI->getOperand(2).getMBB()->getSymbol(), *Ctx); + const LoongArchMCExpr *LoongArchExpr = LoongArchMCExpr::create(Kind, Expr, *Ctx); + OutMI.addOperand(MCOperand::createExpr(LoongArchExpr)); + } else if (MI->getNumOperands() == 4) { + // Create %lo($tgt-$baltgt) or %hi($tgt-$baltgt). + OutMI.addOperand(createSub(MI->getOperand(2).getMBB(), + MI->getOperand(3).getMBB(), Kind)); + } +} + +void LoongArchMCInstLower::lowerLongBranchPCADDU12I(const MachineInstr *MI, + MCInst &OutMI, int Opcode) const { + OutMI.setOpcode(Opcode); + + LoongArchMCExpr::LoongArchExprKind Kind; + unsigned TargetFlags = MI->getOperand(1).getTargetFlags(); + switch (TargetFlags) { + case LoongArchII::MO_PCREL_HI: + Kind = LoongArchMCExpr::MEK_PCREL_HI; + break; + case LoongArchII::MO_PCREL_LO: + Kind = LoongArchMCExpr::MEK_PCREL_LO; + break; + default: + report_fatal_error("Unexpected flags for lowerLongBranchADDI"); + } + + // Lower one register operands. + const MachineOperand &MO = MI->getOperand(0); + OutMI.addOperand(LowerOperand(MO)); + + const MCExpr *Expr = + MCSymbolRefExpr::create(MI->getOperand(1).getMBB()->getSymbol(), *Ctx); + const LoongArchMCExpr *LoongArchExpr = LoongArchMCExpr::create(Kind, Expr, *Ctx); + OutMI.addOperand(MCOperand::createExpr(LoongArchExpr)); +} +bool LoongArchMCInstLower::lowerLongBranch(const MachineInstr *MI, + MCInst &OutMI) const { + switch (MI->getOpcode()) { + default: + return false; + case LoongArch::LONG_BRANCH_ADDIW: + case LoongArch::LONG_BRANCH_ADDIW2Op: + lowerLongBranchADDI(MI, OutMI, LoongArch::ADDI_W); + return true; + case LoongArch::LONG_BRANCH_ADDID: + case LoongArch::LONG_BRANCH_ADDID2Op: + lowerLongBranchADDI(MI, OutMI, LoongArch::ADDI_D); + return true; + case LoongArch::LONG_BRANCH_PCADDU12I: + lowerLongBranchPCADDU12I(MI, OutMI, LoongArch::PCADDU12I); + return true; + } +} + +void LoongArchMCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const { + if (lowerLongBranch(MI, OutMI)) + return; + + OutMI.setOpcode(MI->getOpcode()); + + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + const MachineOperand &MO = MI->getOperand(i); + MCOperand MCOp = LowerOperand(MO); + + if (MCOp.isValid()) + OutMI.addOperand(MCOp); + } +} diff --git a/lib/Target/LoongArch/LoongArchMCInstLower.h b/lib/Target/LoongArch/LoongArchMCInstLower.h new file mode 100644 index 00000000..6463a7b6 --- /dev/null +++ b/lib/Target/LoongArch/LoongArchMCInstLower.h @@ -0,0 +1,55 @@ +//===- LoongArchMCInstLower.h - Lower MachineInstr to MCInst --------*- C++ -*--===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_LOONGARCH_LOONGARCHMCINSTLOWER_H +#define LLVM_LIB_TARGET_LOONGARCH_LOONGARCHMCINSTLOWER_H + +#include "MCTargetDesc/LoongArchMCExpr.h" +#include "llvm/CodeGen/MachineOperand.h" +#include "llvm/Support/Compiler.h" + +namespace llvm { + +class MachineBasicBlock; +class MachineInstr; +class MCContext; +class MCInst; +class MCOperand; +class LoongArchAsmPrinter; + +/// LoongArchMCInstLower - This class is used to lower an MachineInstr into an +/// MCInst. +class LLVM_LIBRARY_VISIBILITY LoongArchMCInstLower { + using MachineOperandType = MachineOperand::MachineOperandType; + + MCContext *Ctx; + LoongArchAsmPrinter &AsmPrinter; + +public: + LoongArchMCInstLower(LoongArchAsmPrinter &asmprinter); + + void Initialize(MCContext *C); + void Lower(const MachineInstr *MI, MCInst &OutMI) const; + MCOperand LowerOperand(const MachineOperand& MO, unsigned offset = 0) const; + +private: + MCOperand LowerSymbolOperand(const MachineOperand &MO, + MachineOperandType MOTy, unsigned Offset) const; + MCOperand createSub(MachineBasicBlock *BB1, MachineBasicBlock *BB2, + LoongArchMCExpr::LoongArchExprKind Kind) const; + void lowerLongBranchLUi(const MachineInstr *MI, MCInst &OutMI) const; + void lowerLongBranchADDI(const MachineInstr *MI, MCInst &OutMI, + int Opcode) const; + void lowerLongBranchPCADDU12I(const MachineInstr *MI, MCInst &OutMI, + int Opcode) const; + bool lowerLongBranch(const MachineInstr *MI, MCInst &OutMI) const; +}; + +} // end namespace llvm + +#endif // LLVM_LIB_TARGET_LOONGARCH_LOONGARCHMCINSTLOWER_H diff --git a/lib/Target/LoongArch/LoongArchMachineFunction.cpp b/lib/Target/LoongArch/LoongArchMachineFunction.cpp new file mode 100644 index 00000000..90baa8fd --- /dev/null +++ b/lib/Target/LoongArch/LoongArchMachineFunction.cpp @@ -0,0 +1,51 @@ +//===-- LoongArchMachineFunctionInfo.cpp - Private data used for LoongArch ----------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "LoongArchMachineFunction.h" +#include "MCTargetDesc/LoongArchABIInfo.h" +#include "LoongArchSubtarget.h" +#include "LoongArchTargetMachine.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/PseudoSourceValue.h" +#include "llvm/CodeGen/TargetRegisterInfo.h" +#include "llvm/Support/CommandLine.h" + +using namespace llvm; + +LoongArchFunctionInfo::~LoongArchFunctionInfo() = default; + +void LoongArchFunctionInfo::createEhDataRegsFI() { + const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo(); + for (int I = 0; I < 4; ++I) { + const TargetRegisterClass &RC = + static_cast(MF.getTarget()) + .getABI() + .IsLP64() + ? LoongArch::GPR64RegClass + : LoongArch::GPR32RegClass; + + EhDataRegFI[I] = MF.getFrameInfo().CreateStackObject(TRI.getSpillSize(RC), + TRI.getSpillAlign(RC), false); + } +} + +bool LoongArchFunctionInfo::isEhDataRegFI(int FI) const { + return CallsEhReturn && (FI == EhDataRegFI[0] || FI == EhDataRegFI[1] + || FI == EhDataRegFI[2] || FI == EhDataRegFI[3]); +} + +MachinePointerInfo LoongArchFunctionInfo::callPtrInfo(const char *ES) { + return MachinePointerInfo(MF.getPSVManager().getExternalSymbolCallEntry(ES)); +} + +MachinePointerInfo LoongArchFunctionInfo::callPtrInfo(const GlobalValue *GV) { + return MachinePointerInfo(MF.getPSVManager().getGlobalValueCallEntry(GV)); +} + +void LoongArchFunctionInfo::anchor() {} diff --git a/lib/Target/LoongArch/LoongArchMachineFunction.h b/lib/Target/LoongArch/LoongArchMachineFunction.h new file mode 100644 index 00000000..b1c805c0 --- /dev/null +++ b/lib/Target/LoongArch/LoongArchMachineFunction.h @@ -0,0 +1,98 @@ +//===- LoongArchMachineFunctionInfo.h - Private data used for LoongArch ---*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file declares the LoongArch specific subclass of MachineFunctionInfo. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_LOONGARCH_LOONGARCHMACHINEFUNCTION_H +#define LLVM_LIB_TARGET_LOONGARCH_LOONGARCHMACHINEFUNCTION_H + +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineMemOperand.h" +#include + +namespace llvm { + +/// LoongArchFunctionInfo - This class is derived from MachineFunction private +/// LoongArch target-specific information for each MachineFunction. +class LoongArchFunctionInfo : public MachineFunctionInfo { +public: + LoongArchFunctionInfo(MachineFunction &MF) : MF(MF) {} + + ~LoongArchFunctionInfo() override; + + unsigned getSRetReturnReg() const { return SRetReturnReg; } + void setSRetReturnReg(unsigned Reg) { SRetReturnReg = Reg; } + + int getVarArgsFrameIndex() const { return VarArgsFrameIndex; } + void setVarArgsFrameIndex(int Index) { VarArgsFrameIndex = Index; } + + unsigned getVarArgsSaveSize() const { return VarArgsSaveSize; } + void setVarArgsSaveSize(int Size) { VarArgsSaveSize = Size; } + + bool hasByvalArg() const { return HasByvalArg; } + void setFormalArgInfo(unsigned Size, bool HasByval) { + IncomingArgSize = Size; + HasByvalArg = HasByval; + } + + unsigned getIncomingArgSize() const { return IncomingArgSize; } + + bool callsEhReturn() const { return CallsEhReturn; } + void setCallsEhReturn() { CallsEhReturn = true; } + + void createEhDataRegsFI(); + int getEhDataRegFI(unsigned Reg) const { return EhDataRegFI[Reg]; } + bool isEhDataRegFI(int FI) const; + + /// Create a MachinePointerInfo that has an ExternalSymbolPseudoSourceValue + /// object representing a GOT entry for an external function. + MachinePointerInfo callPtrInfo(const char *ES); + + /// Create a MachinePointerInfo that has a GlobalValuePseudoSourceValue object + /// representing a GOT entry for a global function. + MachinePointerInfo callPtrInfo(const GlobalValue *GV); + + void setSaveS2() { SaveS2 = true; } + bool hasSaveS2() const { return SaveS2; } + +private: + virtual void anchor(); + + MachineFunction& MF; + + /// SRetReturnReg - Some subtargets require that sret lowering includes + /// returning the value of the returned struct in a register. This field + /// holds the virtual register into which the sret argument is passed. + unsigned SRetReturnReg = 0; + + /// VarArgsFrameIndex - FrameIndex for start of varargs area. + int VarArgsFrameIndex = 0; + int VarArgsSaveSize = 0; + + /// True if function has a byval argument. + bool HasByvalArg; + + /// Size of incoming argument area. + unsigned IncomingArgSize; + + /// CallsEhReturn - Whether the function calls llvm.eh.return. + bool CallsEhReturn = false; + + /// Frame objects for spilling eh data registers. + int EhDataRegFI[4]; + + // saveS2 + bool SaveS2 = false; + +}; + +} // end namespace llvm + +#endif // LLVM_LIB_TARGET_LOONGARCH_LOONGARCHMACHINEFUNCTION_H diff --git a/lib/Target/LoongArch/LoongArchModuleISelDAGToDAG.cpp b/lib/Target/LoongArch/LoongArchModuleISelDAGToDAG.cpp new file mode 100644 index 00000000..8dbf30f2 --- /dev/null +++ b/lib/Target/LoongArch/LoongArchModuleISelDAGToDAG.cpp @@ -0,0 +1,53 @@ +//===----------------------------------------------------------------------===// +// Instruction Selector Subtarget Control +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// This file defines a pass used to change the subtarget for the +// LoongArch Instruction selector. +// +//===----------------------------------------------------------------------===// + +#include "LoongArch.h" +#include "LoongArchTargetMachine.h" +#include "llvm/CodeGen/TargetPassConfig.h" +#include "llvm/CodeGen/StackProtector.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; + +#define DEBUG_TYPE "loongarch-isel" + +namespace { + class LoongArchModuleDAGToDAGISel : public MachineFunctionPass { + public: + static char ID; + + LoongArchModuleDAGToDAGISel() : MachineFunctionPass(ID) {} + + // Pass Name + StringRef getPassName() const override { + return "LoongArch DAG->DAG Pattern Instruction Selection"; + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired(); + AU.addPreserved(); + MachineFunctionPass::getAnalysisUsage(AU); + } + + bool runOnMachineFunction(MachineFunction &MF) override; + }; + + char LoongArchModuleDAGToDAGISel::ID = 0; +} + +bool LoongArchModuleDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) { + LLVM_DEBUG(errs() << "In LoongArchModuleDAGToDAGISel::runMachineFunction\n"); + return false; +} + +llvm::FunctionPass *llvm::createLoongArchModuleISelDagPass() { + return new LoongArchModuleDAGToDAGISel(); +} diff --git a/lib/Target/LoongArch/LoongArchRegisterInfo.cpp b/lib/Target/LoongArch/LoongArchRegisterInfo.cpp new file mode 100644 index 00000000..4fea3a79 --- /dev/null +++ b/lib/Target/LoongArch/LoongArchRegisterInfo.cpp @@ -0,0 +1,355 @@ +//===- LoongArchRegisterInfo.cpp - LoongArch Register Information -------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file contains the LoongArch implementation of the TargetRegisterInfo class. +// +//===----------------------------------------------------------------------===// + +#include "LoongArchRegisterInfo.h" +#include "MCTargetDesc/LoongArchABIInfo.h" +#include "LoongArch.h" +#include "LoongArchMachineFunction.h" +#include "LoongArchSubtarget.h" +#include "LoongArchTargetMachine.h" +#include "llvm/ADT/BitVector.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/TargetFrameLowering.h" +#include "llvm/CodeGen/TargetRegisterInfo.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" +#include "llvm/IR/Function.h" +#include "llvm/MC/MCRegisterInfo.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +#include + +using namespace llvm; + +#define DEBUG_TYPE "loongarch-reg-info" + +#define GET_REGINFO_TARGET_DESC +#include "LoongArchGenRegisterInfo.inc" + +LoongArchRegisterInfo::LoongArchRegisterInfo() : LoongArchGenRegisterInfo(LoongArch::RA) {} + +unsigned LoongArchRegisterInfo::getPICCallReg() { return LoongArch::T8; } + +const TargetRegisterClass * +LoongArchRegisterInfo::getPointerRegClass(const MachineFunction &MF, + unsigned Kind) const { + LoongArchABIInfo ABI = MF.getSubtarget().getABI(); + LoongArchPtrClass PtrClassKind = static_cast(Kind); + + switch (PtrClassKind) { + case LoongArchPtrClass::Default: + return ABI.ArePtrs64bit() ? &LoongArch::GPR64RegClass : &LoongArch::GPR32RegClass; + case LoongArchPtrClass::StackPointer: + return ABI.ArePtrs64bit() ? &LoongArch::SP64RegClass : &LoongArch::SP32RegClass; + } + + llvm_unreachable("Unknown pointer kind"); +} + +unsigned +LoongArchRegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC, + MachineFunction &MF) const { + switch (RC->getID()) { + default: + return 0; + case LoongArch::GPR32RegClassID: + case LoongArch::GPR64RegClassID: + { + const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering(); + return 28 - TFI->hasFP(MF); + } + case LoongArch::FGR32RegClassID: + return 32; + case LoongArch::FGR64RegClassID: + return 32; + } +} + +//===----------------------------------------------------------------------===// +// Callee Saved Registers methods +//===----------------------------------------------------------------------===// + +/// LoongArch Callee Saved Registers +const MCPhysReg * +LoongArchRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { + const LoongArchSubtarget &Subtarget = MF->getSubtarget(); + + if ((Subtarget.hasBasicF() && !Subtarget.hasBasicD())) + return CSR_SingleFloatOnly_SaveList; + + if (Subtarget.isABI_LP64()) + return CSR_LP64_SaveList; + + return CSR_ILP32_SaveList; +} + +const uint32_t * +LoongArchRegisterInfo::getCallPreservedMask(const MachineFunction &MF, + CallingConv::ID) const { + const LoongArchSubtarget &Subtarget = MF.getSubtarget(); + + if ((Subtarget.hasBasicF() && !Subtarget.hasBasicD())) + return CSR_SingleFloatOnly_RegMask; + + if (Subtarget.isABI_LP64()) + return CSR_LP64_RegMask; + + return CSR_ILP32_RegMask; +} + +BitVector LoongArchRegisterInfo:: +getReservedRegs(const MachineFunction &MF) const { + static const MCPhysReg ReservedGPR32[] = { + LoongArch::ZERO, LoongArch::SP, LoongArch::TP, LoongArch::T9 + }; + + static const MCPhysReg ReservedGPR64[] = { + LoongArch::ZERO_64, LoongArch::SP_64, LoongArch::TP_64, LoongArch::T9_64 + }; + + BitVector Reserved(getNumRegs()); + const LoongArchSubtarget &Subtarget = MF.getSubtarget(); + + for (unsigned I = 0; I < array_lengthof(ReservedGPR32); ++I) + Reserved.set(ReservedGPR32[I]); + + for (unsigned I = 0; I < array_lengthof(ReservedGPR64); ++I) + Reserved.set(ReservedGPR64[I]); + + // Reserve FP if this function should have a dedicated frame pointer register. + if (Subtarget.getFrameLowering()->hasFP(MF)) { + Reserved.set(LoongArch::FP); + Reserved.set(LoongArch::FP_64); + + // Reserve the base register if we need to both realign the stack and + // allocate variable-sized objects at runtime. This should test the + // same conditions as LoongArchFrameLowering::hasBP(). + if (needsStackRealignment(MF) && MF.getFrameInfo().hasVarSizedObjects()) { + Reserved.set(LoongArch::S7); + Reserved.set(LoongArch::S7_64); + } + } + + return Reserved; +} + +bool +LoongArchRegisterInfo::requiresRegisterScavenging(const MachineFunction &MF) const { + return true; +} + +bool LoongArchRegisterInfo:: +requiresFrameIndexScavenging(const MachineFunction &MF) const { + return true; +} + +bool +LoongArchRegisterInfo::trackLivenessAfterRegAlloc(const MachineFunction &MF) const { + return true; +} + +/// Get the size of the offset supported by the given load/store/inline asm. +/// The result includes the effects of any scale factors applied to the +/// instruction immediate. +static inline unsigned getLoadStoreOffsetSizeInBits(const unsigned Opcode, + MachineOperand MO) { + switch (Opcode) { + case LoongArch::LDPTR_W: + case LoongArch::LDPTR_W32: + case LoongArch::LDPTR_D: + case LoongArch::STPTR_W: + case LoongArch::STPTR_W32: + case LoongArch::STPTR_D: + case LoongArch::LL_W: + case LoongArch::LL_D: + case LoongArch::SC_W: + case LoongArch::SC_D: + return 14 + 2 /* scale factor */; + case LoongArch::INLINEASM: { + unsigned ConstraintID = InlineAsm::getMemoryConstraintID(MO.getImm()); + switch (ConstraintID) { + case InlineAsm::Constraint_ZC: { + return 14 + 2 /* scale factor */; + } + default: + return 12; + } + } + default: + return 12; + } +} + +/// Get the scale factor applied to the immediate in the given load/store. +static inline unsigned getLoadStoreOffsetAlign(const unsigned Opcode) { + switch (Opcode) { + case LoongArch::LDPTR_W: + case LoongArch::LDPTR_W32: + case LoongArch::LDPTR_D: + case LoongArch::STPTR_W: + case LoongArch::STPTR_W32: + case LoongArch::STPTR_D: + case LoongArch::LL_W: + case LoongArch::LL_D: + case LoongArch::SC_W: + case LoongArch::SC_D: + return 4; + default: + return 1; + } +} + +// FrameIndex represent objects inside a abstract stack. +// We must replace FrameIndex with an stack/frame pointer +// direct reference. +void LoongArchRegisterInfo:: +eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj, + unsigned FIOperandNum, RegScavenger *RS) const { + MachineInstr &MI = *II; + MachineFunction &MF = *MI.getParent()->getParent(); + const LoongArchFrameLowering *TFI = getFrameLowering(MF); + + LLVM_DEBUG(errs() << "\nFunction : " << MF.getName() << "\n"; + errs() << "<--------->\n" + << MI); + + int FrameIndex = MI.getOperand(FIOperandNum).getIndex(); + uint64_t stackSize = MF.getFrameInfo().getStackSize(); + int64_t spOffset = MF.getFrameInfo().getObjectOffset(FrameIndex); + + LLVM_DEBUG(errs() << "FrameIndex : " << FrameIndex << "\n" + << "spOffset : " << spOffset << "\n" + << "stackSize : " << stackSize << "\n" + << "SPAdj : " << SPAdj << "\n" + << "alignment : " + << DebugStr(MF.getFrameInfo().getObjectAlign(FrameIndex)) + << "\n"); + + LoongArchABIInfo ABI = + static_cast(MF.getTarget()).getABI(); + + // Everything else is referenced relative to whatever register + // getFrameIndexReference() returns. + Register FrameReg; + StackOffset Offset = + TFI->getFrameIndexReference(MF, FrameIndex, FrameReg) + + StackOffset::getFixed(MI.getOperand(FIOperandNum + 1).getImm()); + + LLVM_DEBUG(errs() << "Location : " + << "FrameReg<" << FrameReg << "> + " << Offset.getFixed() + << "\n<--------->\n"); + + MachineBasicBlock &MBB = *MI.getParent(); + DebugLoc DL = II->getDebugLoc(); + bool IsKill = false; + + if (!MI.isDebugValue()) { + // Make sure Offset fits within the field available. + // For ldptr/stptr/ll/sc instructions, this is a 14-bit signed immediate + // (scaled by 2), otherwise it is a 12-bit signed immediate. + unsigned OffsetBitSize = getLoadStoreOffsetSizeInBits( + MI.getOpcode(), MI.getOperand(FIOperandNum - 1)); + const Align OffsetAlign(getLoadStoreOffsetAlign(MI.getOpcode())); + + if (OffsetBitSize == 16 && isInt<12>(Offset.getFixed()) && + !isAligned(OffsetAlign, Offset.getFixed())) { + // If we have an offset that needs to fit into a signed n-bit immediate + // (where n == 16) and doesn't aligned and does fit into 12-bits + // then use an ADDI + const TargetRegisterClass *PtrRC = ABI.ArePtrs64bit() + ? &LoongArch::GPR64RegClass + : &LoongArch::GPR32RegClass; + MachineRegisterInfo &RegInfo = MBB.getParent()->getRegInfo(); + unsigned Reg = RegInfo.createVirtualRegister(PtrRC); + const LoongArchInstrInfo &TII = *static_cast( + MBB.getParent()->getSubtarget().getInstrInfo()); + BuildMI(MBB, II, DL, TII.get(ABI.GetPtrAddiOp()), Reg) + .addReg(FrameReg) + .addImm(Offset.getFixed()); + + FrameReg = Reg; + Offset = StackOffset::getFixed(0); + IsKill = true; + } else if (!isInt<12>(Offset.getFixed())) { + // Otherwise split the offset into several pieces and add it in multiple + // instructions. + const LoongArchInstrInfo &TII = *static_cast( + MBB.getParent()->getSubtarget().getInstrInfo()); + unsigned Reg = TII.loadImmediate(Offset.getFixed(), MBB, II, DL); + BuildMI(MBB, II, DL, TII.get(ABI.GetPtrAddOp()), Reg) + .addReg(FrameReg) + .addReg(Reg, RegState::Kill); + + FrameReg = Reg; + Offset = StackOffset::getFixed(0); + IsKill = true; + } + } + + MI.getOperand(FIOperandNum).ChangeToRegister(FrameReg, false, false, IsKill); + MI.getOperand(FIOperandNum + 1).ChangeToImmediate(Offset.getFixed()); +} + +Register LoongArchRegisterInfo:: +getFrameRegister(const MachineFunction &MF) const { + const LoongArchSubtarget &Subtarget = MF.getSubtarget(); + const TargetFrameLowering *TFI = Subtarget.getFrameLowering(); + bool IsLP64 = static_cast(MF.getTarget()) + .getABI() + .IsLP64(); + + return TFI->hasFP(MF) ? (IsLP64 ? LoongArch::FP_64 : LoongArch::FP) + : (IsLP64 ? LoongArch::SP_64 : LoongArch::SP); +} + +const TargetRegisterClass * +LoongArchRegisterInfo::intRegClass(unsigned Size) const { + if (Size == 4) + return &LoongArch::GPR32RegClass; + + assert(Size == 8); + return &LoongArch::GPR64RegClass; +} + +bool LoongArchRegisterInfo::canRealignStack(const MachineFunction &MF) const { + // Avoid realigning functions that explicitly do not want to be realigned. + // Normally, we should report an error when a function should be dynamically + // realigned but also has the attribute no-realign-stack. Unfortunately, + // with this attribute, MachineFrameInfo clamps each new object's alignment + // to that of the stack's alignment as specified by the ABI. As a result, + // the information of whether we have objects with larger alignment + // requirement than the stack's alignment is already lost at this point. + if (!TargetRegisterInfo::canRealignStack(MF)) + return false; + + const LoongArchSubtarget &Subtarget = MF.getSubtarget(); + unsigned FP = Subtarget.is64Bit() ? LoongArch::FP_64 : LoongArch::FP; + unsigned BP = Subtarget.is64Bit() ? LoongArch::S7_64 : LoongArch::S7; + + // We can't perform dynamic stack realignment if we can't reserve the + // frame pointer register. + if (!MF.getRegInfo().canReserveReg(FP)) + return false; + + // We can realign the stack if we know the maximum call frame size and we + // don't have variable sized objects. + if (Subtarget.getFrameLowering()->hasReservedCallFrame(MF)) + return true; + + // We have to reserve the base pointer register in the presence of variable + // sized objects. + return MF.getRegInfo().canReserveReg(BP); +} diff --git a/lib/Target/LoongArch/LoongArchRegisterInfo.h b/lib/Target/LoongArch/LoongArchRegisterInfo.h new file mode 100644 index 00000000..dd3be916 --- /dev/null +++ b/lib/Target/LoongArch/LoongArchRegisterInfo.h @@ -0,0 +1,80 @@ +//===- LoongArchRegisterInfo.h - LoongArch Register Information Impl ------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file contains the LoongArch implementation of the TargetRegisterInfo class. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_LOONGARCH_LOONGARCHREGISTERINFO_H +#define LLVM_LIB_TARGET_LOONGARCH_LOONGARCHREGISTERINFO_H + +#include "LoongArch.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include + +#define GET_REGINFO_HEADER +#include "LoongArchGenRegisterInfo.inc" + +namespace llvm { + +class TargetRegisterClass; + +class LoongArchRegisterInfo : public LoongArchGenRegisterInfo { +public: + enum class LoongArchPtrClass { + /// The default register class for integer values. + Default = 0, + /// The stack pointer only. + StackPointer = 1, + }; + + LoongArchRegisterInfo(); + + /// Get PIC indirect call register + static unsigned getPICCallReg(); + + /// Code Generation virtual methods... + const TargetRegisterClass *getPointerRegClass(const MachineFunction &MF, + unsigned Kind) const override; + + unsigned getRegPressureLimit(const TargetRegisterClass *RC, + MachineFunction &MF) const override; + const MCPhysReg *getCalleeSavedRegs(const MachineFunction *MF) const override; + const uint32_t *getCallPreservedMask(const MachineFunction &MF, + CallingConv::ID) const override; + BitVector getReservedRegs(const MachineFunction &MF) const override; + + bool requiresRegisterScavenging(const MachineFunction &MF) const override; + + bool requiresFrameIndexScavenging(const MachineFunction &MF) const override; + + bool trackLivenessAfterRegAlloc(const MachineFunction &MF) const override; + + /// Stack Frame Processing Methods + void eliminateFrameIndex(MachineBasicBlock::iterator II, + int SPAdj, unsigned FIOperandNum, + RegScavenger *RS = nullptr) const override; + + // Stack realignment queries. + bool canRealignStack(const MachineFunction &MF) const override; + + /// Debug information queries. + Register getFrameRegister(const MachineFunction &MF) const override; + + /// Return GPR register class. + const TargetRegisterClass *intRegClass(unsigned Size) const; + +private: + void eliminateFI(MachineBasicBlock::iterator II, unsigned OpNo, + int FrameIndex, uint64_t StackSize, + int SPAdj, int64_t SPOffset) const; +}; + +} // end namespace llvm + +#endif // LLVM_LIB_TARGET_LOONGARCH_LOONGARCHREGISTERINFO_H diff --git a/lib/Target/LoongArch/LoongArchRegisterInfo.td b/lib/Target/LoongArch/LoongArchRegisterInfo.td new file mode 100644 index 00000000..96569e07 --- /dev/null +++ b/lib/Target/LoongArch/LoongArchRegisterInfo.td @@ -0,0 +1,373 @@ +//===-- LoongArchRegisterInfo.td - LoongArch Register defs -----------*- tablegen -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// Declarations that describe the LoongArch register file +//===----------------------------------------------------------------------===// +let Namespace = "LoongArch" in { +def sub_32 : SubRegIndex<32>; +def sub_64 : SubRegIndex<64>; +def sub_128 : SubRegIndex<128>; +def sub_fcsr1 : SubRegIndex<5>; +def sub_fcsr2 : SubRegIndex<13, 16>; +def sub_fcsr3 : SubRegIndex<2, 8>; +def sub_lo : SubRegIndex<32>; +def sub_hi : SubRegIndex<32, 32>; +def PC : Register<"pc">; +} + +class Unallocatable { + bit isAllocatable = 0; +} + +/// We have banks of registers each. +class LoongArchReg Enc, string n> : Register { + let HWEncoding = Enc; + let Namespace = "LoongArch"; +} + +class LoongArchRegWithSubRegs Enc, string n, list subregs> + : RegisterWithSubRegs { + let HWEncoding = Enc; + let Namespace = "LoongArch"; +} + +/// LoongArch 32-bit CPU Registers. +class LoongArch32GPR Enc, string n> : LoongArchReg; + +/// LoongArch 64-bit CPU Registers. +class LoongArch64GPR Enc, string n, list subregs> + : LoongArchRegWithSubRegs { + let SubRegIndices = [sub_32]; +} + +/// LoongArch 64-bit Floating-point Registers +class FGR32 Enc, string n> : LoongArchReg; +class FGR64 Enc, string n, list subregs> + : LoongArchRegWithSubRegs { + let SubRegIndices = [sub_lo]; +} + +// LoongArch 128-bit (aliased) LSX Registers +class LSX128 Enc, string n, list subregs> + : LoongArchRegWithSubRegs { + let SubRegIndices = [sub_64]; +} + +// LoongArch 256-bit (aliased) LASX Registers +class LASX256 Enc, string n, list subregs> + : LoongArchRegWithSubRegs { + let SubRegIndices = [sub_128]; +} + +//===----------------------------------------------------------------------===// +// Registers +//===----------------------------------------------------------------------===// + +/// General Purpose 32-bit Registers +def ZERO : LoongArch32GPR<0, "zero">, + DwarfRegNum<[0]>; +def RA : LoongArch32GPR<1, "ra">, DwarfRegNum<[1]>; +def TP : LoongArch32GPR<2, "tp">, DwarfRegNum<[2]>; +def SP : LoongArch32GPR<3, "sp">, DwarfRegNum<[3]>; +def A0 : LoongArch32GPR<4, "r4">, DwarfRegNum<[4]>; +def A1 : LoongArch32GPR<5, "r5">, DwarfRegNum<[5]>; +def A2 : LoongArch32GPR<6, "r6">, DwarfRegNum<[6]>; +def A3 : LoongArch32GPR<7, "r7">, DwarfRegNum<[7]>; +def A4 : LoongArch32GPR<8, "r8">, DwarfRegNum<[8]>; +def A5 : LoongArch32GPR<9, "r9">, DwarfRegNum<[9]>; +def A6 : LoongArch32GPR<10, "r10">, DwarfRegNum<[10]>; +def A7 : LoongArch32GPR<11, "r11">, DwarfRegNum<[11]>; +def T0 : LoongArch32GPR<12, "r12">, DwarfRegNum<[12]>; +def T1 : LoongArch32GPR<13, "r13">, DwarfRegNum<[13]>; +def T2 : LoongArch32GPR<14, "r14">, DwarfRegNum<[14]>; +def T3 : LoongArch32GPR<15, "r15">, DwarfRegNum<[15]>; +def T4 : LoongArch32GPR<16, "r16">, DwarfRegNum<[16]>; +def T5 : LoongArch32GPR<17, "r17">, DwarfRegNum<[17]>; +def T6 : LoongArch32GPR<18, "r18">, DwarfRegNum<[18]>; +def T7 : LoongArch32GPR<19, "r19">, DwarfRegNum<[19]>; +def T8 : LoongArch32GPR<20, "r20">, DwarfRegNum<[20]>; +def T9 : LoongArch32GPR<21, "r21">, DwarfRegNum<[21]>; +def FP : LoongArch32GPR<22, "r22">, DwarfRegNum<[22]>; +def S0 : LoongArch32GPR<23, "r23">, DwarfRegNum<[23]>; +def S1 : LoongArch32GPR<24, "r24">, DwarfRegNum<[24]>; +def S2 : LoongArch32GPR<25, "r25">, DwarfRegNum<[25]>; +def S3 : LoongArch32GPR<26, "r26">, DwarfRegNum<[26]>; +def S4 : LoongArch32GPR<27, "r27">, DwarfRegNum<[27]>; +def S5 : LoongArch32GPR<28, "r28">, DwarfRegNum<[28]>; +def S6 : LoongArch32GPR<29, "r29">, DwarfRegNum<[29]>; +def S7 : LoongArch32GPR<30, "r30">, DwarfRegNum<[30]>; +def S8 : LoongArch32GPR<31, "r31">, DwarfRegNum<[31]>; + +let SubRegIndices = [sub_32] in { +def V0 : LoongArchRegWithSubRegs<4, "r4", [A0]>, DwarfRegNum<[4]>; +def V1 : LoongArchRegWithSubRegs<5, "r5", [A1]>, DwarfRegNum<[5]>; +} + +/// General Purpose 64-bit Registers +def ZERO_64 : LoongArch64GPR<0, "zero", [ZERO]>, DwarfRegNum<[0]>; +def RA_64 : LoongArch64GPR<1, "ra", [RA]>, DwarfRegNum<[1]>; +def TP_64 : LoongArch64GPR<2, "tp", [TP]>, DwarfRegNum<[2]>; +def SP_64 : LoongArch64GPR<3, "sp", [SP]>, DwarfRegNum<[3]>; +def A0_64 : LoongArch64GPR<4, "r4", [A0]>, DwarfRegNum<[4]>; +def A1_64 : LoongArch64GPR<5, "r5", [A1]>, DwarfRegNum<[5]>; +def A2_64 : LoongArch64GPR<6, "r6", [A2]>, DwarfRegNum<[6]>; +def A3_64 : LoongArch64GPR<7, "r7", [A3]>, DwarfRegNum<[7]>; +def A4_64 : LoongArch64GPR<8, "r8", [A4]>, DwarfRegNum<[8]>; +def A5_64 : LoongArch64GPR<9, "r9", [A5]>, DwarfRegNum<[9]>; +def A6_64 : LoongArch64GPR<10, "r10", [A6]>, DwarfRegNum<[10]>; +def A7_64 : LoongArch64GPR<11, "r11", [A7]>, DwarfRegNum<[11]>; +def T0_64 : LoongArch64GPR<12, "r12", [T0]>, DwarfRegNum<[12]>; +def T1_64 : LoongArch64GPR<13, "r13", [T1]>, DwarfRegNum<[13]>; +def T2_64 : LoongArch64GPR<14, "r14", [T2]>, DwarfRegNum<[14]>; +def T3_64 : LoongArch64GPR<15, "r15", [T3]>, DwarfRegNum<[15]>; +def T4_64 : LoongArch64GPR<16, "r16", [T4]>, DwarfRegNum<[16]>; +def T5_64 : LoongArch64GPR<17, "r17", [T5]>, DwarfRegNum<[17]>; +def T6_64 : LoongArch64GPR<18, "r18", [T6]>, DwarfRegNum<[18]>; +def T7_64 : LoongArch64GPR<19, "r19", [T7]>, DwarfRegNum<[19]>; +def T8_64 : LoongArch64GPR<20, "r20", [T8]>, DwarfRegNum<[20]>; +def T9_64 : LoongArch64GPR<21, "r21", [T9]>, DwarfRegNum<[21]>; +def FP_64 : LoongArch64GPR<22, "r22", [FP]>, DwarfRegNum<[22]>; +def S0_64 : LoongArch64GPR<23, "r23", [S0]>, DwarfRegNum<[23]>; +def S1_64 : LoongArch64GPR<24, "r24", [S1]>, DwarfRegNum<[24]>; +def S2_64 : LoongArch64GPR<25, "r25", [S2]>, DwarfRegNum<[25]>; +def S3_64 : LoongArch64GPR<26, "r26", [S3]>, DwarfRegNum<[26]>; +def S4_64 : LoongArch64GPR<27, "r27", [S4]>, DwarfRegNum<[27]>; +def S5_64 : LoongArch64GPR<28, "r28", [S5]>, DwarfRegNum<[28]>; +def S6_64 : LoongArch64GPR<29, "r29", [S6]>, DwarfRegNum<[29]>; +def S7_64 : LoongArch64GPR<30, "r30", [S7]>, DwarfRegNum<[30]>; +def S8_64 : LoongArch64GPR<31, "r31", [S8]>, DwarfRegNum<[31]>; + +let SubRegIndices = [sub_64] in { +def V0_64 : LoongArch64GPR<4, "r4", [A0_64]>, DwarfRegNum<[4]>; +def V1_64 : LoongArch64GPR<5, "r5", [A1_64]>, DwarfRegNum<[5]>; +} + +/// FP registers +foreach I = 0-31 in +def F#I : FGR32, DwarfRegNum<[!add(I, 32)]>; + +foreach I = 0-31 in +def F#I#_64 : FGR64("F"#I)]>, DwarfRegNum<[!add(I, 32)]>; + +/// FP Condition Flag 0~7 +foreach I = 0-7 in +def FCC#I : LoongArchReg; + +/// FP Control and Status Registers, FCSR 1~3 +foreach I = 1-3 in +def FCSR#I : LoongArchReg; + +class FCSRReg Enc, string n, list subregs> : + RegisterWithSubRegs { +// field bits<2> chan_encoding = 0; + let Namespace = "LoongArch"; + let SubRegIndices = [sub_fcsr1, sub_fcsr2, sub_fcsr3]; +// let HWEncoding{8-0} = encoding{8-0}; +// let HWEncoding{10-9} = chan_encoding; +} + +def FCSR0 : FCSRReg<0, "fcsr0", [FCSR1, FCSR2, FCSR3]>; + +/// PC register +//let NameSpace = "LoongArch" in +//def PC : Register<"pc">; + +//===----------------------------------------------------------------------===// +// Register Classes +//===----------------------------------------------------------------------===// + +def GPR32 : RegisterClass<"LoongArch", [i32], 32, (add + // Reserved + ZERO, + // Return Values and Arguments + A0, A1, A2, A3, A4, A5, A6, A7, + // Not preserved across procedure calls + T0, T1, T2, T3, T4, T5, T6, T7, T8, + // Callee save + S0, S1, S2, S3, S4, S5, S6, S7, S8, + // Reserved + RA, TP, SP, + // Reserved + T9, FP)>; + +def GPR64 : RegisterClass<"LoongArch", [i64], 64, (add + // Reserved + ZERO_64, + // Return Values and Arguments + A0_64, A1_64, A2_64, A3_64, A4_64, A5_64, A6_64, A7_64, + // Not preserved across procedure calls + T0_64, T1_64, T2_64, T3_64, T4_64, T5_64, T6_64, T7_64, T8_64, + // Callee save + S0_64, S1_64, S2_64, S3_64, S4_64, S5_64, S6_64, S7_64, S8_64, + // Reserved + RA_64, TP_64, SP_64, + // Reserved + T9_64, FP_64)>; + +def GPRTC64 : RegisterClass<"LoongArch", [i64], 64, (add + // Return Values and Arguments + A0_64, A1_64, A2_64, A3_64, A4_64, A5_64, A6_64, A7_64, + // Not preserved across procedure calls + T0_64, T1_64, T2_64, T3_64, T4_64, T5_64, T6_64, T7_64, T8_64)>; + +/// FP Registers. +def FGR64 : RegisterClass<"LoongArch", [f64], 64, (sequence "F%u_64", 0, 31)>; +def FGR32 : RegisterClass<"LoongArch", [f32], 64, (sequence "F%u", 0, 31)>; + +/// FP condition Flag registers. +def FCFR : RegisterClass<"LoongArch", [i32], 32, (sequence "FCC%u", 0, 7)>, + Unallocatable; + +def SP32 : RegisterClass<"LoongArch", [i32], 32, (add SP)>, Unallocatable; +def SP64 : RegisterClass<"LoongArch", [i64], 64, (add SP_64)>, Unallocatable; +def TP32 : RegisterClass<"LoongArch", [i32], 32, (add TP)>, Unallocatable; +def TP64 : RegisterClass<"LoongArch", [i64], 64, (add TP_64)>, Unallocatable; + +/// FP control and Status registers. +def FCSR : RegisterClass<"LoongArch", [i32], 4, (sequence "FCSR%u", 0, 3)>, + Unallocatable; + +//LSX +foreach I = 0-31 in +def VR#I : LSX128("F"#I#"_64")]>, + DwarfRegNum<[!add(I, 32)]>; + +//LASX +foreach I = 0-31 in +def XR#I : LASX256("VR"#I)]>, + DwarfRegNum<[!add(I, 32)]>; + +def LSX128B: RegisterClass<"LoongArch", [v16i8], 128, + (sequence "VR%u", 0, 31)>; + +def LSX128H: RegisterClass<"LoongArch", [v8i16], 128, + (sequence "VR%u", 0, 31)>; + +def LSX128W: RegisterClass<"LoongArch", [v4i32, v4f32], 128, + (sequence "VR%u", 0, 31)>; + +def LSX128D: RegisterClass<"LoongArch", [v2i64, v2f64], 128, + (sequence "VR%u", 0, 31)>; + +def LASX256B: RegisterClass<"LoongArch", [v32i8], 256, + (sequence "XR%u", 0, 31)>; +def LASX256H: RegisterClass<"LoongArch", [v16i16], 256, + (sequence "XR%u", 0, 31)>; +def LASX256W: RegisterClass<"LoongArch", [v8i32, v8f32], 256, + (sequence "XR%u", 0, 31)>; +def LASX256D: RegisterClass<"LoongArch", [v4i64, v4f64], 256, + (sequence "XR%u", 0, 31)>; + +//===----------------------------------------------------------------------===// +// Register Operands. +//===----------------------------------------------------------------------===// + +class LoongArchAsmRegOperand : AsmOperandClass { + let ParserMethod = "parseAnyRegister"; +} + +def GPR32AsmOperand : LoongArchAsmRegOperand { + let Name = "GPR32AsmReg"; + let PredicateMethod = "isGPRAsmReg"; +} + +def GPR64AsmOperand : LoongArchAsmRegOperand { + let Name = "GPR64AsmReg"; + let PredicateMethod = "isGPRAsmReg"; +} + +def FGR32AsmOperand : LoongArchAsmRegOperand { + let Name = "FGR32AsmReg"; + let PredicateMethod = "isFGRAsmReg"; +} + +def FGR64AsmOperand : LoongArchAsmRegOperand { + let Name = "FGR64AsmReg"; + let PredicateMethod = "isFGRAsmReg"; +} + +def FCSRAsmOperand : LoongArchAsmRegOperand { + let Name = "FCSRAsmReg"; +} + +def FCFRAsmOperand : LoongArchAsmRegOperand { + let Name = "FCFRAsmReg"; +} + +//LSX +def LSX128AsmOperand : LoongArchAsmRegOperand { + let Name = "LSX128AsmReg"; +} + +//LASX +def LASX256AsmOperand : LoongArchAsmRegOperand { + let Name = "LASX256AsmReg"; +} + +def GPR32Opnd : RegisterOperand { + let ParserMatchClass = GPR32AsmOperand; +} + +def GPR64Opnd : RegisterOperand { + let ParserMatchClass = GPR64AsmOperand; +} + +def GPRTC64Opnd : RegisterOperand { + let ParserMatchClass = GPR64AsmOperand; +} + +def FGR32Opnd : RegisterOperand { + let ParserMatchClass = FGR32AsmOperand; +} + +def FGR64Opnd : RegisterOperand { + let ParserMatchClass = FGR64AsmOperand; +} + +def FCSROpnd : RegisterOperand { + let ParserMatchClass = FCSRAsmOperand; +} + +def FCFROpnd : RegisterOperand { + let ParserMatchClass = FCFRAsmOperand; +} + +//LSX +def LSX128BOpnd : RegisterOperand { + let ParserMatchClass = LSX128AsmOperand; +} + +def LSX128HOpnd : RegisterOperand { + let ParserMatchClass = LSX128AsmOperand; +} + +def LSX128WOpnd : RegisterOperand { + let ParserMatchClass = LSX128AsmOperand; +} + +def LSX128DOpnd : RegisterOperand { + let ParserMatchClass = LSX128AsmOperand; +} + +//LASX +def LASX256BOpnd : RegisterOperand { + let ParserMatchClass = LASX256AsmOperand; +} + +def LASX256HOpnd : RegisterOperand { + let ParserMatchClass = LASX256AsmOperand; +} + +def LASX256WOpnd : RegisterOperand { + let ParserMatchClass = LASX256AsmOperand; +} + +def LASX256DOpnd : RegisterOperand { + let ParserMatchClass = LASX256AsmOperand; +} diff --git a/lib/Target/LoongArch/LoongArchSubtarget.cpp b/lib/Target/LoongArch/LoongArchSubtarget.cpp new file mode 100644 index 00000000..2c2e89bb --- /dev/null +++ b/lib/Target/LoongArch/LoongArchSubtarget.cpp @@ -0,0 +1,112 @@ +//===-- LoongArchSubtarget.cpp - LoongArch Subtarget Information --------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements the LoongArch specific subclass of TargetSubtargetInfo. +// +//===----------------------------------------------------------------------===// + +#include "LoongArchSubtarget.h" +#include "LoongArch.h" +#include "LoongArchMachineFunction.h" +#include "LoongArchRegisterInfo.h" +#include "LoongArchTargetMachine.h" +#include "llvm/IR/Attributes.h" +#include "llvm/IR/Function.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/TargetRegistry.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; + +#define DEBUG_TYPE "loongarch-subtarget" + +#define GET_SUBTARGETINFO_TARGET_DESC +#define GET_SUBTARGETINFO_CTOR +#include "LoongArchGenSubtargetInfo.inc" + +void LoongArchSubtarget::anchor() {} + +LoongArchSubtarget::LoongArchSubtarget(const Triple &TT, StringRef CPU, + StringRef FS, + const LoongArchTargetMachine &TM, + MaybeAlign StackAlignOverride) + : LoongArchGenSubtargetInfo(TT, CPU, /*TuneCPU*/ CPU, FS), HasLA64(false), + HasBasicF(false), HasBasicD(false), HasLSX(false), HasLASX(false), + UnalignedAccess(false), StackAlignOverride(StackAlignOverride), TM(TM), + TargetTriple(TT), TSInfo(), + InstrInfo(initializeSubtargetDependencies(CPU, FS, TM)), + FrameLowering(*this), TLInfo(TM, *this) { + + // Check if Architecture and ABI are compatible. + assert(((!is64Bit() && isABI_ILP32()) || (is64Bit() && isABI_LP64())) && + "Invalid Arch & ABI pair."); + + if (hasLSX() && !hasBasicD()) + report_fatal_error("LSX requires 64-bit floating point register." + "See -mattr=+d.", + false); + +} + +bool LoongArchSubtarget::isPositionIndependent() const { + return TM.isPositionIndependent(); +} + +/// This overrides the PostRAScheduler bit in the SchedModel for any CPU. +bool LoongArchSubtarget::enablePostRAScheduler() const { return true; } + +void LoongArchSubtarget::getCriticalPathRCs(RegClassVector &CriticalPathRCs) const { + CriticalPathRCs.clear(); + CriticalPathRCs.push_back(is64Bit() ? &LoongArch::GPR64RegClass + : &LoongArch::GPR32RegClass); +} + +CodeGenOpt::Level LoongArchSubtarget::getOptLevelToEnablePostRAScheduler() const { + return CodeGenOpt::Aggressive; +} + +LoongArchSubtarget & +LoongArchSubtarget::initializeSubtargetDependencies(StringRef CPU, StringRef FS, + const TargetMachine &TM) { + StringRef CPUName = LoongArch_MC::selectLoongArchCPU(TM.getTargetTriple(), CPU); + + // Parse features string. + ParseSubtargetFeatures(CPUName, /*TuneCPU*/ CPUName, FS); + // Initialize scheduling itinerary for the specified CPU. + InstrItins = getInstrItineraryForCPU(CPUName); + + if (StackAlignOverride) + stackAlignment = *StackAlignOverride; + else if (isABI_LP64()) + stackAlignment = Align(16); + else { + assert(isABI_ILP32() && "Unknown ABI for stack alignment!"); + stackAlignment = Align(8); + } + + return *this; +} + +Reloc::Model LoongArchSubtarget::getRelocationModel() const { + return TM.getRelocationModel(); +} + +bool LoongArchSubtarget::isABI_LP64D() const { return getABI().IsLP64D(); } +bool LoongArchSubtarget::isABI_LP64S() const { return getABI().IsLP64S(); } +bool LoongArchSubtarget::isABI_LP64F() const { return getABI().IsLP64F(); } +bool LoongArchSubtarget::isABI_LP64() const { + return isABI_LP64D() || isABI_LP64S() || isABI_LP64F(); +} +bool LoongArchSubtarget::isABI_ILP32D() const { return getABI().IsILP32D(); } +bool LoongArchSubtarget::isABI_ILP32F() const { return getABI().IsILP32F(); } +bool LoongArchSubtarget::isABI_ILP32S() const { return getABI().IsILP32S(); } +bool LoongArchSubtarget::isABI_ILP32() const { + return isABI_ILP32D() || isABI_ILP32F() || isABI_ILP32S(); +} +const LoongArchABIInfo &LoongArchSubtarget::getABI() const { return TM.getABI(); } diff --git a/lib/Target/LoongArch/LoongArchSubtarget.h b/lib/Target/LoongArch/LoongArchSubtarget.h new file mode 100644 index 00000000..70059dd1 --- /dev/null +++ b/lib/Target/LoongArch/LoongArchSubtarget.h @@ -0,0 +1,145 @@ +//===-- LoongArchSubtarget.h - Define Subtarget for the LoongArch ---------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file declares the LoongArch specific subclass of TargetSubtargetInfo. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_LOONGARCH_LOONGARCHSUBTARGET_H +#define LLVM_LIB_TARGET_LOONGARCH_LOONGARCHSUBTARGET_H + +#include "MCTargetDesc/LoongArchABIInfo.h" +#include "LoongArchFrameLowering.h" +#include "LoongArchISelLowering.h" +#include "LoongArchInstrInfo.h" +#include "llvm/CodeGen/SelectionDAGTargetInfo.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/MC/MCInstrItineraries.h" +#include "llvm/Support/ErrorHandling.h" +#include + +#define GET_SUBTARGETINFO_HEADER +#include "LoongArchGenSubtargetInfo.inc" + +namespace llvm { +class StringRef; + +class LoongArchTargetMachine; + +class LoongArchSubtarget : public LoongArchGenSubtargetInfo { + virtual void anchor(); + + // HasLA64 - The target processor has LA64 ISA support. + bool HasLA64; + + // HasBasicF - The target restricts the use of hardware floating-point + // instructions to 32-bit operations. + bool HasBasicF; + + // HasBasicD - The target allows hardware floating-point instructions to + // cover both 32-bit and 64-bit operations. + bool HasBasicD; + + /// Features related to the presence of specific instructions. + + // HasLSX - Supports LSX. + bool HasLSX; + + // HasLASX - Supports LASX. + bool HasLASX; + + /// The minimum alignment known to hold of the stack frame on + /// entry to the function and which must be maintained by every function. + Align stackAlignment; + + // Allow unaligned memory accesses. + bool UnalignedAccess; + + /// The overridden stack alignment. + MaybeAlign StackAlignOverride; + + InstrItineraryData InstrItins; + + const LoongArchTargetMachine &TM; + + Triple TargetTriple; + + const SelectionDAGTargetInfo TSInfo; + const LoongArchInstrInfo InstrInfo; + const LoongArchFrameLowering FrameLowering; + const LoongArchTargetLowering TLInfo; + +public: + bool isPositionIndependent() const; + /// This overrides the PostRAScheduler bit in the SchedModel for each CPU. + bool enablePostRAScheduler() const override; + void getCriticalPathRCs(RegClassVector &CriticalPathRCs) const override; + CodeGenOpt::Level getOptLevelToEnablePostRAScheduler() const override; + + bool isABI_LP64() const; + bool isABI_LP64D() const; + bool isABI_LP64S() const; + bool isABI_LP64F() const; + bool isABI_ILP32() const; + bool isABI_ILP32D() const; + bool isABI_ILP32F() const; + bool isABI_ILP32S() const; + const LoongArchABIInfo &getABI() const; + + /// This constructor initializes the data members to match that + /// of the specified triple. + LoongArchSubtarget(const Triple &TT, StringRef CPU, StringRef FS, + const LoongArchTargetMachine &TM, MaybeAlign StackAlignOverride); + + /// ParseSubtargetFeatures - Parses features string setting specified + /// subtarget options. Definition of function is auto generated by tblgen. + void ParseSubtargetFeatures(StringRef CPU, StringRef TuneCPU, StringRef FS); + + bool is64Bit() const { return HasLA64; } + bool hasBasicD() const { return HasBasicD; } + unsigned getGPRSizeInBytes() const { return is64Bit() ? 8 : 4; } + bool hasLSX() const { return HasLSX; } + bool hasLASX() const { return HasLASX; } + bool hasBasicF() const { return HasBasicF; } + bool useSoftFloat() const { return (!HasBasicD && !HasBasicF); } + + bool allowUnalignedAccess() const { return UnalignedAccess; } + + bool isXRaySupported() const override { return true; } + + Align getStackAlignment() const { return stackAlignment; } + + // Grab relocation model + Reloc::Model getRelocationModel() const; + + LoongArchSubtarget &initializeSubtargetDependencies(StringRef CPU, StringRef FS, + const TargetMachine &TM); + + const SelectionDAGTargetInfo *getSelectionDAGInfo() const override { + return &TSInfo; + } + const LoongArchInstrInfo *getInstrInfo() const override { + return &InstrInfo; + } + const TargetFrameLowering *getFrameLowering() const override { + return &FrameLowering; + } + const LoongArchRegisterInfo *getRegisterInfo() const override { + return &InstrInfo.getRegisterInfo(); + } + const LoongArchTargetLowering *getTargetLowering() const override { + return &TLInfo; + } + const InstrItineraryData *getInstrItineraryData() const override { + return &InstrItins; + } +}; +} // End llvm namespace + +#endif diff --git a/lib/Target/LoongArch/LoongArchTargetMachine.cpp b/lib/Target/LoongArch/LoongArchTargetMachine.cpp new file mode 100644 index 00000000..a9dd79a6 --- /dev/null +++ b/lib/Target/LoongArch/LoongArchTargetMachine.cpp @@ -0,0 +1,190 @@ +//===-- LoongArchTargetMachine.cpp - Define TargetMachine for LoongArch -------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Implements the info about LoongArch target spec. +// +//===----------------------------------------------------------------------===// + +#include "LoongArchTargetMachine.h" +#include "LoongArch.h" +#include "LoongArchISelDAGToDAG.h" +#include "LoongArchSubtarget.h" +#include "LoongArchTargetObjectFile.h" +#include "LoongArchTargetTransformInfo.h" +#include "MCTargetDesc/LoongArchABIInfo.h" +#include "MCTargetDesc/LoongArchMCTargetDesc.h" +#include "llvm/ADT/Optional.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/CodeGen/BasicTTIImpl.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/TargetPassConfig.h" +#include "llvm/IR/Attributes.h" +#include "llvm/IR/Function.h" +#include "llvm/Support/TargetRegistry.h" +#include "llvm/Support/CodeGen.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/TargetRegistry.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetOptions.h" +#include +#include + +using namespace llvm; + +#define DEBUG_TYPE "loongarch" + +extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeLoongArchTarget() { + // Register the target. + RegisterTargetMachine X(getTheLoongArch32Target()); + RegisterTargetMachine A(getTheLoongArch64Target()); +} + +static std::string computeDataLayout(const Triple &TT, StringRef CPU, + const TargetOptions &Options) { + std::string Ret; + LoongArchABIInfo ABI = LoongArchABIInfo::computeTargetABI(TT, CPU, Options.MCOptions); + + Ret += "e"; + + if (ABI.IsILP32()) + // TODO + llvm_unreachable("Unimplemented ABI"); + else + Ret += "-m:e"; + + // Pointers are 32 bit on some ABIs. + if (!ABI.IsLP64()) + Ret += "-p:32:32"; + + // 8 and 16 bit integers only need to have natural alignment, but try to + // align them to 32 bits. 64 bit integers have natural alignment. + Ret += "-i8:8:32-i16:16:32-i64:64"; + + // 32 bit registers are always available and the stack is at least 64 bit + // aligned. On LP64 64 bit registers are also available and the stack is + // 128 bit aligned. + if (ABI.IsLP64()) + Ret += "-n32:64-S128"; + else + Ret += "-n32-S64"; + + return Ret; +} + +static Reloc::Model getEffectiveRelocModel(bool JIT, + Optional RM) { + if (!RM.hasValue()) { + if (JIT) + return Reloc::PIC_; + return Reloc::Static; + } + return *RM; +} + +// On function prologue, the stack is created by decrementing +// its pointer. Once decremented, all references are done with positive +// offset from the stack/frame pointer, using StackGrowsUp enables +// an easier handling. +// Using CodeModel::Large enables different CALL behavior. +LoongArchTargetMachine::LoongArchTargetMachine(const Target &T, const Triple &TT, + StringRef CPU, StringRef FS, + const TargetOptions &Options, + Optional RM, + Optional CM, + CodeGenOpt::Level OL, bool JIT) + : LLVMTargetMachine(T, computeDataLayout(TT, CPU, Options), TT, + CPU, FS, Options, getEffectiveRelocModel(JIT, RM), + getEffectiveCodeModel(CM, CodeModel::Small), OL), + TLOF(std::make_unique()), + ABI(LoongArchABIInfo::computeTargetABI(TT, CPU, Options.MCOptions)) { + initAsmInfo(); +} + +LoongArchTargetMachine::~LoongArchTargetMachine() = default; + +const LoongArchSubtarget * +LoongArchTargetMachine::getSubtargetImpl(const Function &F) const { + Attribute CPUAttr = F.getFnAttribute("target-cpu"); + Attribute FSAttr = F.getFnAttribute("target-features"); + + std::string CPU = !CPUAttr.hasAttribute(Attribute::None) + ? CPUAttr.getValueAsString().str() + : TargetCPU; + std::string FS = !FSAttr.hasAttribute(Attribute::None) + ? FSAttr.getValueAsString().str() + : TargetFS; + + auto &I = SubtargetMap[CPU + FS]; + if (!I) { + // This needs to be done before we create a new subtarget since any + // creation will depend on the TM and the code generation flags on the + // function that reside in TargetOptions. + resetTargetOptions(F); + I = std::make_unique(TargetTriple, CPU, FS, *this, + MaybeAlign(Options.StackAlignmentOverride)); + } + return I.get(); +} + +namespace { + +/// LoongArch Code Generator Pass Configuration Options. +class LoongArchPassConfig : public TargetPassConfig { +public: + LoongArchPassConfig(LoongArchTargetMachine &TM, PassManagerBase &PM) + : TargetPassConfig(TM, PM) { + } + + LoongArchTargetMachine &getLoongArchTargetMachine() const { + return getTM(); + } + + void addIRPasses() override; + bool addInstSelector() override; + void addPreEmitPass() override; +}; + +} // end anonymous namespace + +TargetPassConfig *LoongArchTargetMachine::createPassConfig(PassManagerBase &PM) { + return new LoongArchPassConfig(*this, PM); +} + +void LoongArchPassConfig::addIRPasses() { + TargetPassConfig::addIRPasses(); + addPass(createAtomicExpandPass()); +} +// Install an instruction selector pass using +// the ISelDag to gen LoongArch code. +bool LoongArchPassConfig::addInstSelector() { + addPass(createLoongArchModuleISelDagPass()); + addPass(createLoongArchISelDag(getLoongArchTargetMachine(), getOptLevel())); + return false; +} + +TargetTransformInfo +LoongArchTargetMachine::getTargetTransformInfo(const Function &F) { + LLVM_DEBUG(errs() << "Target Transform Info Pass Added\n"); + return TargetTransformInfo(BasicTTIImpl(this, F)); +} + +// Implemented by targets that want to run passes immediately before +// machine code is emitted. return true if -print-machineinstrs should +// print out the code after the passes. +void LoongArchPassConfig::addPreEmitPass() { + // Expand pseudo instructions that are sensitive to register allocation. + addPass(createLoongArchExpandPseudoPass()); + + // Relax conditional branch instructions if they're otherwise out of + // range of their destination. + // This pass must be run after any pseudo instruction expansion + addPass(&BranchRelaxationPassID); +} diff --git a/lib/Target/LoongArch/LoongArchTargetMachine.h b/lib/Target/LoongArch/LoongArchTargetMachine.h new file mode 100644 index 00000000..ae09adf7 --- /dev/null +++ b/lib/Target/LoongArch/LoongArchTargetMachine.h @@ -0,0 +1,68 @@ +//===- LoongArchTargetMachine.h - Define TargetMachine for LoongArch ------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file declares the LoongArch specific subclass of TargetMachine. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_LOONGARCH_LOONGARCHTARGETMACHINE_H +#define LLVM_LIB_TARGET_LOONGARCH_LOONGARCHTARGETMACHINE_H + +#include "MCTargetDesc/LoongArchABIInfo.h" +#include "LoongArchSubtarget.h" +#include "llvm/ADT/Optional.h" +#include "llvm/ADT/StringMap.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/CodeGen.h" +#include "llvm/Target/TargetMachine.h" +#include + +namespace llvm { + +class LoongArchTargetMachine : public LLVMTargetMachine { + std::unique_ptr TLOF; + // Selected ABI + LoongArchABIInfo ABI; + + mutable StringMap> SubtargetMap; + +public: + LoongArchTargetMachine(const Target &T, const Triple &TT, StringRef CPU, + StringRef FS, const TargetOptions &Options, + Optional RM, Optional CM, + CodeGenOpt::Level OL, bool JIT); + ~LoongArchTargetMachine() override; + + TargetTransformInfo getTargetTransformInfo(const Function &F) override; + const LoongArchSubtarget *getSubtargetImpl(const Function &F) const override; + + // Pass Pipeline Configuration + TargetPassConfig *createPassConfig(PassManagerBase &PM) override; + + TargetLoweringObjectFile *getObjFileLowering() const override { + return TLOF.get(); + } + + /// Returns true if a cast between SrcAS and DestAS is a noop. + bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const override { + // Mips doesn't have any special address spaces so we just reserve + // the first 256 for software use (e.g. OpenCL) and treat casts + // between them as noops. + return SrcAS < 256 && DestAS < 256; + } + + const LoongArchABIInfo &getABI() const { return ABI; } + + bool isMachineVerifierClean() const override { + return false; + } +}; + +} // end namespace llvm + +#endif // LLVM_LIB_TARGET_LOONGARCH_LOONGARCHTARGETMACHINE_H diff --git a/lib/Target/LoongArch/LoongArchTargetObjectFile.cpp b/lib/Target/LoongArch/LoongArchTargetObjectFile.cpp new file mode 100644 index 00000000..9c6250d2 --- /dev/null +++ b/lib/Target/LoongArch/LoongArchTargetObjectFile.cpp @@ -0,0 +1,26 @@ +//===-- LoongArchTargetObjectFile.cpp - LoongArch Object Files ----------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "LoongArchTargetObjectFile.h" +#include "LoongArchSubtarget.h" +#include "LoongArchTargetMachine.h" +#include "MCTargetDesc/LoongArchMCExpr.h" +#include "llvm/BinaryFormat/ELF.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/GlobalVariable.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCSectionELF.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Target/TargetMachine.h" +using namespace llvm; + +void LoongArchTargetObjectFile::Initialize(MCContext &Ctx, const TargetMachine &TM){ + TargetLoweringObjectFileELF::Initialize(Ctx, TM); + InitializeELF(TM.Options.UseInitArray); +} diff --git a/lib/Target/LoongArch/LoongArchTargetObjectFile.h b/lib/Target/LoongArch/LoongArchTargetObjectFile.h new file mode 100644 index 00000000..a50c5717 --- /dev/null +++ b/lib/Target/LoongArch/LoongArchTargetObjectFile.h @@ -0,0 +1,24 @@ +//===-- llvm/Target/LoongArchTargetObjectFile.h - LoongArch Object Info ---*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_LOONGARCH_LOONGARCHTARGETOBJECTFILE_H +#define LLVM_LIB_TARGET_LOONGARCH_LOONGARCHTARGETOBJECTFILE_H + +#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" + +namespace llvm { +class LoongArchTargetMachine; + class LoongArchTargetObjectFile : public TargetLoweringObjectFileELF { + + public: + + void Initialize(MCContext &Ctx, const TargetMachine &TM) override; + }; +} // end namespace llvm + +#endif diff --git a/lib/Target/LoongArch/LoongArchTargetStreamer.h b/lib/Target/LoongArch/LoongArchTargetStreamer.h new file mode 100644 index 00000000..a9adc32d --- /dev/null +++ b/lib/Target/LoongArch/LoongArchTargetStreamer.h @@ -0,0 +1,130 @@ +//===-- LoongArchTargetStreamer.h - LoongArch Target Streamer ------------*- C++ -*--===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_LOONGARCH_LOONGARCHTARGETSTREAMER_H +#define LLVM_LIB_TARGET_LOONGARCH_LOONGARCHTARGETSTREAMER_H + +#include "MCTargetDesc/LoongArchABIInfo.h" +#include "llvm/ADT/Optional.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/MC/MCELFStreamer.h" +#include "llvm/MC/MCRegisterInfo.h" +#include "llvm/MC/MCStreamer.h" + +namespace llvm { + +class formatted_raw_ostream; + +struct LoongArchFPABIInfo; + +class LoongArchTargetStreamer : public MCTargetStreamer { +public: + LoongArchTargetStreamer(MCStreamer &S); + + virtual void setPic(bool Value) {} + + virtual void emitDirectiveOptionPic0(); + virtual void emitDirectiveOptionPic2(); + + virtual void emitDirectiveSetArch(StringRef Arch); + virtual void emitDirectiveSetLoongArch32(); + virtual void emitDirectiveSetloongarch64(); + + void emitR(unsigned Opcode, unsigned Reg0, SMLoc IDLoc, + const MCSubtargetInfo *STI); + void emitII(unsigned Opcode, int16_t Imm1, int16_t Imm2, SMLoc IDLoc, + const MCSubtargetInfo *STI); + void emitRX(unsigned Opcode, unsigned Reg0, MCOperand Op1, SMLoc IDLoc, + const MCSubtargetInfo *STI); + void emitRI(unsigned Opcode, unsigned Reg0, int32_t Imm, SMLoc IDLoc, + const MCSubtargetInfo *STI); + void emitRR(unsigned Opcode, unsigned Reg0, unsigned Reg1, SMLoc IDLoc, + const MCSubtargetInfo *STI); + void emitRXX(unsigned Opcode, unsigned Reg0, MCOperand Op1, MCOperand Op2, + SMLoc IDLoc, const MCSubtargetInfo *STI); + void emitRRX(unsigned Opcode, unsigned Reg0, unsigned Reg1, MCOperand Op2, + SMLoc IDLoc, const MCSubtargetInfo *STI); + void emitRRR(unsigned Opcode, unsigned Reg0, unsigned Reg1, unsigned Reg2, + SMLoc IDLoc, const MCSubtargetInfo *STI); + void emitRRI(unsigned Opcode, unsigned Reg0, unsigned Reg1, int32_t Imm, + SMLoc IDLoc, const MCSubtargetInfo *STI); + void emitRRXX(unsigned Opcode, unsigned Reg0, unsigned Reg1, MCOperand Op2, + MCOperand Op3, SMLoc IDLoc, const MCSubtargetInfo *STI); + void emitRRIII(unsigned Opcode, unsigned Reg0, unsigned Reg1, int16_t Imm0, + int16_t Imm1, int16_t Imm2, SMLoc IDLoc, + const MCSubtargetInfo *STI); + void emitAdd(unsigned DstReg, unsigned SrcReg, unsigned TrgReg, bool Is64Bit, + const MCSubtargetInfo *STI); + void emitDSLL(unsigned DstReg, unsigned SrcReg, int16_t ShiftAmount, + SMLoc IDLoc, const MCSubtargetInfo *STI); + void emitNop(SMLoc IDLoc, const MCSubtargetInfo *STI); + + void forbidModuleDirective() { ModuleDirectiveAllowed = false; } + void reallowModuleDirective() { ModuleDirectiveAllowed = true; } + bool isModuleDirectiveAllowed() { return ModuleDirectiveAllowed; } + + template + void updateABIInfo(const PredicateLibrary &P) { + ABI = P.getABI(); + } + + const LoongArchABIInfo &getABI() const { + assert(ABI.hasValue() && "ABI hasn't been set!"); + return *ABI; + } + +protected: + llvm::Optional ABI; + + bool GPRInfoSet; + + bool FPRInfoSet; + + bool FrameInfoSet; + int FrameOffset; + unsigned FrameReg; + unsigned ReturnReg; + +private: + bool ModuleDirectiveAllowed; +}; + +// This part is for ascii assembly output +class LoongArchTargetAsmStreamer : public LoongArchTargetStreamer { + formatted_raw_ostream &OS; + +public: + LoongArchTargetAsmStreamer(MCStreamer &S, formatted_raw_ostream &OS); + + void emitDirectiveOptionPic0() override; + void emitDirectiveOptionPic2() override; + + void emitDirectiveSetArch(StringRef Arch) override; + void emitDirectiveSetLoongArch32() override; + void emitDirectiveSetloongarch64() override; +}; + +// This part is for ELF object output +class LoongArchTargetELFStreamer : public LoongArchTargetStreamer { + const MCSubtargetInfo &STI; + bool Pic; + +public: + MCELFStreamer &getStreamer(); + LoongArchTargetELFStreamer(MCStreamer &S, const MCSubtargetInfo &STI); + + void setPic(bool Value) override { Pic = Value; } + + void emitLabel(MCSymbol *Symbol) override; + void finish() override; + + void emitDirectiveOptionPic0() override; + void emitDirectiveOptionPic2() override; +}; +} +#endif diff --git a/lib/Target/LoongArch/LoongArchTargetTransformInfo.cpp b/lib/Target/LoongArch/LoongArchTargetTransformInfo.cpp new file mode 100644 index 00000000..480e2c68 --- /dev/null +++ b/lib/Target/LoongArch/LoongArchTargetTransformInfo.cpp @@ -0,0 +1,330 @@ +//===-- LoongArchTargetTransformInfo.cpp - LoongArch specific TTI pass +//----------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// \file +/// This file implements a TargetTransformInfo analysis pass specific to the +/// LoongArch target machine. It uses the target's detailed information to +/// provide more precise answers to certain TTI queries, while letting the +/// target independent and default TTI implementations handle the rest. +/// +//===----------------------------------------------------------------------===// + +#include "LoongArchTargetTransformInfo.h" +#include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/CodeGen/BasicTTIImpl.h" +#include "llvm/CodeGen/CostTable.h" +#include "llvm/CodeGen/TargetLowering.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/Support/Debug.h" + +using namespace llvm; + +#define DEBUG_TYPE "LoongArchtti" + +//===----------------------------------------------------------------------===// +// +// LoongArch cost model. +// +//===----------------------------------------------------------------------===// + +bool LoongArchTTIImpl::areInlineCompatible(const Function *Caller, + const Function *Callee) const { + const TargetMachine &TM = getTLI()->getTargetMachine(); + + const FeatureBitset &CallerBits = + TM.getSubtargetImpl(*Caller)->getFeatureBits(); + const FeatureBitset &CalleeBits = + TM.getSubtargetImpl(*Callee)->getFeatureBits(); + + // Inline a callee if its target-features are a subset of the callers + // target-features. + return (CallerBits & CalleeBits) == CalleeBits; +} + +TargetTransformInfo::PopcntSupportKind +LoongArchTTIImpl::getPopcntSupport(unsigned TyWidth) { + assert(isPowerOf2_32(TyWidth) && "Ty width must be power of 2"); + if (TyWidth == 32 || TyWidth == 64) + return TTI::PSK_FastHardware; + return TTI::PSK_Software; +} + +unsigned LoongArchTTIImpl::getNumberOfRegisters(bool Vector) { + if (Vector && !ST->hasLSX()) + return 0; + + return 32; +} + +unsigned LoongArchTTIImpl::getRegisterBitWidth(bool Vector) const { + if (Vector) { + if (ST->hasLASX()) + return 256; + + if (ST->hasLSX()) + return 128; + + return 0; + } + return 64; +} + +unsigned LoongArchTTIImpl::getMaxInterleaveFactor(unsigned VF) { + if (VF == 1) + return 1; + return 2; +} + +int LoongArchTTIImpl::getArithmeticInstrCost( + unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, + TTI::OperandValueKind Op1Info, TTI::OperandValueKind Op2Info, + TTI::OperandValueProperties Opd1PropInfo, + TTI::OperandValueProperties Opd2PropInfo, ArrayRef Args, + const Instruction *CxtI) { + + std::pair LT = TLI->getTypeLegalizationCost(DL, Ty); + + int ISD = TLI->InstructionOpcodeToISD(Opcode); + assert(ISD && "Invalid opcode"); + + static const CostTblEntry LASXCostTable[] = { + + {ISD::SHL, MVT::v32i8, 1}, + {ISD::SHL, MVT::v16i16, 1}, + {ISD::SHL, MVT::v8i32, 1}, + {ISD::SHL, MVT::v4i64, 1}, + + {ISD::SRL, MVT::v32i8, 1}, + {ISD::SRL, MVT::v16i16, 1}, + {ISD::SRL, MVT::v8i32, 1}, + {ISD::SRL, MVT::v4i64, 1}, + + {ISD::SRA, MVT::v32i8, 1}, + {ISD::SRA, MVT::v16i16, 1}, + {ISD::SRA, MVT::v8i32, 1}, + {ISD::SRA, MVT::v4i64, 1}, + + {ISD::SUB, MVT::v32i8, 1}, + {ISD::SUB, MVT::v16i16, 1}, + {ISD::SUB, MVT::v8i32, 1}, + {ISD::SUB, MVT::v4i64, 1}, + + {ISD::ADD, MVT::v32i8, 1}, + {ISD::ADD, MVT::v16i16, 1}, + {ISD::ADD, MVT::v8i32, 1}, + {ISD::ADD, MVT::v4i64, 1}, + + {ISD::MUL, MVT::v32i8, 1}, + {ISD::MUL, MVT::v16i16, 1}, + {ISD::MUL, MVT::v8i32, 1}, + {ISD::MUL, MVT::v4i64, 1}, + + {ISD::SDIV, MVT::v32i8, 29}, + {ISD::SDIV, MVT::v16i16, 19}, + {ISD::SDIV, MVT::v8i32, 14}, + {ISD::SDIV, MVT::v4i64, 13}, + + {ISD::UDIV, MVT::v32i8, 29}, + {ISD::UDIV, MVT::v16i16, 19}, + {ISD::UDIV, MVT::v8i32, 14}, + {ISD::UDIV, MVT::v4i64, 13}, + + {ISD::SREM, MVT::v32i8, 33}, + {ISD::SREM, MVT::v16i16, 21}, + {ISD::SREM, MVT::v8i32, 15}, + {ISD::SREM, MVT::v4i64, 13}, + + {ISD::UREM, MVT::v32i8, 29}, + {ISD::UREM, MVT::v16i16, 19}, + {ISD::UREM, MVT::v8i32, 14}, + {ISD::UREM, MVT::v4i64, 13}, + + {ISD::FADD, MVT::f64, 1}, + {ISD::FADD, MVT::f32, 1}, + {ISD::FADD, MVT::v4f64, 1}, + {ISD::FADD, MVT::v8f32, 1}, + + {ISD::FSUB, MVT::f64, 1}, + {ISD::FSUB, MVT::f32, 1}, + {ISD::FSUB, MVT::v4f64, 1}, + {ISD::FSUB, MVT::v8f32, 1}, + + {ISD::FMUL, MVT::f64, 1}, + {ISD::FMUL, MVT::f32, 1}, + {ISD::FMUL, MVT::v4f64, 1}, + {ISD::FMUL, MVT::v8f32, 1}, + + {ISD::FDIV, MVT::f32, 12}, + {ISD::FDIV, MVT::f64, 10}, + {ISD::FDIV, MVT::v8f32, 12}, + {ISD::FDIV, MVT::v4f64, 10} + + }; + + if (ST->hasLASX()) + if (const auto *Entry = CostTableLookup(LASXCostTable, ISD, LT.second)) + return LT.first * Entry->Cost; + + static const CostTblEntry LSXCostTable[] = { + + {ISD::SHL, MVT::v16i8, 1}, + {ISD::SHL, MVT::v8i16, 1}, + {ISD::SHL, MVT::v4i32, 1}, + {ISD::SHL, MVT::v2i64, 1}, + + {ISD::SRL, MVT::v16i8, 1}, + {ISD::SRL, MVT::v8i16, 1}, + {ISD::SRL, MVT::v4i32, 1}, + {ISD::SRL, MVT::v2i64, 1}, + + {ISD::SRA, MVT::v16i8, 1}, + {ISD::SRA, MVT::v8i16, 1}, + {ISD::SRA, MVT::v4i32, 1}, + {ISD::SRA, MVT::v2i64, 1}, + + {ISD::SUB, MVT::v16i8, 1}, + {ISD::SUB, MVT::v8i16, 1}, + {ISD::SUB, MVT::v4i32, 1}, + {ISD::SUB, MVT::v2i64, 1}, + + {ISD::ADD, MVT::v16i8, 1}, + {ISD::ADD, MVT::v8i16, 1}, + {ISD::ADD, MVT::v4i32, 1}, + {ISD::ADD, MVT::v2i64, 1}, + + {ISD::MUL, MVT::v16i8, 1}, + {ISD::MUL, MVT::v8i16, 1}, + {ISD::MUL, MVT::v4i32, 1}, + {ISD::MUL, MVT::v2i64, 1}, + + {ISD::SDIV, MVT::v16i8, 29}, + {ISD::SDIV, MVT::v8i16, 19}, + {ISD::SDIV, MVT::v4i32, 14}, + {ISD::SDIV, MVT::v2i64, 13}, + + {ISD::UDIV, MVT::v16i8, 29}, + {ISD::UDIV, MVT::v8i16, 19}, + {ISD::UDIV, MVT::v4i32, 14}, + {ISD::UDIV, MVT::v2i64, 13}, + + {ISD::SREM, MVT::v16i8, 33}, + {ISD::SREM, MVT::v8i16, 21}, + {ISD::SREM, MVT::v4i32, 15}, + {ISD::SREM, MVT::v2i64, 13}, + + {ISD::UREM, MVT::v16i8, 29}, + {ISD::UREM, MVT::v8i16, 19}, + {ISD::UREM, MVT::v4i32, 14}, + {ISD::UREM, MVT::v2i64, 13}, + + {ISD::FADD, MVT::f64, 1}, + {ISD::FADD, MVT::f32, 1}, + {ISD::FADD, MVT::v2f64, 1}, + {ISD::FADD, MVT::v4f32, 1}, + + {ISD::FSUB, MVT::f64, 1}, + {ISD::FSUB, MVT::f32, 1}, + {ISD::FSUB, MVT::v2f64, 1}, + {ISD::FSUB, MVT::v4f32, 1}, + + {ISD::FMUL, MVT::f64, 1}, + {ISD::FMUL, MVT::f32, 1}, + {ISD::FMUL, MVT::v2f64, 1}, + {ISD::FMUL, MVT::v4f32, 1}, + + {ISD::FDIV, MVT::f32, 12}, + {ISD::FDIV, MVT::f64, 10}, + {ISD::FDIV, MVT::v4f32, 12}, + {ISD::FDIV, MVT::v2f64, 10} + + }; + + if (ST->hasLSX()) + if (const auto *Entry = CostTableLookup(LSXCostTable, ISD, LT.second)) + return LT.first * Entry->Cost; + + // Fallback to the default implementation. + return BaseT::getArithmeticInstrCost(Opcode, Ty, CostKind, Op1Info, Op2Info); +} + +int LoongArchTTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val, + unsigned Index) { + assert(Val->isVectorTy() && "This must be a vector type"); + + Type *ScalarType = Val->getScalarType(); + + if (Index != -1U) { + // Legalize the type. + std::pair LT = TLI->getTypeLegalizationCost(DL, Val); + + // This type is legalized to a scalar type. + if (!LT.second.isVector()) + return 0; + + // The type may be split. Normalize the index to the new type. + unsigned Width = LT.second.getVectorNumElements(); + Index = Index % Width; + + // The element at index zero is already inside the vector. + if (Index == 0) // if (ScalarType->isFloatingPointTy() && Index == 0) + return 0; + } + + // Add to the base cost if we know that the extracted element of a vector is + // destined to be moved to and used in the integer register file. + int RegisterFileMoveCost = 0; + if (Opcode == Instruction::ExtractElement && ScalarType->isPointerTy()) + RegisterFileMoveCost = 1; + + int N = TLI->InstructionOpcodeToISD(Opcode); + if (N == ISD::INSERT_VECTOR_ELT || N == ISD::EXTRACT_VECTOR_ELT) + return 3 + BaseT::getVectorInstrCost(Opcode, Val, Index) + + RegisterFileMoveCost; + + return BaseT::getVectorInstrCost(Opcode, Val, Index) + RegisterFileMoveCost; +} + +unsigned LoongArchTTIImpl::getLoadStoreVecRegBitWidth(unsigned) const { + return getRegisterBitWidth(true); +} + +int LoongArchTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, + Type *Src, + TTI::CastContextHint CCH, + TTI::TargetCostKind CostKind, + const Instruction *I) { + int ISD = TLI->InstructionOpcodeToISD(Opcode); + assert(ISD && "Invalid opcode"); + + static const TypeConversionCostTblEntry LASXConversionTbl[] = { + + // TODO:The cost requires more granular testing + {ISD::SIGN_EXTEND, MVT::v16i16, MVT::v16i8, 3}, + {ISD::ZERO_EXTEND, MVT::v16i16, MVT::v16i8, 3}, + {ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i16, 3}, + {ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i16, 3}, + {ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i32, 3}, + {ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i32, 3}, + + }; + + EVT SrcTy = TLI->getValueType(DL, Src); + EVT DstTy = TLI->getValueType(DL, Dst); + + if (!SrcTy.isSimple() || !DstTy.isSimple()) + return BaseT::getCastInstrCost(Opcode, Dst, Src, CCH, CostKind, I); + + if (ST->hasLASX()) { + if (const auto *Entry = ConvertCostTableLookup( + LASXConversionTbl, ISD, DstTy.getSimpleVT(), SrcTy.getSimpleVT())) + return Entry->Cost; + } + + return BaseT::getCastInstrCost(Opcode, Dst, Src, CCH, CostKind, I); +} diff --git a/lib/Target/LoongArch/LoongArchTargetTransformInfo.h b/lib/Target/LoongArch/LoongArchTargetTransformInfo.h new file mode 100644 index 00000000..706201b6 --- /dev/null +++ b/lib/Target/LoongArch/LoongArchTargetTransformInfo.h @@ -0,0 +1,91 @@ +//===-- LoongArchTargetTransformInfo.h - LoongArch specific TTI -------------*- +// C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// \file +// This file a TargetTransformInfo::Concept conforming object specific to the +// LoongArch target machine. It uses the target's detailed information to +// provide more precise answers to certain TTI queries, while letting the +// target independent and default TTI implementations handle the rest. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_LoongArch_LoongArchTARGETTRANSFORMINFO_H +#define LLVM_LIB_TARGET_LoongArch_LoongArchTARGETTRANSFORMINFO_H + +#include "LoongArch.h" +#include "LoongArchSubtarget.h" +#include "LoongArchTargetMachine.h" +#include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/CodeGen/BasicTTIImpl.h" +#include "llvm/CodeGen/TargetLowering.h" + +namespace llvm { + +class LoongArchTTIImpl : public BasicTTIImplBase { + typedef BasicTTIImplBase BaseT; + typedef TargetTransformInfo TTI; + friend BaseT; + + const LoongArchSubtarget *ST; + const LoongArchTargetLowering *TLI; + + const LoongArchSubtarget *getST() const { return ST; } + const LoongArchTargetLowering *getTLI() const { return TLI; } + +public: + explicit LoongArchTTIImpl(const LoongArchTargetMachine *TM, const Function &F) + : BaseT(TM, F.getParent()->getDataLayout()), ST(TM->getSubtargetImpl(F)), + TLI(ST->getTargetLowering()) {} + + bool areInlineCompatible(const Function *Caller, + const Function *Callee) const; + + /// \name Scalar TTI Implementations + // /// @{ + + TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth); + + /// @} + + /// \name Vector TTI Implementations + /// @{ + + bool enableInterleavedAccessVectorization() { return true; } + + unsigned getNumberOfRegisters(bool Vector); + + unsigned getRegisterBitWidth(bool Vector) const; + + unsigned getMaxInterleaveFactor(unsigned VF); + + int getVectorInstrCost(unsigned Opcode, Type *Val, + unsigned Index); + + int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, + TTI::CastContextHint CCH, + TTI::TargetCostKind CostKind, + const Instruction *I = nullptr); + + unsigned getLoadStoreVecRegBitWidth(unsigned AS) const; + + int getArithmeticInstrCost( + unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, + TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue, + TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue, + TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None, + TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None, + ArrayRef Args = ArrayRef(), + const Instruction *CxtI = nullptr); + + /// @} +}; + +} // end namespace llvm + +#endif diff --git a/lib/Target/LoongArch/MCTargetDesc/CMakeLists.txt b/lib/Target/LoongArch/MCTargetDesc/CMakeLists.txt new file mode 100644 index 00000000..927fa7d5 --- /dev/null +++ b/lib/Target/LoongArch/MCTargetDesc/CMakeLists.txt @@ -0,0 +1,21 @@ + add_llvm_component_library(LLVMLoongArchDesc + LoongArchABIInfo.cpp + LoongArchAnalyzeImmediate.cpp + LoongArchAsmBackend.cpp + LoongArchELFObjectWriter.cpp + LoongArchELFStreamer.cpp + LoongArchInstPrinter.cpp + LoongArchMCAsmInfo.cpp + LoongArchMCCodeEmitter.cpp + LoongArchMCExpr.cpp + LoongArchMCTargetDesc.cpp + LoongArchTargetStreamer.cpp + + LINK_COMPONENTS + MC + LoongArchInfo + Support + + ADD_TO_COMPONENT + LoongArch + ) diff --git a/lib/Target/LoongArch/MCTargetDesc/LoongArchABIInfo.cpp b/lib/Target/LoongArch/MCTargetDesc/LoongArchABIInfo.cpp new file mode 100644 index 00000000..86aab1e3 --- /dev/null +++ b/lib/Target/LoongArch/MCTargetDesc/LoongArchABIInfo.cpp @@ -0,0 +1,113 @@ +//===---- LoongArchABIInfo.cpp - Information about LoongArch ABI's ------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "LoongArchABIInfo.h" +#include "LoongArchRegisterInfo.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/StringSwitch.h" +#include "llvm/MC/MCTargetOptions.h" + +using namespace llvm; + +namespace { + +static const MCPhysReg LoongArch64IntRegs[8] = { + LoongArch::A0_64, LoongArch::A1_64, LoongArch::A2_64, LoongArch::A3_64, + LoongArch::A4_64, LoongArch::A5_64, LoongArch::A6_64, LoongArch::A7_64}; +} + +ArrayRef LoongArchABIInfo::GetByValArgRegs() const { + if (IsILP32()) + // TODO + llvm_unreachable("Unimplemented ABI"); + if (IsLP64()) + return makeArrayRef(LoongArch64IntRegs); + llvm_unreachable("Unhandled ABI"); +} + +ArrayRef LoongArchABIInfo::GetVarArgRegs() const { + if (IsILP32()) + // TODO + llvm_unreachable("Unimplemented ABI"); + if (IsLP64()) + return makeArrayRef(LoongArch64IntRegs); + llvm_unreachable("Unhandled ABI"); +} + +LoongArchABIInfo LoongArchABIInfo::computeTargetABI(const Triple &TT, StringRef CPU, + const MCTargetOptions &Options) { + if (Options.getABIName().startswith("ilp32d")) + return LoongArchABIInfo::ILP32D(); + if (Options.getABIName().startswith("ilp32f")) + return LoongArchABIInfo::ILP32F(); + if (Options.getABIName().startswith("ilp32s")) + return LoongArchABIInfo::ILP32S(); + if (Options.getABIName().startswith("lp64d")) + return LoongArchABIInfo::LP64D(); + if (Options.getABIName().startswith("lp64s")) + return LoongArchABIInfo::LP64S(); + if (Options.getABIName().startswith("lp64f")) + return LoongArchABIInfo::LP64F(); + assert(Options.getABIName().empty() && "Unknown ABI option for LoongArch"); + + if (TT.isLoongArch64()) + return LoongArchABIInfo::LP64D(); + return LoongArchABIInfo::ILP32D(); +} + +unsigned LoongArchABIInfo::GetStackPtr() const { + return ArePtrs64bit() ? LoongArch::SP_64 : LoongArch::SP; +} + +unsigned LoongArchABIInfo::GetFramePtr() const { + return ArePtrs64bit() ? LoongArch::FP_64 : LoongArch::FP; +} + +unsigned LoongArchABIInfo::GetBasePtr() const { + return ArePtrs64bit() ? LoongArch::S7_64 : LoongArch::S7; +} + +unsigned LoongArchABIInfo::GetNullPtr() const { + return ArePtrs64bit() ? LoongArch::ZERO_64 : LoongArch::ZERO; +} + +unsigned LoongArchABIInfo::GetZeroReg() const { + return AreGprs64bit() ? LoongArch::ZERO_64 : LoongArch::ZERO; +} + +unsigned LoongArchABIInfo::GetPtrAddOp() const { + return ArePtrs64bit() ? LoongArch::ADD_D : LoongArch::ADD_W; +} + +unsigned LoongArchABIInfo::GetPtrAddiOp() const { + return ArePtrs64bit() ? LoongArch::ADDI_D : LoongArch::ADDI_W; +} + +unsigned LoongArchABIInfo::GetPtrSubOp() const { + return ArePtrs64bit() ? LoongArch::SUB_D : LoongArch::SUB_W; +} + +unsigned LoongArchABIInfo::GetPtrAndOp() const { + return ArePtrs64bit() ? LoongArch::AND : LoongArch::AND32; +} + +unsigned LoongArchABIInfo::GetGPRMoveOp() const { + return ArePtrs64bit() ? LoongArch::OR : LoongArch::OR32; +} + +unsigned LoongArchABIInfo::GetEhDataReg(unsigned I) const { + static const unsigned EhDataReg[] = { + LoongArch::A0, LoongArch::A1, LoongArch::A2, LoongArch::A3 + }; + static const unsigned EhDataReg64[] = { + LoongArch::A0_64, LoongArch::A1_64, LoongArch::A2_64, LoongArch::A3_64 + }; + + return IsLP64() ? EhDataReg64[I] : EhDataReg[I]; +} + diff --git a/lib/Target/LoongArch/MCTargetDesc/LoongArchABIInfo.h b/lib/Target/LoongArch/MCTargetDesc/LoongArchABIInfo.h new file mode 100644 index 00000000..56d4ef91 --- /dev/null +++ b/lib/Target/LoongArch/MCTargetDesc/LoongArchABIInfo.h @@ -0,0 +1,91 @@ +//===---- LoongArchABIInfo.h - Information about LoongArch ABI's --------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_LOONGARCH_MCTARGETDESC_LOONGARCHABIINFO_H +#define LLVM_LIB_TARGET_LOONGARCH_MCTARGETDESC_LOONGARCHABIINFO_H + +#include "llvm/ADT/Triple.h" +#include "llvm/IR/CallingConv.h" +#include "llvm/MC/MCRegisterInfo.h" + +namespace llvm { + +template class ArrayRef; +class MCTargetOptions; +class StringRef; +class TargetRegisterClass; + +class LoongArchABIInfo { +public: + enum class ABI { Unknown, ILP32D, ILP32F, ILP32S, LP64D, LP64F, LP64S }; + +protected: + ABI ThisABI; + +public: + LoongArchABIInfo(ABI ThisABI) : ThisABI(ThisABI) {} + + static LoongArchABIInfo Unknown() { return LoongArchABIInfo(ABI::Unknown); } + static LoongArchABIInfo ILP32D() { return LoongArchABIInfo(ABI::ILP32D); } + static LoongArchABIInfo ILP32F() { return LoongArchABIInfo(ABI::ILP32F); } + static LoongArchABIInfo ILP32S() { return LoongArchABIInfo(ABI::ILP32S); } + static LoongArchABIInfo LP64D() { return LoongArchABIInfo(ABI::LP64D); } + static LoongArchABIInfo LP64S() { return LoongArchABIInfo(ABI::LP64S); } + static LoongArchABIInfo LP64F() { return LoongArchABIInfo(ABI::LP64F); } + static LoongArchABIInfo computeTargetABI(const Triple &TT, StringRef CPU, + const MCTargetOptions &Options); + + bool IsKnown() const { return ThisABI != ABI::Unknown; } + bool IsILP32D() const { return ThisABI == ABI::ILP32D; } + bool IsILP32F() const { return ThisABI == ABI::ILP32F; } + bool IsILP32S() const { return ThisABI == ABI::ILP32S; } + bool IsILP32() const { return IsILP32D() || IsILP32F() || IsILP32S(); } + bool IsLP64D() const { return ThisABI == ABI::LP64D; } + bool IsLP64S() const { return ThisABI == ABI::LP64S; } + bool IsLP64F() const { return ThisABI == ABI::LP64F; } + bool IsLP64() const { return IsLP64D() || IsLP64S() || IsLP64F(); } + bool IsSoftFloat() const { return IsILP32S() || IsLP64S(); } + bool IsSingleFloat() const { return IsILP32F() || IsLP64F(); } + bool IsDoubleFloat() const { return IsILP32D() || IsLP64D(); } + ABI GetEnumValue() const { return ThisABI; } + + /// The registers to use for byval arguments. + ArrayRef GetByValArgRegs() const; + + /// The registers to use for the variable argument list. + ArrayRef GetVarArgRegs() const; + + /// Ordering of ABI's + /// LoongArchGenSubtargetInfo.inc will use this to resolve conflicts when given + /// multiple ABI options. + bool operator<(const LoongArchABIInfo Other) const { + return ThisABI < Other.GetEnumValue(); + } + + unsigned GetStackPtr() const; + unsigned GetFramePtr() const; + unsigned GetBasePtr() const; + unsigned GetNullPtr() const; + unsigned GetZeroReg() const; + unsigned GetPtrAddOp() const; + unsigned GetPtrAddiOp() const; + unsigned GetPtrSubOp() const; + unsigned GetPtrAndOp() const; + unsigned GetGPRMoveOp() const; + inline bool ArePtrs64bit() const { + return IsLP64D() || IsLP64S() || IsLP64F(); + } + inline bool AreGprs64bit() const { + return IsLP64D() || IsLP64S() || IsLP64F(); + } + + unsigned GetEhDataReg(unsigned I) const; +}; +} + +#endif diff --git a/lib/Target/LoongArch/MCTargetDesc/LoongArchAnalyzeImmediate.cpp b/lib/Target/LoongArch/MCTargetDesc/LoongArchAnalyzeImmediate.cpp new file mode 100644 index 00000000..96e43b2d --- /dev/null +++ b/lib/Target/LoongArch/MCTargetDesc/LoongArchAnalyzeImmediate.cpp @@ -0,0 +1,64 @@ +//===- LoongArchAnalyzeImmediate.cpp - Analyze Immediates -----------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "LoongArchAnalyzeImmediate.h" +#include "LoongArch.h" +#include "MCTargetDesc/LoongArchMCTargetDesc.h" +#include "llvm/Support/MathExtras.h" + +using namespace llvm; + +LoongArchAnalyzeImmediate::InstSeq +LoongArchAnalyzeImmediate::generateInstSeq(int64_t Val, bool Is64Bit) { + // Val: + // | hi32 | lo32 | + // +------------+------------------+------------------+-----------+ + // | Bits_52_63 | Bits_32_51 | Bits_12_31 | Bits_0_11 | + // +------------+------------------+------------------+-----------+ + // 63 52 51 32 31 12 11 0 + unsigned ORIOp = Is64Bit ? LoongArch::ORI : LoongArch::ORI32; + unsigned LU12IOp = Is64Bit ? LoongArch::LU12I_W : LoongArch::LU12I_W32; + unsigned ADDIOp = Is64Bit ? LoongArch::ADDI_W64 : LoongArch::ADDI_W; + unsigned LU32IOp = LoongArch::LU32I_D_R2; + unsigned LU52IOp = LoongArch::LU52I_D; + + int64_t Bits_52_63 = Val >> 52 & 0xFFF; + int64_t Bits_32_51 = Val >> 32 & 0xFFFFF; + int64_t Bits_12_31 = Val >> 12 & 0xFFFFF; + int64_t Bits_0_11 = Val & 0xFFF; + + InstSeq Insts; + + if (isInt<12>(Val) && Is64Bit) { + Insts.push_back(Inst(LoongArch::ADDI_D, SignExtend64<12>(Bits_0_11))); + return Insts; + } + + if (Bits_52_63 != 0 && SignExtend64<52>(Val) == 0) { + Insts.push_back(Inst(LU52IOp, SignExtend64<12>(Bits_52_63))); + return Insts; + } + + if (Bits_12_31 == 0) + Insts.push_back(Inst(ORIOp, Bits_0_11)); + else if (SignExtend32<1>(Bits_0_11 >> 11) == SignExtend32<20>(Bits_12_31)) + Insts.push_back(Inst(ADDIOp, SignExtend64<12>(Bits_0_11))); + else { + Insts.push_back(Inst(LU12IOp, SignExtend64<20>(Bits_12_31))); + if (Bits_0_11 != 0) + Insts.push_back(Inst(ORIOp, Bits_0_11)); + } + + if (SignExtend32<1>(Bits_12_31 >> 19) != SignExtend32<20>(Bits_32_51)) + Insts.push_back(Inst(LU32IOp, SignExtend64<20>(Bits_32_51))); + + if (SignExtend32<1>(Bits_32_51 >> 19) != SignExtend32<12>(Bits_52_63)) + Insts.push_back(Inst(LU52IOp, SignExtend64<12>(Bits_52_63))); + + return Insts; +} diff --git a/lib/Target/LoongArch/MCTargetDesc/LoongArchAnalyzeImmediate.h b/lib/Target/LoongArch/MCTargetDesc/LoongArchAnalyzeImmediate.h new file mode 100644 index 00000000..3ff00f25 --- /dev/null +++ b/lib/Target/LoongArch/MCTargetDesc/LoongArchAnalyzeImmediate.h @@ -0,0 +1,29 @@ +//===- LoongArchAnalyzeImmediate.h - Analyze Immediates --------*- C++ -*--===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_LOONGARCH_LOONGARCHANALYZEIMMEDIATE_H +#define LLVM_LIB_TARGET_LOONGARCH_LOONGARCHANALYZEIMMEDIATE_H + +#include "llvm/ADT/SmallVector.h" + +namespace llvm { +namespace LoongArchAnalyzeImmediate { +struct Inst { + unsigned Opc; + int64_t Imm; + Inst(unsigned Opc, int64_t Imm) : Opc(Opc), Imm(Imm) {} +}; +using InstSeq = SmallVector; + +// Helper to generate an instruction sequence that will materialise the given +// immediate value into a register. +InstSeq generateInstSeq(int64_t Val, bool Is64Bit); +} // end namespace LoongArchAnalyzeImmediate +} // end namespace llvm + +#endif // LLVM_LIB_TARGET_LOONGARCH_LOONGARCHANALYZEIMMEDIATE_H diff --git a/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.cpp b/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.cpp new file mode 100644 index 00000000..afa1ea2d --- /dev/null +++ b/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.cpp @@ -0,0 +1,324 @@ +//===-- LoongArchAsmBackend.cpp - LoongArch Asm Backend ----------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements the LoongArchAsmBackend class. +// +//===----------------------------------------------------------------------===// +// + +#include "MCTargetDesc/LoongArchAsmBackend.h" +#include "MCTargetDesc/LoongArchABIInfo.h" +#include "MCTargetDesc/LoongArchFixupKinds.h" +#include "MCTargetDesc/LoongArchMCExpr.h" +#include "MCTargetDesc/LoongArchMCTargetDesc.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/MC/MCAsmBackend.h" +#include "llvm/MC/MCAssembler.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCDirectives.h" +#include "llvm/MC/MCELFObjectWriter.h" +#include "llvm/MC/MCFixupKindInfo.h" +#include "llvm/MC/MCObjectWriter.h" +#include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/MC/MCTargetOptions.h" +#include "llvm/MC/MCValue.h" +#include "llvm/Support/EndianStream.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/Format.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; + +std::unique_ptr +LoongArchAsmBackend::createObjectTargetWriter() const { + return createLoongArchELFObjectWriter(TheTriple); +} + +static uint64_t adjustFixupValue(const MCFixup &Fixup, uint64_t Value, + MCContext &Ctx) { + switch (Fixup.getTargetKind()) { + default: + llvm_unreachable("Unknown fixup kind"); + case LoongArch::fixup_loongarch_got_pc_hi20: + case LoongArch::fixup_loongarch_got_pc_lo12: + case LoongArch::fixup_loongarch_got64_pc_lo20: + case LoongArch::fixup_loongarch_got64_pc_hi12: + case LoongArch::fixup_loongarch_got_hi20: + case LoongArch::fixup_loongarch_got_lo12: + case LoongArch::fixup_loongarch_got64_lo20: + case LoongArch::fixup_loongarch_got64_hi12: + case LoongArch::fixup_loongarch_tls_ld_pc_hi20: + case LoongArch::fixup_loongarch_tls_ld_hi20: + case LoongArch::fixup_loongarch_tls_gd_pc_hi20: + case LoongArch::fixup_loongarch_tls_gd_hi20: + case LoongArch::fixup_loongarch_tls_ie_pc_hi20: + case LoongArch::fixup_loongarch_tls_ie_pc_lo12: + case LoongArch::fixup_loongarch_tls_ie64_pc_lo20: + case LoongArch::fixup_loongarch_tls_ie64_pc_hi12: + case LoongArch::fixup_loongarch_tls_ie_hi20: + case LoongArch::fixup_loongarch_tls_ie_lo12: + case LoongArch::fixup_loongarch_tls_ie64_lo20: + case LoongArch::fixup_loongarch_tls_ie64_hi12: + // FIXME: Sometimes, these fixup_*pcala* relocations can be evaluated + // directly, left to the linker for now. + case LoongArch::fixup_loongarch_pcala_hi20: + case LoongArch::fixup_loongarch_pcala_lo12: + case LoongArch::fixup_loongarch_pcala64_lo20: + case LoongArch::fixup_loongarch_pcala64_hi12: + llvm_unreachable("Relocation should be unconditionally forced"); + case FK_Data_1: + case FK_Data_2: + case FK_Data_4: + case FK_Data_8: + return Value; + case LoongArch::fixup_loongarch_b16: { + if (!isInt<18>(Value)) + Ctx.reportError(Fixup.getLoc(), "fixup value out of range"); + if (Value & 0x3) + Ctx.reportError(Fixup.getLoc(), "fixup value must be 4-byte aligned"); + return (Value >> 2) & 0xffff; + } + case LoongArch::fixup_loongarch_b21: { + if (!isInt<23>(Value)) + Ctx.reportError(Fixup.getLoc(), "fixup value out of range"); + if (Value & 0x3) + Ctx.reportError(Fixup.getLoc(), "fixup value must be 4-byte aligned"); + return ((Value & 0x3fffc) << 8) | ((Value >> 18) & 0x1f); + } + case LoongArch::fixup_loongarch_b26: { + if (!isInt<28>(Value)) + Ctx.reportError(Fixup.getLoc(), "fixup value out of range"); + if (Value & 0x3) + Ctx.reportError(Fixup.getLoc(), "fixup value must be 4-byte aligned"); + return ((Value & 0x3fffc) << 8) | ((Value >> 18) & 0x3ff); + } + case LoongArch::fixup_loongarch_abs_hi20: + case LoongArch::fixup_loongarch_tls_le_hi20: + return (Value >> 12) & 0xfffff; + case LoongArch::fixup_loongarch_abs_lo12: + case LoongArch::fixup_loongarch_tls_le_lo12: + return Value & 0xfff; + case LoongArch::fixup_loongarch_abs64_lo20: + case LoongArch::fixup_loongarch_tls_le64_lo20: + return (Value >> 32) & 0xfffff; + case LoongArch::fixup_loongarch_abs64_hi12: + case LoongArch::fixup_loongarch_tls_le64_hi12: + return (Value >> 52) & 0xfff; + } +} + +/// ApplyFixup - Apply the \p Value for given \p Fixup into the provided +/// data fragment, at the offset specified by the fixup and following the +/// fixup kind as appropriate. +void LoongArchAsmBackend::applyFixup(const MCAssembler &Asm, const MCFixup &Fixup, + const MCValue &Target, + MutableArrayRef Data, uint64_t Value, + bool IsResolved, + const MCSubtargetInfo *STI) const { + MCFixupKind Kind = Fixup.getKind(); + if (Kind > FirstLiteralRelocationKind) + return; + + MCContext &Ctx = Asm.getContext(); + MCFixupKindInfo Info = getFixupKindInfo(Kind); + if (!Value) + return; // Doesn't change encoding. + // Apply any target-specific value adjustments. + Value = adjustFixupValue(Fixup, Value, Ctx); + + // Shift the value into position. + Value <<= Info.TargetOffset; + + unsigned Offset = Fixup.getOffset(); + unsigned NumBytes = alignTo(Info.TargetSize + Info.TargetOffset, 8) / 8; + + assert(Offset + NumBytes <= Data.size() && "Invalid fixup offset!"); + // For each byte of the fragment that the fixup touches, mask in the + // bits from the fixup value. + for (unsigned I = 0; I != NumBytes; ++I) { + Data[Offset + I] |= uint8_t((Value >> (I * 8)) & 0xff); + } +} + +Optional LoongArchAsmBackend::getFixupKind(StringRef Name) const { + if (STI.getTargetTriple().isOSBinFormatELF()) { + auto Type = llvm::StringSwitch(Name) +#define ELF_RELOC(X, Y) .Case(#X, Y) +#include "llvm/BinaryFormat/ELFRelocs/LoongArch.def" +#undef ELF_RELOC + .Case("BFD_RELOC_NONE", ELF::R_LARCH_NONE) + .Case("BFD_RELOC_32", ELF::R_LARCH_32) + .Case("BFD_RELOC_64", ELF::R_LARCH_64) + .Default(-1u); + if (Type != -1u) + return static_cast(FirstLiteralRelocationKind + Type); + } + return None; +} + +const MCFixupKindInfo &LoongArchAsmBackend:: +getFixupKindInfo(MCFixupKind Kind) const { + const static MCFixupKindInfo Infos[] = { + // This table *must* be in same the order of fixup_* kinds in + // LoongArchFixupKinds.h. + // + // name offset bits flags + {"fixup_LARCH_NONE", 0, 0, 0}, + {"fixup_LARCH_32", 0, 0, 0}, + {"fixup_LARCH_64", 0, 0, 0}, + {"fixup_LARCH_RELATIVE", 0, 0, 0}, + {"fixup_LARCH_COPY", 0, 0, 0}, + {"fixup_LARCH_JUMP_SLOT", 0, 0, 0}, + {"fixup_LARCH_TLS_DTPMOD32", 0, 0, 0}, + {"fixup_LARCH_TLS_DTPMOD64", 0, 0, 0}, + {"fixup_LARCH_TLS_DTPREL32", 0, 0, 0}, + {"fixup_LARCH_TLS_DTPREL64", 0, 0, 0}, + {"fixup_LARCH_TLS_TPREL32", 0, 0, 0}, + {"fixup_LARCH_TLS_TPREL64", 0, 0, 0}, + {"fixup_LARCH_IRELATIVE", 0, 0, 0}, + {"fixup_LARCH_MARK_LA", 0, 0, 0}, + {"fixup_LARCH_MARK_PCREL", 0, 0, 0}, + {"fixup_LARCH_ADD8", 0, 0, 0}, + {"fixup_LARCH_ADD16", 0, 0, 0}, + {"fixup_LARCH_ADD24", 0, 0, 0}, + {"fixup_LARCH_ADD32", 0, 0, 0}, + {"fixup_LARCH_ADD64", 0, 0, 0}, + {"fixup_LARCH_SUB8", 0, 0, 0}, + {"fixup_LARCH_SUB16", 0, 0, 0}, + {"fixup_LARCH_SUB24", 0, 0, 0}, + {"fixup_LARCH_SUB32", 0, 0, 0}, + {"fixup_LARCH_SUB64", 0, 0, 0}, + {"fixup_LARCH_GNU_VTINHERIT", 0, 0, 0}, + {"fixup_LARCH_GNU_VTENTRY", 0, 0, 0}, + {"fixup_loongarch_b16", 10, 16, MCFixupKindInfo::FKF_IsPCRel}, + {"fixup_loongarch_b21", 0, 26, MCFixupKindInfo::FKF_IsPCRel}, + {"fixup_loongarch_b26", 0, 26, MCFixupKindInfo::FKF_IsPCRel}, + {"fixup_loongarch_abs_hi20", 5, 20, 0}, + {"fixup_loongarch_abs_lo12", 10, 12, 0}, + {"fixup_loongarch_abs64_lo20", 5, 20, 0}, + {"fixup_loongarch_abs64_hi12", 10, 12, 0}, + {"fixup_loongarch_pcala_hi20", 5, 20, MCFixupKindInfo::FKF_IsPCRel}, + {"fixup_loongarch_pcala_lo12", 10, 12, MCFixupKindInfo::FKF_IsPCRel}, + {"fixup_loongarch_pcala64_lo20", 5, 20, MCFixupKindInfo::FKF_IsPCRel}, + {"fixup_loongarch_pcala64_hi12", 10, 12, MCFixupKindInfo::FKF_IsPCRel}, + {"fixup_loongarch_got_pc_hi20", 5, 20, MCFixupKindInfo::FKF_IsPCRel}, + {"fixup_loongarch_got_pc_lo12", 10, 12, MCFixupKindInfo::FKF_IsPCRel}, + {"fixup_loongarch_got64_pc_lo20", 5, 20, MCFixupKindInfo::FKF_IsPCRel}, + {"fixup_loongarch_got64_pc_hi12", 10, 12, MCFixupKindInfo::FKF_IsPCRel}, + {"fixup_loongarch_got_hi20", 5, 20, 0}, + {"fixup_loongarch_got_lo12", 10, 12, 0}, + {"fixup_loongarch_got64_lo20", 5, 20, 0}, + {"fixup_loongarch_got64_hi12", 10, 12, 0}, + {"fixup_loongarch_tls_le_hi20", 5, 20, 0}, + {"fixup_loongarch_tls_le_lo12", 10, 12, 0}, + {"fixup_loongarch_tls_le64_lo20", 5, 20, 0}, + {"fixup_loongarch_tls_le64_hi12", 10, 12, 0}, + {"fixup_loongarch_tls_ie_pc_hi20", 5, 20, MCFixupKindInfo::FKF_IsPCRel}, + {"fixup_loongarch_tls_ie_pc_lo12", 10, 12, MCFixupKindInfo::FKF_IsPCRel}, + {"fixup_loongarch_tls_ie64_pc_lo20", 5, 20, MCFixupKindInfo::FKF_IsPCRel}, + {"fixup_loongarch_tls_ie64_pc_hi12", 10, 12, + MCFixupKindInfo::FKF_IsPCRel}, + {"fixup_loongarch_tls_ie_hi20", 5, 20, 0}, + {"fixup_loongarch_tls_ie_lo12", 10, 12, 0}, + {"fixup_loongarch_tls_ie64_lo20", 5, 20, 0}, + {"fixup_loongarch_tls_ie64_hi12", 10, 12, 0}, + {"fixup_loongarch_tls_ld_pc_hi20", 5, 20, MCFixupKindInfo::FKF_IsPCRel}, + {"fixup_loongarch_tls_ld_hi20", 5, 20, 0}, + {"fixup_loongarch_tls_gd_pc_hi20", 5, 20, MCFixupKindInfo::FKF_IsPCRel}, + {"fixup_loongarch_tls_gd_hi20", 5, 20, 0}, + }; + + static_assert((array_lengthof(Infos)) == LoongArch::NumTargetFixupKinds, + "Not all fixup kinds added to Infos array"); + + // Fixup kinds from .reloc directive are like R_LARCH_NONE. They + // do not require any extra processing. + if (Kind >= FirstLiteralRelocationKind) + return MCAsmBackend::getFixupKindInfo(FK_NONE); + + if (Kind < FirstTargetFixupKind) + return MCAsmBackend::getFixupKindInfo(Kind); + + assert(unsigned(Kind - FirstTargetFixupKind) < getNumFixupKinds() && + "Invalid kind!"); + + return Infos[Kind - FirstTargetFixupKind]; +} + +/// WriteNopData - Write an (optimal) nop sequence of Count bytes +/// to the given output. If the target cannot generate such a sequence, +/// it should return an error. +/// +/// \return - True on success. +bool LoongArchAsmBackend::writeNopData(raw_ostream &OS, uint64_t Count) const { + // Check for a less than instruction size number of bytes + if ((Count % 4) != 0) + return false; + + // The nop on LoongArch is andi r0, r0, 0. + for (; Count >= 4; Count -= 4) + support::endian::write(OS, 0x03400000, support::little); + + return true; +} + +bool LoongArchAsmBackend::shouldForceRelocation(const MCAssembler &Asm, + const MCFixup &Fixup, + const MCValue &Target) { + if (Fixup.getKind() >= FirstLiteralRelocationKind) + return true; + const unsigned FixupKind = Fixup.getKind(); + switch (FixupKind) { + default: + return false; + // All these relocations require special processing + // at linking time. Delegate this work to a linker. + case FK_Data_1: + case FK_Data_2: + case FK_Data_4: + case FK_Data_8: + return !Target.isAbsolute(); + // These relocations require special processing at linking time. + case LoongArch::fixup_loongarch_pcala_hi20: + case LoongArch::fixup_loongarch_pcala_lo12: + case LoongArch::fixup_loongarch_pcala64_lo20: + case LoongArch::fixup_loongarch_pcala64_hi12: + case LoongArch::fixup_loongarch_got_pc_hi20: + case LoongArch::fixup_loongarch_got_pc_lo12: + case LoongArch::fixup_loongarch_got64_pc_lo20: + case LoongArch::fixup_loongarch_got64_pc_hi12: + case LoongArch::fixup_loongarch_got_hi20: + case LoongArch::fixup_loongarch_got_lo12: + case LoongArch::fixup_loongarch_got64_lo20: + case LoongArch::fixup_loongarch_got64_hi12: + case LoongArch::fixup_loongarch_tls_ld_pc_hi20: + case LoongArch::fixup_loongarch_tls_ld_hi20: + case LoongArch::fixup_loongarch_tls_gd_pc_hi20: + case LoongArch::fixup_loongarch_tls_gd_hi20: + case LoongArch::fixup_loongarch_tls_ie_pc_hi20: + case LoongArch::fixup_loongarch_tls_ie_pc_lo12: + case LoongArch::fixup_loongarch_tls_ie64_pc_lo20: + case LoongArch::fixup_loongarch_tls_ie64_pc_hi12: + case LoongArch::fixup_loongarch_tls_ie_hi20: + case LoongArch::fixup_loongarch_tls_ie_lo12: + case LoongArch::fixup_loongarch_tls_ie64_lo20: + case LoongArch::fixup_loongarch_tls_ie64_hi12: + return true; + } +} + +MCAsmBackend *llvm::createLoongArchAsmBackend(const Target &T, + const MCSubtargetInfo &STI, + const MCRegisterInfo &MRI, + const MCTargetOptions &Options) { + LoongArchABIInfo ABI = LoongArchABIInfo::computeTargetABI( + STI.getTargetTriple(), STI.getCPU(), Options); + return new LoongArchAsmBackend(STI, T, MRI, STI.getTargetTriple(), + STI.getCPU()); +} diff --git a/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.h b/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.h new file mode 100644 index 00000000..966c240a --- /dev/null +++ b/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.h @@ -0,0 +1,90 @@ +//===-- LoongArchAsmBackend.h - LoongArch Asm Backend ------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines the LoongArchAsmBackend class. +// +//===----------------------------------------------------------------------===// +// + +#ifndef LLVM_LIB_TARGET_LOONGARCH_MCTARGETDESC_LOONGARCHASMBACKEND_H +#define LLVM_LIB_TARGET_LOONGARCH_MCTARGETDESC_LOONGARCHASMBACKEND_H + +#include "MCTargetDesc/LoongArchFixupKinds.h" +#include "llvm/ADT/Triple.h" +#include "llvm/MC/MCAsmBackend.h" + +namespace llvm { + +class MCAssembler; +struct MCFixupKindInfo; +class MCObjectWriter; +class MCRegisterInfo; +class MCSymbolELF; +class Target; + +class LoongArchAsmBackend : public MCAsmBackend { + const MCSubtargetInfo &STI; + Triple TheTriple; + +public: + LoongArchAsmBackend(const MCSubtargetInfo &STI, const Target &T, + const MCRegisterInfo &MRI, const Triple &TT, + StringRef CPU) + : MCAsmBackend(support::little), STI(STI), TheTriple(TT) { + assert(TT.isLittleEndian()); + } + + std::unique_ptr + createObjectTargetWriter() const override; + + void applyFixup(const MCAssembler &Asm, const MCFixup &Fixup, + const MCValue &Target, MutableArrayRef Data, + uint64_t Value, bool IsResolved, + const MCSubtargetInfo *STI) const override; + + Optional getFixupKind(StringRef Name) const override; + const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const override; + + unsigned getNumFixupKinds() const override { + return LoongArch::NumTargetFixupKinds; + } + + /// @name Target Relaxation Interfaces + /// @{ + + /// MayNeedRelaxation - Check whether the given instruction may need + /// relaxation. + /// + /// \param Inst - The instruction to test. + bool mayNeedRelaxation(const MCInst &Inst, + const MCSubtargetInfo &STI) const override { + return false; + } + + /// fixupNeedsRelaxation - Target specific predicate for whether a given + /// fixup requires the associated instruction to be relaxed. + bool fixupNeedsRelaxation(const MCFixup &Fixup, uint64_t Value, + const MCRelaxableFragment *DF, + const MCAsmLayout &Layout) const override { + // FIXME. + llvm_unreachable("RelaxInstruction() unimplemented"); + return false; + } + + /// @} + + bool writeNopData(raw_ostream &OS, uint64_t Counti) const override; + + bool shouldForceRelocation(const MCAssembler &Asm, const MCFixup &Fixup, + const MCValue &Target) override; + +}; // class LoongArchAsmBackend + +} // namespace + +#endif diff --git a/lib/Target/LoongArch/MCTargetDesc/LoongArchBaseInfo.h b/lib/Target/LoongArch/MCTargetDesc/LoongArchBaseInfo.h new file mode 100644 index 00000000..707333c1 --- /dev/null +++ b/lib/Target/LoongArch/MCTargetDesc/LoongArchBaseInfo.h @@ -0,0 +1,128 @@ +//===-- LoongArchBaseInfo.h - Top level definitions for LoongArch MC ------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file contains small standalone helper functions and enum definitions for +// the LoongArch target useful for the compiler back-end and the MC libraries. +// +//===----------------------------------------------------------------------===// +#ifndef LLVM_LIB_TARGET_LOONGARCH_MCTARGETDESC_LOONGARCHBASEINFO_H +#define LLVM_LIB_TARGET_LOONGARCH_MCTARGETDESC_LOONGARCHBASEINFO_H + +#include "LoongArchFixupKinds.h" +#include "LoongArchMCTargetDesc.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/Support/DataTypes.h" +#include "llvm/Support/ErrorHandling.h" + +namespace llvm { + +/// LoongArchII - This namespace holds all of the target specific flags that +/// instruction info tracks. +/// +namespace LoongArchII { + /// Target Operand Flag enum. + enum TOF { + //===------------------------------------------------------------------===// + // LoongArch Specific MachineOperand flags. + + MO_NO_FLAG, + + /// MO_ABS_XXX - Represents the hi or low part of an absolute symbol + /// address. + MO_ABS_HI, + MO_ABS_LO, + MO_ABS_HIGHER, + MO_ABS_HIGHEST, + + /// MO_PCREL_XXX - Represents the hi or low part of an pc relative symbol + /// address. + MO_PCREL_HI, + MO_PCREL_LO, + // with tmp reg + MO_PCREL_RRHI, + MO_PCREL_RRLO, + MO_PCREL_RRHIGHER, + MO_PCREL_RRHIGHEST, + + // LArch Tls gd and ld + MO_TLSGD_HI, + MO_TLSGD_LO, + // with tmp reg + MO_TLSGD_RRHI, + MO_TLSGD_RRLO, + MO_TLSGD_RRHIGHER, + MO_TLSGD_RRHIGHEST, + + // LArch thread tprel (ie/le) + // LArch Tls ie + MO_TLSIE_HI, + MO_TLSIE_LO, + // with tmp reg + MO_TLSIE_RRHI, + MO_TLSIE_RRLO, + MO_TLSIE_RRHIGHER, + MO_TLSIE_RRHIGHEST, + // LArch Tls le + MO_TLSLE_HI, + MO_TLSLE_LO, + MO_TLSLE_HIGHER, + MO_TLSLE_HIGHEST, + + // Loongarch got + MO_GOT_HI, + MO_GOT_LO, + // with tmp reg + MO_GOT_RRHI, + MO_GOT_RRLO, + MO_GOT_RRHIGHER, + MO_GOT_RRHIGHEST, + + MO_CALL_HI, + MO_CALL_LO, + }; + + enum { + //===------------------------------------------------------------------===// + // Instruction encodings. These are the standard/most common forms for + // LoongArch instructions. + // + + // Pseudo - This represents an instruction that is a pseudo instruction + // or one that has not been implemented yet. It is illegal to code generate + // it, but tolerated for intermediate implementation stages. + Pseudo = 0, + + /// FrmR - This form is for instructions of the format R. + FrmR = 1, + /// FrmI - This form is for instructions of the format I. + FrmI = 2, + /// FrmJ - This form is for instructions of the format J. + FrmJ = 3, + /// FrmFR - This form is for instructions of the format FR. + FrmFR = 4, + /// FrmFI - This form is for instructions of the format FI. + FrmFI = 5, + /// FrmOther - This form is for instructions that have no specific format. + FrmOther = 6, + + FormMask = 15, + /// IsCTI - Instruction is a Control Transfer Instruction. + IsCTI = 1 << 4, + /// HasForbiddenSlot - Instruction has a forbidden slot. + HasForbiddenSlot = 1 << 5, + /// IsPCRelativeLoad - A Load instruction with implicit source register + /// ($pc) with explicit offset and destination register + IsPCRelativeLoad = 1 << 6, + /// HasFCCRegOperand - Instruction uses an $fcc register. + HasFCCRegOperand = 1 << 7 + + }; +} +} + +#endif diff --git a/lib/Target/LoongArch/MCTargetDesc/LoongArchELFObjectWriter.cpp b/lib/Target/LoongArch/MCTargetDesc/LoongArchELFObjectWriter.cpp new file mode 100644 index 00000000..cdce15e6 --- /dev/null +++ b/lib/Target/LoongArch/MCTargetDesc/LoongArchELFObjectWriter.cpp @@ -0,0 +1,209 @@ +//===-- LoongArchELFObjectWriter.cpp - LoongArch ELF Writer -------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "MCTargetDesc/LoongArchFixupKinds.h" +#include "MCTargetDesc/LoongArchMCExpr.h" +#include "MCTargetDesc/LoongArchMCTargetDesc.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/BinaryFormat/ELF.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCELFObjectWriter.h" +#include "llvm/MC/MCFixup.h" +#include "llvm/MC/MCObjectWriter.h" +#include "llvm/MC/MCSymbolELF.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Support/raw_ostream.h" +#include +#include +#include +#include +#include +#include + +#define DEBUG_TYPE "loongarch-elf-object-writer" + +using namespace llvm; + +namespace { + +class LoongArchELFObjectWriter : public MCELFObjectTargetWriter { +public: + LoongArchELFObjectWriter(uint8_t OSABI, bool HasRelocationAddend, bool Is64); + + ~LoongArchELFObjectWriter() override = default; + + unsigned getRelocType(MCContext &Ctx, const MCValue &Target, + const MCFixup &Fixup, bool IsPCRel) const override; +}; + +} // end anonymous namespace + +LoongArchELFObjectWriter::LoongArchELFObjectWriter(uint8_t OSABI, + bool HasRelocationAddend, bool Is64) + : MCELFObjectTargetWriter(Is64, OSABI, ELF::EM_LOONGARCH, HasRelocationAddend) {} + +unsigned LoongArchELFObjectWriter::getRelocType(MCContext &Ctx, + const MCValue &Target, + const MCFixup &Fixup, + bool IsPCRel) const { + // Determine the type of the relocation. + ///XXX:Reloc + unsigned Kind = (unsigned)Fixup.getKind(); + const MCExpr *Expr = Fixup.getValue(); + + if (Kind >= FirstLiteralRelocationKind) + return Kind - FirstLiteralRelocationKind; + + switch (Kind) { + default: + return ELF::R_LARCH_NONE; + //llvm_unreachable("invalid fixup kind!"); + case FK_Data_4: + case LoongArch::fixup_LARCH_32: + if (Expr->getKind() == MCExpr::Target && + cast(Expr)->getKind() == + LoongArchMCExpr::MEK_32_PCREL) + return ELF::R_LARCH_32_PCREL; + return IsPCRel ? ELF::R_LARCH_32_PCREL : ELF::R_LARCH_32; + case FK_GPRel_4: + case FK_Data_8: + case LoongArch::fixup_LARCH_64: + return ELF::R_LARCH_64; + case LoongArch::fixup_LARCH_NONE: + return ELF::R_LARCH_NONE; + case LoongArch::fixup_LARCH_RELATIVE: + return ELF::R_LARCH_RELATIVE; + case LoongArch::fixup_LARCH_COPY: + return ELF::R_LARCH_COPY; + case LoongArch::fixup_LARCH_JUMP_SLOT: + return ELF::R_LARCH_JUMP_SLOT; + case LoongArch::fixup_LARCH_TLS_DTPMOD32: + return ELF::R_LARCH_TLS_DTPMOD32; + case LoongArch::fixup_LARCH_TLS_DTPMOD64: + return ELF::R_LARCH_TLS_DTPMOD64; + case LoongArch::fixup_LARCH_TLS_DTPREL32: + return ELF::R_LARCH_TLS_DTPREL32; + case LoongArch::fixup_LARCH_TLS_DTPREL64: + return ELF::R_LARCH_TLS_DTPREL64; + case LoongArch::fixup_LARCH_TLS_TPREL32: + return ELF::R_LARCH_TLS_TPREL32; + case LoongArch::fixup_LARCH_TLS_TPREL64: + return ELF::R_LARCH_TLS_TPREL64; + case LoongArch::fixup_LARCH_IRELATIVE: + return ELF::R_LARCH_IRELATIVE; + case LoongArch::fixup_LARCH_MARK_LA: + return ELF::R_LARCH_MARK_LA; + case LoongArch::fixup_LARCH_MARK_PCREL: + return ELF::R_LARCH_MARK_PCREL; + case LoongArch::fixup_LARCH_ADD8: + return ELF::R_LARCH_ADD8; + case LoongArch::fixup_LARCH_ADD16: + return ELF::R_LARCH_ADD16; + case LoongArch::fixup_LARCH_ADD32: + return ELF::R_LARCH_ADD32; + case LoongArch::fixup_LARCH_ADD64: + return ELF::R_LARCH_ADD64; + case LoongArch::fixup_LARCH_SUB8: + return ELF::R_LARCH_SUB8; + case LoongArch::fixup_LARCH_SUB16: + return ELF::R_LARCH_SUB16; + case LoongArch::fixup_LARCH_SUB24: + return ELF::R_LARCH_SUB24; + case LoongArch::fixup_LARCH_SUB32: + return ELF::R_LARCH_SUB32; + case LoongArch::fixup_LARCH_SUB64: + return ELF::R_LARCH_SUB64; + case LoongArch::fixup_LARCH_GNU_VTINHERIT: + return ELF::R_LARCH_GNU_VTINHERIT; + case LoongArch::fixup_LARCH_GNU_VTENTRY: + return ELF::R_LARCH_GNU_VTENTRY; + case LoongArch::fixup_loongarch_b16: + return ELF::R_LARCH_B16; + case LoongArch::fixup_loongarch_b21: + return ELF::R_LARCH_B21; + case LoongArch::fixup_loongarch_b26: + return ELF::R_LARCH_B26; + case LoongArch::fixup_loongarch_abs_hi20: + return ELF::R_LARCH_ABS_HI20; + case LoongArch::fixup_loongarch_abs_lo12: + return ELF::R_LARCH_ABS_LO12; + case LoongArch::fixup_loongarch_abs64_lo20: + return ELF::R_LARCH_ABS64_LO20; + case LoongArch::fixup_loongarch_abs64_hi12: + return ELF::R_LARCH_ABS64_HI12; + case LoongArch::fixup_loongarch_pcala_hi20: + return ELF::R_LARCH_PCALA_HI20; + case LoongArch::fixup_loongarch_pcala_lo12: + return ELF::R_LARCH_PCALA_LO12; + case LoongArch::fixup_loongarch_pcala64_lo20: + return ELF::R_LARCH_PCALA64_LO20; + case LoongArch::fixup_loongarch_pcala64_hi12: + return ELF::R_LARCH_PCALA64_HI12; + case LoongArch::fixup_loongarch_got_pc_hi20: + return ELF::R_LARCH_GOT_PC_HI20; + case LoongArch::fixup_loongarch_got_pc_lo12: + return ELF::R_LARCH_GOT_PC_LO12; + case LoongArch::fixup_loongarch_got64_pc_lo20: + return ELF::R_LARCH_GOT64_PC_LO20; + case LoongArch::fixup_loongarch_got64_pc_hi12: + return ELF::R_LARCH_GOT64_PC_HI12; + case LoongArch::fixup_loongarch_got_hi20: + return ELF::R_LARCH_GOT_HI20; + case LoongArch::fixup_loongarch_got_lo12: + return ELF::R_LARCH_GOT_LO12; + case LoongArch::fixup_loongarch_got64_lo20: + return ELF::R_LARCH_GOT64_LO20; + case LoongArch::fixup_loongarch_got64_hi12: + return ELF::R_LARCH_GOT64_HI12; + case LoongArch::fixup_loongarch_tls_le_hi20: + return ELF::R_LARCH_TLS_LE_HI20; + case LoongArch::fixup_loongarch_tls_le_lo12: + return ELF::R_LARCH_TLS_LE_LO12; + case LoongArch::fixup_loongarch_tls_le64_lo20: + return ELF::R_LARCH_TLS_LE64_LO20; + case LoongArch::fixup_loongarch_tls_le64_hi12: + return ELF::R_LARCH_TLS_LE64_HI12; + case LoongArch::fixup_loongarch_tls_ie_pc_hi20: + return ELF::R_LARCH_TLS_IE_PC_HI20; + case LoongArch::fixup_loongarch_tls_ie_pc_lo12: + return ELF::R_LARCH_TLS_IE_PC_LO12; + case LoongArch::fixup_loongarch_tls_ie64_pc_lo20: + return ELF::R_LARCH_TLS_IE64_PC_LO20; + case LoongArch::fixup_loongarch_tls_ie64_pc_hi12: + return ELF::R_LARCH_TLS_IE64_PC_HI12; + case LoongArch::fixup_loongarch_tls_ie_hi20: + return ELF::R_LARCH_TLS_IE_HI20; + case LoongArch::fixup_loongarch_tls_ie_lo12: + return ELF::R_LARCH_TLS_IE_LO12; + case LoongArch::fixup_loongarch_tls_ie64_lo20: + return ELF::R_LARCH_TLS_IE64_LO20; + case LoongArch::fixup_loongarch_tls_ie64_hi12: + return ELF::R_LARCH_TLS_IE64_HI12; + case LoongArch::fixup_loongarch_tls_ld_pc_hi20: + return ELF::R_LARCH_TLS_LD_PC_HI20; + case LoongArch::fixup_loongarch_tls_ld_hi20: + return ELF::R_LARCH_TLS_LD_HI20; + case LoongArch::fixup_loongarch_tls_gd_pc_hi20: + return ELF::R_LARCH_TLS_GD_PC_HI20; + case LoongArch::fixup_loongarch_tls_gd_hi20: + return ELF::R_LARCH_TLS_GD_HI20; + } +} + +std::unique_ptr +llvm::createLoongArchELFObjectWriter(const Triple &TT) { + uint8_t OSABI = MCELFObjectTargetWriter::getOSABI(TT.getOS()); + bool IsLP64 = TT.isArch64Bit(); + bool HasRelocationAddend = TT.isArch64Bit(); + return std::make_unique(OSABI, HasRelocationAddend, + IsLP64); +} diff --git a/lib/Target/LoongArch/MCTargetDesc/LoongArchELFStreamer.cpp b/lib/Target/LoongArch/MCTargetDesc/LoongArchELFStreamer.cpp new file mode 100644 index 00000000..92357d91 --- /dev/null +++ b/lib/Target/LoongArch/MCTargetDesc/LoongArchELFStreamer.cpp @@ -0,0 +1,138 @@ +//===-------- LoongArchELFStreamer.cpp - ELF Object Output ---------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "LoongArchELFStreamer.h" +#include "LoongArchFixupKinds.h" +#include "LoongArchTargetStreamer.h" +#include "llvm/BinaryFormat/ELF.h" +#include "llvm/MC/MCAsmBackend.h" +#include "llvm/MC/MCAssembler.h" +#include "llvm/MC/MCCodeEmitter.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCDwarf.h" +#include "llvm/MC/MCInst.h" +#include "llvm/MC/MCObjectWriter.h" +#include "llvm/MC/MCSymbolELF.h" +#include "llvm/MC/MCValue.h" +#include "llvm/Support/Casting.h" + +using namespace llvm; + +static std::pair getRelocPairForSize(unsigned Size) { + switch (Size) { + default: + llvm_unreachable("unsupported fixup size"); + case 1: + return std::make_pair(LoongArch::fixup_LARCH_ADD8, + LoongArch::fixup_LARCH_SUB8); + case 2: + return std::make_pair(LoongArch::fixup_LARCH_ADD16, + LoongArch::fixup_LARCH_SUB16); + case 4: + return std::make_pair(LoongArch::fixup_LARCH_ADD32, + LoongArch::fixup_LARCH_SUB32); + case 8: + return std::make_pair(LoongArch::fixup_LARCH_ADD64, + LoongArch::fixup_LARCH_SUB64); + } +} + +static bool requiresFixups(MCContext &C, const MCExpr *Value, + const MCExpr *&LHS, const MCExpr *&RHS, + LoongArchELFStreamer *MCS) { + const auto *MBE = dyn_cast(Value); + if (MBE == nullptr) + return false; + + MCValue E; + if (!Value->evaluateAsRelocatable(E, nullptr, nullptr)) + return false; + if (E.getSymA() == nullptr || E.getSymB() == nullptr) + return false; + + const auto &A = E.getSymA()->getSymbol(); + const auto &B = E.getSymB()->getSymbol(); + + if (A.getName().empty() && B.getName().empty()) + return false; + + if (!A.isInSection() && !B.isInSection() && + !A.getName().empty() && !B.getName().empty()) + return false; + + LHS = + MCBinaryExpr::create(MCBinaryExpr::Add, MCSymbolRefExpr::create(&A, C), + MCConstantExpr::create(E.getConstant(), C), C); + RHS = E.getSymB(); + + bool isCheckInstr = + StringSwitch(MCS->getCurrentSectionOnly()->getName()) + .Case(".debug_aranges", true) + .Default(false); + + return (A.isInSection() + ? (isCheckInstr ? A.getSection().hasInstructions() : true) + : !A.getName().empty()) || + (B.isInSection() ? B.getSection().hasInstructions() + : !B.getName().empty()); +} + +LoongArchELFStreamer::LoongArchELFStreamer(MCContext &Context, + std::unique_ptr MAB, + std::unique_ptr OW, + std::unique_ptr Emitter) + : MCELFStreamer(Context, std::move(MAB), std::move(OW), + std::move(Emitter)) { + } + +void LoongArchELFStreamer::emitCFIStartProcImpl(MCDwarfFrameInfo &Frame) { + Frame.Begin = getContext().createTempSymbol(); + MCELFStreamer::emitLabel(Frame.Begin); +} + +MCSymbol *LoongArchELFStreamer::emitCFILabel() { + MCSymbol *Label = getContext().createTempSymbol("cfi", true); + MCELFStreamer::emitLabel(Label); + return Label; +} + +void LoongArchELFStreamer::emitCFIEndProcImpl(MCDwarfFrameInfo &Frame) { + Frame.End = getContext().createTempSymbol(); + MCELFStreamer::emitLabel(Frame.End); +} + +void LoongArchELFStreamer::emitValueImpl(const MCExpr *Value, unsigned Size, + SMLoc Loc) { + const MCExpr *A, *B; + if (!requiresFixups(getContext(), Value, A, B, this)) + return MCELFStreamer::emitValueImpl(Value, Size, Loc); + + MCStreamer::emitValueImpl(Value, Size, Loc); + + MCDataFragment *DF = getOrCreateDataFragment(); + flushPendingLabels(DF, DF->getContents().size()); + MCDwarfLineEntry::Make(this, getCurrentSectionOnly()); + + unsigned Add, Sub; + std::tie(Add, Sub) = getRelocPairForSize(Size); + + DF->getFixups().push_back(MCFixup::create( + DF->getContents().size(), A, static_cast(Add), Loc)); + DF->getFixups().push_back(MCFixup::create( + DF->getContents().size(), B, static_cast(Sub), Loc)); + + DF->getContents().resize(DF->getContents().size() + Size, 0); +} + +MCELFStreamer *llvm::createLoongArchELFStreamer( + MCContext &Context, std::unique_ptr MAB, + std::unique_ptr OW, std::unique_ptr Emitter, + bool RelaxAll) { + return new LoongArchELFStreamer(Context, std::move(MAB), std::move(OW), + std::move(Emitter)); +} diff --git a/lib/Target/LoongArch/MCTargetDesc/LoongArchELFStreamer.h b/lib/Target/LoongArch/MCTargetDesc/LoongArchELFStreamer.h new file mode 100644 index 00000000..875cebcb --- /dev/null +++ b/lib/Target/LoongArch/MCTargetDesc/LoongArchELFStreamer.h @@ -0,0 +1,53 @@ +//===- LoongArchELFStreamer.h - ELF Object Output --------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This is a custom MCELFStreamer which allows us to insert some hooks before +// emitting data into an actual object file. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_LOONGARCH_MCTARGETDESC_LOONGARCHELFSTREAMER_H +#define LLVM_LIB_TARGET_LOONGARCH_MCTARGETDESC_LOONGARCHELFSTREAMER_H + +#include "llvm/ADT/SmallVector.h" +#include "llvm/MC/MCELFStreamer.h" +#include + +namespace llvm { + +class MCAsmBackend; +class MCCodeEmitter; +class MCContext; +class MCSubtargetInfo; +struct MCDwarfFrameInfo; + +class LoongArchELFStreamer : public MCELFStreamer { + +public: + LoongArchELFStreamer(MCContext &Context, std::unique_ptr MAB, + std::unique_ptr OW, + std::unique_ptr Emitter); + + /// Overriding these functions allows us to dismiss all labels. + void emitValueImpl(const MCExpr *Value, unsigned Size, SMLoc Loc) override; + + // Overriding these functions allows us to avoid recording of these labels + // in emitLabel. + void emitCFIStartProcImpl(MCDwarfFrameInfo &Frame) override; + void emitCFIEndProcImpl(MCDwarfFrameInfo &Frame) override; + MCSymbol *emitCFILabel() override; +}; + +MCELFStreamer *createLoongArchELFStreamer(MCContext &Context, + std::unique_ptr MAB, + std::unique_ptr OW, + std::unique_ptr Emitter, + bool RelaxAll); +} // end namespace llvm + +#endif // LLVM_LIB_TARGET_LOONGARCH_MCTARGETDESC_LOONGARCHELFSTREAMER_H diff --git a/lib/Target/LoongArch/MCTargetDesc/LoongArchFixupKinds.h b/lib/Target/LoongArch/MCTargetDesc/LoongArchFixupKinds.h new file mode 100644 index 00000000..5ee83c84 --- /dev/null +++ b/lib/Target/LoongArch/MCTargetDesc/LoongArchFixupKinds.h @@ -0,0 +1,136 @@ +//===-- LoongArchFixupKinds.h - LoongArch Specific Fixup Entries ----------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_LOONGARCH_MCTARGETDESC_LOONGARCHFIXUPKINDS_H +#define LLVM_LIB_TARGET_LOONGARCH_MCTARGETDESC_LOONGARCHFIXUPKINDS_H + +#include "llvm/MC/MCFixup.h" + +namespace llvm { +namespace LoongArch { +// Although most of the current fixup types reflect a unique relocation +// one can have multiple fixup types for a given relocation and thus need +// to be uniquely named. +// +// This table *must* be in the same order of +// MCFixupKindInfo Infos[LoongArch::NumTargetFixupKinds] +// in LoongArchAsmBackend.cpp. +// +enum Fixups { + // R_LARCH_NONE. + fixup_LARCH_NONE = FirstTargetFixupKind, + + // reloc_hint + // fixup methods + fixup_LARCH_32, + fixup_LARCH_64, + fixup_LARCH_RELATIVE, + fixup_LARCH_COPY, + fixup_LARCH_JUMP_SLOT, + fixup_LARCH_TLS_DTPMOD32, + fixup_LARCH_TLS_DTPMOD64, + fixup_LARCH_TLS_DTPREL32, + fixup_LARCH_TLS_DTPREL64, + fixup_LARCH_TLS_TPREL32, + fixup_LARCH_TLS_TPREL64, + fixup_LARCH_IRELATIVE, + fixup_LARCH_MARK_LA, + fixup_LARCH_MARK_PCREL, + fixup_LARCH_ADD8, + fixup_LARCH_ADD16, + fixup_LARCH_ADD24, + fixup_LARCH_ADD32, + fixup_LARCH_ADD64, + fixup_LARCH_SUB8, + fixup_LARCH_SUB16, + fixup_LARCH_SUB24, + fixup_LARCH_SUB32, + fixup_LARCH_SUB64, + fixup_LARCH_GNU_VTINHERIT, + fixup_LARCH_GNU_VTENTRY, + // 16-bit fixup corresponding to %b16(foo) for instructions like bne. + fixup_loongarch_b16, + // 21-bit fixup corresponding to %b21(foo) for instructions like bnez. + fixup_loongarch_b21, + // 26-bit fixup corresponding to %b26(foo)/%plt(foo) for instructions b/bl. + fixup_loongarch_b26, + // 20-bit fixup corresponding to %abs_hi20(foo) for instruction lu12i.w. + fixup_loongarch_abs_hi20, + // 12-bit fixup corresponding to %abs_lo12(foo) for instruction ori. + fixup_loongarch_abs_lo12, + // 20-bit fixup corresponding to %abs64_lo20(foo) for instruction lu32i.d. + fixup_loongarch_abs64_lo20, + // 12-bit fixup corresponding to %abs_hi12(foo) for instruction lu52i.d. + fixup_loongarch_abs64_hi12, + // 20-bit fixup corresponding to %pc_hi20(foo) for instruction pcalau12i. + fixup_loongarch_pcala_hi20, + // 12-bit fixup corresponding to %pc_lo12(foo) for instructions like addi.w/d. + fixup_loongarch_pcala_lo12, + // 20-bit fixup corresponding to %pc64_lo20(foo) for instruction lu32i.d. + fixup_loongarch_pcala64_lo20, + // 12-bit fixup corresponding to %pc64_hi12(foo) for instruction lu52i.d. + fixup_loongarch_pcala64_hi12, + // 20-bit fixup corresponding to %got_pc_hi20(foo) for instruction pcalau12i. + fixup_loongarch_got_pc_hi20, + // 12-bit fixup corresponding to %got_pc_lo12(foo) for instructions + // ld.w/ld.d/add.d. + fixup_loongarch_got_pc_lo12, + // 20-bit fixup corresponding to %got64_pc_lo20(foo) for instruction lu32i.d. + fixup_loongarch_got64_pc_lo20, + // 12-bit fixup corresponding to %got64_pc_hi12(foo) for instruction lu52i.d. + fixup_loongarch_got64_pc_hi12, + // 20-bit fixup corresponding to %got_hi20(foo) for instruction lu12i.w. + fixup_loongarch_got_hi20, + // 12-bit fixup corresponding to %got_lo12(foo) for instruction ori. + fixup_loongarch_got_lo12, + // 20-bit fixup corresponding to %got64_lo20(foo) for instruction lu32i.d. + fixup_loongarch_got64_lo20, + // 12-bit fixup corresponding to %got64_hi12(foo) for instruction lu52i.d. + fixup_loongarch_got64_hi12, + // 20-bit fixup corresponding to %le_hi20(foo) for instruction lu12i.w. + fixup_loongarch_tls_le_hi20, + // 12-bit fixup corresponding to %le_lo12(foo) for instruction ori. + fixup_loongarch_tls_le_lo12, + // 20-bit fixup corresponding to %le64_lo20(foo) for instruction lu32i.d. + fixup_loongarch_tls_le64_lo20, + // 12-bit fixup corresponding to %le64_hi12(foo) for instruction lu52i.d. + fixup_loongarch_tls_le64_hi12, + // 20-bit fixup corresponding to %ie_pc_hi20(foo) for instruction pcalau12i. + fixup_loongarch_tls_ie_pc_hi20, + // 12-bit fixup corresponding to %ie_pc_lo12(foo) for instructions + // ld.w/ld.d/add.d. + fixup_loongarch_tls_ie_pc_lo12, + // 20-bit fixup corresponding to %ie64_pc_lo20(foo) for instruction lu32i.d. + fixup_loongarch_tls_ie64_pc_lo20, + // 12-bit fixup corresponding to %ie64_pc_hi12(foo) for instruction lu52i.d. + fixup_loongarch_tls_ie64_pc_hi12, + // 20-bit fixup corresponding to %ie_hi20(foo) for instruction lu12i.w. + fixup_loongarch_tls_ie_hi20, + // 12-bit fixup corresponding to %ie_lo12(foo) for instruction ori. + fixup_loongarch_tls_ie_lo12, + // 20-bit fixup corresponding to %ie64_lo20(foo) for instruction lu32i.d. + fixup_loongarch_tls_ie64_lo20, + // 12-bit fixup corresponding to %ie64_hi12(foo) for instruction lu52i.d. + fixup_loongarch_tls_ie64_hi12, + // 20-bit fixup corresponding to %ld_pc_hi20(foo) for instruction pcalau12i. + fixup_loongarch_tls_ld_pc_hi20, + // 20-bit fixup corresponding to %ld_hi20(foo) for instruction lu12i.w. + fixup_loongarch_tls_ld_hi20, + // 20-bit fixup corresponding to %gd_pc_hi20(foo) for instruction pcalau12i. + fixup_loongarch_tls_gd_pc_hi20, + // 20-bit fixup corresponding to %gd_hi20(foo) for instruction lu12i.w. + fixup_loongarch_tls_gd_hi20, + + // Marker + LastTargetFixupKind, + NumTargetFixupKinds = LastTargetFixupKind - FirstTargetFixupKind +}; +} // namespace LoongArch +} // namespace llvm + +#endif diff --git a/lib/Target/LoongArch/MCTargetDesc/LoongArchInstPrinter.cpp b/lib/Target/LoongArch/MCTargetDesc/LoongArchInstPrinter.cpp new file mode 100644 index 00000000..d684b059 --- /dev/null +++ b/lib/Target/LoongArch/MCTargetDesc/LoongArchInstPrinter.cpp @@ -0,0 +1,246 @@ +//===-- LoongArchInstPrinter.cpp - Convert LoongArch MCInst to assembly syntax ------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This class prints an LoongArch MCInst to a .s file. +// +//===----------------------------------------------------------------------===// + +#include "LoongArchInstPrinter.h" +#include "MCTargetDesc/LoongArchMCExpr.h" +#include "LoongArchInstrInfo.h" +#include "MCTargetDesc/LoongArchMCTargetDesc.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCInst.h" +#include "llvm/MC/MCInstrInfo.h" +#include "llvm/MC/MCSymbol.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; + +#define DEBUG_TYPE "asm-printer" + +#define PRINT_ALIAS_INSTR +#include "LoongArchGenAsmWriter.inc" + +template +static bool isReg(const MCInst &MI, unsigned OpNo) { + assert(MI.getOperand(OpNo).isReg() && "Register operand expected."); + return MI.getOperand(OpNo).getReg() == R; +} + +const char* LoongArch::LoongArchFCCToString(LoongArch::CondCode CC) { + switch (CC) { + case FCOND_T: + case FCOND_F: return "caf"; + case FCOND_OR: + case FCOND_UN: return "cun"; + case FCOND_UNE: + case FCOND_OEQ: return "ceq"; + case FCOND_ONE: + case FCOND_UEQ: return "cueq"; + case FCOND_UGE: + case FCOND_OLT: return "clt"; + case FCOND_OGE: + case FCOND_ULT: return "cult"; + case FCOND_UGT: + case FCOND_OLE: return "cle"; + case FCOND_OGT: + case FCOND_ULE: return "cule"; + case FCOND_ST: + case FCOND_SF: return "saf"; + case FCOND_GLE: + case FCOND_NGLE:return "sun"; + case FCOND_SEQ: return "seq"; + case FCOND_SNE: return "sne"; + case FCOND_GL: + case FCOND_NGL: return "sueq"; + case FCOND_NLT: + case FCOND_LT: return "slt"; + case FCOND_GE: + case FCOND_NGE: return "sult"; + case FCOND_NLE: + case FCOND_LE: return "sle"; + case FCOND_GT: + case FCOND_NGT: return "sule"; + case FCOND_CNE: return "cne"; + case FCOND_COR: return "cor"; + case FCOND_SOR: return "sor"; + case FCOND_CUNE: return "cune"; + case FCOND_SUNE: return "sune"; + } + llvm_unreachable("Impossible condition code!"); +} + +void LoongArchInstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const { + OS << '$' << StringRef(getRegisterName(RegNo)).lower(); +} + +void LoongArchInstPrinter::printInst(const MCInst *MI, uint64_t Address, + StringRef Annot, + const MCSubtargetInfo &STI, + raw_ostream &O) { + switch (MI->getOpcode()) { + default: + break; + case LoongArch::PCALAU12I_ri: + case LoongArch::LU12I_W_ri: + printLoadAddr(MI, O); + return; + case LoongArch::ADD_D_rrr: + case LoongArch::LDX_D_rrr: + case LoongArch::ADDI_D_rri: + case LoongArch::LD_D_rri: + case LoongArch::ORI_rri: + case LoongArch::LU32I_D_ri: + case LoongArch::LU52I_D_rri: + O << "\t# la expanded slot"; + return; + } + + // Try to print any aliases first. + if (!printAliasInstr(MI, Address, O) && !printAlias(*MI, O)) + printInstruction(MI, Address, O); + printAnnotation(O, Annot); +} + +void LoongArchInstPrinter::printOperand(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + const MCOperand &Op = MI->getOperand(OpNo); + if (Op.isReg()) { + printRegName(O, Op.getReg()); + return; + } + + if (Op.isImm()) { + O << formatImm(Op.getImm()); + return; + } + + assert(Op.isExpr() && "unknown operand kind in printOperand"); + Op.getExpr()->print(O, &MAI, true); +} + +template +void LoongArchInstPrinter::printUImm(const MCInst *MI, int opNum, raw_ostream &O) { + const MCOperand &MO = MI->getOperand(opNum); + if (MO.isImm()) { + uint64_t Imm = MO.getImm(); + Imm -= Offset; + Imm &= (1 << Bits) - 1; + Imm += Offset; + O << formatImm(Imm); + return; + } + + printOperand(MI, opNum, O); +} + +void LoongArchInstPrinter:: +printMemOperand(const MCInst *MI, int opNum, raw_ostream &O) { + // Load/Store memory operands -- $reg, imm + printOperand(MI, opNum, O); + O << ", "; + printOperand(MI, opNum+1, O); +} + +void LoongArchInstPrinter::printAMemOperand(const MCInst *MI, int opNum, + raw_ostream &O) { + // AM* instruction memory operand: "rj, 0" + printRegName(O, MI->getOperand(opNum).getReg()); + O << ", 0"; +} + +void LoongArchInstPrinter:: +printMemOperandEA(const MCInst *MI, int opNum, raw_ostream &O) { + // when using stack locations for not load/store instructions + // print the same way as all normal 3 operand instructions. + printOperand(MI, opNum, O); + O << ", "; + printOperand(MI, opNum+1, O); +} + +void LoongArchInstPrinter:: +printFCCOperand(const MCInst *MI, int opNum, raw_ostream &O) { + const MCOperand& MO = MI->getOperand(opNum); + O << LoongArchFCCToString((LoongArch::CondCode)MO.getImm()); +} + +bool LoongArchInstPrinter::printAlias(const char *Str, const MCInst &MI, + unsigned OpNo, raw_ostream &OS) { + OS << "\t" << Str << "\t"; + if(MI.getOpcode() == LoongArch::JIRL) { + printOperand(&MI, OpNo, OS); + OS << "@plt"; + }else + printOperand(&MI, OpNo, OS); + return true; +} + +bool LoongArchInstPrinter::printAlias(const char *Str, const MCInst &MI, + unsigned OpNo0, unsigned OpNo1, + raw_ostream &OS) { + printAlias(Str, MI, OpNo0, OS); + OS << ", "; + printOperand(&MI, OpNo1, OS); + return true; +} + +bool LoongArchInstPrinter::printAlias(const MCInst &MI, raw_ostream &OS) { + switch (MI.getOpcode()) { + case LoongArch::OR: + // or $r0, $r1, $zero => move $r0, $r1 + return isReg(MI, 2) && printAlias("move", MI, 0, 1, OS); + default: return false; + } +} + +void LoongArchInstPrinter:: +printRegisterList(const MCInst *MI, int opNum, raw_ostream &O) { + // - 2 because register List is always first operand of instruction and it is + // always followed by memory operand (base + offset). + for (int i = opNum, e = MI->getNumOperands() - 2; i != e; ++i) { + if (i != opNum) + O << ", "; + printRegName(O, MI->getOperand(i).getReg()); + } +} + +void LoongArchInstPrinter:: +printLoadAddr(const MCInst *MI, raw_ostream &O) { + const MCOperand &Op = MI->getOperand(1); + const MCExpr *Expr = Op.getExpr(); + const LoongArchMCExpr *LoongArchExpr = cast(Expr); + switch (LoongArchExpr->getKind()) { + default: + llvm_unreachable("invalid handled!"); + return; + case LoongArchMCExpr::MEK_ABS_HI: + O << "\tla.abs\t"; + break; + case LoongArchMCExpr::MEK_GOT_HI: + O << "\tla.got\t"; + break; + case LoongArchMCExpr::MEK_PCREL_HI: + O << "\tla.pcrel\t"; + break; + case LoongArchMCExpr::MEK_TLSGD_HI: + O << "\tla.tls.gd\t"; + break; + case LoongArchMCExpr::MEK_TLSIE_HI: + O << "\tla.tls.ie\t"; + break; + case LoongArchMCExpr::MEK_TLSLE_HI: + O << "\tla.tls.le\t"; + break; + } + printRegName(O, MI->getOperand(0).getReg()); + O << ", "; + Expr->print(O, nullptr); + return; +} diff --git a/lib/Target/LoongArch/MCTargetDesc/LoongArchInstPrinter.h b/lib/Target/LoongArch/MCTargetDesc/LoongArchInstPrinter.h new file mode 100644 index 00000000..050dcc13 --- /dev/null +++ b/lib/Target/LoongArch/MCTargetDesc/LoongArchInstPrinter.h @@ -0,0 +1,119 @@ +//=== LoongArchInstPrinter.h - Convert LoongArch MCInst to assembly syntax -*- C++ -*-==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This class prints a LoongArch MCInst to a .s file. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_LOONGARCH_INSTPRINTER_LOONGARCHINSTPRINTER_H +#define LLVM_LIB_TARGET_LOONGARCH_INSTPRINTER_LOONGARCHINSTPRINTER_H +#include "llvm/MC/MCInstPrinter.h" + +namespace llvm { + +namespace LoongArch { +// LoongArch Branch Codes +enum FPBranchCode { + BRANCH_F, + BRANCH_T, + BRANCH_INVALID +}; + +// LoongArch Condition Codes +enum CondCode { + FCOND_F = 0x0, + FCOND_SF, + FCOND_OLT, + FCOND_LT, + FCOND_OEQ, + FCOND_SEQ, + FCOND_OLE, + FCOND_LE, + FCOND_UN, + FCOND_NGLE, + FCOND_ULT, + FCOND_NGE, + FCOND_UEQ, + FCOND_NGL, + FCOND_ULE, + FCOND_NGT, + FCOND_CNE, + FCOND_SNE, + FCOND_COR = 0x14, + FCOND_SOR = 0x15, + FCOND_CUNE = 0x18, + FCOND_SUNE = 0x19, + + // To be used with float branch False + // This conditions have the same mnemonic as the + // above ones, but are used with a branch False; + FCOND_T, + FCOND_UNE, + FCOND_ST, + FCOND_UGE, + FCOND_NLT, + FCOND_UGT, + FCOND_NLE, + FCOND_OR, + FCOND_GLE, + FCOND_OGE, + FCOND_GE, + FCOND_ONE, + FCOND_GL, + FCOND_OGT, + FCOND_GT +}; + +const char *LoongArchFCCToString(LoongArch::CondCode CC); +} // end namespace LoongArch + +class LoongArchInstPrinter : public MCInstPrinter { +public: + LoongArchInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII, + const MCRegisterInfo &MRI) + : MCInstPrinter(MAI, MII, MRI) {} + + // Autogenerated by tblgen. + std::pair getMnemonic(const MCInst *MI) override; + void printInstruction(const MCInst *MI, uint64_t Address, raw_ostream &O); + static const char *getRegisterName(unsigned RegNo); + + void printRegName(raw_ostream &OS, unsigned RegNo) const override; + void printInst(const MCInst *MI, uint64_t Address, StringRef Annot, + const MCSubtargetInfo &STI, raw_ostream &O) override; + + bool printAliasInstr(const MCInst *MI, uint64_t Address, raw_ostream &OS); + void printCustomAliasOperand(const MCInst *MI, uint64_t Address, + unsigned OpIdx, unsigned PrintMethodIdx, + raw_ostream &O); + +private: + void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); + void printOperand(const MCInst *MI, uint64_t /*Address*/, unsigned OpNum, + raw_ostream &O) { + printOperand(MI, OpNum, O); + } + template + void printUImm(const MCInst *MI, int opNum, raw_ostream &O); + void printMemOperand(const MCInst *MI, int opNum, raw_ostream &O); + void printAMemOperand(const MCInst *MI, int opNum, raw_ostream &O); + void printMemOperandEA(const MCInst *MI, int opNum, raw_ostream &O); + void printFCCOperand(const MCInst *MI, int opNum, raw_ostream &O); + + bool printAlias(const char *Str, const MCInst &MI, unsigned OpNo, + raw_ostream &OS); + bool printAlias(const char *Str, const MCInst &MI, unsigned OpNo0, + unsigned OpNo1, raw_ostream &OS); + bool printAlias(const MCInst &MI, raw_ostream &OS); + void printSaveRestore(const MCInst *MI, raw_ostream &O); + void printRegisterList(const MCInst *MI, int opNum, raw_ostream &O); + void printLoadAddr(const MCInst *MI, raw_ostream &O); +}; +} // end namespace llvm + +#endif diff --git a/lib/Target/LoongArch/MCTargetDesc/LoongArchMCAsmInfo.cpp b/lib/Target/LoongArch/MCTargetDesc/LoongArchMCAsmInfo.cpp new file mode 100644 index 00000000..81939927 --- /dev/null +++ b/lib/Target/LoongArch/MCTargetDesc/LoongArchMCAsmInfo.cpp @@ -0,0 +1,59 @@ +//===-- LoongArchMCAsmInfo.cpp - LoongArch Asm Properties ---------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file contains the declarations of the LoongArchMCAsmInfo properties. +// +//===----------------------------------------------------------------------===// + +#include "LoongArchMCAsmInfo.h" +#include "MCTargetDesc/LoongArchMCExpr.h" +#include "llvm/ADT/Triple.h" +#include "llvm/BinaryFormat/Dwarf.h" +#include "llvm/MC/MCStreamer.h" + +using namespace llvm; + +void LoongArchMCAsmInfo::anchor() { } + +LoongArchMCAsmInfo::LoongArchMCAsmInfo(const Triple &TheTriple, + const MCTargetOptions &Options) { + + if (TheTriple.isLoongArch64() + && TheTriple.getEnvironment() != Triple::GNUABILPX32) + CodePointerSize = CalleeSaveStackSlotSize = 8; + + AlignmentIsInBytes = false; + Data16bitsDirective = "\t.half\t"; + Data32bitsDirective = "\t.word\t"; + Data64bitsDirective = "\t.dword\t"; + CommentString = "#"; + ZeroDirective = "\t.space\t"; + SupportsDebugInformation = true; + ExceptionsType = ExceptionHandling::DwarfCFI; + DwarfRegNumForCFI = true; + //HasLoongArchExpressions = true; + UseIntegratedAssembler = true; + UsesELFSectionDirectiveForBSS = true; +} + +const MCExpr * +LoongArchMCAsmInfo::getExprForFDESymbol(const MCSymbol *Sym, unsigned Encoding, + MCStreamer &Streamer) const { + if (!(Encoding & dwarf::DW_EH_PE_pcrel)) + return MCAsmInfo::getExprForFDESymbol(Sym, Encoding, Streamer); + + // The default symbol subtraction results in an ADD/SUB relocation pair. + // Processing this relocation pair is problematic when linker relaxation is + // enabled, so we follow binutils in using the R_LARCH_32_PCREL relocation + // for the FDE initial location. + MCContext &Ctx = Streamer.getContext(); + const MCExpr *ME = + MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_None, Ctx); + assert(Encoding & dwarf::DW_EH_PE_sdata4 && "Unexpected encoding"); + return LoongArchMCExpr::create(LoongArchMCExpr::MEK_32_PCREL, ME, Ctx); +} diff --git a/lib/Target/LoongArch/MCTargetDesc/LoongArchMCAsmInfo.h b/lib/Target/LoongArch/MCTargetDesc/LoongArchMCAsmInfo.h new file mode 100644 index 00000000..f8ca6833 --- /dev/null +++ b/lib/Target/LoongArch/MCTargetDesc/LoongArchMCAsmInfo.h @@ -0,0 +1,34 @@ +//===-- LoongArchMCAsmInfo.h - LoongArch Asm Info ------------------------*- C++ -*--===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file contains the declaration of the LoongArchMCAsmInfo class. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_LOONGARCH_MCTARGETDESC_LOONGARCHMCASMINFO_H +#define LLVM_LIB_TARGET_LOONGARCH_MCTARGETDESC_LOONGARCHMCASMINFO_H + +#include "llvm/MC/MCAsmInfoELF.h" + +namespace llvm { +class Triple; + +class LoongArchMCAsmInfo : public MCAsmInfoELF { + void anchor() override; + +public: + explicit LoongArchMCAsmInfo(const Triple &TheTriple, + const MCTargetOptions &Options); + + const MCExpr *getExprForFDESymbol(const MCSymbol *Sym, unsigned Encoding, + MCStreamer &Streamer) const override; +}; + +} // namespace llvm + +#endif diff --git a/lib/Target/LoongArch/MCTargetDesc/LoongArchMCCodeEmitter.cpp b/lib/Target/LoongArch/MCTargetDesc/LoongArchMCCodeEmitter.cpp new file mode 100644 index 00000000..e32b6856 --- /dev/null +++ b/lib/Target/LoongArch/MCTargetDesc/LoongArchMCCodeEmitter.cpp @@ -0,0 +1,521 @@ +//===-- LoongArchMCCodeEmitter.cpp - Convert LoongArch Code to Machine Code ---------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements the LoongArchMCCodeEmitter class. +// +//===----------------------------------------------------------------------===// + +#include "LoongArchMCCodeEmitter.h" +#include "MCTargetDesc/LoongArchFixupKinds.h" +#include "MCTargetDesc/LoongArchMCExpr.h" +#include "MCTargetDesc/LoongArchMCTargetDesc.h" +#include "MCTargetDesc/LoongArchInstPrinter.h" +#include "llvm/ADT/APFloat.h" +#include "llvm/ADT/APInt.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCFixup.h" +#include "llvm/MC/MCInst.h" +#include "llvm/MC/MCInstrDesc.h" +#include "llvm/MC/MCInstrInfo.h" +#include "llvm/MC/MCRegisterInfo.h" +#include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +#include +#include + +using namespace llvm; + +#define DEBUG_TYPE "mccodeemitter" + +#define GET_INSTRMAP_INFO +#include "LoongArchGenInstrInfo.inc" +#undef GET_INSTRMAP_INFO + +namespace llvm { + +MCCodeEmitter *createLoongArchMCCodeEmitter(const MCInstrInfo &MCII, + const MCRegisterInfo &MRI, + MCContext &Ctx) { + return new LoongArchMCCodeEmitter(MCII, Ctx); +} + +} // end namespace llvm + +void LoongArchMCCodeEmitter::EmitByte(unsigned char C, raw_ostream &OS) const { + OS << (char)C; +} + +void LoongArchMCCodeEmitter::EmitInstruction(uint64_t Val, unsigned Size, + const MCSubtargetInfo &STI, + raw_ostream &OS) const { + for (unsigned i = 0; i < Size; ++i) { + unsigned Shift = i * 8; + EmitByte((Val >> Shift) & 0xff, OS); + } +} + +/// encodeInstruction - Emit the instruction. +/// Size the instruction with Desc.getSize(). +void LoongArchMCCodeEmitter:: +encodeInstruction(const MCInst &MI, raw_ostream &OS, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const +{ + MCInst TmpInst = MI; + + uint32_t Binary = getBinaryCodeForInstr(TmpInst, Fixups, STI); + + const MCInstrDesc &Desc = MCII.get(TmpInst.getOpcode()); + + // Get byte count of instruction + unsigned Size = Desc.getSize(); + if (!Size) + llvm_unreachable("Desc.getSize() returns 0"); + + EmitInstruction(Binary, Size, STI, OS); +} + +/// getBranchTargetOpValue - Return binary encoding of the branch +/// target operand. If the machine operand requires relocation, +/// record the relocation and return zero. +unsigned LoongArchMCCodeEmitter:: +getBranchTargetOpValue(const MCInst &MI, unsigned OpNo, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const { + const MCOperand &MO = MI.getOperand(OpNo); + + // If the destination is an immediate, divide by 4. + if (MO.isImm()) return MO.getImm() >> 2; + + assert(MO.isExpr() && + "getBranchTargetOpValue expects only expressions or immediates"); + + // XXX: brtarget reloc EncoderMethod. + const MCExpr *Expr = MO.getExpr(); + switch (MI.getOpcode()) { + default: + llvm_unreachable("Unhandled reloc instruction!"); + break; + case LoongArch::BEQZ: + case LoongArch::BEQZ32: + case LoongArch::BNEZ: + case LoongArch::BNEZ32: + case LoongArch::BCEQZ: + case LoongArch::BCNEZ: + Fixups.push_back( + MCFixup::create(0, Expr, MCFixupKind(LoongArch::fixup_loongarch_b21))); + break; + case LoongArch::BEQ: + case LoongArch::BEQ32: + case LoongArch::BNE: + case LoongArch::BNE32: + case LoongArch::BLT: + case LoongArch::BLT32: + case LoongArch::BGE: + case LoongArch::BGE32: + case LoongArch::BLTU: + case LoongArch::BLTU32: + case LoongArch::BGEU: + case LoongArch::BGEU32: + Fixups.push_back( + MCFixup::create(0, Expr, MCFixupKind(LoongArch::fixup_loongarch_b16))); + break; + } + return 0; +} + +/// getJumpTargetOpValue - Return binary encoding of the jump +/// target operand. If the machine operand requires relocation, +/// record the relocation and return zero. +unsigned LoongArchMCCodeEmitter:: +getJumpTargetOpValue(const MCInst &MI, unsigned OpNo, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const { + const MCOperand &MO = MI.getOperand(OpNo); + // If the destination is an immediate, divide by 4. + if (MO.isImm()) return MO.getImm()>>2; + + assert(MO.isExpr() && + "getJumpTargetOpValue expects only expressions or an immediate"); + + const MCExpr *Expr = MO.getExpr(); + Fixups.push_back( + MCFixup::create(0, Expr, MCFixupKind(LoongArch::fixup_loongarch_b26))); + return 0; +} + +unsigned LoongArchMCCodeEmitter:: +getSImm11Lsl1Encoding(const MCInst &MI, unsigned OpNo, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const { + const MCOperand &MO = MI.getOperand(OpNo); + if (MO.isImm()) { + unsigned Value = MO.getImm(); + return Value >> 1; + } + + return 0; +} + +unsigned LoongArchMCCodeEmitter:: +getSImm10Lsl2Encoding(const MCInst &MI, unsigned OpNo, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const { + const MCOperand &MO = MI.getOperand(OpNo); + if (MO.isImm()) { + unsigned Value = MO.getImm(); + return Value >> 2; + } + + return 0; +} + +unsigned LoongArchMCCodeEmitter:: +getSImm9Lsl3Encoding(const MCInst &MI, unsigned OpNo, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const { + const MCOperand &MO = MI.getOperand(OpNo); + if (MO.isImm()) { + unsigned Value = MO.getImm(); + return Value >> 3; + } + + return 0; +} + +unsigned LoongArchMCCodeEmitter:: +getSImm8Lsl1Encoding(const MCInst &MI, unsigned OpNo, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const { + const MCOperand &MO = MI.getOperand(OpNo); + if (MO.isImm()) { + unsigned Value = MO.getImm(); + return Value >> 1; + } + + return 0; +} + +unsigned LoongArchMCCodeEmitter:: +getSImm8Lsl2Encoding(const MCInst &MI, unsigned OpNo, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const { + const MCOperand &MO = MI.getOperand(OpNo); + if (MO.isImm()) { + unsigned Value = MO.getImm(); + return Value >> 2; + } + + return 0; +} + +unsigned LoongArchMCCodeEmitter:: +getSImm8Lsl3Encoding(const MCInst &MI, unsigned OpNo, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const { + const MCOperand &MO = MI.getOperand(OpNo); + if (MO.isImm()) { + unsigned Value = MO.getImm(); + return Value >> 3; + } + + return 0; +} + +unsigned LoongArchMCCodeEmitter:: +getExprOpValue(const MCInst &MI, const MCExpr *Expr, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const { + int64_t Res; + + if (Expr->evaluateAsAbsolute(Res)) + return Res; + + MCExpr::ExprKind Kind = Expr->getKind(); + if (Kind == MCExpr::Constant) { + return cast(Expr)->getValue(); + } + + if (Kind == MCExpr::Binary) { + unsigned Res = getExprOpValue(MI, cast(Expr)->getLHS(), Fixups, STI); + Res += getExprOpValue(MI, cast(Expr)->getRHS(), Fixups, STI); + return Res; + } + + if (Kind == MCExpr::Target) { + const LoongArchMCExpr *LoongArchExpr = cast(Expr); + + LoongArch::Fixups FixupKind = LoongArch::Fixups(0); + switch (LoongArchExpr->getKind()) { + case LoongArchMCExpr::MEK_32_PCREL: + case LoongArchMCExpr::MEK_None: + case LoongArchMCExpr::MEK_Special: + llvm_unreachable("Unhandled fixup kind!"); + Fixups.push_back(MCFixup::create(0, LoongArchExpr, MCFixupKind(FixupKind))); + break; + case LoongArchMCExpr::MEK_PLT: + FixupKind = LoongArch::fixup_loongarch_b26; + break; + case LoongArchMCExpr::MEK_GOT_HI: + case LoongArchMCExpr::MEK_GOT_RRHI: + FixupKind = LoongArch::fixup_loongarch_got_pc_hi20; + break; + case LoongArchMCExpr::MEK_GOT_LO: + case LoongArchMCExpr::MEK_GOT_RRLO: + case LoongArchMCExpr::MEK_TLSGD_LO: + case LoongArchMCExpr::MEK_TLSGD_RRLO: + FixupKind = LoongArch::fixup_loongarch_got_pc_lo12; + break; + case LoongArchMCExpr::MEK_GOT_RRHIGHER: + case LoongArchMCExpr::MEK_TLSGD_RRHIGHER: + FixupKind = LoongArch::fixup_loongarch_got64_pc_lo20; + break; + case LoongArchMCExpr::MEK_GOT_RRHIGHEST: + case LoongArchMCExpr::MEK_TLSGD_RRHIGHEST: + FixupKind = LoongArch::fixup_loongarch_got64_pc_hi12; + break; + case LoongArchMCExpr::MEK_ABS_HI: + FixupKind = LoongArch::fixup_loongarch_abs_hi20; + break; + case LoongArchMCExpr::MEK_ABS_LO: + FixupKind = LoongArch::fixup_loongarch_abs_lo12; + break; + case LoongArchMCExpr::MEK_ABS_HIGHER: + FixupKind = LoongArch::fixup_loongarch_abs64_lo20; + break; + case LoongArchMCExpr::MEK_ABS_HIGHEST: + FixupKind = LoongArch::fixup_loongarch_abs64_hi12; + break; + case LoongArchMCExpr::MEK_PCREL_HI: + case LoongArchMCExpr::MEK_PCREL_RRHI: + FixupKind = LoongArch::fixup_loongarch_pcala_hi20; + break; + case LoongArchMCExpr::MEK_PCREL_LO: + case LoongArchMCExpr::MEK_PCREL_RRLO: + FixupKind = LoongArch::fixup_loongarch_pcala_lo12; + break; + case LoongArchMCExpr::MEK_PCREL_RRHIGHER: + FixupKind = LoongArch::fixup_loongarch_pcala64_lo20; + break; + case LoongArchMCExpr::MEK_PCREL_RRHIGHEST: + FixupKind = LoongArch::fixup_loongarch_pcala64_hi12; + break; + case LoongArchMCExpr::MEK_TLSGD_HI: + case LoongArchMCExpr::MEK_TLSGD_RRHI: + FixupKind = LoongArch::fixup_loongarch_tls_gd_pc_hi20; + break; + case LoongArchMCExpr::MEK_TLSIE_HI: + case LoongArchMCExpr::MEK_TLSIE_RRHI: + FixupKind = LoongArch::fixup_loongarch_tls_ie_pc_hi20; + break; + case LoongArchMCExpr::MEK_TLSIE_LO: + case LoongArchMCExpr::MEK_TLSIE_RRLO: + FixupKind = LoongArch::fixup_loongarch_tls_ie_pc_lo12; + break; + case LoongArchMCExpr::MEK_TLSIE_RRHIGHER: + FixupKind = LoongArch::fixup_loongarch_tls_ie64_lo20; + break; + case LoongArchMCExpr::MEK_TLSIE_RRHIGHEST: + FixupKind = LoongArch::fixup_loongarch_tls_ie64_hi12; + break; + case LoongArchMCExpr::MEK_TLSLE_HI: + FixupKind = LoongArch::fixup_loongarch_tls_le_hi20; + break; + case LoongArchMCExpr::MEK_TLSLE_LO: + FixupKind = LoongArch::fixup_loongarch_tls_le_lo12; + break; + case LoongArchMCExpr::MEK_TLSLE_HIGHER: + FixupKind = LoongArch::fixup_loongarch_tls_le64_lo20; + break; + case LoongArchMCExpr::MEK_TLSLE_HIGHEST: + FixupKind = LoongArch::fixup_loongarch_tls_le64_hi12; + break; + } + Fixups.push_back(MCFixup::create(0, LoongArchExpr, MCFixupKind(FixupKind))); + return 0; + } + + if (Kind == MCExpr::SymbolRef) { + LoongArch::Fixups FixupKind = LoongArch::Fixups(0); + + switch(cast(Expr)->getKind()) { + default: llvm_unreachable("Unknown fixup kind!"); + break; + } + Fixups.push_back(MCFixup::create(0, Expr, MCFixupKind(FixupKind))); + return 0; + } + return 0; +} + +/// getMachineOpValue - Return binary encoding of operand. If the machine +/// operand requires relocation, record the relocation and return zero. +unsigned LoongArchMCCodeEmitter:: +getMachineOpValue(const MCInst &MI, const MCOperand &MO, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const { + if (MO.isReg()) { + unsigned Reg = MO.getReg(); + unsigned RegNo = Ctx.getRegisterInfo()->getEncodingValue(Reg); + return RegNo; + } else if (MO.isImm()) { + return static_cast(MO.getImm()); + } else if (MO.isFPImm()) { + return static_cast(APFloat(MO.getFPImm()) + .bitcastToAPInt().getHiBits(32).getLimitedValue()); + } + // MO must be an Expr. + assert(MO.isExpr()); + return getExprOpValue(MI, MO.getExpr(),Fixups, STI); +} + +/// Return binary encoding of memory related operand. +/// If the offset operand requires relocation, record the relocation. +template +unsigned LoongArchMCCodeEmitter::getMemEncoding(const MCInst &MI, unsigned OpNo, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const { + // Base register is encoded in bits 16-12, offset is encoded in bits 11-0. + assert(MI.getOperand(OpNo).isReg()); + unsigned RegBits = getMachineOpValue(MI, MI.getOperand(OpNo),Fixups, STI) << 12; + unsigned OffBits = getMachineOpValue(MI, MI.getOperand(OpNo+1), Fixups, STI); + + // Apply the scale factor if there is one. + OffBits >>= ShiftAmount; + + return (OffBits & 0xFFF) | RegBits; +} + +/// Return binary encoding of AM* memory related operand. +unsigned +LoongArchMCCodeEmitter::getAMemEncoding(const MCInst &MI, unsigned OpNo, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const { + // Base register is encoded in bits 16-12, bits 11-0 are not used. + assert(MI.getOperand(OpNo).isReg()); + unsigned RegBits = getMachineOpValue(MI, MI.getOperand(OpNo), Fixups, STI) + << 12; + return RegBits; +} + +unsigned LoongArchMCCodeEmitter::getMemEncoding10l2(const MCInst &MI, unsigned OpNo, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const { + // Base register is encoded in bits 16-12, offset is encoded in bits 11-0. + assert(MI.getOperand(OpNo).isReg()); + unsigned RegBits = getMachineOpValue(MI, MI.getOperand(OpNo),Fixups, STI) << 10; + unsigned OffBits = getMachineOpValue(MI, MI.getOperand(OpNo+1), Fixups, STI); + + // Apply the scale factor if there is one. + OffBits >>= 2; + + return (OffBits & 0x3FF) | RegBits; +} + +unsigned LoongArchMCCodeEmitter::getMemEncoding11l1(const MCInst &MI, unsigned OpNo, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const { + // Base register is encoded in bits 16-12, offset is encoded in bits 11-0. + assert(MI.getOperand(OpNo).isReg()); + unsigned RegBits = getMachineOpValue(MI, MI.getOperand(OpNo),Fixups, STI) << 11; + unsigned OffBits = getMachineOpValue(MI, MI.getOperand(OpNo+1), Fixups, STI); + + // Apply the scale factor if there is one. + OffBits >>= 1; + + return (OffBits & 0x7FF) | RegBits; +} + +unsigned LoongArchMCCodeEmitter::getMemEncoding9l3(const MCInst &MI, unsigned OpNo, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const { + // Base register is encoded in bits 16-12, offset is encoded in bits 11-0. + assert(MI.getOperand(OpNo).isReg()); + unsigned RegBits = getMachineOpValue(MI, MI.getOperand(OpNo),Fixups, STI) << 9; + unsigned OffBits = getMachineOpValue(MI, MI.getOperand(OpNo+1), Fixups, STI); + + // Apply the scale factor if there is one. + OffBits >>= 3; + + return (OffBits & 0x1FF) | RegBits; +} + +/// Return binary encoding of simm14 memory related operand. Such as LL/SC instructions. +/// If the offset operand requires relocation, record the relocation. +template +unsigned LoongArchMCCodeEmitter::getSimm14MemEncoding(const MCInst &MI, unsigned OpNo, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const { + // Base register is encoded in bits 18-14, offset is encoded in bits 13-0. + assert(MI.getOperand(OpNo).isReg()); + unsigned RegBits = getMachineOpValue(MI, MI.getOperand(OpNo),Fixups, STI) << 14; + unsigned OffBits = getMachineOpValue(MI, MI.getOperand(OpNo+1), Fixups, STI); + + // Apply the scale factor if there is one. + OffBits >>= ShiftAmount; + + return (OffBits & 0x3FFF) | RegBits; +} + +unsigned +LoongArchMCCodeEmitter::getFCMPEncoding(const MCInst &MI, unsigned OpNo, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const { + const MCOperand& MO = MI.getOperand(OpNo); + switch((LoongArch::CondCode)MO.getImm()){ + case LoongArch::FCOND_T: + return 0x0; + case LoongArch::FCOND_OR: + return 0x8; + case LoongArch::FCOND_UNE: + return 0x4; + case LoongArch::FCOND_ONE: + return 0xC; + case LoongArch::FCOND_UGE: + return 0x2; + case LoongArch::FCOND_OGE: + return 0xA; + case LoongArch::FCOND_UGT: + return 0x6; + case LoongArch::FCOND_OGT: + return 0xE; + case LoongArch::FCOND_ST: + return 0x1; + case LoongArch::FCOND_GLE: + return 0x9; + case LoongArch::FCOND_GL: + return 0xD; + case LoongArch::FCOND_NLT: + return 0x3; + case LoongArch::FCOND_GE: + return 0xB; + case LoongArch::FCOND_NLE: + return 0x7; + case LoongArch::FCOND_GT: + return 0xF; + default: + return MO.getImm(); + } +} + +template +unsigned +LoongArchMCCodeEmitter::getUImmWithOffsetEncoding(const MCInst &MI, unsigned OpNo, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const { + assert(MI.getOperand(OpNo).isImm()); + unsigned Value = getMachineOpValue(MI, MI.getOperand(OpNo), Fixups, STI); + Value -= Offset; + return Value; +} + +#include "LoongArchGenMCCodeEmitter.inc" diff --git a/lib/Target/LoongArch/MCTargetDesc/LoongArchMCCodeEmitter.h b/lib/Target/LoongArch/MCTargetDesc/LoongArchMCCodeEmitter.h new file mode 100644 index 00000000..01634015 --- /dev/null +++ b/lib/Target/LoongArch/MCTargetDesc/LoongArchMCCodeEmitter.h @@ -0,0 +1,146 @@ +//===- LoongArchMCCodeEmitter.h - Convert LoongArch Code to Machine Code --*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines the LoongArchMCCodeEmitter class. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_LOONGARCH_MCTARGETDESC_LOONGARCHMCCODEEMITTER_H +#define LLVM_LIB_TARGET_LOONGARCH_MCTARGETDESC_LOONGARCHMCCODEEMITTER_H + +#include "llvm/MC/MCCodeEmitter.h" +#include "llvm/ADT/StringRef.h" +#include +#include + +namespace llvm { + +class MCContext; +class MCExpr; +class MCFixup; +class MCInst; +class MCInstrInfo; +class MCOperand; +class MCSubtargetInfo; +class raw_ostream; + +class LoongArchMCCodeEmitter : public MCCodeEmitter { + const MCInstrInfo &MCII; + MCContext &Ctx; + +public: + LoongArchMCCodeEmitter(const MCInstrInfo &mcii, MCContext &Ctx_) + : MCII(mcii), Ctx(Ctx_) {} + LoongArchMCCodeEmitter(const LoongArchMCCodeEmitter &) = delete; + LoongArchMCCodeEmitter &operator=(const LoongArchMCCodeEmitter &) = delete; + ~LoongArchMCCodeEmitter() override = default; + + void EmitByte(unsigned char C, raw_ostream &OS) const; + + void EmitInstruction(uint64_t Val, unsigned Size, const MCSubtargetInfo &STI, + raw_ostream &OS) const; + + void encodeInstruction(const MCInst &MI, raw_ostream &OS, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const override; + + // getBinaryCodeForInstr - TableGen'erated function for getting the + // binary encoding for an instruction. + uint64_t getBinaryCodeForInstr(const MCInst &MI, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const; + + // getJumpTargetOpValue - Return binary encoding of the jump + // target operand. If the machine operand requires relocation, + // record the relocation and return zero. + unsigned getJumpTargetOpValue(const MCInst &MI, unsigned OpNo, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const; + + // getBranchTargetOpValue - Return binary encoding of the branch + // target operand. If the machine operand requires relocation, + // record the relocation and return zero. + unsigned getBranchTargetOpValue(const MCInst &MI, unsigned OpNo, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const; + + // getMachineOpValue - Return binary encoding of operand. If the machin + // operand requires relocation, record the relocation and return zero. + unsigned getMachineOpValue(const MCInst &MI, const MCOperand &MO, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const; + + template + unsigned getMemEncoding(const MCInst &MI, unsigned OpNo, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const; + + unsigned getAMemEncoding(const MCInst &MI, unsigned OpNo, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const; + + unsigned getMemEncoding10l2(const MCInst &MI, unsigned OpNo, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const; + + unsigned getMemEncoding11l1(const MCInst &MI, unsigned OpNo, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const; + + unsigned getMemEncoding9l3(const MCInst &MI, unsigned OpNo, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const; + + template + unsigned getSimm14MemEncoding(const MCInst &MI, unsigned OpNo, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const; + + unsigned getFCMPEncoding(const MCInst &MI, unsigned OpNo, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const; + + /// Subtract Offset then encode as a N-bit unsigned integer. + template + unsigned getUImmWithOffsetEncoding(const MCInst &MI, unsigned OpNo, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const; + + unsigned getExprOpValue(const MCInst &MI, const MCExpr *Expr, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const; + + unsigned getSImm11Lsl1Encoding(const MCInst &MI, unsigned OpNo, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const; + + unsigned getSImm10Lsl2Encoding(const MCInst &MI, unsigned OpNo, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const; + + unsigned getSImm9Lsl3Encoding(const MCInst &MI, unsigned OpNo, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const; + + unsigned getSImm8Lsl1Encoding(const MCInst &MI, unsigned OpNo, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const; + + unsigned getSImm8Lsl2Encoding(const MCInst &MI, unsigned OpNo, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const; + + unsigned getSImm8Lsl3Encoding(const MCInst &MI, unsigned OpNo, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const; + +}; + +} // end namespace llvm + +#endif // LLVM_LIB_TARGET_LOONGARCH_MCTARGETDESC_LOONGARCHMCCODEEMITTER_H diff --git a/lib/Target/LoongArch/MCTargetDesc/LoongArchMCExpr.cpp b/lib/Target/LoongArch/MCTargetDesc/LoongArchMCExpr.cpp new file mode 100644 index 00000000..bb842538 --- /dev/null +++ b/lib/Target/LoongArch/MCTargetDesc/LoongArchMCExpr.cpp @@ -0,0 +1,134 @@ +//===-- LoongArchMCExpr.cpp - LoongArch specific MC expression classes --------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "LoongArchMCExpr.h" +#include "llvm/BinaryFormat/ELF.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCAssembler.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MCSymbolELF.h" +#include "llvm/MC/MCValue.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Support/raw_ostream.h" +#include + +using namespace llvm; + +#define DEBUG_TYPE "loongarchmcexpr" + +const LoongArchMCExpr *LoongArchMCExpr::create(LoongArchMCExpr::LoongArchExprKind Kind, + const MCExpr *Expr, MCContext &Ctx) { + return new (Ctx) LoongArchMCExpr(Kind, Expr); +} + +void LoongArchMCExpr::printImpl(raw_ostream &OS, const MCAsmInfo *MAI) const { + int64_t AbsVal; + if (Expr->evaluateAsAbsolute(AbsVal)) + OS << AbsVal; + else + Expr->print(OS, MAI, true); +} + +bool +LoongArchMCExpr::evaluateAsRelocatableImpl(MCValue &Res, + const MCAsmLayout *Layout, + const MCFixup *Fixup) const { + if (!getSubExpr()->evaluateAsRelocatable(Res, nullptr, nullptr)) + return false; + + Res = + MCValue::get(Res.getSymA(), Res.getSymB(), Res.getConstant(), getKind()); + // Custom fixup types are not valid with symbol difference expressions. + return Res.getSymB() ? getKind() == MEK_None : true; +} + +void LoongArchMCExpr::visitUsedExpr(MCStreamer &Streamer) const { + Streamer.visitUsedExpr(*getSubExpr()); +} + +static void fixELFSymbolsInTLSFixupsImpl(const MCExpr *Expr, MCAssembler &Asm) { + switch (Expr->getKind()) { + case MCExpr::Target: + fixELFSymbolsInTLSFixupsImpl(cast(Expr)->getSubExpr(), Asm); + break; + case MCExpr::Constant: + break; + case MCExpr::Binary: { + const MCBinaryExpr *BE = cast(Expr); + fixELFSymbolsInTLSFixupsImpl(BE->getLHS(), Asm); + fixELFSymbolsInTLSFixupsImpl(BE->getRHS(), Asm); + break; + } + case MCExpr::SymbolRef: { + // We're known to be under a TLS fixup, so any symbol should be + // modified. There should be only one. + const MCSymbolRefExpr &SymRef = *cast(Expr); + cast(SymRef.getSymbol()).setType(ELF::STT_TLS); + break; + } + case MCExpr::Unary: + fixELFSymbolsInTLSFixupsImpl(cast(Expr)->getSubExpr(), Asm); + break; + } +} + +void LoongArchMCExpr::fixELFSymbolsInTLSFixups(MCAssembler &Asm) const { + switch (getKind()) { + default: + break; + case MEK_None: + case MEK_Special: + llvm_unreachable("MEK_None and MEK_Special are invalid"); + break; + case MEK_CALL_HI: + case MEK_CALL_LO: + case MEK_GOT_HI: + case MEK_GOT_LO: + case MEK_GOT_RRHI: + case MEK_GOT_RRLO: + case MEK_GOT_RRHIGHER: + case MEK_GOT_RRHIGHEST: + case MEK_ABS_HI: + case MEK_ABS_LO: + case MEK_ABS_HIGHER: + case MEK_ABS_HIGHEST: + case MEK_PCREL_HI: + case MEK_PCREL_LO: + case MEK_PCREL_RRHI: + case MEK_PCREL_RRHIGHER: + case MEK_PCREL_RRHIGHEST: + case MEK_PCREL_RRLO: + case MEK_PLT: + // If we do have nested target-specific expressions, they will be in + // a consecutive chain. + if (const LoongArchMCExpr *E = dyn_cast(getSubExpr())) + E->fixELFSymbolsInTLSFixups(Asm); + break; + case MEK_TLSGD_HI: + case MEK_TLSGD_LO: + case MEK_TLSGD_RRHI: + case MEK_TLSGD_RRHIGHER: + case MEK_TLSGD_RRHIGHEST: + case MEK_TLSGD_RRLO: + case MEK_TLSLE_HI: + case MEK_TLSLE_HIGHER: + case MEK_TLSLE_HIGHEST: + case MEK_TLSLE_LO: + case MEK_TLSIE_HI: + case MEK_TLSIE_LO: + case MEK_TLSIE_RRHI: + case MEK_TLSIE_RRHIGHER: + case MEK_TLSIE_RRHIGHEST: + case MEK_TLSIE_RRLO: + fixELFSymbolsInTLSFixupsImpl(getSubExpr(), Asm); + break; + } +} diff --git a/lib/Target/LoongArch/MCTargetDesc/LoongArchMCExpr.h b/lib/Target/LoongArch/MCTargetDesc/LoongArchMCExpr.h new file mode 100644 index 00000000..80592ead --- /dev/null +++ b/lib/Target/LoongArch/MCTargetDesc/LoongArchMCExpr.h @@ -0,0 +1,98 @@ +//===- LoongArchMCExpr.h - LoongArch specific MC expression classes -------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_LOONGARCH_MCTARGETDESC_LOONGARCHMCEXPR_H +#define LLVM_LIB_TARGET_LOONGARCH_MCTARGETDESC_LOONGARCHMCEXPR_H + +#include "llvm/MC/MCAsmLayout.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCValue.h" + +namespace llvm { + +class LoongArchMCExpr : public MCTargetExpr { +public: + enum LoongArchExprKind { + MEK_None, + MEK_CALL_HI, + MEK_CALL_LO, + MEK_GOT_HI, + MEK_GOT_LO, + MEK_GOT_RRHI, + MEK_GOT_RRHIGHER, + MEK_GOT_RRHIGHEST, + MEK_GOT_RRLO, + MEK_ABS_HI, + MEK_ABS_HIGHER, + MEK_ABS_HIGHEST, + MEK_ABS_LO, + MEK_PCREL_HI, + MEK_PCREL_LO, + MEK_PCREL_RRHI, + MEK_PCREL_RRHIGHER, + MEK_PCREL_RRHIGHEST, + MEK_PCREL_RRLO, + MEK_TLSLE_HI, + MEK_TLSLE_HIGHER, + MEK_TLSLE_HIGHEST, + MEK_TLSLE_LO, + MEK_TLSIE_HI, + MEK_TLSIE_LO, + MEK_TLSIE_RRHI, + MEK_TLSIE_RRHIGHER, + MEK_TLSIE_RRHIGHEST, + MEK_TLSIE_RRLO, + MEK_TLSGD_HI, + MEK_TLSGD_LO, + MEK_TLSGD_RRHI, + MEK_TLSGD_RRHIGHER, + MEK_TLSGD_RRHIGHEST, + MEK_TLSGD_RRLO, + MEK_PLT, + MEK_32_PCREL, + MEK_Special, + }; + +private: + const LoongArchExprKind Kind; + const MCExpr *Expr; + + explicit LoongArchMCExpr(LoongArchExprKind Kind, const MCExpr *Expr) + : Kind(Kind), Expr(Expr) {} + +public: + static const LoongArchMCExpr *create(LoongArchExprKind Kind, const MCExpr *Expr, + MCContext &Ctx); + static const LoongArchMCExpr *createGpOff(LoongArchExprKind Kind, const MCExpr *Expr, + MCContext &Ctx); + + /// Get the kind of this expression. + LoongArchExprKind getKind() const { return Kind; } + + /// Get the child of this expression. + const MCExpr *getSubExpr() const { return Expr; } + + void printImpl(raw_ostream &OS, const MCAsmInfo *MAI) const override; + bool evaluateAsRelocatableImpl(MCValue &Res, const MCAsmLayout *Layout, + const MCFixup *Fixup) const override; + void visitUsedExpr(MCStreamer &Streamer) const override; + + MCFragment *findAssociatedFragment() const override { + return getSubExpr()->findAssociatedFragment(); + } + + void fixELFSymbolsInTLSFixups(MCAssembler &Asm) const override; + + static bool classof(const MCExpr *E) { + return E->getKind() == MCExpr::Target; + } +}; + +} // end namespace llvm + +#endif // LLVM_LIB_TARGET_LOONGARCH_MCTARGETDESC_LOONGARCHMCEXPR_H diff --git a/lib/Target/LoongArch/MCTargetDesc/LoongArchMCTargetDesc.cpp b/lib/Target/LoongArch/MCTargetDesc/LoongArchMCTargetDesc.cpp new file mode 100644 index 00000000..7bf85b64 --- /dev/null +++ b/lib/Target/LoongArch/MCTargetDesc/LoongArchMCTargetDesc.cpp @@ -0,0 +1,187 @@ +//===-- LoongArchMCTargetDesc.cpp - LoongArch Target Descriptions -------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file provides LoongArch specific target descriptions. +// +//===----------------------------------------------------------------------===// + +#include "LoongArchMCTargetDesc.h" +#include "LoongArchTargetStreamer.h" +#include "MCTargetDesc/LoongArchAsmBackend.h" +#include "MCTargetDesc/LoongArchELFStreamer.h" +#include "MCTargetDesc/LoongArchInstPrinter.h" +#include "MCTargetDesc/LoongArchMCAsmInfo.h" +#include "TargetInfo/LoongArchTargetInfo.h" +#include "llvm/ADT/Triple.h" +#include "llvm/MC/MCCodeEmitter.h" +#include "llvm/MC/MCELFStreamer.h" +#include "llvm/MC/MCInstrAnalysis.h" +#include "llvm/MC/MCInstrInfo.h" +#include "llvm/MC/MCObjectWriter.h" +#include "llvm/MC/MCRegisterInfo.h" +#include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/MC/MCSymbol.h" +#include "llvm/MC/MachineLocation.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/FormattedStream.h" +#include "llvm/Support/TargetRegistry.h" + +using namespace llvm; + +#define GET_INSTRINFO_MC_DESC +#include "LoongArchGenInstrInfo.inc" + +#define GET_SUBTARGETINFO_MC_DESC +#include "LoongArchGenSubtargetInfo.inc" + +#define GET_REGINFO_MC_DESC +#include "LoongArchGenRegisterInfo.inc" + +/// Select the LoongArch CPU for the given triple and cpu name. +/// FIXME: Merge with the copy in LoongArchSubtarget.cpp +StringRef LoongArch_MC::selectLoongArchCPU(const Triple &TT, StringRef CPU) { + if (CPU.empty() || CPU == "generic") { + if (TT.isLoongArch32()) + CPU = "generic-la32"; + else + CPU = "la464"; + } + return CPU; +} + +static MCInstrInfo *createLoongArchMCInstrInfo() { + MCInstrInfo *X = new MCInstrInfo(); + InitLoongArchMCInstrInfo(X); + return X; +} + +static MCRegisterInfo *createLoongArchMCRegisterInfo(const Triple &TT) { + MCRegisterInfo *X = new MCRegisterInfo(); + InitLoongArchMCRegisterInfo(X, LoongArch::RA); + return X; +} + +static MCSubtargetInfo *createLoongArchMCSubtargetInfo(const Triple &TT, + StringRef CPU, StringRef FS) { + CPU = LoongArch_MC::selectLoongArchCPU(TT, CPU); + return createLoongArchMCSubtargetInfoImpl(TT, CPU, /*TuneCPU*/ CPU, FS); +} + +static MCAsmInfo *createLoongArchMCAsmInfo(const MCRegisterInfo &MRI, + const Triple &TT, + const MCTargetOptions &Options) { + MCAsmInfo *MAI = new LoongArchMCAsmInfo(TT, Options); + + unsigned SP = MRI.getDwarfRegNum(LoongArch::SP, true); + MCCFIInstruction Inst = MCCFIInstruction::cfiDefCfa(nullptr, SP, 0); + MAI->addInitialFrameState(Inst); + + return MAI; +} + +static MCInstPrinter *createLoongArchMCInstPrinter(const Triple &T, + unsigned SyntaxVariant, + const MCAsmInfo &MAI, + const MCInstrInfo &MII, + const MCRegisterInfo &MRI) { + return new LoongArchInstPrinter(MAI, MII, MRI); +} + +static MCStreamer *createMCStreamer(const Triple &T, MCContext &Context, + std::unique_ptr &&MAB, + std::unique_ptr &&OW, + std::unique_ptr &&Emitter, + bool RelaxAll) { + MCStreamer *S; + S = createLoongArchELFStreamer(Context, std::move(MAB), std::move(OW), + std::move(Emitter), RelaxAll); + return S; +} + +static MCTargetStreamer *createLoongArchAsmTargetStreamer(MCStreamer &S, + formatted_raw_ostream &OS, + MCInstPrinter *InstPrint, + bool isVerboseAsm) { + return new LoongArchTargetAsmStreamer(S, OS); +} + +static MCTargetStreamer *createLoongArchNullTargetStreamer(MCStreamer &S) { + return new LoongArchTargetStreamer(S); +} + +static MCTargetStreamer * +createLoongArchObjectTargetStreamer(MCStreamer &S, const MCSubtargetInfo &STI) { + return new LoongArchTargetELFStreamer(S, STI); +} + +namespace { + +class LoongArchMCInstrAnalysis : public MCInstrAnalysis { +public: + LoongArchMCInstrAnalysis(const MCInstrInfo *Info) : MCInstrAnalysis(Info) {} + + bool evaluateBranch(const MCInst &Inst, uint64_t Addr, uint64_t Size, + uint64_t &Target) const override { + unsigned NumOps = Inst.getNumOperands(); + if (NumOps == 0) + return false; + if (Info->get(Inst.getOpcode()).isBranch() || Inst.getOpcode() == LoongArch::BL) { + // just not jirl + Target = Addr + Inst.getOperand(NumOps - 1).getImm(); + return true; + } else { + return false; + } + } +}; +} + +static MCInstrAnalysis *createLoongArchMCInstrAnalysis(const MCInstrInfo *Info) { + return new LoongArchMCInstrAnalysis(Info); +} + +extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeLoongArchTargetMC() { + for (Target *T : {&getTheLoongArch32Target(), &getTheLoongArch64Target()}) { + // Register the MC asm info. + RegisterMCAsmInfoFn X(*T, createLoongArchMCAsmInfo); + + // Register the MC instruction info. + TargetRegistry::RegisterMCInstrInfo(*T, createLoongArchMCInstrInfo); + + // Register the MC register info. + TargetRegistry::RegisterMCRegInfo(*T, createLoongArchMCRegisterInfo); + + // Register the elf streamer. + TargetRegistry::RegisterELFStreamer(*T, createMCStreamer); + + // Register the asm target streamer. + TargetRegistry::RegisterAsmTargetStreamer(*T, createLoongArchAsmTargetStreamer); + + TargetRegistry::RegisterNullTargetStreamer(*T, + createLoongArchNullTargetStreamer); + + // Register the MC subtarget info. + TargetRegistry::RegisterMCSubtargetInfo(*T, createLoongArchMCSubtargetInfo); + + // Register the MC instruction analyzer. + TargetRegistry::RegisterMCInstrAnalysis(*T, createLoongArchMCInstrAnalysis); + + // Register the MCInstPrinter. + TargetRegistry::RegisterMCInstPrinter(*T, createLoongArchMCInstPrinter); + + TargetRegistry::RegisterObjectTargetStreamer( + *T, createLoongArchObjectTargetStreamer); + + // Register the asm backend. + TargetRegistry::RegisterMCAsmBackend(*T, createLoongArchAsmBackend); + } + + // Register the MC Code Emitter + for (Target *T : {&getTheLoongArch32Target(), &getTheLoongArch64Target()}) + TargetRegistry::RegisterMCCodeEmitter(*T, createLoongArchMCCodeEmitter); +} diff --git a/lib/Target/LoongArch/MCTargetDesc/LoongArchMCTargetDesc.h b/lib/Target/LoongArch/MCTargetDesc/LoongArchMCTargetDesc.h new file mode 100644 index 00000000..56949ef1 --- /dev/null +++ b/lib/Target/LoongArch/MCTargetDesc/LoongArchMCTargetDesc.h @@ -0,0 +1,68 @@ +//===-- LoongArchMCTargetDesc.h - LoongArch Target Descriptions -----------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file provides LoongArch specific target descriptions. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_LOONGARCH_MCTARGETDESC_LOONGARCHMCTARGETDESC_H +#define LLVM_LIB_TARGET_LOONGARCH_MCTARGETDESC_LOONGARCHMCTARGETDESC_H + +#include "llvm/Support/DataTypes.h" + +#include + +namespace llvm { +class MCAsmBackend; +class MCCodeEmitter; +class MCContext; +class MCInstrInfo; +class MCObjectTargetWriter; +class MCRegisterInfo; +class MCSubtargetInfo; +class MCTargetOptions; +class StringRef; +class Target; +class Triple; +class raw_ostream; +class raw_pwrite_stream; + +Target &getTheLoongArch32Target(); +Target &getTheLoongArch64Target(); + +MCCodeEmitter *createLoongArchMCCodeEmitter(const MCInstrInfo &MCII, + const MCRegisterInfo &MRI, + MCContext &Ctx); + +MCAsmBackend *createLoongArchAsmBackend(const Target &T, + const MCSubtargetInfo &STI, + const MCRegisterInfo &MRI, + const MCTargetOptions &Options); + +std::unique_ptr +createLoongArchELFObjectWriter(const Triple &TT); + +namespace LoongArch_MC { +StringRef selectLoongArchCPU(const Triple &TT, StringRef CPU); +} + +} // End llvm namespace + +// Defines symbolic names for LoongArch registers. This defines a mapping from +// register name to register number. +#define GET_REGINFO_ENUM +#include "LoongArchGenRegisterInfo.inc" + +// Defines symbolic names for the LoongArch instructions. +#define GET_INSTRINFO_ENUM +#include "LoongArchGenInstrInfo.inc" + +#define GET_SUBTARGETINFO_ENUM +#include "LoongArchGenSubtargetInfo.inc" + +#endif diff --git a/lib/Target/LoongArch/MCTargetDesc/LoongArchTargetStreamer.cpp b/lib/Target/LoongArch/MCTargetDesc/LoongArchTargetStreamer.cpp new file mode 100644 index 00000000..6a060b14 --- /dev/null +++ b/lib/Target/LoongArch/MCTargetDesc/LoongArchTargetStreamer.cpp @@ -0,0 +1,322 @@ +//===-- LoongArchTargetStreamer.cpp - LoongArch Target Streamer Methods -------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file provides LoongArch specific target streamer methods. +// +//===----------------------------------------------------------------------===// + +#include "LoongArchABIInfo.h" +#include "LoongArchELFStreamer.h" +#include "LoongArchInstPrinter.h" +#include "LoongArchMCExpr.h" +#include "LoongArchMCTargetDesc.h" +#include "LoongArchTargetObjectFile.h" +#include "LoongArchTargetStreamer.h" +#include "llvm/BinaryFormat/ELF.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCSectionELF.h" +#include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/MC/MCSymbolELF.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/FormattedStream.h" + +using namespace llvm; + +namespace { +static cl::opt RoundSectionSizes( + "loongarch-round-section-sizes", cl::init(false), + cl::desc("Round section sizes up to the section alignment"), cl::Hidden); +} // end anonymous namespace + +LoongArchTargetStreamer::LoongArchTargetStreamer(MCStreamer &S) + : MCTargetStreamer(S), ModuleDirectiveAllowed(true) { + GPRInfoSet = FPRInfoSet = FrameInfoSet = false; +} +void LoongArchTargetStreamer::emitDirectiveOptionPic0() {} +void LoongArchTargetStreamer::emitDirectiveOptionPic2() {} +void LoongArchTargetStreamer::emitDirectiveSetArch(StringRef Arch) { + forbidModuleDirective(); +} +void LoongArchTargetStreamer::emitDirectiveSetLoongArch32() { forbidModuleDirective(); } +void LoongArchTargetStreamer::emitDirectiveSetloongarch64() { forbidModuleDirective(); } + +void LoongArchTargetStreamer::emitR(unsigned Opcode, unsigned Reg0, SMLoc IDLoc, + const MCSubtargetInfo *STI) { + MCInst TmpInst; + TmpInst.setOpcode(Opcode); + TmpInst.addOperand(MCOperand::createReg(Reg0)); + TmpInst.setLoc(IDLoc); + getStreamer().emitInstruction(TmpInst, *STI); +} + +void LoongArchTargetStreamer::emitRXX(unsigned Opcode, unsigned Reg0, MCOperand Op1, + MCOperand Op2, SMLoc IDLoc, const MCSubtargetInfo *STI) { + MCInst TmpInst; + TmpInst.setOpcode(Opcode); + TmpInst.addOperand(MCOperand::createReg(Reg0)); + TmpInst.addOperand(Op1); + TmpInst.addOperand(Op2); + TmpInst.setLoc(IDLoc); + getStreamer().emitInstruction(TmpInst, *STI); +} + +void LoongArchTargetStreamer::emitRRXX(unsigned Opcode, unsigned Reg0, unsigned Reg1, + MCOperand Op2, MCOperand Op3, SMLoc IDLoc, + const MCSubtargetInfo *STI) { + MCInst TmpInst; + TmpInst.setOpcode(Opcode); + TmpInst.addOperand(MCOperand::createReg(Reg0)); + TmpInst.addOperand(MCOperand::createReg(Reg1)); + TmpInst.addOperand(Op2); + TmpInst.addOperand(Op3); + TmpInst.setLoc(IDLoc); + getStreamer().emitInstruction(TmpInst, *STI); +} + +void LoongArchTargetStreamer::emitRX(unsigned Opcode, unsigned Reg0, MCOperand Op1, + SMLoc IDLoc, const MCSubtargetInfo *STI) { + MCInst TmpInst; + TmpInst.setOpcode(Opcode); + TmpInst.addOperand(MCOperand::createReg(Reg0)); + TmpInst.addOperand(Op1); + TmpInst.setLoc(IDLoc); + getStreamer().emitInstruction(TmpInst, *STI); +} + +void LoongArchTargetStreamer::emitRI(unsigned Opcode, unsigned Reg0, int32_t Imm, + SMLoc IDLoc, const MCSubtargetInfo *STI) { + emitRX(Opcode, Reg0, MCOperand::createImm(Imm), IDLoc, STI); +} + +void LoongArchTargetStreamer::emitRR(unsigned Opcode, unsigned Reg0, unsigned Reg1, + SMLoc IDLoc, const MCSubtargetInfo *STI) { + emitRX(Opcode, Reg0, MCOperand::createReg(Reg1), IDLoc, STI); +} + +void LoongArchTargetStreamer::emitII(unsigned Opcode, int16_t Imm1, int16_t Imm2, + SMLoc IDLoc, const MCSubtargetInfo *STI) { + MCInst TmpInst; + TmpInst.setOpcode(Opcode); + TmpInst.addOperand(MCOperand::createImm(Imm1)); + TmpInst.addOperand(MCOperand::createImm(Imm2)); + TmpInst.setLoc(IDLoc); + getStreamer().emitInstruction(TmpInst, *STI); +} + +void LoongArchTargetStreamer::emitRRX(unsigned Opcode, unsigned Reg0, unsigned Reg1, + MCOperand Op2, SMLoc IDLoc, + const MCSubtargetInfo *STI) { + MCInst TmpInst; + TmpInst.setOpcode(Opcode); + TmpInst.addOperand(MCOperand::createReg(Reg0)); + TmpInst.addOperand(MCOperand::createReg(Reg1)); + TmpInst.addOperand(Op2); + TmpInst.setLoc(IDLoc); + getStreamer().emitInstruction(TmpInst, *STI); +} + +void LoongArchTargetStreamer::emitRRR(unsigned Opcode, unsigned Reg0, unsigned Reg1, + unsigned Reg2, SMLoc IDLoc, + const MCSubtargetInfo *STI) { + emitRRX(Opcode, Reg0, Reg1, MCOperand::createReg(Reg2), IDLoc, STI); +} + +void LoongArchTargetStreamer::emitRRI(unsigned Opcode, unsigned Reg0, + unsigned Reg1, int32_t Imm, SMLoc IDLoc, + const MCSubtargetInfo *STI) { + emitRRX(Opcode, Reg0, Reg1, MCOperand::createImm(Imm), IDLoc, STI); +} + +void LoongArchTargetStreamer::emitRRIII(unsigned Opcode, unsigned Reg0, + unsigned Reg1, int16_t Imm0, int16_t Imm1, + int16_t Imm2, SMLoc IDLoc, + const MCSubtargetInfo *STI) { + MCInst TmpInst; + TmpInst.setOpcode(Opcode); + TmpInst.addOperand(MCOperand::createReg(Reg0)); + TmpInst.addOperand(MCOperand::createReg(Reg1)); + TmpInst.addOperand(MCOperand::createImm(Imm0)); + TmpInst.addOperand(MCOperand::createImm(Imm1)); + TmpInst.addOperand(MCOperand::createImm(Imm2)); + TmpInst.setLoc(IDLoc); + getStreamer().emitInstruction(TmpInst, *STI); +} + +void LoongArchTargetStreamer::emitAdd(unsigned DstReg, unsigned SrcReg, + unsigned TrgReg, bool Is64Bit, + const MCSubtargetInfo *STI) { + emitRRR(Is64Bit ? LoongArch::ADD_D : LoongArch::ADD_W, DstReg, SrcReg, TrgReg, SMLoc(), + STI); +} + +void LoongArchTargetStreamer::emitDSLL(unsigned DstReg, unsigned SrcReg, + int16_t ShiftAmount, SMLoc IDLoc, + const MCSubtargetInfo *STI) { + if (ShiftAmount >= 32) { + emitRRI(LoongArch::SLLI_D, DstReg, SrcReg, ShiftAmount - 32, IDLoc, STI); + return; + } + + emitRRI(LoongArch::SLLI_D, DstReg, SrcReg, ShiftAmount, IDLoc, STI); +} + +void LoongArchTargetStreamer::emitNop(SMLoc IDLoc, const MCSubtargetInfo *STI) { + emitRRI(LoongArch::ANDI, LoongArch::ZERO, LoongArch::ZERO, 0, IDLoc, STI); +} + +LoongArchTargetAsmStreamer::LoongArchTargetAsmStreamer(MCStreamer &S, + formatted_raw_ostream &OS) + : LoongArchTargetStreamer(S), OS(OS) {} + +void LoongArchTargetAsmStreamer::emitDirectiveOptionPic0() { + OS << "\t.option\tpic0\n"; +} + +void LoongArchTargetAsmStreamer::emitDirectiveOptionPic2() { + OS << "\t.option\tpic2\n"; +} + +void LoongArchTargetAsmStreamer::emitDirectiveSetArch(StringRef Arch) { + OS << "\t.set arch=" << Arch << "\n"; + LoongArchTargetStreamer::emitDirectiveSetArch(Arch); +} + +void LoongArchTargetAsmStreamer::emitDirectiveSetLoongArch32() { + //OS << "\t.set\tloongarch32\n"; + LoongArchTargetStreamer::emitDirectiveSetLoongArch32(); +} + +void LoongArchTargetAsmStreamer::emitDirectiveSetloongarch64() { + //OS << "\t.set\tloongarch64\n"; + LoongArchTargetStreamer::emitDirectiveSetloongarch64(); +} + +// This part is for ELF object output. +LoongArchTargetELFStreamer::LoongArchTargetELFStreamer(MCStreamer &S, + const MCSubtargetInfo &STI) + : LoongArchTargetStreamer(S), STI(STI) { + MCAssembler &MCA = getStreamer().getAssembler(); + + // It's possible that MCObjectFileInfo isn't fully initialized at this point + // due to an initialization order problem where LLVMTargetMachine creates the + // target streamer before TargetLoweringObjectFile calls + // InitializeMCObjectFileInfo. There doesn't seem to be a single place that + // covers all cases so this statement covers most cases and direct object + // emission must call setPic() once MCObjectFileInfo has been initialized. The + // cases we don't handle here are covered by LoongArchAsmPrinter. + Pic = MCA.getContext().getObjectFileInfo()->isPositionIndependent(); + + // FIXME: Fix a dependency issue by instantiating the ABI object to some + // default based off the triple. The triple doesn't describe the target + // fully, but any external user of the API that uses the MCTargetStreamer + // would otherwise crash on assertion failure. + + ABI = LoongArchABIInfo( + STI.getTargetTriple().getArch() == Triple::ArchType::loongarch32 + ? LoongArchABIInfo::ILP32D() + : LoongArchABIInfo::LP64D()); + +} + +void LoongArchTargetELFStreamer::emitLabel(MCSymbol *S) { + auto *Symbol = cast(S); + getStreamer().getAssembler().registerSymbol(*Symbol); + uint8_t Type = Symbol->getType(); + if (Type != ELF::STT_FUNC) + return; + +} + +void LoongArchTargetELFStreamer::finish() { + MCAssembler &MCA = getStreamer().getAssembler(); + const MCObjectFileInfo &OFI = *MCA.getContext().getObjectFileInfo(); + + // .bss, .text and .data are always at least 16-byte aligned. + MCSection &TextSection = *OFI.getTextSection(); + MCA.registerSection(TextSection); + MCSection &DataSection = *OFI.getDataSection(); + MCA.registerSection(DataSection); + MCSection &BSSSection = *OFI.getBSSSection(); + MCA.registerSection(BSSSection); + + TextSection.setAlignment(Align(std::max(16u, TextSection.getAlignment()))); + DataSection.setAlignment(Align(std::max(16u, DataSection.getAlignment()))); + BSSSection.setAlignment(Align(std::max(16u, BSSSection.getAlignment()))); + + if (RoundSectionSizes) { + // Make sections sizes a multiple of the alignment. This is useful for + // verifying the output of IAS against the output of other assemblers but + // it's not necessary to produce a correct object and increases section + // size. + MCStreamer &OS = getStreamer(); + for (MCSection &S : MCA) { + MCSectionELF &Section = static_cast(S); + + unsigned Alignment = Section.getAlignment(); + if (Alignment) { + OS.SwitchSection(&Section); + if (Section.UseCodeAlign()) + OS.emitCodeAlignment(Alignment, Alignment); + else + OS.emitValueToAlignment(Alignment, 0, 1, Alignment); + } + } + } + + // Update e_header flags. See the FIXME and comment above in + // the constructor for a full rundown on this. + unsigned EFlags = MCA.getELFHeaderEFlags(); + // Figure out the e_flags. + // + // Bitness is already represented with the EI_CLASS byte in the current spec, + // so here we only record the base ABI modifier. Also set the object file ABI + // version to v1, as upstream LLVM cannot handle the previous stack-machine- + // based relocs from day one. + // + // Refer to LoongArch ELF psABI v2.01 for details. + // LoongArchABIInfo + EFlags |= ELF::EF_LOONGARCH_OBJABI_V1; + if (getABI().IsSoftFloat()) + EFlags |= ELF::EF_LOONGARCH_ABI_SOFT_FLOAT; + else if (getABI().IsSingleFloat()) + EFlags |= ELF::EF_LOONGARCH_ABI_SINGLE_FLOAT; + else if (getABI().IsDoubleFloat()) + EFlags |= ELF::EF_LOONGARCH_ABI_DOUBLE_FLOAT; + else + llvm_unreachable("Improperly initialized target ABI"); + + MCA.setELFHeaderEFlags(EFlags); +} + +MCELFStreamer &LoongArchTargetELFStreamer::getStreamer() { + return static_cast(Streamer); +} + +void LoongArchTargetELFStreamer::emitDirectiveOptionPic0() { + MCAssembler &MCA = getStreamer().getAssembler(); + unsigned Flags = MCA.getELFHeaderEFlags(); + // This option overrides other PIC options like -KPIC. + Pic = false; + ///XXX:Reloc no this flags + //Flags &= ~ELF::EF_LOONGARCH_PIC; + MCA.setELFHeaderEFlags(Flags); +} + +void LoongArchTargetELFStreamer::emitDirectiveOptionPic2() { + MCAssembler &MCA = getStreamer().getAssembler(); + unsigned Flags = MCA.getELFHeaderEFlags(); + Pic = true; + // NOTE: We are following the GAS behaviour here which means the directive + // 'pic2' also sets the CPIC bit in the ELF header. This is different from + // what is stated in the SYSV ABI which consider the bits EF_LOONGARCH_PIC and + // EF_LOONGARCH_CPIC to be mutually exclusive. + ///XXX:Reloc no this flags + //Flags |= ELF::EF_LOONGARCH_PIC | ELF::EF_LOONGARCH_CPIC; + MCA.setELFHeaderEFlags(Flags); +} diff --git a/lib/Target/LoongArch/TargetInfo/CMakeLists.txt b/lib/Target/LoongArch/TargetInfo/CMakeLists.txt new file mode 100644 index 00000000..f53ddba4 --- /dev/null +++ b/lib/Target/LoongArch/TargetInfo/CMakeLists.txt @@ -0,0 +1,9 @@ +add_llvm_component_library(LLVMLoongArchInfo + LoongArchTargetInfo.cpp + + LINK_COMPONENTS + Support + + ADD_TO_COMPONENT + LoongArch + ) diff --git a/lib/Target/LoongArch/TargetInfo/LoongArchTargetInfo.cpp b/lib/Target/LoongArch/TargetInfo/LoongArchTargetInfo.cpp new file mode 100644 index 00000000..5d3ec9ea --- /dev/null +++ b/lib/Target/LoongArch/TargetInfo/LoongArchTargetInfo.cpp @@ -0,0 +1,34 @@ +//===-- LoongArchTargetInfo.cpp - LoongArch Target Implementation -------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "TargetInfo/LoongArchTargetInfo.h" +#include "llvm/Support/TargetRegistry.h" +using namespace llvm; + +Target &llvm::getTheLoongArch32Target() { + static Target TheLoongArch32Target; + return TheLoongArch32Target; +} + +Target &llvm::getTheLoongArch64Target() { + static Target TheLoongArch64Target; + return TheLoongArch64Target; +} + +extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeLoongArchTargetInfo() { +#if 0 + //TODO: support it in futrue + RegisterTarget + X(getTheLoongArch32Target(), "loongarch32", "LoongArch (32-bit)", "LoongArch"); +#endif + RegisterTarget + A(getTheLoongArch64Target(), "loongarch64", "LoongArch (64-bit)", + "LoongArch"); +} diff --git a/lib/Target/LoongArch/TargetInfo/LoongArchTargetInfo.h b/lib/Target/LoongArch/TargetInfo/LoongArchTargetInfo.h new file mode 100644 index 00000000..7dce2497 --- /dev/null +++ b/lib/Target/LoongArch/TargetInfo/LoongArchTargetInfo.h @@ -0,0 +1,21 @@ +//===-- LoongArchTargetInfo.h - LoongArch Target Implementation -----------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_LOONGARCH_TARGETINFO_LOONGARCHTARGETINFO_H +#define LLVM_LIB_TARGET_LOONGARCH_TARGETINFO_LOONGARCHTARGETINFO_H + +namespace llvm { + +class Target; + +Target &getTheLoongArch32Target(); +Target &getTheLoongArch64Target(); + +} // namespace llvm + +#endif // LLVM_LIB_TARGET_LOONGARCH_TARGETINFO_LOONGARCHTARGETINFO_H diff --git a/test/CodeGen/LoongArch/atomic-operand-imm0.ll b/test/CodeGen/LoongArch/atomic-operand-imm0.ll new file mode 100644 index 00000000..d1d0c0bc --- /dev/null +++ b/test/CodeGen/LoongArch/atomic-operand-imm0.ll @@ -0,0 +1,17 @@ +; Test that the last immediate 0 operand of amtomic instruction is printed + +; RUN: llc -march=loongarch64 -o - %s | FileCheck %s + +define void @test_i32(i32* %dst, i32 %val) { +; CHECK: ammax_db.wu $r[[REG1:[0-9]+]], $r[[REG2:[0-9]+]], $r[[REG3:[0-9]+]], 0 +entry: + %a = atomicrmw umax i32* %dst, i32 %val monotonic + ret void +} + +define void @test_i64(i64* %dst, i64 %val) { +; CHECK: ammax_db.du $r[[REG1:[0-9]+]], $r[[REG2:[0-9]+]], $r[[REG3:[0-9]+]], 0 +entry: + %a = atomicrmw umax i64* %dst, i64 %val monotonic + ret void +} diff --git a/test/CodeGen/LoongArch/atomic_16_8.ll b/test/CodeGen/LoongArch/atomic_16_8.ll new file mode 100644 index 00000000..d5c3e0da --- /dev/null +++ b/test/CodeGen/LoongArch/atomic_16_8.ll @@ -0,0 +1,809 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=loongarch64 -o - %s | FileCheck %s + + +define void @umax_8(i8* %ptr) { +; CHECK-LABEL: umax_8: +; CHECK: # %bb.0: +; CHECK-NEXT: ori $r5, $zero, 100 +; CHECK-NEXT: addi.d $r6, $zero, -4 +; CHECK-NEXT: and $r6, $r4, $r6 +; CHECK-NEXT: andi $r4, $r4, 3 +; CHECK-NEXT: slli.w $r4, $r4, 3 +; CHECK-NEXT: ori $r7, $zero, 255 +; CHECK-NEXT: sll.w $r7, $r7, $r4 +; CHECK-NEXT: nor $r8, $zero, $r7 +; CHECK-NEXT: sll.w $r5, $r5, $r4 +; CHECK-NEXT: dbar 0 +; CHECK-NEXT: .LBB0_1: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ll.w $r10, $r6, 0 +; CHECK-NEXT: and $r14, $r10, $r7 +; CHECK-NEXT: and $r5, $r5, $r7 +; CHECK-NEXT: sltu $r13, $r14, $r5 +; CHECK-NEXT: masknez $r11, $r14, $r13 +; CHECK-NEXT: maskeqz $r13, $r5, $r13 +; CHECK-NEXT: or $r11, $r11, $r13 +; CHECK-NEXT: and $r12, $r10, $r8 +; CHECK-NEXT: or $r12, $r12, $r11 +; CHECK-NEXT: sc.w $r12, $r6, 0 +; CHECK-NEXT: beq $r12, $zero, .LBB0_1 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: and $r9, $r10, $r7 +; CHECK-NEXT: srl.w $r9, $r9, $r4 +; CHECK-NEXT: ext.w.b $r9, $r9 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: jr $ra + %ret = atomicrmw umax i8* %ptr, i8 100 seq_cst + ret void +} + +define void @umax_16(i16* %ptr) { +; CHECK-LABEL: umax_16: +; CHECK: # %bb.0: +; CHECK-NEXT: ori $r5, $zero, 100 +; CHECK-NEXT: addi.d $r6, $zero, -4 +; CHECK-NEXT: and $r6, $r4, $r6 +; CHECK-NEXT: andi $r4, $r4, 3 +; CHECK-NEXT: slli.w $r4, $r4, 3 +; CHECK-NEXT: lu12i.w $r7, 15 +; CHECK-NEXT: ori $r7, $r7, 4095 +; CHECK-NEXT: sll.w $r7, $r7, $r4 +; CHECK-NEXT: nor $r8, $zero, $r7 +; CHECK-NEXT: sll.w $r5, $r5, $r4 +; CHECK-NEXT: dbar 0 +; CHECK-NEXT: .LBB1_1: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ll.w $r10, $r6, 0 +; CHECK-NEXT: and $r14, $r10, $r7 +; CHECK-NEXT: and $r5, $r5, $r7 +; CHECK-NEXT: sltu $r13, $r14, $r5 +; CHECK-NEXT: masknez $r11, $r14, $r13 +; CHECK-NEXT: maskeqz $r13, $r5, $r13 +; CHECK-NEXT: or $r11, $r11, $r13 +; CHECK-NEXT: and $r12, $r10, $r8 +; CHECK-NEXT: or $r12, $r12, $r11 +; CHECK-NEXT: sc.w $r12, $r6, 0 +; CHECK-NEXT: beq $r12, $zero, .LBB1_1 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: and $r9, $r10, $r7 +; CHECK-NEXT: srl.w $r9, $r9, $r4 +; CHECK-NEXT: ext.w.h $r9, $r9 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: jr $ra + %ret = atomicrmw umax i16* %ptr, i16 100 seq_cst + ret void +} + +define void @max_8(i8* %ptr) { +; CHECK-LABEL: max_8: +; CHECK: # %bb.0: +; CHECK-NEXT: ori $r5, $zero, 100 +; CHECK-NEXT: addi.d $r6, $zero, -4 +; CHECK-NEXT: and $r6, $r4, $r6 +; CHECK-NEXT: andi $r4, $r4, 3 +; CHECK-NEXT: slli.w $r4, $r4, 3 +; CHECK-NEXT: ori $r7, $zero, 255 +; CHECK-NEXT: sll.w $r7, $r7, $r4 +; CHECK-NEXT: nor $r8, $zero, $r7 +; CHECK-NEXT: sll.w $r5, $r5, $r4 +; CHECK-NEXT: dbar 0 +; CHECK-NEXT: .LBB2_1: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ll.w $r10, $r6, 0 +; CHECK-NEXT: and $r14, $r10, $r7 +; CHECK-NEXT: and $r5, $r5, $r7 +; CHECK-NEXT: slt $r13, $r14, $r5 +; CHECK-NEXT: masknez $r11, $r14, $r13 +; CHECK-NEXT: maskeqz $r13, $r5, $r13 +; CHECK-NEXT: or $r11, $r11, $r13 +; CHECK-NEXT: and $r12, $r10, $r8 +; CHECK-NEXT: or $r12, $r12, $r11 +; CHECK-NEXT: sc.w $r12, $r6, 0 +; CHECK-NEXT: beq $r12, $zero, .LBB2_1 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: and $r9, $r10, $r7 +; CHECK-NEXT: srl.w $r9, $r9, $r4 +; CHECK-NEXT: ext.w.b $r9, $r9 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: jr $ra + %ret = atomicrmw max i8* %ptr, i8 100 seq_cst + ret void +} + +define void @max_16(i16* %ptr) { +; CHECK-LABEL: max_16: +; CHECK: # %bb.0: +; CHECK-NEXT: ori $r5, $zero, 100 +; CHECK-NEXT: addi.d $r6, $zero, -4 +; CHECK-NEXT: and $r6, $r4, $r6 +; CHECK-NEXT: andi $r4, $r4, 3 +; CHECK-NEXT: slli.w $r4, $r4, 3 +; CHECK-NEXT: lu12i.w $r7, 15 +; CHECK-NEXT: ori $r7, $r7, 4095 +; CHECK-NEXT: sll.w $r7, $r7, $r4 +; CHECK-NEXT: nor $r8, $zero, $r7 +; CHECK-NEXT: sll.w $r5, $r5, $r4 +; CHECK-NEXT: dbar 0 +; CHECK-NEXT: .LBB3_1: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ll.w $r10, $r6, 0 +; CHECK-NEXT: and $r14, $r10, $r7 +; CHECK-NEXT: and $r5, $r5, $r7 +; CHECK-NEXT: slt $r13, $r14, $r5 +; CHECK-NEXT: masknez $r11, $r14, $r13 +; CHECK-NEXT: maskeqz $r13, $r5, $r13 +; CHECK-NEXT: or $r11, $r11, $r13 +; CHECK-NEXT: and $r12, $r10, $r8 +; CHECK-NEXT: or $r12, $r12, $r11 +; CHECK-NEXT: sc.w $r12, $r6, 0 +; CHECK-NEXT: beq $r12, $zero, .LBB3_1 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: and $r9, $r10, $r7 +; CHECK-NEXT: srl.w $r9, $r9, $r4 +; CHECK-NEXT: ext.w.h $r9, $r9 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: jr $ra + %ret = atomicrmw max i16* %ptr, i16 100 seq_cst + ret void +} + + +define void @umin_8(i8* %ptr) { +; CHECK-LABEL: umin_8: +; CHECK: # %bb.0: +; CHECK-NEXT: ori $r5, $zero, 100 +; CHECK-NEXT: addi.d $r6, $zero, -4 +; CHECK-NEXT: and $r6, $r4, $r6 +; CHECK-NEXT: andi $r4, $r4, 3 +; CHECK-NEXT: slli.w $r4, $r4, 3 +; CHECK-NEXT: ori $r7, $zero, 255 +; CHECK-NEXT: sll.w $r7, $r7, $r4 +; CHECK-NEXT: nor $r8, $zero, $r7 +; CHECK-NEXT: sll.w $r5, $r5, $r4 +; CHECK-NEXT: dbar 0 +; CHECK-NEXT: .LBB4_1: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ll.w $r10, $r6, 0 +; CHECK-NEXT: and $r14, $r10, $r7 +; CHECK-NEXT: and $r5, $r5, $r7 +; CHECK-NEXT: sltu $r13, $r14, $r5 +; CHECK-NEXT: maskeqz $r11, $r14, $r13 +; CHECK-NEXT: masknez $r13, $r5, $r13 +; CHECK-NEXT: or $r11, $r11, $r13 +; CHECK-NEXT: and $r12, $r10, $r8 +; CHECK-NEXT: or $r12, $r12, $r11 +; CHECK-NEXT: sc.w $r12, $r6, 0 +; CHECK-NEXT: beq $r12, $zero, .LBB4_1 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: and $r9, $r10, $r7 +; CHECK-NEXT: srl.w $r9, $r9, $r4 +; CHECK-NEXT: ext.w.b $r9, $r9 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: jr $ra + %ret = atomicrmw umin i8* %ptr, i8 100 seq_cst + ret void +} + +define void @umin_16(i16* %ptr) { +; CHECK-LABEL: umin_16: +; CHECK: # %bb.0: +; CHECK-NEXT: ori $r5, $zero, 100 +; CHECK-NEXT: addi.d $r6, $zero, -4 +; CHECK-NEXT: and $r6, $r4, $r6 +; CHECK-NEXT: andi $r4, $r4, 3 +; CHECK-NEXT: slli.w $r4, $r4, 3 +; CHECK-NEXT: lu12i.w $r7, 15 +; CHECK-NEXT: ori $r7, $r7, 4095 +; CHECK-NEXT: sll.w $r7, $r7, $r4 +; CHECK-NEXT: nor $r8, $zero, $r7 +; CHECK-NEXT: sll.w $r5, $r5, $r4 +; CHECK-NEXT: dbar 0 +; CHECK-NEXT: .LBB5_1: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ll.w $r10, $r6, 0 +; CHECK-NEXT: and $r14, $r10, $r7 +; CHECK-NEXT: and $r5, $r5, $r7 +; CHECK-NEXT: sltu $r13, $r14, $r5 +; CHECK-NEXT: maskeqz $r11, $r14, $r13 +; CHECK-NEXT: masknez $r13, $r5, $r13 +; CHECK-NEXT: or $r11, $r11, $r13 +; CHECK-NEXT: and $r12, $r10, $r8 +; CHECK-NEXT: or $r12, $r12, $r11 +; CHECK-NEXT: sc.w $r12, $r6, 0 +; CHECK-NEXT: beq $r12, $zero, .LBB5_1 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: and $r9, $r10, $r7 +; CHECK-NEXT: srl.w $r9, $r9, $r4 +; CHECK-NEXT: ext.w.h $r9, $r9 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: jr $ra + %ret = atomicrmw umin i16* %ptr, i16 100 seq_cst + ret void +} + +define void @min_8(i8* %ptr) { +; CHECK-LABEL: min_8: +; CHECK: # %bb.0: +; CHECK-NEXT: ori $r5, $zero, 100 +; CHECK-NEXT: addi.d $r6, $zero, -4 +; CHECK-NEXT: and $r6, $r4, $r6 +; CHECK-NEXT: andi $r4, $r4, 3 +; CHECK-NEXT: slli.w $r4, $r4, 3 +; CHECK-NEXT: ori $r7, $zero, 255 +; CHECK-NEXT: sll.w $r7, $r7, $r4 +; CHECK-NEXT: nor $r8, $zero, $r7 +; CHECK-NEXT: sll.w $r5, $r5, $r4 +; CHECK-NEXT: dbar 0 +; CHECK-NEXT: .LBB6_1: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ll.w $r10, $r6, 0 +; CHECK-NEXT: and $r14, $r10, $r7 +; CHECK-NEXT: and $r5, $r5, $r7 +; CHECK-NEXT: slt $r13, $r14, $r5 +; CHECK-NEXT: maskeqz $r11, $r14, $r13 +; CHECK-NEXT: masknez $r13, $r5, $r13 +; CHECK-NEXT: or $r11, $r11, $r13 +; CHECK-NEXT: and $r12, $r10, $r8 +; CHECK-NEXT: or $r12, $r12, $r11 +; CHECK-NEXT: sc.w $r12, $r6, 0 +; CHECK-NEXT: beq $r12, $zero, .LBB6_1 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: and $r9, $r10, $r7 +; CHECK-NEXT: srl.w $r9, $r9, $r4 +; CHECK-NEXT: ext.w.b $r9, $r9 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: jr $ra + %ret = atomicrmw min i8* %ptr, i8 100 seq_cst + ret void +} + +define void @min_16(i16* %ptr) { +; CHECK-LABEL: min_16: +; CHECK: # %bb.0: +; CHECK-NEXT: ori $r5, $zero, 100 +; CHECK-NEXT: addi.d $r6, $zero, -4 +; CHECK-NEXT: and $r6, $r4, $r6 +; CHECK-NEXT: andi $r4, $r4, 3 +; CHECK-NEXT: slli.w $r4, $r4, 3 +; CHECK-NEXT: lu12i.w $r7, 15 +; CHECK-NEXT: ori $r7, $r7, 4095 +; CHECK-NEXT: sll.w $r7, $r7, $r4 +; CHECK-NEXT: nor $r8, $zero, $r7 +; CHECK-NEXT: sll.w $r5, $r5, $r4 +; CHECK-NEXT: dbar 0 +; CHECK-NEXT: .LBB7_1: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ll.w $r10, $r6, 0 +; CHECK-NEXT: and $r14, $r10, $r7 +; CHECK-NEXT: and $r5, $r5, $r7 +; CHECK-NEXT: slt $r13, $r14, $r5 +; CHECK-NEXT: maskeqz $r11, $r14, $r13 +; CHECK-NEXT: masknez $r13, $r5, $r13 +; CHECK-NEXT: or $r11, $r11, $r13 +; CHECK-NEXT: and $r12, $r10, $r8 +; CHECK-NEXT: or $r12, $r12, $r11 +; CHECK-NEXT: sc.w $r12, $r6, 0 +; CHECK-NEXT: beq $r12, $zero, .LBB7_1 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: and $r9, $r10, $r7 +; CHECK-NEXT: srl.w $r9, $r9, $r4 +; CHECK-NEXT: ext.w.h $r9, $r9 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: jr $ra + %ret = atomicrmw min i16* %ptr, i16 100 seq_cst + ret void +} + + +define void @or_8(i8* %ptr) { +; CHECK-LABEL: or_8: +; CHECK: # %bb.0: +; CHECK-NEXT: ori $r5, $zero, 100 +; CHECK-NEXT: addi.d $r6, $zero, -4 +; CHECK-NEXT: and $r6, $r4, $r6 +; CHECK-NEXT: andi $r4, $r4, 3 +; CHECK-NEXT: slli.w $r4, $r4, 3 +; CHECK-NEXT: ori $r7, $zero, 255 +; CHECK-NEXT: sll.w $r7, $r7, $r4 +; CHECK-NEXT: nor $r8, $zero, $r7 +; CHECK-NEXT: sll.w $r5, $r5, $r4 +; CHECK-NEXT: dbar 0 +; CHECK-NEXT: .LBB8_1: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ll.w $r10, $r6, 0 +; CHECK-NEXT: or $r11, $r10, $r5 +; CHECK-NEXT: and $r11, $r11, $r7 +; CHECK-NEXT: and $r12, $r10, $r8 +; CHECK-NEXT: or $r12, $r12, $r11 +; CHECK-NEXT: sc.w $r12, $r6, 0 +; CHECK-NEXT: beq $r12, $zero, .LBB8_1 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: and $r9, $r10, $r7 +; CHECK-NEXT: srl.w $r9, $r9, $r4 +; CHECK-NEXT: ext.w.b $r9, $r9 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: jr $ra + %ret = atomicrmw or i8* %ptr, i8 100 seq_cst + ret void +} + +define void @or_16(i16* %ptr) { +; CHECK-LABEL: or_16: +; CHECK: # %bb.0: +; CHECK-NEXT: ori $r5, $zero, 100 +; CHECK-NEXT: addi.d $r6, $zero, -4 +; CHECK-NEXT: and $r6, $r4, $r6 +; CHECK-NEXT: andi $r4, $r4, 3 +; CHECK-NEXT: slli.w $r4, $r4, 3 +; CHECK-NEXT: lu12i.w $r7, 15 +; CHECK-NEXT: ori $r7, $r7, 4095 +; CHECK-NEXT: sll.w $r7, $r7, $r4 +; CHECK-NEXT: nor $r8, $zero, $r7 +; CHECK-NEXT: sll.w $r5, $r5, $r4 +; CHECK-NEXT: dbar 0 +; CHECK-NEXT: .LBB9_1: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ll.w $r10, $r6, 0 +; CHECK-NEXT: or $r11, $r10, $r5 +; CHECK-NEXT: and $r11, $r11, $r7 +; CHECK-NEXT: and $r12, $r10, $r8 +; CHECK-NEXT: or $r12, $r12, $r11 +; CHECK-NEXT: sc.w $r12, $r6, 0 +; CHECK-NEXT: beq $r12, $zero, .LBB9_1 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: and $r9, $r10, $r7 +; CHECK-NEXT: srl.w $r9, $r9, $r4 +; CHECK-NEXT: ext.w.h $r9, $r9 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: jr $ra + %ret = atomicrmw or i16* %ptr, i16 100 seq_cst + ret void +} + + +define void @add_8(i8* %ptr) { +; CHECK-LABEL: add_8: +; CHECK: # %bb.0: +; CHECK-NEXT: ori $r5, $zero, 100 +; CHECK-NEXT: addi.d $r6, $zero, -4 +; CHECK-NEXT: and $r6, $r4, $r6 +; CHECK-NEXT: andi $r4, $r4, 3 +; CHECK-NEXT: slli.w $r4, $r4, 3 +; CHECK-NEXT: ori $r7, $zero, 255 +; CHECK-NEXT: sll.w $r7, $r7, $r4 +; CHECK-NEXT: nor $r8, $zero, $r7 +; CHECK-NEXT: sll.w $r5, $r5, $r4 +; CHECK-NEXT: dbar 0 +; CHECK-NEXT: .LBB10_1: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ll.w $r10, $r6, 0 +; CHECK-NEXT: add.w $r11, $r10, $r5 +; CHECK-NEXT: and $r11, $r11, $r7 +; CHECK-NEXT: and $r12, $r10, $r8 +; CHECK-NEXT: or $r12, $r12, $r11 +; CHECK-NEXT: sc.w $r12, $r6, 0 +; CHECK-NEXT: beq $r12, $zero, .LBB10_1 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: and $r9, $r10, $r7 +; CHECK-NEXT: srl.w $r9, $r9, $r4 +; CHECK-NEXT: ext.w.b $r9, $r9 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: jr $ra + %ret = atomicrmw add i8* %ptr, i8 100 seq_cst + ret void +} + +define void @add_16(i16* %ptr) { +; CHECK-LABEL: add_16: +; CHECK: # %bb.0: +; CHECK-NEXT: ori $r5, $zero, 100 +; CHECK-NEXT: addi.d $r6, $zero, -4 +; CHECK-NEXT: and $r6, $r4, $r6 +; CHECK-NEXT: andi $r4, $r4, 3 +; CHECK-NEXT: slli.w $r4, $r4, 3 +; CHECK-NEXT: lu12i.w $r7, 15 +; CHECK-NEXT: ori $r7, $r7, 4095 +; CHECK-NEXT: sll.w $r7, $r7, $r4 +; CHECK-NEXT: nor $r8, $zero, $r7 +; CHECK-NEXT: sll.w $r5, $r5, $r4 +; CHECK-NEXT: dbar 0 +; CHECK-NEXT: .LBB11_1: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ll.w $r10, $r6, 0 +; CHECK-NEXT: add.w $r11, $r10, $r5 +; CHECK-NEXT: and $r11, $r11, $r7 +; CHECK-NEXT: and $r12, $r10, $r8 +; CHECK-NEXT: or $r12, $r12, $r11 +; CHECK-NEXT: sc.w $r12, $r6, 0 +; CHECK-NEXT: beq $r12, $zero, .LBB11_1 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: and $r9, $r10, $r7 +; CHECK-NEXT: srl.w $r9, $r9, $r4 +; CHECK-NEXT: ext.w.h $r9, $r9 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: jr $ra + %ret = atomicrmw add i16* %ptr, i16 100 seq_cst + ret void +} + + +define void @sub_8(i8* %ptr) { +; CHECK-LABEL: sub_8: +; CHECK: # %bb.0: +; CHECK-NEXT: ori $r5, $zero, 100 +; CHECK-NEXT: addi.d $r6, $zero, -4 +; CHECK-NEXT: and $r6, $r4, $r6 +; CHECK-NEXT: andi $r4, $r4, 3 +; CHECK-NEXT: slli.w $r4, $r4, 3 +; CHECK-NEXT: ori $r7, $zero, 255 +; CHECK-NEXT: sll.w $r7, $r7, $r4 +; CHECK-NEXT: nor $r8, $zero, $r7 +; CHECK-NEXT: sll.w $r5, $r5, $r4 +; CHECK-NEXT: dbar 0 +; CHECK-NEXT: .LBB12_1: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ll.w $r10, $r6, 0 +; CHECK-NEXT: sub.w $r11, $r10, $r5 +; CHECK-NEXT: and $r11, $r11, $r7 +; CHECK-NEXT: and $r12, $r10, $r8 +; CHECK-NEXT: or $r12, $r12, $r11 +; CHECK-NEXT: sc.w $r12, $r6, 0 +; CHECK-NEXT: beq $r12, $zero, .LBB12_1 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: and $r9, $r10, $r7 +; CHECK-NEXT: srl.w $r9, $r9, $r4 +; CHECK-NEXT: ext.w.b $r9, $r9 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: jr $ra + %ret = atomicrmw sub i8* %ptr, i8 100 seq_cst + ret void +} + +define void @sub_16(i16* %ptr) { +; CHECK-LABEL: sub_16: +; CHECK: # %bb.0: +; CHECK-NEXT: ori $r5, $zero, 100 +; CHECK-NEXT: addi.d $r6, $zero, -4 +; CHECK-NEXT: and $r6, $r4, $r6 +; CHECK-NEXT: andi $r4, $r4, 3 +; CHECK-NEXT: slli.w $r4, $r4, 3 +; CHECK-NEXT: lu12i.w $r7, 15 +; CHECK-NEXT: ori $r7, $r7, 4095 +; CHECK-NEXT: sll.w $r7, $r7, $r4 +; CHECK-NEXT: nor $r8, $zero, $r7 +; CHECK-NEXT: sll.w $r5, $r5, $r4 +; CHECK-NEXT: dbar 0 +; CHECK-NEXT: .LBB13_1: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ll.w $r10, $r6, 0 +; CHECK-NEXT: sub.w $r11, $r10, $r5 +; CHECK-NEXT: and $r11, $r11, $r7 +; CHECK-NEXT: and $r12, $r10, $r8 +; CHECK-NEXT: or $r12, $r12, $r11 +; CHECK-NEXT: sc.w $r12, $r6, 0 +; CHECK-NEXT: beq $r12, $zero, .LBB13_1 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: and $r9, $r10, $r7 +; CHECK-NEXT: srl.w $r9, $r9, $r4 +; CHECK-NEXT: ext.w.h $r9, $r9 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: jr $ra + %ret = atomicrmw sub i16* %ptr, i16 100 seq_cst + ret void +} + + +define void @and_8(i8* %ptr) { +; CHECK-LABEL: and_8: +; CHECK: # %bb.0: +; CHECK-NEXT: ori $r5, $zero, 100 +; CHECK-NEXT: addi.d $r6, $zero, -4 +; CHECK-NEXT: and $r6, $r4, $r6 +; CHECK-NEXT: andi $r4, $r4, 3 +; CHECK-NEXT: slli.w $r4, $r4, 3 +; CHECK-NEXT: ori $r7, $zero, 255 +; CHECK-NEXT: sll.w $r7, $r7, $r4 +; CHECK-NEXT: nor $r8, $zero, $r7 +; CHECK-NEXT: sll.w $r5, $r5, $r4 +; CHECK-NEXT: dbar 0 +; CHECK-NEXT: .LBB14_1: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ll.w $r10, $r6, 0 +; CHECK-NEXT: and $r11, $r10, $r5 +; CHECK-NEXT: and $r11, $r11, $r7 +; CHECK-NEXT: and $r12, $r10, $r8 +; CHECK-NEXT: or $r12, $r12, $r11 +; CHECK-NEXT: sc.w $r12, $r6, 0 +; CHECK-NEXT: beq $r12, $zero, .LBB14_1 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: and $r9, $r10, $r7 +; CHECK-NEXT: srl.w $r9, $r9, $r4 +; CHECK-NEXT: ext.w.b $r9, $r9 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: jr $ra + %ret = atomicrmw and i8* %ptr, i8 100 seq_cst + ret void +} + +define void @and_16(i16* %ptr) { +; CHECK-LABEL: and_16: +; CHECK: # %bb.0: +; CHECK-NEXT: ori $r5, $zero, 100 +; CHECK-NEXT: addi.d $r6, $zero, -4 +; CHECK-NEXT: and $r6, $r4, $r6 +; CHECK-NEXT: andi $r4, $r4, 3 +; CHECK-NEXT: slli.w $r4, $r4, 3 +; CHECK-NEXT: lu12i.w $r7, 15 +; CHECK-NEXT: ori $r7, $r7, 4095 +; CHECK-NEXT: sll.w $r7, $r7, $r4 +; CHECK-NEXT: nor $r8, $zero, $r7 +; CHECK-NEXT: sll.w $r5, $r5, $r4 +; CHECK-NEXT: dbar 0 +; CHECK-NEXT: .LBB15_1: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ll.w $r10, $r6, 0 +; CHECK-NEXT: and $r11, $r10, $r5 +; CHECK-NEXT: and $r11, $r11, $r7 +; CHECK-NEXT: and $r12, $r10, $r8 +; CHECK-NEXT: or $r12, $r12, $r11 +; CHECK-NEXT: sc.w $r12, $r6, 0 +; CHECK-NEXT: beq $r12, $zero, .LBB15_1 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: and $r9, $r10, $r7 +; CHECK-NEXT: srl.w $r9, $r9, $r4 +; CHECK-NEXT: ext.w.h $r9, $r9 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: jr $ra + %ret = atomicrmw and i16* %ptr, i16 100 seq_cst + ret void +} + + +define void @nand_8(i8* %ptr) { +; CHECK-LABEL: nand_8: +; CHECK: # %bb.0: +; CHECK-NEXT: ori $r5, $zero, 100 +; CHECK-NEXT: addi.d $r6, $zero, -4 +; CHECK-NEXT: and $r6, $r4, $r6 +; CHECK-NEXT: andi $r4, $r4, 3 +; CHECK-NEXT: slli.w $r4, $r4, 3 +; CHECK-NEXT: ori $r7, $zero, 255 +; CHECK-NEXT: sll.w $r7, $r7, $r4 +; CHECK-NEXT: nor $r8, $zero, $r7 +; CHECK-NEXT: sll.w $r5, $r5, $r4 +; CHECK-NEXT: dbar 0 +; CHECK-NEXT: .LBB16_1: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ll.w $r10, $r6, 0 +; CHECK-NEXT: and $r11, $r10, $r5 +; CHECK-NEXT: nor $r11, $zero, $r11 +; CHECK-NEXT: and $r11, $r11, $r7 +; CHECK-NEXT: and $r12, $r10, $r8 +; CHECK-NEXT: or $r12, $r12, $r11 +; CHECK-NEXT: sc.w $r12, $r6, 0 +; CHECK-NEXT: beq $r12, $zero, .LBB16_1 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: and $r9, $r10, $r7 +; CHECK-NEXT: srl.w $r9, $r9, $r4 +; CHECK-NEXT: ext.w.b $r9, $r9 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: jr $ra + %ret = atomicrmw nand i8* %ptr, i8 100 seq_cst + ret void +} + +define void @nand_16(i16* %ptr) { +; CHECK-LABEL: nand_16: +; CHECK: # %bb.0: +; CHECK-NEXT: ori $r5, $zero, 100 +; CHECK-NEXT: addi.d $r6, $zero, -4 +; CHECK-NEXT: and $r6, $r4, $r6 +; CHECK-NEXT: andi $r4, $r4, 3 +; CHECK-NEXT: slli.w $r4, $r4, 3 +; CHECK-NEXT: lu12i.w $r7, 15 +; CHECK-NEXT: ori $r7, $r7, 4095 +; CHECK-NEXT: sll.w $r7, $r7, $r4 +; CHECK-NEXT: nor $r8, $zero, $r7 +; CHECK-NEXT: sll.w $r5, $r5, $r4 +; CHECK-NEXT: dbar 0 +; CHECK-NEXT: .LBB17_1: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ll.w $r10, $r6, 0 +; CHECK-NEXT: and $r11, $r10, $r5 +; CHECK-NEXT: nor $r11, $zero, $r11 +; CHECK-NEXT: and $r11, $r11, $r7 +; CHECK-NEXT: and $r12, $r10, $r8 +; CHECK-NEXT: or $r12, $r12, $r11 +; CHECK-NEXT: sc.w $r12, $r6, 0 +; CHECK-NEXT: beq $r12, $zero, .LBB17_1 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: and $r9, $r10, $r7 +; CHECK-NEXT: srl.w $r9, $r9, $r4 +; CHECK-NEXT: ext.w.h $r9, $r9 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: jr $ra + %ret = atomicrmw nand i16* %ptr, i16 100 seq_cst + ret void +} + + +define void @xor_8(i8* %ptr) { +; CHECK-LABEL: xor_8: +; CHECK: # %bb.0: +; CHECK-NEXT: ori $r5, $zero, 100 +; CHECK-NEXT: addi.d $r6, $zero, -4 +; CHECK-NEXT: and $r6, $r4, $r6 +; CHECK-NEXT: andi $r4, $r4, 3 +; CHECK-NEXT: slli.w $r4, $r4, 3 +; CHECK-NEXT: ori $r7, $zero, 255 +; CHECK-NEXT: sll.w $r7, $r7, $r4 +; CHECK-NEXT: nor $r8, $zero, $r7 +; CHECK-NEXT: sll.w $r5, $r5, $r4 +; CHECK-NEXT: dbar 0 +; CHECK-NEXT: .LBB18_1: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ll.w $r10, $r6, 0 +; CHECK-NEXT: xor $r11, $r10, $r5 +; CHECK-NEXT: and $r11, $r11, $r7 +; CHECK-NEXT: and $r12, $r10, $r8 +; CHECK-NEXT: or $r12, $r12, $r11 +; CHECK-NEXT: sc.w $r12, $r6, 0 +; CHECK-NEXT: beq $r12, $zero, .LBB18_1 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: and $r9, $r10, $r7 +; CHECK-NEXT: srl.w $r9, $r9, $r4 +; CHECK-NEXT: ext.w.b $r9, $r9 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: jr $ra + %ret = atomicrmw xor i8* %ptr, i8 100 seq_cst + ret void +} + +define void @xor_16(i16* %ptr) { +; CHECK-LABEL: xor_16: +; CHECK: # %bb.0: +; CHECK-NEXT: ori $r5, $zero, 100 +; CHECK-NEXT: addi.d $r6, $zero, -4 +; CHECK-NEXT: and $r6, $r4, $r6 +; CHECK-NEXT: andi $r4, $r4, 3 +; CHECK-NEXT: slli.w $r4, $r4, 3 +; CHECK-NEXT: lu12i.w $r7, 15 +; CHECK-NEXT: ori $r7, $r7, 4095 +; CHECK-NEXT: sll.w $r7, $r7, $r4 +; CHECK-NEXT: nor $r8, $zero, $r7 +; CHECK-NEXT: sll.w $r5, $r5, $r4 +; CHECK-NEXT: dbar 0 +; CHECK-NEXT: .LBB19_1: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ll.w $r10, $r6, 0 +; CHECK-NEXT: xor $r11, $r10, $r5 +; CHECK-NEXT: and $r11, $r11, $r7 +; CHECK-NEXT: and $r12, $r10, $r8 +; CHECK-NEXT: or $r12, $r12, $r11 +; CHECK-NEXT: sc.w $r12, $r6, 0 +; CHECK-NEXT: beq $r12, $zero, .LBB19_1 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: and $r9, $r10, $r7 +; CHECK-NEXT: srl.w $r9, $r9, $r4 +; CHECK-NEXT: ext.w.h $r9, $r9 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: jr $ra + %ret = atomicrmw xor i16* %ptr, i16 100 seq_cst + ret void +} + + +define void @xchg_8(i8* %ptr) { +; CHECK-LABEL: xchg_8: +; CHECK: # %bb.0: +; CHECK-NEXT: ori $r5, $zero, 100 +; CHECK-NEXT: addi.d $r6, $zero, -4 +; CHECK-NEXT: and $r6, $r4, $r6 +; CHECK-NEXT: andi $r4, $r4, 3 +; CHECK-NEXT: slli.w $r4, $r4, 3 +; CHECK-NEXT: ori $r7, $zero, 255 +; CHECK-NEXT: sll.w $r7, $r7, $r4 +; CHECK-NEXT: nor $r8, $zero, $r7 +; CHECK-NEXT: sll.w $r5, $r5, $r4 +; CHECK-NEXT: dbar 0 +; CHECK-NEXT: .LBB20_1: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ll.w $r10, $r6, 0 +; CHECK-NEXT: and $r11, $r5, $r7 +; CHECK-NEXT: and $r12, $r10, $r8 +; CHECK-NEXT: or $r12, $r12, $r11 +; CHECK-NEXT: sc.w $r12, $r6, 0 +; CHECK-NEXT: beq $r12, $zero, .LBB20_1 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: and $r9, $r10, $r7 +; CHECK-NEXT: srl.w $r9, $r9, $r4 +; CHECK-NEXT: ext.w.b $r9, $r9 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: jr $ra + %ret = atomicrmw xchg i8* %ptr, i8 100 seq_cst + ret void +} + +define void @xchg_16(i16* %ptr) { +; CHECK-LABEL: xchg_16: +; CHECK: # %bb.0: +; CHECK-NEXT: ori $r5, $zero, 100 +; CHECK-NEXT: addi.d $r6, $zero, -4 +; CHECK-NEXT: and $r6, $r4, $r6 +; CHECK-NEXT: andi $r4, $r4, 3 +; CHECK-NEXT: slli.w $r4, $r4, 3 +; CHECK-NEXT: lu12i.w $r7, 15 +; CHECK-NEXT: ori $r7, $r7, 4095 +; CHECK-NEXT: sll.w $r7, $r7, $r4 +; CHECK-NEXT: nor $r8, $zero, $r7 +; CHECK-NEXT: sll.w $r5, $r5, $r4 +; CHECK-NEXT: dbar 0 +; CHECK-NEXT: .LBB21_1: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ll.w $r10, $r6, 0 +; CHECK-NEXT: and $r11, $r5, $r7 +; CHECK-NEXT: and $r12, $r10, $r8 +; CHECK-NEXT: or $r12, $r12, $r11 +; CHECK-NEXT: sc.w $r12, $r6, 0 +; CHECK-NEXT: beq $r12, $zero, .LBB21_1 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: and $r9, $r10, $r7 +; CHECK-NEXT: srl.w $r9, $r9, $r4 +; CHECK-NEXT: ext.w.h $r9, $r9 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: jr $ra + %ret = atomicrmw xchg i16* %ptr, i16 100 seq_cst + ret void +} + +define void @cmpxchg_8(i8* %ptr) { +; CHECK-LABEL: cmpxchg_8: +; CHECK: # %bb.0: +; CHECK-NEXT: ori $r5, $zero, 1 +; CHECK-NEXT: ori $r6, $zero, 100 +; CHECK-NEXT: addi.d $r7, $zero, -4 +; CHECK-NEXT: and $r7, $r4, $r7 +; CHECK-NEXT: andi $r4, $r4, 3 +; CHECK-NEXT: slli.w $r4, $r4, 3 +; CHECK-NEXT: ori $r8, $zero, 255 +; CHECK-NEXT: sll.w $r8, $r8, $r4 +; CHECK-NEXT: nor $r9, $zero, $r8 +; CHECK-NEXT: andi $r6, $r6, 255 +; CHECK-NEXT: sll.w $r6, $r6, $r4 +; CHECK-NEXT: andi $r5, $r5, 255 +; CHECK-NEXT: sll.w $r5, $r5, $r4 +; CHECK-NEXT: dbar 0 +; CHECK-NEXT: .LBB22_1: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ll.w $r11, $r7, 0 +; CHECK-NEXT: and $r12, $r11, $r8 +; CHECK-NEXT: bne $r12, $r6, .LBB22_3 +; CHECK-NEXT: # %bb.2: # in Loop: Header=BB22_1 Depth=1 +; CHECK-NEXT: and $r11, $r11, $r9 +; CHECK-NEXT: or $r11, $r11, $r5 +; CHECK-NEXT: sc.w $r11, $r7, 0 +; CHECK-NEXT: beq $r11, $zero, .LBB22_1 +; CHECK-NEXT: .LBB22_3: +; CHECK-NEXT: dbar 1792 +; CHECK-NEXT: srl.w $r10, $r12, $r4 +; CHECK-NEXT: ext.w.b $r10, $r10 +; CHECK-NEXT: # %bb.4: +; CHECK-NEXT: jr $ra + %ret = cmpxchg i8* %ptr, i8 100, i8 1 seq_cst seq_cst + ret void +} + +define void @cmpxchg_16(i16* %ptr) { +; CHECK-LABEL: cmpxchg_16: +; CHECK: # %bb.0: +; CHECK-NEXT: ori $r5, $zero, 1 +; CHECK-NEXT: ori $r6, $zero, 100 +; CHECK-NEXT: addi.d $r7, $zero, -4 +; CHECK-NEXT: and $r7, $r4, $r7 +; CHECK-NEXT: andi $r4, $r4, 3 +; CHECK-NEXT: slli.w $r4, $r4, 3 +; CHECK-NEXT: lu12i.w $r8, 15 +; CHECK-NEXT: ori $r8, $r8, 4095 +; CHECK-NEXT: sll.w $r9, $r8, $r4 +; CHECK-NEXT: nor $r10, $zero, $r9 +; CHECK-NEXT: and $r6, $r6, $r8 +; CHECK-NEXT: sll.w $r6, $r6, $r4 +; CHECK-NEXT: and $r5, $r5, $r8 +; CHECK-NEXT: sll.w $r5, $r5, $r4 +; CHECK-NEXT: dbar 0 +; CHECK-NEXT: .LBB23_1: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ll.w $r11, $r7, 0 +; CHECK-NEXT: and $r12, $r11, $r9 +; CHECK-NEXT: bne $r12, $r6, .LBB23_3 +; CHECK-NEXT: # %bb.2: # in Loop: Header=BB23_1 Depth=1 +; CHECK-NEXT: and $r11, $r11, $r10 +; CHECK-NEXT: or $r11, $r11, $r5 +; CHECK-NEXT: sc.w $r11, $r7, 0 +; CHECK-NEXT: beq $r11, $zero, .LBB23_1 +; CHECK-NEXT: .LBB23_3: +; CHECK-NEXT: dbar 1792 +; CHECK-NEXT: srl.w $r8, $r12, $r4 +; CHECK-NEXT: ext.w.h $r8, $r8 +; CHECK-NEXT: # %bb.4: +; CHECK-NEXT: jr $ra + %ret = cmpxchg i16* %ptr, i16 100, i16 1 seq_cst seq_cst + ret void +} diff --git a/test/CodeGen/LoongArch/atomic_64_32.ll b/test/CodeGen/LoongArch/atomic_64_32.ll new file mode 100644 index 00000000..ce400fd4 --- /dev/null +++ b/test/CodeGen/LoongArch/atomic_64_32.ll @@ -0,0 +1,327 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=loongarch64 -o - %s | FileCheck %s + + +define void @umax_32(i32* %ptr) { +; CHECK-LABEL: umax_32: +; CHECK: # %bb.0: +; CHECK-NEXT: ori $r5, $zero, 100 +; CHECK-NEXT: .LBB0_1: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ammax_db.wu $r6, $r5, $r4, 0 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: jr $ra + %ret = atomicrmw umax i32* %ptr, i32 100 seq_cst + ret void +} + +define void @umax_64(i64* %ptr) { +; CHECK-LABEL: umax_64: +; CHECK: # %bb.0: +; CHECK-NEXT: addi.d $r5, $zero, 100 +; CHECK-NEXT: .LBB1_1: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ammax_db.du $r6, $r5, $r4, 0 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: jr $ra + %ret = atomicrmw umax i64* %ptr, i64 100 seq_cst + ret void +} + +define void @max_32(i32* %ptr) { +; CHECK-LABEL: max_32: +; CHECK: # %bb.0: +; CHECK-NEXT: ori $r5, $zero, 100 +; CHECK-NEXT: .LBB2_1: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ammax_db.w $r6, $r5, $r4, 0 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: jr $ra + %ret = atomicrmw max i32* %ptr, i32 100 seq_cst + ret void +} + +define void @max_64(i64* %ptr) { +; CHECK-LABEL: max_64: +; CHECK: # %bb.0: +; CHECK-NEXT: addi.d $r5, $zero, 100 +; CHECK-NEXT: .LBB3_1: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ammax_db.d $r6, $r5, $r4, 0 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: jr $ra + %ret = atomicrmw max i64* %ptr, i64 100 seq_cst + ret void +} + + +define void @umin_32(i32* %ptr) { +; CHECK-LABEL: umin_32: +; CHECK: # %bb.0: +; CHECK-NEXT: ori $r5, $zero, 100 +; CHECK-NEXT: .LBB4_1: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ammin_db.wu $r6, $r5, $r4, 0 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: jr $ra + %ret = atomicrmw umin i32* %ptr, i32 100 seq_cst + ret void +} + +define void @umin_64(i64* %ptr) { +; CHECK-LABEL: umin_64: +; CHECK: # %bb.0: +; CHECK-NEXT: addi.d $r5, $zero, 100 +; CHECK-NEXT: .LBB5_1: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ammin_db.du $r6, $r5, $r4, 0 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: jr $ra + %ret = atomicrmw umin i64* %ptr, i64 100 seq_cst + ret void +} + +define void @min_32(i32* %ptr) { +; CHECK-LABEL: min_32: +; CHECK: # %bb.0: +; CHECK-NEXT: ori $r5, $zero, 100 +; CHECK-NEXT: .LBB6_1: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ammin_db.w $r6, $r5, $r4, 0 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: jr $ra + %ret = atomicrmw min i32* %ptr, i32 100 seq_cst + ret void +} + +define void @min_64(i64* %ptr) { +; CHECK-LABEL: min_64: +; CHECK: # %bb.0: +; CHECK-NEXT: addi.d $r5, $zero, 100 +; CHECK-NEXT: .LBB7_1: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ammin_db.d $r6, $r5, $r4, 0 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: jr $ra + %ret = atomicrmw min i64* %ptr, i64 100 seq_cst + ret void +} + + +define void @or_32(i32* %ptr) { +; CHECK-LABEL: or_32: +; CHECK: # %bb.0: +; CHECK-NEXT: ori $r5, $zero, 100 +; CHECK-NEXT: .LBB8_1: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: amor_db.w $r6, $r5, $r4, 0 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: jr $ra + %ret = atomicrmw or i32* %ptr, i32 100 seq_cst + ret void +} + +define void @or_64(i64* %ptr) { +; CHECK-LABEL: or_64: +; CHECK: # %bb.0: +; CHECK-NEXT: addi.d $r5, $zero, 100 +; CHECK-NEXT: .LBB9_1: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: amor_db.d $r6, $r5, $r4, 0 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: jr $ra + %ret = atomicrmw or i64* %ptr, i64 100 seq_cst + ret void +} + + +define void @add_32(i32* %ptr) { +; CHECK-LABEL: add_32: +; CHECK: # %bb.0: +; CHECK-NEXT: ori $r5, $zero, 100 +; CHECK-NEXT: .LBB10_1: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: amadd_db.w $r6, $r5, $r4, 0 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: jr $ra + %ret = atomicrmw add i32* %ptr, i32 100 seq_cst + ret void +} + +define void @add_64(i64* %ptr) { +; CHECK-LABEL: add_64: +; CHECK: # %bb.0: +; CHECK-NEXT: addi.d $r5, $zero, 100 +; CHECK-NEXT: .LBB11_1: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: amadd_db.d $r6, $r5, $r4, 0 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: jr $ra + %ret = atomicrmw add i64* %ptr, i64 100 seq_cst + ret void +} + + +define void @sub_32(i32* %ptr) { +; CHECK-LABEL: sub_32: +; CHECK: # %bb.0: +; CHECK-NEXT: ori $r5, $zero, 100 +; CHECK-NEXT: .LBB12_1: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: sub.w $r7, $zero, $r5 +; CHECK-NEXT: amadd_db.w $r6, $r7, $r4, 0 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: jr $ra + %ret = atomicrmw sub i32* %ptr, i32 100 seq_cst + ret void +} + +define void @sub_64(i64* %ptr) { +; CHECK-LABEL: sub_64: +; CHECK: # %bb.0: +; CHECK-NEXT: addi.d $r5, $zero, 100 +; CHECK-NEXT: .LBB13_1: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: sub.d $r7, $zero, $r5 +; CHECK-NEXT: amadd_db.d $r6, $r7, $r4, 0 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: jr $ra + %ret = atomicrmw sub i64* %ptr, i64 100 seq_cst + ret void +} + + +define void @and_32(i32* %ptr) { +; CHECK-LABEL: and_32: +; CHECK: # %bb.0: +; CHECK-NEXT: ori $r5, $zero, 100 +; CHECK-NEXT: .LBB14_1: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: amand_db.w $r6, $r5, $r4, 0 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: jr $ra + %ret = atomicrmw and i32* %ptr, i32 100 seq_cst + ret void +} + +define void @and_64(i64* %ptr) { +; CHECK-LABEL: and_64: +; CHECK: # %bb.0: +; CHECK-NEXT: addi.d $r5, $zero, 100 +; CHECK-NEXT: .LBB15_1: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: amand_db.d $r6, $r5, $r4, 0 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: jr $ra + %ret = atomicrmw and i64* %ptr, i64 100 seq_cst + ret void +} + + +define void @nand_32(i32* %ptr) { +; CHECK-LABEL: nand_32: +; CHECK: # %bb.0: +; CHECK-NEXT: ori $r5, $zero, 100 +; CHECK-NEXT: .LBB16_1: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ll.w $r6, $r4, 0 +; CHECK-NEXT: and $r7, $r6, $r5 +; CHECK-NEXT: nor $r7, $zero, $r7 +; CHECK-NEXT: sc.w $r7, $r4, 0 +; CHECK-NEXT: beq $r7, $zero, .LBB16_1 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: dbar 1792 +; CHECK-NEXT: jr $ra + %ret = atomicrmw nand i32* %ptr, i32 100 seq_cst + ret void +} + +define void @nand_64(i64* %ptr) { +; CHECK-LABEL: nand_64: +; CHECK: # %bb.0: +; CHECK-NEXT: addi.d $r5, $zero, 100 +; CHECK-NEXT: .LBB17_1: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ll.d $r6, $r4, 0 +; CHECK-NEXT: and $r7, $r6, $r5 +; CHECK-NEXT: nor $r7, $zero, $r7 +; CHECK-NEXT: sc.d $r7, $r4, 0 +; CHECK-NEXT: beq $r7, $zero, .LBB17_1 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: dbar 1792 +; CHECK-NEXT: jr $ra + %ret = atomicrmw nand i64* %ptr, i64 100 seq_cst + ret void +} + + +define void @xor_32(i32* %ptr) { +; CHECK-LABEL: xor_32: +; CHECK: # %bb.0: +; CHECK-NEXT: ori $r5, $zero, 100 +; CHECK-NEXT: .LBB18_1: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: amxor_db.w $r6, $r5, $r4, 0 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: jr $ra + %ret = atomicrmw xor i32* %ptr, i32 100 seq_cst + ret void +} + +define void @xor_64(i64* %ptr) { +; CHECK-LABEL: xor_64: +; CHECK: # %bb.0: +; CHECK-NEXT: addi.d $r5, $zero, 100 +; CHECK-NEXT: .LBB19_1: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: amxor_db.d $r6, $r5, $r4, 0 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: jr $ra + %ret = atomicrmw xor i64* %ptr, i64 100 seq_cst + ret void +} + + +define void @xchg_32(i32* %ptr) { +; CHECK-LABEL: xchg_32: +; CHECK: # %bb.0: +; CHECK-NEXT: ori $r5, $zero, 100 +; CHECK-NEXT: .LBB20_1: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: amswap_db.w $r6, $r5, $r4, 0 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: jr $ra + %ret = atomicrmw xchg i32* %ptr, i32 100 seq_cst + ret void +} + +define void @xchg_64(i64* %ptr) { +; CHECK-LABEL: xchg_64: +; CHECK: # %bb.0: +; CHECK-NEXT: addi.d $r5, $zero, 100 +; CHECK-NEXT: .LBB21_1: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: amswap_db.d $r6, $r5, $r4, 0 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: jr $ra + %ret = atomicrmw xchg i64* %ptr, i64 100 seq_cst + ret void +} + +define void @cmpxchg_32(i32* %ptr) { +; CHECK-LABEL: cmpxchg_32: +; CHECK: # %bb.0: +; CHECK-NEXT: ori $r5, $zero, 1 +; CHECK-NEXT: ori $r6, $zero, 100 +; CHECK-NEXT: dbar 0 +; CHECK-NEXT: .LBB22_1: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ll.w $r7, $r4, 0 +; CHECK-NEXT: bne $r7, $r6, .LBB22_3 +; CHECK-NEXT: # %bb.2: # in Loop: Header=BB22_1 Depth=1 +; CHECK-NEXT: move $r8, $r5 +; CHECK-NEXT: sc.w $r8, $r4, 0 +; CHECK-NEXT: beq $r8, $zero, .LBB22_1 +; CHECK-NEXT: .LBB22_3: +; CHECK-NEXT: dbar 1792 +; CHECK-NEXT: jr $ra + %ret = cmpxchg i32* %ptr, i32 100, i32 1 seq_cst seq_cst + ret void +} + +define void @cmpxchg_64(i64* %ptr) { +; CHECK-LABEL: cmpxchg_64: +; CHECK: # %bb.0: +; CHECK-NEXT: addi.d $r5, $zero, 1 +; CHECK-NEXT: addi.d $r6, $zero, 100 +; CHECK-NEXT: dbar 0 +; CHECK-NEXT: .LBB23_1: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ll.d $r7, $r4, 0 +; CHECK-NEXT: bne $r7, $r6, .LBB23_3 +; CHECK-NEXT: # %bb.2: # in Loop: Header=BB23_1 Depth=1 +; CHECK-NEXT: move $r8, $r5 +; CHECK-NEXT: sc.d $r8, $r4, 0 +; CHECK-NEXT: beq $r8, $zero, .LBB23_1 +; CHECK-NEXT: .LBB23_3: +; CHECK-NEXT: dbar 1792 +; CHECK-NEXT: jr $ra + %ret = cmpxchg i64* %ptr, i64 100, i64 1 seq_cst seq_cst + ret void +} diff --git a/test/CodeGen/LoongArch/bss.ll b/test/CodeGen/LoongArch/bss.ll new file mode 100644 index 00000000..cfc30b3a --- /dev/null +++ b/test/CodeGen/LoongArch/bss.ll @@ -0,0 +1,5 @@ +; RUN: llc -march=loongarch64 -o - %s | FileCheck %s + +; CHECK: .section .bss,"aw",@nobits +; CHECK: .globl a +@a = global i32 0, align 4 diff --git a/test/CodeGen/LoongArch/bstrins_d.ll b/test/CodeGen/LoongArch/bstrins_d.ll new file mode 100644 index 00000000..819bfdbb --- /dev/null +++ b/test/CodeGen/LoongArch/bstrins_d.ll @@ -0,0 +1,53 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=loongarch64 -o - %s | FileCheck %s + +define void @bstrinsd_63_27(i64* nocapture %d) nounwind { +; CHECK-LABEL: bstrinsd_63_27: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: ld.d $r5, $r4, 0 +; CHECK-NEXT: addi.d $r6, $zero, 123 +; CHECK-NEXT: bstrins.d $r5, $r6, 63, 27 +; CHECK-NEXT: st.d $r5, $r4, 0 +; CHECK-NEXT: jr $ra +entry: + %tmp = load i64, i64* %d, align 8 + %and5 = and i64 %tmp, 134217727 + %or = or i64 %and5, 16508780544 + store i64 %or, i64* %d, align 8 + ret void +} + +define void @bstrinsd_33_28(i64* nocapture %d) nounwind { +; CHECK-LABEL: bstrinsd_33_28: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: ld.d $r5, $r4, 0 +; CHECK-NEXT: addi.d $r6, $zero, 4 +; CHECK-NEXT: bstrins.d $r5, $r6, 33, 28 +; CHECK-NEXT: st.d $r5, $r4, 0 +; CHECK-NEXT: jr $ra +entry: + %tmp = load i64, i64* %d, align 8 + %and5 = and i64 %tmp, -16911433729 + %or = or i64 %and5, 1073741824 + store i64 %or, i64* %d, align 8 + ret void +} + +define void @bstrinsd_49_34(i64* nocapture %d) nounwind { +; CHECK-LABEL: bstrinsd_49_34: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: ld.d $r5, $r4, 0 +; CHECK-NEXT: srli.d $r6, $r5, 50 +; CHECK-NEXT: bstrins.d $r5, $r6, 49, 34 +; CHECK-NEXT: st.d $r5, $r4, 0 +; CHECK-NEXT: jr $ra +entry: + %tmp0 = load i64, i64* %d, align 8 + %lshr = lshr i64 %tmp0, 50 + %tmp1 = load i64, i64* %d, align 8 + %shl = shl nuw nsw i64 %lshr, 34 + %and = and i64 %tmp1, -1125882726973441 + %or = or i64 %and, %shl + store i64 %or, i64* %d, align 8 + ret void +} diff --git a/test/CodeGen/LoongArch/bstrins_w.ll b/test/CodeGen/LoongArch/bstrins_w.ll new file mode 100644 index 00000000..3b62a760 --- /dev/null +++ b/test/CodeGen/LoongArch/bstrins_w.ll @@ -0,0 +1,28 @@ +; RUN: llc -march=loongarch64 -o - %s | FileCheck %s + +define void @bstrins_w(i32 %s, i32* nocapture %d) nounwind { +; CHECK-LABEL: bstrins_w: +; CHECK: bstrins.w $r[[REG2:[0-9]+]], $r[[REG1:[0-9]+]], 13, 5 +entry: + %and = shl i32 %s, 5 + %shl = and i32 %and, 16352 + %tmp3 = load i32, i32* %d, align 4 + %and5 = and i32 %tmp3, -16353 + %or = or i32 %and5, %shl + store i32 %or, i32* %d, align 4 + ret void +} + +define i32 @no_bstrinsw(i32* nocapture %d) { +; CHECK-LABEL: no_bstrinsw: +; CHECK: addi.w $r[[REG2:[0-9]+]], $zero, -4 +; CHECK: and $r[[REG1:[0-9]+]], $r[[REG1:[0-9]+]], $r[[REG2:[0-9]+]] +; CHECK: ori $r[[REG2:[0-9]+]], $r[[REG1:[0-9]+]], 8 +; CHECK-NOT: bstrins.w {{[[:space:]].*}} +entry: + %tmp = load volatile i32, i32* %d, align 4 + %and = and i32 %tmp, -4 + %or = or i32 %and, 8 + store volatile i32 %or, i32* %d, align 4 + ret i32 %and +} diff --git a/test/CodeGen/LoongArch/bstrpick_d.ll b/test/CodeGen/LoongArch/bstrpick_d.ll new file mode 100644 index 00000000..e1169cb2 --- /dev/null +++ b/test/CodeGen/LoongArch/bstrpick_d.ll @@ -0,0 +1,64 @@ +; RUN: llc -march=loongarch64 -o - %s | FileCheck %s + +define i64 @bstrpickd_add_zext(i32 signext %n) { +entry: + %add = add i32 %n, 1 + %res = zext i32 %add to i64 + ret i64 %res + +; CHECK-LABEL: bstrpickd_add_zext: +; CHECK: bstrpick.d $r[[REG:[0-9]+]], $r[[REG:[0-9]+]], 31, 0 + +} + +define i64 @bstrpickd_and12(i64 zeroext %a) { +entry: + %and = and i64 %a, 4095 + ret i64 %and + +; CHECK-LABEL: bstrpickd_and12: +; CHECK: andi $r[[REG:[0-9]+]], $r[[REG:[0-9]+]], 4095 + +} + +define i64 @bstrpickd_and13(i64 zeroext %a) { +entry: + %and = and i64 %a, 8191 + ret i64 %and + +; CHECK-LABEL: bstrpickd_and13: +; CHECK: bstrpick.d $r[[REG:[0-9]+]], $r[[REG:[0-9]+]], 12, 0 + +} + +define i64 @bstrpickd_lsr_and8(i64 zeroext %a) { +entry: + %shr = lshr i64 %a, 40 + %and = and i64 %shr, 255 + ret i64 %and + +; CHECK-LABEL: bstrpickd_lsr_and8: +; CHECK: bstrpick.d $r[[REG:[0-9]+]], $r[[REG:[0-9]+]], 47, 40 + +} + +define i64 @bstrpickd_zext(i32 signext %a) { +entry: + %conv = zext i32 %a to i64 + ret i64 %conv + +; CHECK-LABEL: bstrpickd_zext: +; CHECK: bstrpick.d $r[[REG:[0-9]+]], $r[[REG:[0-9]+]], 31, 0 + +} + +define i64 @bstrpickd_and_lsr(i64 zeroext %n) { +entry: + %and = lshr i64 %n, 8 + %shr = and i64 %and, 4095 + ret i64 %shr + +; CHECK-LABEL: bstrpickd_and_lsr: +; CHECK: bstrpick.d $r[[REG:[0-9]+]], $r[[REG:[0-9]+]], 19, 8 + +} diff --git a/test/CodeGen/LoongArch/bstrpick_w.ll b/test/CodeGen/LoongArch/bstrpick_w.ll new file mode 100644 index 00000000..e60de473 --- /dev/null +++ b/test/CodeGen/LoongArch/bstrpick_w.ll @@ -0,0 +1,18 @@ +; RUN: llc -march=loongarch64 -o - %s | FileCheck %s + +define i32 @bstrpickw_and24(i32 signext %a) { +; CHECK-LABEL: bstrpickw_and24: +; CHECK: bstrpick.w $r[[REG:[0-9]+]], $r[[REG:[0-9]+]], 23, 0 +entry: + %and = and i32 %a, 16777215 + ret i32 %and +} + +define i32 @bstrpickw_lshr_and(i32 %s, i32 %pos, i32 %sz) nounwind readnone { +; CHECK-LABEL: bstrpickw_lshr_and: +; CHECK: bstrpick.w $r[[REG:[0-9]+]], $r[[REG:[0-9]+]], 13, 5 +entry: + %shr = lshr i32 %s, 5 + %and = and i32 %shr, 511 + ret i32 %and +} diff --git a/test/CodeGen/LoongArch/builtins-loongarch-base.ll b/test/CodeGen/LoongArch/builtins-loongarch-base.ll new file mode 100644 index 00000000..41553082 --- /dev/null +++ b/test/CodeGen/LoongArch/builtins-loongarch-base.ll @@ -0,0 +1,752 @@ +; Test the base intrinsics. +; RUN: llc -march=loongarch64 -o - %s | FileCheck %s + +define void @cpucfg() { +entry: + %u32_r = alloca i32, align 4 + %u32_a = alloca i32, align 4 + %0 = load i32, i32* %u32_a, align 4 + %1 = call i32 @llvm.loongarch.cpucfg(i32 %0) + store i32 %1, i32* %u32_r, align 4 + ret void +} + +declare i32 @llvm.loongarch.cpucfg(i32) + +; CHECK-LABEL: cpucfg: +; CHECK: ld.w $r[[REG:[0-9]+]], $sp, 8 +; CHECK: cpucfg $r[[REG:[0-9]+]], $r[[REG:[0-9]+]] +; CHECK: st.w $r[[REG:[0-9]+]], $sp, 12 +; CHECK: jr $ra +; + +define void @csrrd() { +entry: + %u32_r = alloca i32, align 4 + %0 = call i32 @llvm.loongarch.csrrd(i32 1) + store i32 %0, i32* %u32_r, align 4 + ret void +} + +declare i32 @llvm.loongarch.csrrd(i32) + +; CHECK-LABEL: csrrd: +; CHECK: csrrd $r[[REG:[0-9]+]], 1 +; CHECK: st.w $r[[REG:[0-9]+]], $sp, 12 +; CHECK: jr $ra +; + +define void @dcsrrd() { +entry: + %u64_r = alloca i64, align 8 + %0 = call i64 @llvm.loongarch.dcsrrd(i64 1) + store i64 %0, i64* %u64_r, align 8 + ret void +} + +declare i64 @llvm.loongarch.dcsrrd(i64) + +; CHECK-LABEL: dcsrrd: +; CHECK: csrrd $r[[REG:[0-9]+]], 1 +; CHECK: st.d $r[[REG:[0-9]+]], $sp, 8 +; CHECK: jr $ra +; + +define void @csrwr() { +entry: + %u32_r = alloca i32, align 4 + %u32_a = alloca i32, align 4 + %0 = load i32, i32* %u32_a, align 4 + %1 = call i32 @llvm.loongarch.csrwr(i32 %0, i32 1) + store i32 %1, i32* %u32_r, align 4 + ret void +} + +declare i32 @llvm.loongarch.csrwr(i32, i32) + +; CHECK-LABEL: csrwr: +; CHECK: ld.w $r[[REG:[0-9]+]], $sp, 8 +; CHECK: csrwr $r[[REG:[0-9]+]], 1 +; CHECK: st.w $r[[REG:[0-9]+]], $sp, 12 +; CHECK: jr $ra +; + +define void @dcsrwr() { +entry: + %u64_r = alloca i64, align 8 + %u64_a = alloca i64, align 8 + %0 = load i64, i64* %u64_a, align 8 + %1 = call i64 @llvm.loongarch.dcsrwr(i64 %0, i64 1) + store i64 %1, i64* %u64_r, align 8 + ret void +} + +declare i64 @llvm.loongarch.dcsrwr(i64, i64) + +; CHECK-LABEL: dcsrwr: +; CHECK: ld.d $r[[REG:[0-9]+]], $sp, 0 +; CHECK: csrwr $r[[REG:[0-9]+]], 1 +; CHECK: st.d $r[[REG:[0-9]+]], $sp, 8 +; CHECK: jr $ra +; + +define void @csrxchg() { +entry: + %u32_r = alloca i32, align 4 + %u32_a = alloca i32, align 4 + %u32_b = alloca i32, align 4 + %0 = load i32, i32* %u32_a, align 4 + %1 = load i32, i32* %u32_b, align 4 + %2 = call i32 @llvm.loongarch.csrxchg(i32 %0, i32 %1, i32 1) + store i32 %2, i32* %u32_r, align 4 + ret void +} + +declare i32 @llvm.loongarch.csrxchg(i32, i32, i32) + +; CHECK-LABEL: csrxchg: +; CHECK: ld.w $r[[REG1:[0-9]+]], $sp, 4 +; CHECK: ld.w $r[[REG2:[0-9]+]], $sp, 8 +; CHECK: csrxchg $r[[REG1:[0-9]+]], $r[[REG2:[0-9]+]], 1 +; CHECK: st.w $r[[REG1:[0-9]+]], $sp, 12 +; CHECK: jr $ra +; + +define void @dcsrxchg() { +entry: + %u64_r = alloca i64, align 8 + %u64_a = alloca i64, align 8 + %u64_b = alloca i64, align 8 + %0 = load i64, i64* %u64_a, align 8 + %1 = load i64, i64* %u64_b, align 8 + %2 = call i64 @llvm.loongarch.dcsrxchg(i64 %0, i64 %1, i64 1) + store i64 %2, i64* %u64_r, align 8 + ret void +} + +declare i64 @llvm.loongarch.dcsrxchg(i64, i64, i64) + +; CHECK-LABEL: dcsrxchg: +; CHECK: ld.d $r[[REG1:[0-9]+]], $sp, 8 +; CHECK: ld.d $r[[REG2:[0-9]+]], $sp, 16 +; CHECK: csrxchg $r[[REG1:[0-9]+]], $r[[REG2:[0-9]+]], 1 +; CHECK: st.d $r[[REG1:[0-9]+]], $sp, 24 +; CHECK: jr $ra +; + +define void @iocsrrd_b() { +entry: + %u32_a = alloca i32, align 4 + %u8_r = alloca i8, align 1 + %0 = load i32, i32* %u32_a, align 4 + %1 = call i32 @llvm.loongarch.iocsrrd.b(i32 %0) + %conv = trunc i32 %1 to i8 + store i8 %conv, i8* %u8_r, align 1 + ret void +} + +declare i32 @llvm.loongarch.iocsrrd.b(i32) + +; CHECK-LABEL: iocsrrd_b: +; CHECK: ld.w $r[[REG:[0-9]+]], $sp, 12 +; CHECK: iocsrrd.b $r[[REG:[0-9]+]], $r[[REG:[0-9]+]] +; CHECK: st.b $r[[REG:[0-9]+]], $sp, 8 +; CHECK: jr $ra +; + +define void @iocsrrd_h() { +entry: + %u32_a = alloca i32, align 4 + %u16_r = alloca i16, align 2 + %0 = load i32, i32* %u32_a, align 4 + %1 = call i32 @llvm.loongarch.iocsrrd.h(i32 %0) + %conv = trunc i32 %1 to i16 + store i16 %conv, i16* %u16_r, align 2 + ret void +} + +declare i32 @llvm.loongarch.iocsrrd.h(i32) + +; CHECK-LABEL: iocsrrd_h: +; CHECK: ld.w $r[[REG:[0-9]+]], $sp, 12 +; CHECK: iocsrrd.h $r[[REG:[0-9]+]], $r[[REG:[0-9]+]] +; CHECK: st.h $r[[REG:[0-9]+]], $sp, 8 +; CHECK: jr $ra +; + +define void @iocsrrd_w() { +entry: + %u32_r = alloca i32, align 4 + %u32_a = alloca i32, align 4 + %0 = load i32, i32* %u32_a, align 4 + %1 = call i32 @llvm.loongarch.iocsrrd.w(i32 %0) + store i32 %1, i32* %u32_r, align 4 + ret void +} + +declare i32 @llvm.loongarch.iocsrrd.w(i32) + +; CHECK-LABEL: iocsrrd_w: +; CHECK: ld.w $r[[REG:[0-9]+]], $sp, 8 +; CHECK: iocsrrd.w $r[[REG:[0-9]+]], $r[[REG:[0-9]+]] +; CHECK: st.w $r[[REG:[0-9]+]], $sp, 12 +; CHECK: jr $ra +; + +define void @iocsrrd_d() { +entry: + %u32_a = alloca i32, align 4 + %u64_r = alloca i64, align 8 + %0 = load i32, i32* %u32_a, align 4 + %1 = call i64 @llvm.loongarch.iocsrrd.d(i32 %0) + store i64 %1, i64* %u64_r, align 8 + ret void +} + +declare i64 @llvm.loongarch.iocsrrd.d(i32) + +; CHECK-LABEL: iocsrrd_d: +; CHECK: ld.w $r[[REG:[0-9]+]], $sp, 12 +; CHECK: iocsrrd.d $r[[REG:[0-9]+]], $r[[REG:[0-9]+]] +; CHECK: st.d $r[[REG:[0-9]+]], $sp, 0 +; CHECK: jr $ra +; + +define void @iocsrwr_b() { +entry: + %u32_a = alloca i32, align 4 + %u8_a = alloca i8, align 1 + %0 = load i8, i8* %u8_a, align 1 + %conv = zext i8 %0 to i32 + %1 = load i32, i32* %u32_a, align 4 + call void @llvm.loongarch.iocsrwr.b(i32 %conv, i32 %1) + ret void +} + +declare void @llvm.loongarch.iocsrwr.b(i32, i32) + +; CHECK-LABEL: iocsrwr_b: +; CHECK: ld.w $r[[REG1:[0-9]+]], $sp, 12 +; CHECK: ld.bu $r[[REG2:[0-9]+]], $sp, 8 +; CHECK: iocsrwr.b $r[[REG2:[0-9]+]], $r[[REG1:[0-9]+]] +; CHECK: jr $ra +; + +define void @iocsrwr_h() { +entry: + %u32_a = alloca i32, align 4 + %u16_a = alloca i16, align 2 + %0 = load i16, i16* %u16_a, align 2 + %conv = zext i16 %0 to i32 + %1 = load i32, i32* %u32_a, align 4 + call void @llvm.loongarch.iocsrwr.h(i32 %conv, i32 %1) + ret void +} + +declare void @llvm.loongarch.iocsrwr.h(i32, i32) + +; CHECK-LABEL: iocsrwr_h: +; CHECK: ld.w $r[[REG1:[0-9]+]], $sp, 12 +; CHECK: ld.hu $r[[REG2:[0-9]+]], $sp, 8 +; CHECK: iocsrwr.h $r[[REG2:[0-9]+]], $r[[REG1:[0-9]+]] +; CHECK: jr $ra +; + +define void @iocsrwr_w() { +entry: + %u32_a = alloca i32, align 4 + %u32_b = alloca i32, align 4 + %0 = load i32, i32* %u32_a, align 4 + %1 = load i32, i32* %u32_b, align 4 + call void @llvm.loongarch.iocsrwr.w(i32 %0, i32 %1) + ret void +} + +declare void @llvm.loongarch.iocsrwr.w(i32, i32) + +; CHECK-LABEL: iocsrwr_w: +; CHECK: ld.w $r[[REG1:[0-9]+]], $sp, 8 +; CHECK: ld.w $r[[REG2:[0-9]+]], $sp, 12 +; CHECK: iocsrwr.w $r[[REG2:[0-9]+]], $r[[REG1:[0-9]+]] +; CHECK: jr $ra +; + +define void @iocsrwr_d() { +entry: + %u32_a = alloca i32, align 4 + %u64_a = alloca i64, align 8 + %0 = load i64, i64* %u64_a, align 8 + %1 = load i32, i32* %u32_a, align 4 + call void @llvm.loongarch.iocsrwr.d(i64 %0, i32 %1) + ret void +} + +declare void @llvm.loongarch.iocsrwr.d(i64, i32) + +; CHECK-LABEL: iocsrwr_d: +; CHECK: ld.w $r[[REG1:[0-9]+]], $sp, 12 +; CHECK: ld.d $r[[REG2:[0-9]+]], $sp, 0 +; CHECK: iocsrwr.d $r[[REG2:[0-9]+]], $r[[REG1:[0-9]+]] +; CHECK: jr $ra +; + +define void @cacop() { +entry: + %i32_a = alloca i32, align 4 + %0 = load i32, i32* %i32_a, align 4 + call void @llvm.loongarch.cacop(i32 1, i32 %0, i32 2) + ret void +} + +declare void @llvm.loongarch.cacop(i32, i32, i32) + +; CHECK-LABEL: cacop: +; CHECK: ld.w $r[[REG:[0-9]+]], $sp, 12 +; CHECK: cacop 1, $r[[REG:[0-9]+]], 2 +; CHECK: jr $ra +; + +define void @dcacop() { +entry: + %i64_a = alloca i64, align 8 + %0 = load i64, i64* %i64_a, align 8 + call void @llvm.loongarch.dcacop(i32 1, i64 %0, i64 2) + ret void +} + +declare void @llvm.loongarch.dcacop(i32, i64, i64) + +; CHECK-LABEL: dcacop: +; CHECK: ld.d $r[[REG:[0-9]+]], $sp, 8 +; CHECK: cacop 1, $r[[REG:[0-9]+]], 2 +; CHECK: jr $ra +; + +define void @rdtime_d() { +entry: + %value = alloca i64, align 8 + %timeid = alloca i64, align 8 + %0 = call { i64, i64 } asm sideeffect "rdtime.d\09$0,$1\0A\09", "=&r,=&r"() nounwind + %asmresult0 = extractvalue { i64, i64 } %0, 0 + %asmresult1 = extractvalue { i64, i64 } %0, 1 + store i64 %asmresult0, i64* %value, align 8 + store i64 %asmresult1, i64* %timeid, align 8 + ret void +} + +; CHECK-LABEL: rdtime_d: +; CHECK: rdtime.d $r[[REG1:[0-9]+]], $r[[REG2:[0-9]+]] +; CHECK: st.d $r[[REG2:[0-9]+]], $sp, 8 +; CHECK: st.d $r[[REG1:[0-9]+]], $sp, 0 +; CHECK: jr $ra +; + +define void @rdtimeh_w() { +entry: + %value = alloca i32, align 4 + %timeid = alloca i32, align 4 + %0 = call { i32, i32 } asm sideeffect "rdtimeh.w\09$0,$1\0A\09", "=&r,=&r"() nounwind + %asmresult0 = extractvalue { i32, i32 } %0, 0 + %asmresult1 = extractvalue { i32, i32 } %0, 1 + store i32 %asmresult0, i32* %value, align 4 + store i32 %asmresult1, i32* %timeid, align 4 + ret void +} + +; CHECK-LABEL: rdtimeh_w: +; CHECK: rdtimeh.w $r[[REG1:[0-9]+]], $r[[REG2:[0-9]+]] +; CHECK: st.w $r[[REG2:[0-9]+]], $sp, 12 +; CHECK: st.w $r[[REG1:[0-9]+]], $sp, 8 +; CHECK: jr $ra +; + +define void @rdtimel_w() { +entry: + %value = alloca i32, align 4 + %timeid = alloca i32, align 4 + %0 = call { i32, i32 } asm sideeffect "rdtimel.w\09$0,$1\0A\09", "=&r,=&r"() nounwind + %asmresult0 = extractvalue { i32, i32 } %0, 0 + %asmresult1 = extractvalue { i32, i32 } %0, 1 + store i32 %asmresult0, i32* %value, align 4 + store i32 %asmresult1, i32* %timeid, align 4 + ret void +} + +; CHECK-LABEL: rdtimel_w: +; CHECK: rdtimel.w $r[[REG1:[0-9]+]], $r[[REG2:[0-9]+]] +; CHECK: st.w $r[[REG2:[0-9]+]], $sp, 12 +; CHECK: st.w $r[[REG1:[0-9]+]], $sp, 8 +; CHECK: jr $ra +; + +define void @crc_w_b_w() { +entry: + %i32_r = alloca i32, align 4 + %i32_a = alloca i32, align 4 + %i8_a = alloca i8, align 1 + %0 = load i8, i8* %i8_a, align 1 + %conv = sext i8 %0 to i32 + %1 = load i32, i32* %i32_a, align 4 + %2 = call i32 @llvm.loongarch.crc.w.b.w(i32 %conv, i32 %1) + store i32 %2, i32* %i32_r, align 4 + ret void +} + +declare i32 @llvm.loongarch.crc.w.b.w(i32, i32) + +; CHECK-LABEL: crc_w_b_w: +; CHECK: ld.w $r[[REG1:[0-9]+]], $sp, 8 +; CHECK: ld.b $r[[REG2:[0-9]+]], $sp, 4 +; CHECK: crc.w.b.w $r[[REG1:[0-9]+]], $r[[REG2:[0-9]+]], $r[[REG1:[0-9]+]] +; CHECK: jr $ra +; + +define void @crc_w_h_w() { +entry: + %i32_r = alloca i32, align 4 + %i32_a = alloca i32, align 4 + %i16_a = alloca i16, align 2 + %0 = load i16, i16* %i16_a, align 2 + %conv = sext i16 %0 to i32 + %1 = load i32, i32* %i32_a, align 4 + %2 = call i32 @llvm.loongarch.crc.w.h.w(i32 %conv, i32 %1) + store i32 %2, i32* %i32_r, align 4 + ret void +} + +declare i32 @llvm.loongarch.crc.w.h.w(i32, i32) + +; CHECK-LABEL: crc_w_h_w: +; CHECK: ld.w $r[[REG1:[0-9]+]], $sp, 8 +; CHECK: ld.h $r[[REG2:[0-9]+]], $sp, 4 +; CHECK: crc.w.h.w $r[[REG1:[0-9]+]], $r[[REG2:[0-9]+]], $r[[REG1:[0-9]+]] +; CHECK: jr $ra +; + +define void @crc_w_w_w() { +entry: + %i32_r = alloca i32, align 4 + %i32_a = alloca i32, align 4 + %i32_b = alloca i32, align 4 + %0 = load i32, i32* %i32_a, align 4 + %1 = load i32, i32* %i32_b, align 4 + %2 = call i32 @llvm.loongarch.crc.w.w.w(i32 %0, i32 %1) + store i32 %2, i32* %i32_r, align 4 + ret void +} + +declare i32 @llvm.loongarch.crc.w.w.w(i32, i32) + +; CHECK-LABEL: crc_w_w_w: +; CHECK: ld.w $r[[REG1:[0-9]+]], $sp, 4 +; CHECK: ld.w $r[[REG2:[0-9]+]], $sp, 8 +; CHECK: crc.w.w.w $r[[REG1:[0-9]+]], $r[[REG2:[0-9]+]], $r[[REG1:[0-9]+]] +; CHECK: jr $ra +; + +define void @crc_w_d_w() { +entry: + %i32_r = alloca i32, align 4 + %i32_a = alloca i32, align 4 + %i64_a = alloca i64, align 8 + %0 = load i64, i64* %i64_a, align 8 + %1 = load i32, i32* %i32_a, align 4 + %2 = call i32 @llvm.loongarch.crc.w.d.w(i64 %0, i32 %1) + store i32 %2, i32* %i32_r, align 4 + ret void +} + +declare i32 @llvm.loongarch.crc.w.d.w(i64, i32) + +; CHECK-LABEL: crc_w_d_w: +; CHECK: ld.w $r[[REG1:[0-9]+]], $sp, 8 +; CHECK: ld.d $r[[REG2:[0-9]+]], $sp, 0 +; CHECK: crc.w.d.w $r[[REG1:[0-9]+]], $r[[REG2:[0-9]+]], $r[[REG1:[0-9]+]] +; CHECK: jr $ra +; + +define void @crcc_w_b_w() { +entry: + %i32_r = alloca i32, align 4 + %i32_a = alloca i32, align 4 + %i8_a = alloca i8, align 1 + %0 = load i8, i8* %i8_a, align 1 + %conv = sext i8 %0 to i32 + %1 = load i32, i32* %i32_a, align 4 + %2 = call i32 @llvm.loongarch.crcc.w.b.w(i32 %conv, i32 %1) + store i32 %2, i32* %i32_r, align 4 + ret void +} + +declare i32 @llvm.loongarch.crcc.w.b.w(i32, i32) + +; CHECK-LABEL: crcc_w_b_w: +; CHECK: ld.w $r[[REG1:[0-9]+]], $sp, 8 +; CHECK: ld.b $r[[REG2:[0-9]+]], $sp, 4 +; CHECK: crcc.w.b.w $r[[REG1:[0-9]+]], $r[[REG2:[0-9]+]], $r[[REG1:[0-9]+]] +; CHECK: jr $ra +; + +define void @crcc_w_h_w() { +entry: + %i32_r = alloca i32, align 4 + %i32_a = alloca i32, align 4 + %i16_a = alloca i16, align 2 + %0 = load i16, i16* %i16_a, align 2 + %conv = sext i16 %0 to i32 + %1 = load i32, i32* %i32_a, align 4 + %2 = call i32 @llvm.loongarch.crcc.w.h.w(i32 %conv, i32 %1) + store i32 %2, i32* %i32_r, align 4 + ret void +} + +declare i32 @llvm.loongarch.crcc.w.h.w(i32, i32) + +; CHECK-LABEL: crcc_w_h_w: +; CHECK: ld.w $r[[REG1:[0-9]+]], $sp, 8 +; CHECK: ld.h $r[[REG2:[0-9]+]], $sp, 4 +; CHECK: crcc.w.h.w $r[[REG1:[0-9]+]], $r[[REG2:[0-9]+]], $r[[REG1:[0-9]+]] +; CHECK: jr $ra +; + +define void @crcc_w_w_w() { +entry: + %i32_r = alloca i32, align 4 + %i32_a = alloca i32, align 4 + %i32_b = alloca i32, align 4 + %0 = load i32, i32* %i32_a, align 4 + %1 = load i32, i32* %i32_b, align 4 + %2 = call i32 @llvm.loongarch.crcc.w.w.w(i32 %0, i32 %1) + store i32 %2, i32* %i32_r, align 4 + ret void +} + +declare i32 @llvm.loongarch.crcc.w.w.w(i32, i32) + +; CHECK-LABEL: crcc_w_w_w: +; CHECK: ld.w $r[[REG1:[0-9]+]], $sp, 4 +; CHECK: ld.w $r[[REG2:[0-9]+]], $sp, 8 +; CHECK: crcc.w.w.w $r[[REG1:[0-9]+]], $r[[REG2:[0-9]+]], $r[[REG1:[0-9]+]] +; CHECK: jr $ra +; + +define void @crcc_w_d_w() { +entry: + %i32_r = alloca i32, align 4 + %i32_a = alloca i32, align 4 + %i64_a = alloca i64, align 8 + %0 = load i64, i64* %i64_a, align 8 + %1 = load i32, i32* %i32_a, align 4 + %2 = call i32 @llvm.loongarch.crcc.w.d.w(i64 %0, i32 %1) + store i32 %2, i32* %i32_r, align 4 + ret void +} + +declare i32 @llvm.loongarch.crcc.w.d.w(i64, i32) + +; CHECK-LABEL: crcc_w_d_w: +; CHECK: ld.w $r[[REG1:[0-9]+]], $sp, 8 +; CHECK: ld.d $r[[REG2:[0-9]+]], $sp, 0 +; CHECK: crcc.w.d.w $r[[REG1:[0-9]+]], $r[[REG2:[0-9]+]], $r[[REG1:[0-9]+]] +; CHECK: jr $ra +; + +define void @tlbclr() { +entry: + call void @llvm.loongarch.tlbclr() + ret void +} + +declare void @llvm.loongarch.tlbclr() + +; CHECK-LABEL: tlbclr: +; CHECK: tlbclr +; CHECK: jr $ra +; + +define void @tlbflush() { +entry: + call void @llvm.loongarch.tlbflush() + ret void +} + +declare void @llvm.loongarch.tlbflush() + +; CHECK-LABEL: tlbflush: +; CHECK: tlbflush +; CHECK: jr $ra +; + +define void @tlbfill() { +entry: + call void @llvm.loongarch.tlbfill() + ret void +} + +declare void @llvm.loongarch.tlbfill() + +; CHECK-LABEL: tlbfill: +; CHECK: tlbfill +; CHECK: jr $ra +; + +define void @tlbrd() { +entry: + call void @llvm.loongarch.tlbrd() + ret void +} + +declare void @llvm.loongarch.tlbrd() + +; CHECK-LABEL: tlbrd: +; CHECK: tlbrd +; CHECK: jr $ra +; + +define void @tlbwr() { +entry: + call void @llvm.loongarch.tlbwr() + ret void +} + +declare void @llvm.loongarch.tlbwr() + +; CHECK-LABEL: tlbwr: +; CHECK: tlbwr +; CHECK: jr $ra +; + +define void @tlbsrch() { +entry: + call void @llvm.loongarch.tlbsrch() + ret void +} + +declare void @llvm.loongarch.tlbsrch() + +; CHECK-LABEL: tlbsrch: +; CHECK: tlbsrch +; CHECK: jr $ra +; + +define void @syscall() { +entry: + call void @llvm.loongarch.syscall(i64 1) + ret void +} + +declare void @llvm.loongarch.syscall(i64) + +; CHECK-LABEL: syscall: +; CHECK: syscall 1 +; CHECK: jr $ra +; + +define void @break_builtin() { +entry: + call void @llvm.loongarch.break(i64 1) + ret void +} + +declare void @llvm.loongarch.break(i64) + +; CHECK-LABEL: break_builtin: +; CHECK: break 1 +; CHECK: jr $ra +; + +define void @asrtle_d() { +entry: + %i64_a = alloca i64, align 8 + %i64_b = alloca i64, align 8 + %0 = load i64, i64* %i64_a, align 8 + %1 = load i64, i64* %i64_b, align 8 + call void @llvm.loongarch.asrtle.d(i64 %0, i64 %1) + ret void +} + +declare void @llvm.loongarch.asrtle.d(i64, i64) + +; CHECK-LABEL: asrtle_d: +; CHECK: ld.d $r[[REG1:[0-9]+]], $sp, 0 +; CHECK: ld.d $r[[REG2:[0-9]+]], $sp, 8 +; CHECK: asrtle.d $r[[REG2:[0-9]+]], $r[[REG1:[0-9]+]] +; CHECK: jr $ra +; + +define void @asrtgt_d() { +entry: + %i64_a = alloca i64, align 8 + %i64_b = alloca i64, align 8 + %0 = load i64, i64* %i64_a, align 8 + %1 = load i64, i64* %i64_b, align 8 + call void @llvm.loongarch.asrtgt.d(i64 %0, i64 %1) + ret void +} + +declare void @llvm.loongarch.asrtgt.d(i64, i64) + +; CHECK-LABEL: asrtgt_d: +; CHECK: ld.d $r[[REG1:[0-9]+]], $sp, 0 +; CHECK: ld.d $r[[REG2:[0-9]+]], $sp, 8 +; CHECK: asrtgt.d $r[[REG2:[0-9]+]], $r[[REG1:[0-9]+]] +; CHECK: jr $ra +; + +define void @dbar() { +entry: + call void @llvm.loongarch.dbar(i64 0) + ret void +} + +declare void @llvm.loongarch.dbar(i64) + +; CHECK-LABEL: dbar: +; CHECK: dbar 0 +; CHECK: jr $ra +; + +define void @ibar() { +entry: + call void @llvm.loongarch.ibar(i64 0) + ret void +} + +declare void @llvm.loongarch.ibar(i64) + +; CHECK-LABEL: ibar: +; CHECK: ibar 0 +; CHECK: jr $ra +; + +define void @movfcsr2gr() { +entry: + %u32_r = alloca i32, align 4 + %rd = alloca i32, align 4 + %0 = call i32 asm sideeffect "movfcsr2gr $0, $$fcsr0", "=&r"() + store i32 %0, i32* %rd, align 4 + %1 = load i32, i32* %rd, align 4 + store i32 %1, i32* %u32_r, align 4 + ret void +} + +; CHECK-LABEL: movfcsr2gr: +; CHECK: movfcsr2gr $r[[REG:[0-9]+]], $fcsr[[REG:[0-9]+]] +; CHECK: st.w $r[[REG:[0-9]+]], $sp, 8 +; CHECK: st.w $r[[REG:[0-9]+]], $sp, 12 +; CHECK: jr $ra +; + +define void @movgr2fcsr() { +entry: + %u32_a = alloca i32, align 4 + %0 = load i32, i32* %u32_a, align 4 + call void asm sideeffect "movgr2fcsr $$fcsr0, $0", "r"(i32 %0) + ret void +} + +; CHECK-LABEL: movgr2fcsr: +; CHECK: ld.w $r[[REG:[0-9]+]], $sp, 12 +; CHECK: movgr2fcsr $fcsr[[REG:[0-9]+]], $r[[REG:[0-9]+]] +; CHECK: jr $ra +; diff --git a/test/CodeGen/LoongArch/const-mult.ll b/test/CodeGen/LoongArch/const-mult.ll new file mode 100644 index 00000000..955e1626 --- /dev/null +++ b/test/CodeGen/LoongArch/const-mult.ll @@ -0,0 +1,245 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=loongarch64-linux-gnu < %s | FileCheck %s + + +; This test is copied from Mips except the mul2730_32 and mul2730_64 + +define i32 @mul5_32(i32 signext %a) { +; CHECK-LABEL: mul5_32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: slli.w $r4, $r4, 0 +; CHECK-NEXT: alsl.w $r4, $r4, $r4, 2 +; CHECK-NEXT: jr $ra +entry: + %mul = mul nsw i32 %a, 5 + ret i32 %mul +} + +define i32 @mul27_32(i32 signext %a) { +; CHECK-LABEL: mul27_32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: slli.w $r4, $r4, 0 +; CHECK-NEXT: alsl.w $r5, $r4, $r4, 2 +; CHECK-NEXT: slli.w $r4, $r4, 5 +; CHECK-NEXT: sub.w $r4, $r4, $r5 +; CHECK-NEXT: jr $ra +entry: + %mul = mul nsw i32 %a, 27 + ret i32 %mul +} + +define i32 @muln2147483643_32(i32 signext %a) { +; CHECK-LABEL: muln2147483643_32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: slli.w $r4, $r4, 0 +; CHECK-NEXT: alsl.w $r5, $r4, $r4, 2 +; CHECK-NEXT: slli.w $r4, $r4, 31 +; CHECK-NEXT: add.w $r4, $r4, $r5 +; CHECK-NEXT: jr $ra +entry: + %mul = mul nsw i32 %a, -2147483643 + ret i32 %mul +} + +define i64 @muln9223372036854775805_64(i64 signext %a) { +; CHECK-LABEL: muln9223372036854775805_64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: alsl.d $r5, $r4, $r4, 1 +; CHECK-NEXT: slli.d $r4, $r4, 63 +; CHECK-NEXT: add.d $r4, $r4, $r5 +; CHECK-NEXT: jr $ra +entry: + %mul = mul nsw i64 %a, -9223372036854775805 + ret i64 %mul +} + +define i128 @muln170141183460469231731687303715884105725_128(i128 signext %a) { +; CHECK-LABEL: muln170141183460469231731687303715884105725_128: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: srli.d $r6, $r4, 63 +; CHECK-NEXT: slli.d $r7, $r5, 1 +; CHECK-NEXT: or $r6, $r7, $r6 +; CHECK-NEXT: add.d $r5, $r6, $r5 +; CHECK-NEXT: slli.d $r7, $r4, 1 +; CHECK-NEXT: alsl.d $r6, $r4, $r4, 1 +; CHECK-NEXT: sltu $r7, $r6, $r7 +; CHECK-NEXT: bstrpick.d $r7, $r7, 31, 0 +; CHECK-NEXT: add.d $r5, $r5, $r7 +; CHECK-NEXT: slli.d $r4, $r4, 63 +; CHECK-NEXT: add.d $r5, $r4, $r5 +; CHECK-NEXT: move $r4, $r6 +; CHECK-NEXT: jr $ra +entry: + %mul = mul nsw i128 %a, -170141183460469231731687303715884105725 + ret i128 %mul +} + +define i128 @mul170141183460469231731687303715884105723_128(i128 signext %a) { +; CHECK-LABEL: mul170141183460469231731687303715884105723_128: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: srli.d $r6, $r4, 62 +; CHECK-NEXT: slli.d $r7, $r5, 2 +; CHECK-NEXT: or $r6, $r7, $r6 +; CHECK-NEXT: add.d $r5, $r6, $r5 +; CHECK-NEXT: slli.d $r6, $r4, 2 +; CHECK-NEXT: alsl.d $r7, $r4, $r4, 2 +; CHECK-NEXT: sltu $r6, $r7, $r6 +; CHECK-NEXT: bstrpick.d $r6, $r6, 31, 0 +; CHECK-NEXT: add.d $r5, $r5, $r6 +; CHECK-NEXT: slli.d $r4, $r4, 63 +; CHECK-NEXT: sub.d $r4, $r4, $r5 +; CHECK-NEXT: sltu $r5, $zero, $r7 +; CHECK-NEXT: bstrpick.d $r5, $r5, 31, 0 +; CHECK-NEXT: sub.d $r5, $r4, $r5 +; CHECK-NEXT: addi.d $r4, $zero, 0 +; CHECK-NEXT: sub.d $r4, $r4, $r7 +; CHECK-NEXT: jr $ra +entry: + %mul = mul nsw i128 %a, 170141183460469231731687303715884105723 + ret i128 %mul +} + +define i32 @mul42949673_32(i32 %a) { +; CHECK-LABEL: mul42949673_32: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r5, 10485 +; CHECK-NEXT: ori $r5, $r5, 3113 +; CHECK-NEXT: slli.w $r4, $r4, 0 +; CHECK-NEXT: mul.w $r4, $r4, $r5 +; CHECK-NEXT: jr $ra + %b = mul i32 %a, 42949673 + ret i32 %b +} + +define i64 @mul42949673_64(i64 %a) { +; CHECK-LABEL: mul42949673_64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: lu12i.w $r5, 10485 +; CHECK-NEXT: ori $r5, $r5, 3113 +; CHECK-NEXT: mul.d $r4, $r4, $r5 +; CHECK-NEXT: jr $ra +entry: + %b = mul i64 %a, 42949673 + ret i64 %b +} + +define i32 @mul22224078_32(i32 %a) { +; CHECK-LABEL: mul22224078_32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: lu12i.w $r5, 5425 +; CHECK-NEXT: ori $r5, $r5, 3278 +; CHECK-NEXT: slli.w $r4, $r4, 0 +; CHECK-NEXT: mul.w $r4, $r4, $r5 +; CHECK-NEXT: jr $ra +entry: + %b = mul i32 %a, 22224078 + ret i32 %b +} + +define i64 @mul22224078_64(i64 %a) { +; CHECK-LABEL: mul22224078_64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: lu12i.w $r5, 5425 +; CHECK-NEXT: ori $r5, $r5, 3278 +; CHECK-NEXT: mul.d $r4, $r4, $r5 +; CHECK-NEXT: jr $ra +entry: + %b = mul i64 %a, 22224078 + ret i64 %b +} + +define i32 @mul22245375_32(i32 %a) { +; CHECK-LABEL: mul22245375_32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: lu12i.w $r5, 5430 +; CHECK-NEXT: ori $r5, $r5, 4095 +; CHECK-NEXT: slli.w $r4, $r4, 0 +; CHECK-NEXT: mul.w $r4, $r4, $r5 +; CHECK-NEXT: jr $ra +entry: + %b = mul i32 %a, 22245375 + ret i32 %b +} + +define i64 @mul22245375_64(i64 %a) { +; CHECK-LABEL: mul22245375_64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: lu12i.w $r5, 5430 +; CHECK-NEXT: ori $r5, $r5, 4095 +; CHECK-NEXT: mul.d $r4, $r4, $r5 +; CHECK-NEXT: jr $ra +entry: + %b = mul i64 %a, 22245375 + ret i64 %b +} + +define i32 @mul25165824_32(i32 %a) { +; CHECK-LABEL: mul25165824_32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: lu12i.w $r5, 5430 +; CHECK-NEXT: ori $r5, $r5, 4095 +; CHECK-NEXT: slli.w $r4, $r4, 0 +; CHECK-NEXT: mul.w $r4, $r4, $r5 +; CHECK-NEXT: jr $ra +entry: + %b = mul i32 %a, 22245375 + ret i32 %b +} + +define i64 @mul25165824_64(i64 %a) { +; CHECK-LABEL: mul25165824_64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: slli.d $r5, $r4, 23 +; CHECK-NEXT: slli.d $r4, $r4, 24 +; CHECK-NEXT: add.d $r4, $r4, $r5 +; CHECK-NEXT: jr $ra +entry: + %b = mul i64 %a, 25165824 + ret i64 %b +} + +define i32 @mul33554432_32(i32 %a) { +; CHECK-LABEL: mul33554432_32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: lu12i.w $r5, 5430 +; CHECK-NEXT: ori $r5, $r5, 4095 +; CHECK-NEXT: slli.w $r4, $r4, 0 +; CHECK-NEXT: mul.w $r4, $r4, $r5 +; CHECK-NEXT: jr $ra +entry: + %b = mul i32 %a, 22245375 + ret i32 %b +} + +define i64 @mul33554432_64(i64 %a) { +; CHECK-LABEL: mul33554432_64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: slli.d $r4, $r4, 25 +; CHECK-NEXT: jr $ra +entry: + %b = mul i64 %a, 33554432 + ret i64 %b +} + +define i32 @mul2730_32(i32 %a) { +; CHECK-LABEL: mul2730_32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: slli.w $r4, $r4, 0 +; CHECK-NEXT: ori $r5, $zero, 2730 +; CHECK-NEXT: mul.w $r4, $r4, $r5 +; CHECK-NEXT: jr $ra +entry: + %b = mul i32 %a, 2730 + ret i32 %b +} + +define i64 @mul2730_64(i64 %a) { +; CHECK-LABEL: mul2730_64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: ori $r5, $zero, 2730 +; CHECK-NEXT: mul.d $r4, $r4, $r5 +; CHECK-NEXT: jr $ra +entry: + %b = mul i64 %a, 2730 + ret i64 %b +} diff --git a/test/CodeGen/LoongArch/disable-tail-calls.ll b/test/CodeGen/LoongArch/disable-tail-calls.ll new file mode 100644 index 00000000..586daca2 --- /dev/null +++ b/test/CodeGen/LoongArch/disable-tail-calls.ll @@ -0,0 +1,94 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -march=loongarch64 -relocation-model=pic < %s \ +; RUN: | FileCheck %s --check-prefixes=CHECK1 +; RUN: llc -march=loongarch64 -relocation-model=pic -disable-tail-calls < %s \ +; RUN: | FileCheck %s --check-prefixes=CHECK2 +; RUN: llc -march=loongarch64 -relocation-model=pic -disable-tail-calls=false < %s \ +; RUN: | FileCheck %s --check-prefixes=CHECK3 + +; Function with attribute #0 = { "disable-tail-calls"="true" } +define i32 @caller1(i32 %a) #0 { +; CHECK1-LABEL: caller1: +; CHECK1: # %bb.0: # %entry +; CHECK1-NEXT: addi.d $sp, $sp, -16 +; CHECK1-NEXT: .cfi_def_cfa_offset 16 +; CHECK1-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; CHECK1-NEXT: .cfi_offset 1, -8 +; CHECK1-NEXT: bl callee +; CHECK1-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; CHECK1-NEXT: addi.d $sp, $sp, 16 +; CHECK1-NEXT: jr $ra +; +; CHECK2-LABEL: caller1: +; CHECK2: # %bb.0: # %entry +; CHECK2-NEXT: addi.d $sp, $sp, -16 +; CHECK2-NEXT: .cfi_def_cfa_offset 16 +; CHECK2-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; CHECK2-NEXT: .cfi_offset 1, -8 +; CHECK2-NEXT: bl callee +; CHECK2-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; CHECK2-NEXT: addi.d $sp, $sp, 16 +; CHECK2-NEXT: jr $ra +; +; CHECK3-LABEL: caller1: +; CHECK3: # %bb.0: # %entry +; CHECK3-NEXT: b callee +entry: + %call = tail call i32 @callee(i32 %a) + ret i32 %call +} + + +; Function with attribute #1 = { "disable-tail-calls"="false" } +define i32 @caller2(i32 %a) #1 { +; CHECK1-LABEL: caller2: +; CHECK1: # %bb.0: # %entry +; CHECK1-NEXT: b callee +; +; CHECK2-LABEL: caller2: +; CHECK2: # %bb.0: # %entry +; CHECK2-NEXT: addi.d $sp, $sp, -16 +; CHECK2-NEXT: .cfi_def_cfa_offset 16 +; CHECK2-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; CHECK2-NEXT: .cfi_offset 1, -8 +; CHECK2-NEXT: bl callee +; CHECK2-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; CHECK2-NEXT: addi.d $sp, $sp, 16 +; CHECK2-NEXT: jr $ra +; +; CHECK3-LABEL: caller2: +; CHECK3: # %bb.0: # %entry +; CHECK3-NEXT: b callee +entry: + %call = tail call i32 @callee(i32 %a) + ret i32 %call +} + +define i32 @caller3(i32 %a) { +; CHECK1-LABEL: caller3: +; CHECK1: # %bb.0: # %entry +; CHECK1-NEXT: b callee +; +; CHECK2-LABEL: caller3: +; CHECK2: # %bb.0: # %entry +; CHECK2-NEXT: addi.d $sp, $sp, -16 +; CHECK2-NEXT: .cfi_def_cfa_offset 16 +; CHECK2-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; CHECK2-NEXT: .cfi_offset 1, -8 +; CHECK2-NEXT: bl callee +; CHECK2-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; CHECK2-NEXT: addi.d $sp, $sp, 16 +; CHECK2-NEXT: jr $ra +; +; CHECK3-LABEL: caller3: +; CHECK3: # %bb.0: # %entry +; CHECK3-NEXT: b callee +entry: + %call = tail call i32 @callee(i32 %a) + ret i32 %call +} + +declare i32 @callee(i32) + +attributes #0 = { "disable-tail-calls"="true" } +attributes #1 = { "disable-tail-calls"="false" } diff --git a/test/CodeGen/LoongArch/divrem.ll b/test/CodeGen/LoongArch/divrem.ll new file mode 100644 index 00000000..34293a83 --- /dev/null +++ b/test/CodeGen/LoongArch/divrem.ll @@ -0,0 +1,68 @@ +; RUN: llc -march=loongarch64 -relocation-model=pic < %s | FileCheck %s -check-prefixes=CHECK,CHECK-TRAP + +; RUN: llc -march=loongarch64 -mnocheck-zero-division -relocation-model=pic < %s | FileCheck %s -check-prefixes=CHECK,NOCHECK + +; FileCheck Prefixes: +; CHECK-TRAP - trap +; NOCHECK - Division by zero will not be detected + +define i32 @sdiv1(i32 signext %a0, i32 signext %a1) nounwind readnone { +entry: +; CHECK-LABEL: sdiv1: + +; CHECK: div.w $r4, $r4, $r5 +; CHECK-TRAP: bne $r5, $zero, 8 +; CHECK-TRAP: break 7 + +; NOCHECK-NOT: bne +; NOCHECK-NOT: break + + %div = sdiv i32 %a0, %a1 + ret i32 %div +} + +define i32 @srem1(i32 signext %a0, i32 signext %a1) nounwind readnone { +entry: +; CHECK-LABEL: srem1: + +; CHECK: mod.w $r4, $r4, $r5 +; CHECK-TRAP: bne $r5, $zero, 8 +; CHECK-TRAP: break 7 + +; NOCHECK-NOT: bne +; NOCHECK-NOT: break + + %rem = srem i32 %a0, %a1 + ret i32 %rem +} + +define i32 @udiv1(i32 signext %a0, i32 signext %a1) nounwind readnone { +entry: +; CHECK-LABEL: udiv1: + +; CHECK: div.wu $r4, $r4, $r5 +; CHECK-TRAP: bne $r5, $zero, 8 +; CHECK-TRAP: break 7 + +; NOCHECK-NOT: bne +; NOCHECK-NOT: break + + %div = udiv i32 %a0, %a1 + ret i32 %div +} + +define i32 @urem1(i32 signext %a0, i32 signext %a1) nounwind readnone { +entry: +; CHECK-LABEL: urem1: + + +; CHECK: mod.wu $r4, $r4, $r5 +; CHECK-TRAP: bne $r5, $zero, 8 +; CHECK-TRAP: break 7 + +; NOCHECK-NOT: bne +; NOCHECK-NOT: break + + %rem = urem i32 %a0, %a1 + ret i32 %rem +} diff --git a/test/CodeGen/LoongArch/e_flags.ll b/test/CodeGen/LoongArch/e_flags.ll new file mode 100644 index 00000000..c1817dfa --- /dev/null +++ b/test/CodeGen/LoongArch/e_flags.ll @@ -0,0 +1,15 @@ +; RUN: llc --mtriple=loongarch64 --filetype=obj %s -o %t-la64 +; RUN: llvm-readelf -h %t-la64 | FileCheck %s --check-prefixes=LP64,ABI-D --match-full-lines + +;; Note that we have not support the -target-abi option to select specific ABI. +;; See comments in LoongArchELFStreamer.cpp. So here we only check the default behaviour. +;; After -target-abi is supported, we can add more tests. + +; LP64: Class: ELF64 +; ILP32: Class: ELF32 + +; ABI-D: Flags: 0x43, DOUBLE-FLOAT, OBJ-v1 + +define void @foo() { + ret void +} diff --git a/test/CodeGen/LoongArch/eliminateFI.ll b/test/CodeGen/LoongArch/eliminateFI.ll new file mode 100644 index 00000000..0272c95b --- /dev/null +++ b/test/CodeGen/LoongArch/eliminateFI.ll @@ -0,0 +1,106 @@ +; Check whether LoongArchSERegisterInfo::eliminateFI works well +; RUN: llc -march=loongarch64 -o - %s | FileCheck %s + +define signext i32 @ldptr_w_unaligned() { +; CHECK-LABEL: ldptr_w_unaligned: +; CHECK: # %bb.0: # %entry +entry: + %array = alloca [6000 x i8], align 1 + %arrayidx = getelementptr inbounds [6000 x i8], [6000 x i8]* %array, i64 0, i64 5001 + %0 = bitcast i8* %arrayidx to i32* +; the offset MUST be 0 +; CHECK: ldptr.w $r{{[0-9]+}}, $r{{[0-9]+}}, 0 + %1 = load i32, i32* %0, align 1 + ret i32 %1 +} + +define signext i32 @ldptr_w_aligned() { +; CHECK-LABEL: ldptr_w_aligned: +; CHECK: # %bb.0: # %entry +entry: + %array = alloca [6000 x i8], align 1 + %arrayidx = getelementptr inbounds [6000 x i8], [6000 x i8]* %array, i64 0, i64 5000 + %0 = bitcast i8* %arrayidx to i32* +; the offset may not be 0, but MUST be 4-bytes aligned +; CHECK: ldptr.w $r{{[0-9]+}}, $r{{[0-9]+}}, {{[0-9]+}} + %1 = load i32, i32* %0, align 1 + ret i32 %1 +} + +define signext i64 @ldptr_d_unaligned() { +; CHECK-LABEL: ldptr_d_unaligned: +; CHECK: # %bb.0: # %entry +entry: + %array = alloca [6000 x i8], align 1 + %arrayidx = getelementptr inbounds [6000 x i8], [6000 x i8]* %array, i64 0, i64 5001 + %0 = bitcast i8* %arrayidx to i64* +; the offset MUST be 0 +; CHECK: ldptr.d $r{{[0-9]+}}, $r{{[0-9]+}}, 0 + %1 = load i64, i64* %0, align 1 + ret i64 %1 +} + +define signext i64 @ldptr_d_aligned() { +; CHECK-LABEL: ldptr_d_aligned: +; CHECK: # %bb.0: # %entry +entry: + %array = alloca [6000 x i8], align 1 + %arrayidx = getelementptr inbounds [6000 x i8], [6000 x i8]* %array, i64 0, i64 5000 + %0 = bitcast i8* %arrayidx to i64* +; the offset may not be 0, but MUST be 4-bytes aligned +; CHECK: ldptr.d $r{{[0-9]+}}, $r{{[0-9]+}}, {{[0-9]+}} + %1 = load i64, i64* %0, align 1 + ret i64 %1 +} + +define void @stptr_w_unaligned(i32 signext %val) { +; CHECK-LABEL: stptr_w_unaligned: +; CHECK: # %bb.0: # %entry +entry: + %array = alloca [6000 x i8], align 1 + %arrayidx = getelementptr inbounds [6000 x i8], [6000 x i8]* %array, i64 0, i64 5001 + %0 = bitcast i8* %arrayidx to i32* +; the offset MUST be 0 +; CHECK: stptr.w $r{{[0-9]+}}, $r{{[0-9]+}}, 0 + store i32 %val, i32* %0, align 1 + ret void +} + +define void @stptr_w_aligned(i32 signext %val) { +; CHECK-LABEL: stptr_w_aligned: +; CHECK: # %bb.0: # %entry +entry: + %array = alloca [6000 x i8], align 1 + %arrayidx = getelementptr inbounds [6000 x i8], [6000 x i8]* %array, i64 0, i64 5000 + %0 = bitcast i8* %arrayidx to i32* +; the offset may not be 0, but MUST be 4-bytes aligned +; CHECK: stptr.w $r{{[0-9]+}}, $r{{[0-9]+}}, {{[0-9]+}} + store i32 %val, i32* %0, align 1 + ret void +} + +define void @stptr_d_unaligned(i64 %val) { +; CHECK-LABEL: stptr_d_unaligned: +; CHECK: # %bb.0: # %entry +entry: + %array = alloca [6000 x i8], align 1 + %arrayidx = getelementptr inbounds [6000 x i8], [6000 x i8]* %array, i64 0, i64 5001 + %0 = bitcast i8* %arrayidx to i64* +; the offset MUST be 0 +; CHECK: stptr.d $r{{[0-9]+}}, $r{{[0-9]+}}, 0 + store i64 %val, i64* %0, align 1 + ret void +} + +define void @stptr_d_aligned(i64 %val) { +; CHECK-LABEL: stptr_d_aligned: +; CHECK: # %bb.0: # %entry +entry: + %array = alloca [6000 x i8], align 1 + %arrayidx = getelementptr inbounds [6000 x i8], [6000 x i8]* %array, i64 0, i64 5000 + %0 = bitcast i8* %arrayidx to i64* +; the offset may not be 0, but MUST be 4-bytes aligned +; CHECK: stptr.d $r{{[0-9]+}}, $r{{[0-9]+}}, {{[0-9]+}} + store i64 %val, i64* %0, align 1 + ret void +} diff --git a/test/CodeGen/LoongArch/emergency-spill-slot.ll b/test/CodeGen/LoongArch/emergency-spill-slot.ll new file mode 100644 index 00000000..80fa7a85 --- /dev/null +++ b/test/CodeGen/LoongArch/emergency-spill-slot.ll @@ -0,0 +1,103 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 -O0 < %s | FileCheck %s + +@var = external global i32 + +define void @func() { +; CHECK-LABEL: func: +; CHECK: # %bb.0: +; CHECK-NEXT: addi.d $sp, $sp, -2048 +; CHECK-NEXT: addi.d $sp, $sp, -2048 +; CHECK-NEXT: addi.d $sp, $sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 4112 +; CHECK-NEXT: lu12i.w $r5, var +; CHECK-NEXT: ori $r5, $r5, var +; CHECK-NEXT: lu32i.d $r5, var +; CHECK-NEXT: lu52i.d $r5, $r5, var +; CHECK-NEXT: ld.w $r20, $r5, 0 +; CHECK-NEXT: ld.w $r19, $r5, 0 +; CHECK-NEXT: ld.w $r18, $r5, 0 +; CHECK-NEXT: ld.w $r17, $r5, 0 +; CHECK-NEXT: ld.w $r16, $r5, 0 +; CHECK-NEXT: ld.w $r15, $r5, 0 +; CHECK-NEXT: ld.w $r14, $r5, 0 +; CHECK-NEXT: ld.w $r13, $r5, 0 +; CHECK-NEXT: ld.w $r12, $r5, 0 +; CHECK-NEXT: ld.w $r11, $r5, 0 +; CHECK-NEXT: ld.w $r10, $r5, 0 +; CHECK-NEXT: ld.w $r9, $r5, 0 +; CHECK-NEXT: ld.w $r8, $r5, 0 +; CHECK-NEXT: ld.w $r7, $r5, 0 +; CHECK-NEXT: ld.w $r6, $r5, 0 +; CHECK-NEXT: ld.w $r4, $r5, 0 +; CHECK-NEXT: st.d $r23, $sp, 0 +; CHECK-NEXT: lu12i.w $r23, 1 +; CHECK-NEXT: ori $r23, $r23, 12 +; CHECK-NEXT: add.d $r23, $sp, $r23 +; CHECK-NEXT: st.w $r20, $r23, 0 +; CHECK-NEXT: ld.d $r23, $sp, 0 +; CHECK-NEXT: st.w $r20, $r5, 0 +; CHECK-NEXT: st.w $r19, $r5, 0 +; CHECK-NEXT: st.w $r18, $r5, 0 +; CHECK-NEXT: st.w $r17, $r5, 0 +; CHECK-NEXT: st.w $r16, $r5, 0 +; CHECK-NEXT: st.w $r15, $r5, 0 +; CHECK-NEXT: st.w $r14, $r5, 0 +; CHECK-NEXT: st.w $r13, $r5, 0 +; CHECK-NEXT: st.w $r12, $r5, 0 +; CHECK-NEXT: st.w $r11, $r5, 0 +; CHECK-NEXT: st.w $r10, $r5, 0 +; CHECK-NEXT: st.w $r9, $r5, 0 +; CHECK-NEXT: st.w $r8, $r5, 0 +; CHECK-NEXT: st.w $r7, $r5, 0 +; CHECK-NEXT: st.w $r6, $r5, 0 +; CHECK-NEXT: st.w $r4, $r5, 0 +; CHECK-NEXT: lu12i.w $r4, 1 +; CHECK-NEXT: ori $r4, $r4, 16 +; CHECK-NEXT: add.d $sp, $sp, $r4 +; CHECK-NEXT: jr $ra + %space = alloca i32, align 4 + %stackspace = alloca[1024 x i32], align 4 + + ;; Load values to increase register pressure. + %v0 = load volatile i32, i32* @var + %v1 = load volatile i32, i32* @var + %v2 = load volatile i32, i32* @var + %v3 = load volatile i32, i32* @var + %v4 = load volatile i32, i32* @var + %v5 = load volatile i32, i32* @var + %v6 = load volatile i32, i32* @var + %v7 = load volatile i32, i32* @var + %v8 = load volatile i32, i32* @var + %v9 = load volatile i32, i32* @var + %v10 = load volatile i32, i32* @var + %v11 = load volatile i32, i32* @var + %v12 = load volatile i32, i32* @var + %v13 = load volatile i32, i32* @var + %v14 = load volatile i32, i32* @var + %v15 = load volatile i32, i32* @var + + ;; Computing a stack-relative values needs an additional register. + ;; We should get an emergency spill/reload for this. + store volatile i32 %v0, i32* %space + + ;; store values so they are used. + store volatile i32 %v0, i32* @var + store volatile i32 %v1, i32* @var + store volatile i32 %v2, i32* @var + store volatile i32 %v3, i32* @var + store volatile i32 %v4, i32* @var + store volatile i32 %v5, i32* @var + store volatile i32 %v6, i32* @var + store volatile i32 %v7, i32* @var + store volatile i32 %v8, i32* @var + store volatile i32 %v9, i32* @var + store volatile i32 %v10, i32* @var + store volatile i32 %v11, i32* @var + store volatile i32 %v12, i32* @var + store volatile i32 %v13, i32* @var + store volatile i32 %v14, i32* @var + store volatile i32 %v15, i32* @var + + ret void +} diff --git a/test/CodeGen/LoongArch/fcopysign.ll b/test/CodeGen/LoongArch/fcopysign.ll new file mode 100644 index 00000000..7d8d6a9b --- /dev/null +++ b/test/CodeGen/LoongArch/fcopysign.ll @@ -0,0 +1,17 @@ +; RUN: llc -march=loongarch64 -mattr=+d -o - %s | FileCheck %s + +define float @fcopysign_s(float %a, float %b) { +; CHECK-LABEL: fcopysign_s: +; CHECK: fcopysign.s $f0, $f0, $f1 + %ret = call float @llvm.copysign.f32(float %a, float %b) + ret float %ret +} +declare float @llvm.copysign.f32(float %a, float %b) + +define double @fcopysign_d(double %a, double %b) { +; CHECK-LABEL: fcopysign_d: +; CHECK: fcopysign.d $f0, $f0, $f1 + %ret = call double @llvm.copysign.f64(double %a, double %b) + ret double %ret +} +declare double @llvm.copysign.f64(double %a, double %b) diff --git a/test/CodeGen/LoongArch/frame-info.ll b/test/CodeGen/LoongArch/frame-info.ll new file mode 100644 index 00000000..eb4fc69f --- /dev/null +++ b/test/CodeGen/LoongArch/frame-info.ll @@ -0,0 +1,132 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -relocation-model=pic -mtriple=loongarch64 -frame-pointer=all < %s | FileCheck %s + +define void @trivial() { +; CHECK-LABEL: trivial: +; CHECK: # %bb.0: +; CHECK-NEXT: addi.d $sp, $sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: st.d $r22, $sp, 8 # 8-byte Folded Spill +; CHECK-NEXT: .cfi_offset 22, -8 +; CHECK-NEXT: addi.d $r22, $sp, 16 +; CHECK-NEXT: .cfi_def_cfa 22, 0 +; CHECK-NEXT: ld.d $r22, $sp, 8 # 8-byte Folded Reload +; CHECK-NEXT: addi.d $sp, $sp, 16 +; CHECK-NEXT: jr $ra + ret void +} + +define void @stack_alloc(i32 signext %size) { +; CHECK-LABEL: stack_alloc: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi.d $sp, $sp, -32 +; CHECK-NEXT: .cfi_def_cfa_offset 32 +; CHECK-NEXT: st.d $ra, $sp, 24 # 8-byte Folded Spill +; CHECK-NEXT: st.d $r22, $sp, 16 # 8-byte Folded Spill +; CHECK-NEXT: .cfi_offset 1, -8 +; CHECK-NEXT: .cfi_offset 22, -16 +; CHECK-NEXT: addi.d $r22, $sp, 32 +; CHECK-NEXT: .cfi_def_cfa 22, 0 +; CHECK-NEXT: addi.w $r5, $zero, -16 +; CHECK-NEXT: lu32i.d $r5, 1 +; CHECK-NEXT: bstrpick.d $r4, $r4, 31, 0 +; CHECK-NEXT: addi.d $r4, $r4, 15 +; CHECK-NEXT: and $r4, $r4, $r5 +; CHECK-NEXT: sub.d $r4, $sp, $r4 +; CHECK-NEXT: move $sp, $r4 +; CHECK-NEXT: bl callee_with_args +; CHECK-NEXT: addi.d $sp, $r22, -32 +; CHECK-NEXT: ld.d $r22, $sp, 16 # 8-byte Folded Reload +; CHECK-NEXT: ld.d $ra, $sp, 24 # 8-byte Folded Reload +; CHECK-NEXT: addi.d $sp, $sp, 32 +; CHECK-NEXT: jr $ra +entry: + %0 = alloca i8, i32 %size, align 16 + call void @callee_with_args(i8* nonnull %0) + ret void +} + +define void @branch_and_tail_call(i1 %a) { +; CHECK-LABEL: branch_and_tail_call: +; CHECK: # %bb.0: +; CHECK-NEXT: slli.w $r4, $r4, 0 +; CHECK-NEXT: andi $r4, $r4, 1 +; CHECK-NEXT: beqz $r4, .LBB2_2 +; CHECK-NEXT: # %bb.1: # %blue_pill +; CHECK-NEXT: b callee1 +; CHECK-NEXT: .LBB2_2: # %red_pill +; CHECK-NEXT: addi.d $sp, $sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; CHECK-NEXT: st.d $r22, $sp, 0 # 8-byte Folded Spill +; CHECK-NEXT: .cfi_offset 1, -8 +; CHECK-NEXT: .cfi_offset 22, -16 +; CHECK-NEXT: addi.d $r22, $sp, 16 +; CHECK-NEXT: .cfi_def_cfa 22, 0 +; CHECK-NEXT: bl callee2 +; CHECK-NEXT: ld.d $r22, $sp, 0 # 8-byte Folded Reload +; CHECK-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; CHECK-NEXT: addi.d $sp, $sp, 16 +; CHECK-NEXT: jr $ra + br i1 %a, label %blue_pill, label %red_pill +blue_pill: + tail call void @callee1() + ret void +red_pill: + call void @callee2() + ret void +} + +define void @big_frame() { +; CHECK-LABEL: big_frame: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi.d $sp, $sp, -2032 +; CHECK-NEXT: .cfi_def_cfa_offset 2032 +; CHECK-NEXT: st.d $ra, $sp, 2024 # 8-byte Folded Spill +; CHECK-NEXT: st.d $r22, $sp, 2016 # 8-byte Folded Spill +; CHECK-NEXT: .cfi_offset 1, -8 +; CHECK-NEXT: .cfi_offset 22, -16 +; CHECK-NEXT: addi.d $r22, $sp, 2032 +; CHECK-NEXT: .cfi_def_cfa 22, 0 +; CHECK-NEXT: addi.d $sp, $sp, -48 +; CHECK-NEXT: lu12i.w $r4, -1 +; CHECK-NEXT: ori $r4, $r4, 2016 +; CHECK-NEXT: add.d $r4, $r22, $r4 +; CHECK-NEXT: addi.d $r4, $r4, 0 +; CHECK-NEXT: bl callee_with_args +; CHECK-NEXT: addi.d $sp, $sp, 48 +; CHECK-NEXT: ld.d $r22, $sp, 2016 # 8-byte Folded Reload +; CHECK-NEXT: ld.d $ra, $sp, 2024 # 8-byte Folded Reload +; CHECK-NEXT: addi.d $sp, $sp, 2032 +; CHECK-NEXT: jr $ra +entry: + %0 = alloca i8, i32 2048, align 16 + call void @callee_with_args(i8* nonnull %0) + ret void +} + +define void @varargs_frame(i32 %i, ...) { +; CHECK-LABEL: varargs_frame: +; CHECK: # %bb.0: +; CHECK-NEXT: addi.d $sp, $sp, -80 +; CHECK-NEXT: .cfi_def_cfa_offset 80 +; CHECK-NEXT: st.d $r22, $sp, 8 # 8-byte Folded Spill +; CHECK-NEXT: .cfi_offset 22, -72 +; CHECK-NEXT: addi.d $r22, $sp, 16 +; CHECK-NEXT: .cfi_def_cfa 22, 64 +; CHECK-NEXT: st.d $r11, $r22, 56 +; CHECK-NEXT: st.d $r10, $r22, 48 +; CHECK-NEXT: st.d $r9, $r22, 40 +; CHECK-NEXT: st.d $r8, $r22, 32 +; CHECK-NEXT: st.d $r7, $r22, 24 +; CHECK-NEXT: st.d $r6, $r22, 16 +; CHECK-NEXT: st.d $r5, $r22, 8 +; CHECK-NEXT: ld.d $r22, $sp, 8 # 8-byte Folded Reload +; CHECK-NEXT: addi.d $sp, $sp, 80 +; CHECK-NEXT: jr $ra + ret void +} + +declare void @callee1() +declare void @callee2() +declare void @callee_with_args(i8*) diff --git a/test/CodeGen/LoongArch/fsel.ll b/test/CodeGen/LoongArch/fsel.ll new file mode 100644 index 00000000..b7b0f835 --- /dev/null +++ b/test/CodeGen/LoongArch/fsel.ll @@ -0,0 +1,47 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -march=loongarch64 -mattr=+d -o - %s | FileCheck %s + + +define double @olt_f64(double %a, double %b) { +; CHECK-LABEL: olt_f64: +; CHECK: # %bb.0: +; CHECK-NEXT: fcmp.clt.d $fcc0, $f0, $f1 +; CHECK-NEXT: fsel $f0, $f1, $f0, $fcc0 +; CHECK-NEXT: jr $ra + %cond = fcmp olt double %a, %b + %ret = select i1 %cond, double %a, double %b + ret double %ret +} + +define double @ogt_f64(double %a, double %b) { +; CHECK-LABEL: ogt_f64: +; CHECK: # %bb.0: +; CHECK-NEXT: fcmp.cule.d $fcc0, $f0, $f1 +; CHECK-NEXT: fsel $f0, $f0, $f1, $fcc0 +; CHECK-NEXT: jr $ra + %cond = fcmp ogt double %a, %b + %ret = select i1 %cond, double %a, double %b + ret double %ret +} + +define float @olt_f32(float %a, float %b) { +; CHECK-LABEL: olt_f32: +; CHECK: # %bb.0: +; CHECK-NEXT: fcmp.clt.s $fcc0, $f0, $f1 +; CHECK-NEXT: fsel $f0, $f1, $f0, $fcc0 +; CHECK-NEXT: jr $ra + %cond = fcmp olt float %a, %b + %ret = select i1 %cond, float %a, float %b + ret float %ret +} + +define float @ogt_f32(float %a, float %b) { +; CHECK-LABEL: ogt_f32: +; CHECK: # %bb.0: +; CHECK-NEXT: fcmp.cule.s $fcc0, $f0, $f1 +; CHECK-NEXT: fsel $f0, $f0, $f1, $fcc0 +; CHECK-NEXT: jr $ra + %cond = fcmp ogt float %a, %b + %ret = select i1 %cond, float %a, float %b + ret float %ret +} diff --git a/test/CodeGen/LoongArch/immediate.ll b/test/CodeGen/LoongArch/immediate.ll new file mode 100644 index 00000000..1de3ef0f --- /dev/null +++ b/test/CodeGen/LoongArch/immediate.ll @@ -0,0 +1,2542 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=loongarch64 < %s | FileCheck %s +define i64 @li0000000000000000() { +; CHECK-LABEL: li0000000000000000: +; CHECK: # %bb.0: +; CHECK-NEXT: addi.d $r4, $zero, 0 +; CHECK-NEXT: jr $ra + ret i64 0 +} + +define i64 @li00000000000007ff() { +; CHECK-LABEL: li00000000000007ff: +; CHECK: # %bb.0: +; CHECK-NEXT: addi.d $r4, $zero, 2047 +; CHECK-NEXT: jr $ra + ret i64 2047 +} + +define i64 @li0000000000000800() { +; CHECK-LABEL: li0000000000000800: +; CHECK: # %bb.0: +; CHECK-NEXT: ori $r4, $zero, 2048 +; CHECK-NEXT: jr $ra + ret i64 2048 +} + +define i64 @li0000000000000fff() { +; CHECK-LABEL: li0000000000000fff: +; CHECK: # %bb.0: +; CHECK-NEXT: ori $r4, $zero, 4095 +; CHECK-NEXT: jr $ra + ret i64 4095 +} + +define i64 @li000000007ffff000() { +; CHECK-LABEL: li000000007ffff000: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, 524287 +; CHECK-NEXT: jr $ra + ret i64 2147479552 +} + +define i64 @li000000007ffff7ff() { +; CHECK-LABEL: li000000007ffff7ff: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, 524287 +; CHECK-NEXT: ori $r4, $r4, 2047 +; CHECK-NEXT: jr $ra + ret i64 2147481599 +} + +define i64 @li000000007ffff800() { +; CHECK-LABEL: li000000007ffff800: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, 524287 +; CHECK-NEXT: ori $r4, $r4, 2048 +; CHECK-NEXT: jr $ra + ret i64 2147481600 +} + +define i64 @li000000007fffffff() { +; CHECK-LABEL: li000000007fffffff: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, 524287 +; CHECK-NEXT: ori $r4, $r4, 4095 +; CHECK-NEXT: jr $ra + ret i64 2147483647 +} + +define i64 @li0000000080000000() { +; CHECK-LABEL: li0000000080000000: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, -524288 +; CHECK-NEXT: lu32i.d $r4, 0 +; CHECK-NEXT: jr $ra + ret i64 2147483648 +} + +define i64 @li00000000800007ff() { +; CHECK-LABEL: li00000000800007ff: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, -524288 +; CHECK-NEXT: ori $r4, $r4, 2047 +; CHECK-NEXT: lu32i.d $r4, 0 +; CHECK-NEXT: jr $ra + ret i64 2147485695 +} + +define i64 @li0000000080000800() { +; CHECK-LABEL: li0000000080000800: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, -524288 +; CHECK-NEXT: ori $r4, $r4, 2048 +; CHECK-NEXT: lu32i.d $r4, 0 +; CHECK-NEXT: jr $ra + ret i64 2147485696 +} + +define i64 @li0000000080000fff() { +; CHECK-LABEL: li0000000080000fff: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, -524288 +; CHECK-NEXT: ori $r4, $r4, 4095 +; CHECK-NEXT: lu32i.d $r4, 0 +; CHECK-NEXT: jr $ra + ret i64 2147487743 +} + +define i64 @li00000000fffff000() { +; CHECK-LABEL: li00000000fffff000: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, -1 +; CHECK-NEXT: lu32i.d $r4, 0 +; CHECK-NEXT: jr $ra + ret i64 4294963200 +} + +define i64 @li00000000fffff7ff() { +; CHECK-LABEL: li00000000fffff7ff: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, -1 +; CHECK-NEXT: ori $r4, $r4, 2047 +; CHECK-NEXT: lu32i.d $r4, 0 +; CHECK-NEXT: jr $ra + ret i64 4294965247 +} + +define i64 @li00000000fffff800() { +; CHECK-LABEL: li00000000fffff800: +; CHECK: # %bb.0: +; CHECK-NEXT: addi.w $r4, $zero, -2048 +; CHECK-NEXT: lu32i.d $r4, 0 +; CHECK-NEXT: jr $ra + ret i64 4294965248 +} + +define i64 @li00000000ffffffff() { +; CHECK-LABEL: li00000000ffffffff: +; CHECK: # %bb.0: +; CHECK-NEXT: addi.w $r4, $zero, -1 +; CHECK-NEXT: lu32i.d $r4, 0 +; CHECK-NEXT: jr $ra + ret i64 4294967295 +} + +define i64 @li0007ffff00000000() { +; CHECK-LABEL: li0007ffff00000000: +; CHECK: # %bb.0: +; CHECK-NEXT: ori $r4, $zero, 0 +; CHECK-NEXT: lu32i.d $r4, 524287 +; CHECK-NEXT: jr $ra + ret i64 2251795518717952 +} + +define i64 @li0007ffff000007ff() { +; CHECK-LABEL: li0007ffff000007ff: +; CHECK: # %bb.0: +; CHECK-NEXT: ori $r4, $zero, 2047 +; CHECK-NEXT: lu32i.d $r4, 524287 +; CHECK-NEXT: jr $ra + ret i64 2251795518719999 +} + +define i64 @li0007ffff00000800() { +; CHECK-LABEL: li0007ffff00000800: +; CHECK: # %bb.0: +; CHECK-NEXT: ori $r4, $zero, 2048 +; CHECK-NEXT: lu32i.d $r4, 524287 +; CHECK-NEXT: jr $ra + ret i64 2251795518720000 +} + +define i64 @li0007ffff00000fff() { +; CHECK-LABEL: li0007ffff00000fff: +; CHECK: # %bb.0: +; CHECK-NEXT: ori $r4, $zero, 4095 +; CHECK-NEXT: lu32i.d $r4, 524287 +; CHECK-NEXT: jr $ra + ret i64 2251795518722047 +} + +define i64 @li0007ffff7ffff000() { +; CHECK-LABEL: li0007ffff7ffff000: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, 524287 +; CHECK-NEXT: lu32i.d $r4, 524287 +; CHECK-NEXT: jr $ra + ret i64 2251797666197504 +} + +define i64 @li0007ffff7ffff7ff() { +; CHECK-LABEL: li0007ffff7ffff7ff: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, 524287 +; CHECK-NEXT: ori $r4, $r4, 2047 +; CHECK-NEXT: lu32i.d $r4, 524287 +; CHECK-NEXT: jr $ra + ret i64 2251797666199551 +} + +define i64 @li0007ffff7ffff800() { +; CHECK-LABEL: li0007ffff7ffff800: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, 524287 +; CHECK-NEXT: ori $r4, $r4, 2048 +; CHECK-NEXT: lu32i.d $r4, 524287 +; CHECK-NEXT: jr $ra + ret i64 2251797666199552 +} + +define i64 @li0007ffff7fffffff() { +; CHECK-LABEL: li0007ffff7fffffff: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, 524287 +; CHECK-NEXT: ori $r4, $r4, 4095 +; CHECK-NEXT: lu32i.d $r4, 524287 +; CHECK-NEXT: jr $ra + ret i64 2251797666201599 +} + +define i64 @li0007ffff80000000() { +; CHECK-LABEL: li0007ffff80000000: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, -524288 +; CHECK-NEXT: lu32i.d $r4, 524287 +; CHECK-NEXT: jr $ra + ret i64 2251797666201600 +} + +define i64 @li0007ffff800007ff() { +; CHECK-LABEL: li0007ffff800007ff: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, -524288 +; CHECK-NEXT: ori $r4, $r4, 2047 +; CHECK-NEXT: lu32i.d $r4, 524287 +; CHECK-NEXT: jr $ra + ret i64 2251797666203647 +} + +define i64 @li0007ffff80000800() { +; CHECK-LABEL: li0007ffff80000800: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, -524288 +; CHECK-NEXT: ori $r4, $r4, 2048 +; CHECK-NEXT: lu32i.d $r4, 524287 +; CHECK-NEXT: jr $ra + ret i64 2251797666203648 +} + +define i64 @li0007ffff80000fff() { +; CHECK-LABEL: li0007ffff80000fff: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, -524288 +; CHECK-NEXT: ori $r4, $r4, 4095 +; CHECK-NEXT: lu32i.d $r4, 524287 +; CHECK-NEXT: jr $ra + ret i64 2251797666205695 +} + +define i64 @li0007fffffffff000() { +; CHECK-LABEL: li0007fffffffff000: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, -1 +; CHECK-NEXT: lu32i.d $r4, 524287 +; CHECK-NEXT: jr $ra + ret i64 2251799813681152 +} + +define i64 @li0007fffffffff7ff() { +; CHECK-LABEL: li0007fffffffff7ff: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, -1 +; CHECK-NEXT: ori $r4, $r4, 2047 +; CHECK-NEXT: lu32i.d $r4, 524287 +; CHECK-NEXT: jr $ra + ret i64 2251799813683199 +} + +define i64 @li0007fffffffff800() { +; CHECK-LABEL: li0007fffffffff800: +; CHECK: # %bb.0: +; CHECK-NEXT: addi.w $r4, $zero, -2048 +; CHECK-NEXT: lu32i.d $r4, 524287 +; CHECK-NEXT: jr $ra + ret i64 2251799813683200 +} + +define i64 @li0007ffffffffffff() { +; CHECK-LABEL: li0007ffffffffffff: +; CHECK: # %bb.0: +; CHECK-NEXT: addi.w $r4, $zero, -1 +; CHECK-NEXT: lu32i.d $r4, 524287 +; CHECK-NEXT: jr $ra + ret i64 2251799813685247 +} + +define i64 @li0008000000000000() { +; CHECK-LABEL: li0008000000000000: +; CHECK: # %bb.0: +; CHECK-NEXT: ori $r4, $zero, 0 +; CHECK-NEXT: lu32i.d $r4, -524288 +; CHECK-NEXT: lu52i.d $r4, $r4, 0 +; CHECK-NEXT: jr $ra + ret i64 2251799813685248 +} + +define i64 @li00080000000007ff() { +; CHECK-LABEL: li00080000000007ff: +; CHECK: # %bb.0: +; CHECK-NEXT: ori $r4, $zero, 2047 +; CHECK-NEXT: lu32i.d $r4, -524288 +; CHECK-NEXT: lu52i.d $r4, $r4, 0 +; CHECK-NEXT: jr $ra + ret i64 2251799813687295 +} + +define i64 @li0008000000000800() { +; CHECK-LABEL: li0008000000000800: +; CHECK: # %bb.0: +; CHECK-NEXT: ori $r4, $zero, 2048 +; CHECK-NEXT: lu32i.d $r4, -524288 +; CHECK-NEXT: lu52i.d $r4, $r4, 0 +; CHECK-NEXT: jr $ra + ret i64 2251799813687296 +} + +define i64 @li0008000000000fff() { +; CHECK-LABEL: li0008000000000fff: +; CHECK: # %bb.0: +; CHECK-NEXT: ori $r4, $zero, 4095 +; CHECK-NEXT: lu32i.d $r4, -524288 +; CHECK-NEXT: lu52i.d $r4, $r4, 0 +; CHECK-NEXT: jr $ra + ret i64 2251799813689343 +} + +define i64 @li000800007ffff000() { +; CHECK-LABEL: li000800007ffff000: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, 524287 +; CHECK-NEXT: lu32i.d $r4, -524288 +; CHECK-NEXT: lu52i.d $r4, $r4, 0 +; CHECK-NEXT: jr $ra + ret i64 2251801961164800 +} + +define i64 @li000800007ffff7ff() { +; CHECK-LABEL: li000800007ffff7ff: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, 524287 +; CHECK-NEXT: ori $r4, $r4, 2047 +; CHECK-NEXT: lu32i.d $r4, -524288 +; CHECK-NEXT: lu52i.d $r4, $r4, 0 +; CHECK-NEXT: jr $ra + ret i64 2251801961166847 +} + +define i64 @li000800007ffff800() { +; CHECK-LABEL: li000800007ffff800: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, 524287 +; CHECK-NEXT: ori $r4, $r4, 2048 +; CHECK-NEXT: lu32i.d $r4, -524288 +; CHECK-NEXT: lu52i.d $r4, $r4, 0 +; CHECK-NEXT: jr $ra + ret i64 2251801961166848 +} + +define i64 @li000800007fffffff() { +; CHECK-LABEL: li000800007fffffff: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, 524287 +; CHECK-NEXT: ori $r4, $r4, 4095 +; CHECK-NEXT: lu32i.d $r4, -524288 +; CHECK-NEXT: lu52i.d $r4, $r4, 0 +; CHECK-NEXT: jr $ra + ret i64 2251801961168895 +} + +define i64 @li0008000080000000() { +; CHECK-LABEL: li0008000080000000: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, -524288 +; CHECK-NEXT: lu32i.d $r4, -524288 +; CHECK-NEXT: lu52i.d $r4, $r4, 0 +; CHECK-NEXT: jr $ra + ret i64 2251801961168896 +} + +define i64 @li00080000800007ff() { +; CHECK-LABEL: li00080000800007ff: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, -524288 +; CHECK-NEXT: ori $r4, $r4, 2047 +; CHECK-NEXT: lu32i.d $r4, -524288 +; CHECK-NEXT: lu52i.d $r4, $r4, 0 +; CHECK-NEXT: jr $ra + ret i64 2251801961170943 +} + +define i64 @li0008000080000800() { +; CHECK-LABEL: li0008000080000800: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, -524288 +; CHECK-NEXT: ori $r4, $r4, 2048 +; CHECK-NEXT: lu32i.d $r4, -524288 +; CHECK-NEXT: lu52i.d $r4, $r4, 0 +; CHECK-NEXT: jr $ra + ret i64 2251801961170944 +} + +define i64 @li0008000080000fff() { +; CHECK-LABEL: li0008000080000fff: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, -524288 +; CHECK-NEXT: ori $r4, $r4, 4095 +; CHECK-NEXT: lu32i.d $r4, -524288 +; CHECK-NEXT: lu52i.d $r4, $r4, 0 +; CHECK-NEXT: jr $ra + ret i64 2251801961172991 +} + +define i64 @li00080000fffff000() { +; CHECK-LABEL: li00080000fffff000: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, -1 +; CHECK-NEXT: lu32i.d $r4, -524288 +; CHECK-NEXT: lu52i.d $r4, $r4, 0 +; CHECK-NEXT: jr $ra + ret i64 2251804108648448 +} + +define i64 @li00080000fffff7ff() { +; CHECK-LABEL: li00080000fffff7ff: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, -1 +; CHECK-NEXT: ori $r4, $r4, 2047 +; CHECK-NEXT: lu32i.d $r4, -524288 +; CHECK-NEXT: lu52i.d $r4, $r4, 0 +; CHECK-NEXT: jr $ra + ret i64 2251804108650495 +} + +define i64 @li00080000fffff800() { +; CHECK-LABEL: li00080000fffff800: +; CHECK: # %bb.0: +; CHECK-NEXT: addi.w $r4, $zero, -2048 +; CHECK-NEXT: lu32i.d $r4, -524288 +; CHECK-NEXT: lu52i.d $r4, $r4, 0 +; CHECK-NEXT: jr $ra + ret i64 2251804108650496 +} + +define i64 @li00080000ffffffff() { +; CHECK-LABEL: li00080000ffffffff: +; CHECK: # %bb.0: +; CHECK-NEXT: addi.w $r4, $zero, -1 +; CHECK-NEXT: lu32i.d $r4, -524288 +; CHECK-NEXT: lu52i.d $r4, $r4, 0 +; CHECK-NEXT: jr $ra + ret i64 2251804108652543 +} + +define i64 @li000fffff00000000() { +; CHECK-LABEL: li000fffff00000000: +; CHECK: # %bb.0: +; CHECK-NEXT: ori $r4, $zero, 0 +; CHECK-NEXT: lu32i.d $r4, -1 +; CHECK-NEXT: lu52i.d $r4, $r4, 0 +; CHECK-NEXT: jr $ra + ret i64 4503595332403200 +} + +define i64 @li000fffff000007ff() { +; CHECK-LABEL: li000fffff000007ff: +; CHECK: # %bb.0: +; CHECK-NEXT: ori $r4, $zero, 2047 +; CHECK-NEXT: lu32i.d $r4, -1 +; CHECK-NEXT: lu52i.d $r4, $r4, 0 +; CHECK-NEXT: jr $ra + ret i64 4503595332405247 +} + +define i64 @li000fffff00000800() { +; CHECK-LABEL: li000fffff00000800: +; CHECK: # %bb.0: +; CHECK-NEXT: ori $r4, $zero, 2048 +; CHECK-NEXT: lu32i.d $r4, -1 +; CHECK-NEXT: lu52i.d $r4, $r4, 0 +; CHECK-NEXT: jr $ra + ret i64 4503595332405248 +} + +define i64 @li000fffff00000fff() { +; CHECK-LABEL: li000fffff00000fff: +; CHECK: # %bb.0: +; CHECK-NEXT: ori $r4, $zero, 4095 +; CHECK-NEXT: lu32i.d $r4, -1 +; CHECK-NEXT: lu52i.d $r4, $r4, 0 +; CHECK-NEXT: jr $ra + ret i64 4503595332407295 +} + +define i64 @li000fffff7ffff000() { +; CHECK-LABEL: li000fffff7ffff000: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, 524287 +; CHECK-NEXT: lu32i.d $r4, -1 +; CHECK-NEXT: lu52i.d $r4, $r4, 0 +; CHECK-NEXT: jr $ra + ret i64 4503597479882752 +} + +define i64 @li000fffff7ffff7ff() { +; CHECK-LABEL: li000fffff7ffff7ff: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, 524287 +; CHECK-NEXT: ori $r4, $r4, 2047 +; CHECK-NEXT: lu32i.d $r4, -1 +; CHECK-NEXT: lu52i.d $r4, $r4, 0 +; CHECK-NEXT: jr $ra + ret i64 4503597479884799 +} + +define i64 @li000fffff7ffff800() { +; CHECK-LABEL: li000fffff7ffff800: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, 524287 +; CHECK-NEXT: ori $r4, $r4, 2048 +; CHECK-NEXT: lu32i.d $r4, -1 +; CHECK-NEXT: lu52i.d $r4, $r4, 0 +; CHECK-NEXT: jr $ra + ret i64 4503597479884800 +} + +define i64 @li000fffff7fffffff() { +; CHECK-LABEL: li000fffff7fffffff: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, 524287 +; CHECK-NEXT: ori $r4, $r4, 4095 +; CHECK-NEXT: lu32i.d $r4, -1 +; CHECK-NEXT: lu52i.d $r4, $r4, 0 +; CHECK-NEXT: jr $ra + ret i64 4503597479886847 +} + +define i64 @li000fffff80000000() { +; CHECK-LABEL: li000fffff80000000: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, -524288 +; CHECK-NEXT: lu52i.d $r4, $r4, 0 +; CHECK-NEXT: jr $ra + ret i64 4503597479886848 +} + +define i64 @li000fffff800007ff() { +; CHECK-LABEL: li000fffff800007ff: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, -524288 +; CHECK-NEXT: ori $r4, $r4, 2047 +; CHECK-NEXT: lu52i.d $r4, $r4, 0 +; CHECK-NEXT: jr $ra + ret i64 4503597479888895 +} + +define i64 @li000fffff80000800() { +; CHECK-LABEL: li000fffff80000800: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, -524288 +; CHECK-NEXT: ori $r4, $r4, 2048 +; CHECK-NEXT: lu52i.d $r4, $r4, 0 +; CHECK-NEXT: jr $ra + ret i64 4503597479888896 +} + +define i64 @li000fffff80000fff() { +; CHECK-LABEL: li000fffff80000fff: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, -524288 +; CHECK-NEXT: ori $r4, $r4, 4095 +; CHECK-NEXT: lu52i.d $r4, $r4, 0 +; CHECK-NEXT: jr $ra + ret i64 4503597479890943 +} + +define i64 @li000ffffffffff000() { +; CHECK-LABEL: li000ffffffffff000: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, -1 +; CHECK-NEXT: lu52i.d $r4, $r4, 0 +; CHECK-NEXT: jr $ra + ret i64 4503599627366400 +} + +define i64 @li000ffffffffff7ff() { +; CHECK-LABEL: li000ffffffffff7ff: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, -1 +; CHECK-NEXT: ori $r4, $r4, 2047 +; CHECK-NEXT: lu52i.d $r4, $r4, 0 +; CHECK-NEXT: jr $ra + ret i64 4503599627368447 +} + +define i64 @li000ffffffffff800() { +; CHECK-LABEL: li000ffffffffff800: +; CHECK: # %bb.0: +; CHECK-NEXT: addi.w $r4, $zero, -2048 +; CHECK-NEXT: lu52i.d $r4, $r4, 0 +; CHECK-NEXT: jr $ra + ret i64 4503599627368448 +} + +define i64 @li000fffffffffffff() { +; CHECK-LABEL: li000fffffffffffff: +; CHECK: # %bb.0: +; CHECK-NEXT: addi.w $r4, $zero, -1 +; CHECK-NEXT: lu52i.d $r4, $r4, 0 +; CHECK-NEXT: jr $ra + ret i64 4503599627370495 +} + +define i64 @li7ff0000000000000() { +; CHECK-LABEL: li7ff0000000000000: +; CHECK: # %bb.0: +; CHECK-NEXT: lu52i.d $r4, $zero, 2047 +; CHECK-NEXT: jr $ra + ret i64 9218868437227405312 +} + +define i64 @li7ff00000000007ff() { +; CHECK-LABEL: li7ff00000000007ff: +; CHECK: # %bb.0: +; CHECK-NEXT: ori $r4, $zero, 2047 +; CHECK-NEXT: lu52i.d $r4, $r4, 2047 +; CHECK-NEXT: jr $ra + ret i64 9218868437227407359 +} + +define i64 @li7ff0000000000800() { +; CHECK-LABEL: li7ff0000000000800: +; CHECK: # %bb.0: +; CHECK-NEXT: ori $r4, $zero, 2048 +; CHECK-NEXT: lu52i.d $r4, $r4, 2047 +; CHECK-NEXT: jr $ra + ret i64 9218868437227407360 +} + +define i64 @li7ff0000000000fff() { +; CHECK-LABEL: li7ff0000000000fff: +; CHECK: # %bb.0: +; CHECK-NEXT: ori $r4, $zero, 4095 +; CHECK-NEXT: lu52i.d $r4, $r4, 2047 +; CHECK-NEXT: jr $ra + ret i64 9218868437227409407 +} + +define i64 @li7ff000007ffff000() { +; CHECK-LABEL: li7ff000007ffff000: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, 524287 +; CHECK-NEXT: lu52i.d $r4, $r4, 2047 +; CHECK-NEXT: jr $ra + ret i64 9218868439374884864 +} + +define i64 @li7ff000007ffff7ff() { +; CHECK-LABEL: li7ff000007ffff7ff: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, 524287 +; CHECK-NEXT: ori $r4, $r4, 2047 +; CHECK-NEXT: lu52i.d $r4, $r4, 2047 +; CHECK-NEXT: jr $ra + ret i64 9218868439374886911 +} + +define i64 @li7ff000007ffff800() { +; CHECK-LABEL: li7ff000007ffff800: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, 524287 +; CHECK-NEXT: ori $r4, $r4, 2048 +; CHECK-NEXT: lu52i.d $r4, $r4, 2047 +; CHECK-NEXT: jr $ra + ret i64 9218868439374886912 +} + +define i64 @li7ff000007fffffff() { +; CHECK-LABEL: li7ff000007fffffff: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, 524287 +; CHECK-NEXT: ori $r4, $r4, 4095 +; CHECK-NEXT: lu52i.d $r4, $r4, 2047 +; CHECK-NEXT: jr $ra + ret i64 9218868439374888959 +} + +define i64 @li7ff0000080000000() { +; CHECK-LABEL: li7ff0000080000000: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, -524288 +; CHECK-NEXT: lu32i.d $r4, 0 +; CHECK-NEXT: lu52i.d $r4, $r4, 2047 +; CHECK-NEXT: jr $ra + ret i64 9218868439374888960 +} + +define i64 @li7ff00000800007ff() { +; CHECK-LABEL: li7ff00000800007ff: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, -524288 +; CHECK-NEXT: ori $r4, $r4, 2047 +; CHECK-NEXT: lu32i.d $r4, 0 +; CHECK-NEXT: lu52i.d $r4, $r4, 2047 +; CHECK-NEXT: jr $ra + ret i64 9218868439374891007 +} + +define i64 @li7ff0000080000800() { +; CHECK-LABEL: li7ff0000080000800: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, -524288 +; CHECK-NEXT: ori $r4, $r4, 2048 +; CHECK-NEXT: lu32i.d $r4, 0 +; CHECK-NEXT: lu52i.d $r4, $r4, 2047 +; CHECK-NEXT: jr $ra + ret i64 9218868439374891008 +} + +define i64 @li7ff0000080000fff() { +; CHECK-LABEL: li7ff0000080000fff: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, -524288 +; CHECK-NEXT: ori $r4, $r4, 4095 +; CHECK-NEXT: lu32i.d $r4, 0 +; CHECK-NEXT: lu52i.d $r4, $r4, 2047 +; CHECK-NEXT: jr $ra + ret i64 9218868439374893055 +} + +define i64 @li7ff00000fffff000() { +; CHECK-LABEL: li7ff00000fffff000: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, -1 +; CHECK-NEXT: lu32i.d $r4, 0 +; CHECK-NEXT: lu52i.d $r4, $r4, 2047 +; CHECK-NEXT: jr $ra + ret i64 9218868441522368512 +} + +define i64 @li7ff00000fffff7ff() { +; CHECK-LABEL: li7ff00000fffff7ff: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, -1 +; CHECK-NEXT: ori $r4, $r4, 2047 +; CHECK-NEXT: lu32i.d $r4, 0 +; CHECK-NEXT: lu52i.d $r4, $r4, 2047 +; CHECK-NEXT: jr $ra + ret i64 9218868441522370559 +} + +define i64 @li7ff00000fffff800() { +; CHECK-LABEL: li7ff00000fffff800: +; CHECK: # %bb.0: +; CHECK-NEXT: addi.w $r4, $zero, -2048 +; CHECK-NEXT: lu32i.d $r4, 0 +; CHECK-NEXT: lu52i.d $r4, $r4, 2047 +; CHECK-NEXT: jr $ra + ret i64 9218868441522370560 +} + +define i64 @li7ff00000ffffffff() { +; CHECK-LABEL: li7ff00000ffffffff: +; CHECK: # %bb.0: +; CHECK-NEXT: addi.w $r4, $zero, -1 +; CHECK-NEXT: lu32i.d $r4, 0 +; CHECK-NEXT: lu52i.d $r4, $r4, 2047 +; CHECK-NEXT: jr $ra + ret i64 9218868441522372607 +} + +define i64 @li7ff7ffff00000000() { +; CHECK-LABEL: li7ff7ffff00000000: +; CHECK: # %bb.0: +; CHECK-NEXT: ori $r4, $zero, 0 +; CHECK-NEXT: lu32i.d $r4, 524287 +; CHECK-NEXT: lu52i.d $r4, $r4, 2047 +; CHECK-NEXT: jr $ra + ret i64 9221120232746123264 +} + +define i64 @li7ff7ffff000007ff() { +; CHECK-LABEL: li7ff7ffff000007ff: +; CHECK: # %bb.0: +; CHECK-NEXT: ori $r4, $zero, 2047 +; CHECK-NEXT: lu32i.d $r4, 524287 +; CHECK-NEXT: lu52i.d $r4, $r4, 2047 +; CHECK-NEXT: jr $ra + ret i64 9221120232746125311 +} + +define i64 @li7ff7ffff00000800() { +; CHECK-LABEL: li7ff7ffff00000800: +; CHECK: # %bb.0: +; CHECK-NEXT: ori $r4, $zero, 2048 +; CHECK-NEXT: lu32i.d $r4, 524287 +; CHECK-NEXT: lu52i.d $r4, $r4, 2047 +; CHECK-NEXT: jr $ra + ret i64 9221120232746125312 +} + +define i64 @li7ff7ffff00000fff() { +; CHECK-LABEL: li7ff7ffff00000fff: +; CHECK: # %bb.0: +; CHECK-NEXT: ori $r4, $zero, 4095 +; CHECK-NEXT: lu32i.d $r4, 524287 +; CHECK-NEXT: lu52i.d $r4, $r4, 2047 +; CHECK-NEXT: jr $ra + ret i64 9221120232746127359 +} + +define i64 @li7ff7ffff7ffff000() { +; CHECK-LABEL: li7ff7ffff7ffff000: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, 524287 +; CHECK-NEXT: lu32i.d $r4, 524287 +; CHECK-NEXT: lu52i.d $r4, $r4, 2047 +; CHECK-NEXT: jr $ra + ret i64 9221120234893602816 +} + +define i64 @li7ff7ffff7ffff7ff() { +; CHECK-LABEL: li7ff7ffff7ffff7ff: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, 524287 +; CHECK-NEXT: ori $r4, $r4, 2047 +; CHECK-NEXT: lu32i.d $r4, 524287 +; CHECK-NEXT: lu52i.d $r4, $r4, 2047 +; CHECK-NEXT: jr $ra + ret i64 9221120234893604863 +} + +define i64 @li7ff7ffff7ffff800() { +; CHECK-LABEL: li7ff7ffff7ffff800: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, 524287 +; CHECK-NEXT: ori $r4, $r4, 2048 +; CHECK-NEXT: lu32i.d $r4, 524287 +; CHECK-NEXT: lu52i.d $r4, $r4, 2047 +; CHECK-NEXT: jr $ra + ret i64 9221120234893604864 +} + +define i64 @li7ff7ffff7fffffff() { +; CHECK-LABEL: li7ff7ffff7fffffff: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, 524287 +; CHECK-NEXT: ori $r4, $r4, 4095 +; CHECK-NEXT: lu32i.d $r4, 524287 +; CHECK-NEXT: lu52i.d $r4, $r4, 2047 +; CHECK-NEXT: jr $ra + ret i64 9221120234893606911 +} + +define i64 @li7ff7ffff80000000() { +; CHECK-LABEL: li7ff7ffff80000000: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, -524288 +; CHECK-NEXT: lu32i.d $r4, 524287 +; CHECK-NEXT: lu52i.d $r4, $r4, 2047 +; CHECK-NEXT: jr $ra + ret i64 9221120234893606912 +} + +define i64 @li7ff7ffff800007ff() { +; CHECK-LABEL: li7ff7ffff800007ff: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, -524288 +; CHECK-NEXT: ori $r4, $r4, 2047 +; CHECK-NEXT: lu32i.d $r4, 524287 +; CHECK-NEXT: lu52i.d $r4, $r4, 2047 +; CHECK-NEXT: jr $ra + ret i64 9221120234893608959 +} + +define i64 @li7ff7ffff80000800() { +; CHECK-LABEL: li7ff7ffff80000800: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, -524288 +; CHECK-NEXT: ori $r4, $r4, 2048 +; CHECK-NEXT: lu32i.d $r4, 524287 +; CHECK-NEXT: lu52i.d $r4, $r4, 2047 +; CHECK-NEXT: jr $ra + ret i64 9221120234893608960 +} + +define i64 @li7ff7ffff80000fff() { +; CHECK-LABEL: li7ff7ffff80000fff: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, -524288 +; CHECK-NEXT: ori $r4, $r4, 4095 +; CHECK-NEXT: lu32i.d $r4, 524287 +; CHECK-NEXT: lu52i.d $r4, $r4, 2047 +; CHECK-NEXT: jr $ra + ret i64 9221120234893611007 +} + +define i64 @li7ff7fffffffff000() { +; CHECK-LABEL: li7ff7fffffffff000: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, -1 +; CHECK-NEXT: lu32i.d $r4, 524287 +; CHECK-NEXT: lu52i.d $r4, $r4, 2047 +; CHECK-NEXT: jr $ra + ret i64 9221120237041086464 +} + +define i64 @li7ff7fffffffff7ff() { +; CHECK-LABEL: li7ff7fffffffff7ff: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, -1 +; CHECK-NEXT: ori $r4, $r4, 2047 +; CHECK-NEXT: lu32i.d $r4, 524287 +; CHECK-NEXT: lu52i.d $r4, $r4, 2047 +; CHECK-NEXT: jr $ra + ret i64 9221120237041088511 +} + +define i64 @li7ff7fffffffff800() { +; CHECK-LABEL: li7ff7fffffffff800: +; CHECK: # %bb.0: +; CHECK-NEXT: addi.w $r4, $zero, -2048 +; CHECK-NEXT: lu32i.d $r4, 524287 +; CHECK-NEXT: lu52i.d $r4, $r4, 2047 +; CHECK-NEXT: jr $ra + ret i64 9221120237041088512 +} + +define i64 @li7ff7ffffffffffff() { +; CHECK-LABEL: li7ff7ffffffffffff: +; CHECK: # %bb.0: +; CHECK-NEXT: addi.w $r4, $zero, -1 +; CHECK-NEXT: lu32i.d $r4, 524287 +; CHECK-NEXT: lu52i.d $r4, $r4, 2047 +; CHECK-NEXT: jr $ra + ret i64 9221120237041090559 +} + +define i64 @li7ff8000000000000() { +; CHECK-LABEL: li7ff8000000000000: +; CHECK: # %bb.0: +; CHECK-NEXT: ori $r4, $zero, 0 +; CHECK-NEXT: lu32i.d $r4, -524288 +; CHECK-NEXT: lu52i.d $r4, $r4, 2047 +; CHECK-NEXT: jr $ra + ret i64 9221120237041090560 +} + +define i64 @li7ff80000000007ff() { +; CHECK-LABEL: li7ff80000000007ff: +; CHECK: # %bb.0: +; CHECK-NEXT: ori $r4, $zero, 2047 +; CHECK-NEXT: lu32i.d $r4, -524288 +; CHECK-NEXT: lu52i.d $r4, $r4, 2047 +; CHECK-NEXT: jr $ra + ret i64 9221120237041092607 +} + +define i64 @li7ff8000000000800() { +; CHECK-LABEL: li7ff8000000000800: +; CHECK: # %bb.0: +; CHECK-NEXT: ori $r4, $zero, 2048 +; CHECK-NEXT: lu32i.d $r4, -524288 +; CHECK-NEXT: lu52i.d $r4, $r4, 2047 +; CHECK-NEXT: jr $ra + ret i64 9221120237041092608 +} + +define i64 @li7ff8000000000fff() { +; CHECK-LABEL: li7ff8000000000fff: +; CHECK: # %bb.0: +; CHECK-NEXT: ori $r4, $zero, 4095 +; CHECK-NEXT: lu32i.d $r4, -524288 +; CHECK-NEXT: lu52i.d $r4, $r4, 2047 +; CHECK-NEXT: jr $ra + ret i64 9221120237041094655 +} + +define i64 @li7ff800007ffff000() { +; CHECK-LABEL: li7ff800007ffff000: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, 524287 +; CHECK-NEXT: lu32i.d $r4, -524288 +; CHECK-NEXT: lu52i.d $r4, $r4, 2047 +; CHECK-NEXT: jr $ra + ret i64 9221120239188570112 +} + +define i64 @li7ff800007ffff7ff() { +; CHECK-LABEL: li7ff800007ffff7ff: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, 524287 +; CHECK-NEXT: ori $r4, $r4, 2047 +; CHECK-NEXT: lu32i.d $r4, -524288 +; CHECK-NEXT: lu52i.d $r4, $r4, 2047 +; CHECK-NEXT: jr $ra + ret i64 9221120239188572159 +} + +define i64 @li7ff800007ffff800() { +; CHECK-LABEL: li7ff800007ffff800: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, 524287 +; CHECK-NEXT: ori $r4, $r4, 2048 +; CHECK-NEXT: lu32i.d $r4, -524288 +; CHECK-NEXT: lu52i.d $r4, $r4, 2047 +; CHECK-NEXT: jr $ra + ret i64 9221120239188572160 +} + +define i64 @li7ff800007fffffff() { +; CHECK-LABEL: li7ff800007fffffff: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, 524287 +; CHECK-NEXT: ori $r4, $r4, 4095 +; CHECK-NEXT: lu32i.d $r4, -524288 +; CHECK-NEXT: lu52i.d $r4, $r4, 2047 +; CHECK-NEXT: jr $ra + ret i64 9221120239188574207 +} + +define i64 @li7ff8000080000000() { +; CHECK-LABEL: li7ff8000080000000: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, -524288 +; CHECK-NEXT: lu32i.d $r4, -524288 +; CHECK-NEXT: lu52i.d $r4, $r4, 2047 +; CHECK-NEXT: jr $ra + ret i64 9221120239188574208 +} + +define i64 @li7ff80000800007ff() { +; CHECK-LABEL: li7ff80000800007ff: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, -524288 +; CHECK-NEXT: ori $r4, $r4, 2047 +; CHECK-NEXT: lu32i.d $r4, -524288 +; CHECK-NEXT: lu52i.d $r4, $r4, 2047 +; CHECK-NEXT: jr $ra + ret i64 9221120239188576255 +} + +define i64 @li7ff8000080000800() { +; CHECK-LABEL: li7ff8000080000800: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, -524288 +; CHECK-NEXT: ori $r4, $r4, 2048 +; CHECK-NEXT: lu32i.d $r4, -524288 +; CHECK-NEXT: lu52i.d $r4, $r4, 2047 +; CHECK-NEXT: jr $ra + ret i64 9221120239188576256 +} + +define i64 @li7ff8000080000fff() { +; CHECK-LABEL: li7ff8000080000fff: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, -524288 +; CHECK-NEXT: ori $r4, $r4, 4095 +; CHECK-NEXT: lu32i.d $r4, -524288 +; CHECK-NEXT: lu52i.d $r4, $r4, 2047 +; CHECK-NEXT: jr $ra + ret i64 9221120239188578303 +} + +define i64 @li7ff80000fffff000() { +; CHECK-LABEL: li7ff80000fffff000: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, -1 +; CHECK-NEXT: lu32i.d $r4, -524288 +; CHECK-NEXT: lu52i.d $r4, $r4, 2047 +; CHECK-NEXT: jr $ra + ret i64 9221120241336053760 +} + +define i64 @li7ff80000fffff7ff() { +; CHECK-LABEL: li7ff80000fffff7ff: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, -1 +; CHECK-NEXT: ori $r4, $r4, 2047 +; CHECK-NEXT: lu32i.d $r4, -524288 +; CHECK-NEXT: lu52i.d $r4, $r4, 2047 +; CHECK-NEXT: jr $ra + ret i64 9221120241336055807 +} + +define i64 @li7ff80000fffff800() { +; CHECK-LABEL: li7ff80000fffff800: +; CHECK: # %bb.0: +; CHECK-NEXT: addi.w $r4, $zero, -2048 +; CHECK-NEXT: lu32i.d $r4, -524288 +; CHECK-NEXT: lu52i.d $r4, $r4, 2047 +; CHECK-NEXT: jr $ra + ret i64 9221120241336055808 +} + +define i64 @li7ff80000ffffffff() { +; CHECK-LABEL: li7ff80000ffffffff: +; CHECK: # %bb.0: +; CHECK-NEXT: addi.w $r4, $zero, -1 +; CHECK-NEXT: lu32i.d $r4, -524288 +; CHECK-NEXT: lu52i.d $r4, $r4, 2047 +; CHECK-NEXT: jr $ra + ret i64 9221120241336057855 +} + +define i64 @li7fffffff00000000() { +; CHECK-LABEL: li7fffffff00000000: +; CHECK: # %bb.0: +; CHECK-NEXT: ori $r4, $zero, 0 +; CHECK-NEXT: lu32i.d $r4, -1 +; CHECK-NEXT: lu52i.d $r4, $r4, 2047 +; CHECK-NEXT: jr $ra + ret i64 9223372032559808512 +} + +define i64 @li7fffffff000007ff() { +; CHECK-LABEL: li7fffffff000007ff: +; CHECK: # %bb.0: +; CHECK-NEXT: ori $r4, $zero, 2047 +; CHECK-NEXT: lu32i.d $r4, -1 +; CHECK-NEXT: lu52i.d $r4, $r4, 2047 +; CHECK-NEXT: jr $ra + ret i64 9223372032559810559 +} + +define i64 @li7fffffff00000800() { +; CHECK-LABEL: li7fffffff00000800: +; CHECK: # %bb.0: +; CHECK-NEXT: ori $r4, $zero, 2048 +; CHECK-NEXT: lu32i.d $r4, -1 +; CHECK-NEXT: lu52i.d $r4, $r4, 2047 +; CHECK-NEXT: jr $ra + ret i64 9223372032559810560 +} + +define i64 @li7fffffff00000fff() { +; CHECK-LABEL: li7fffffff00000fff: +; CHECK: # %bb.0: +; CHECK-NEXT: ori $r4, $zero, 4095 +; CHECK-NEXT: lu32i.d $r4, -1 +; CHECK-NEXT: lu52i.d $r4, $r4, 2047 +; CHECK-NEXT: jr $ra + ret i64 9223372032559812607 +} + +define i64 @li7fffffff7ffff000() { +; CHECK-LABEL: li7fffffff7ffff000: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, 524287 +; CHECK-NEXT: lu32i.d $r4, -1 +; CHECK-NEXT: lu52i.d $r4, $r4, 2047 +; CHECK-NEXT: jr $ra + ret i64 9223372034707288064 +} + +define i64 @li7fffffff7ffff7ff() { +; CHECK-LABEL: li7fffffff7ffff7ff: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, 524287 +; CHECK-NEXT: ori $r4, $r4, 2047 +; CHECK-NEXT: lu32i.d $r4, -1 +; CHECK-NEXT: lu52i.d $r4, $r4, 2047 +; CHECK-NEXT: jr $ra + ret i64 9223372034707290111 +} + +define i64 @li7fffffff7ffff800() { +; CHECK-LABEL: li7fffffff7ffff800: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, 524287 +; CHECK-NEXT: ori $r4, $r4, 2048 +; CHECK-NEXT: lu32i.d $r4, -1 +; CHECK-NEXT: lu52i.d $r4, $r4, 2047 +; CHECK-NEXT: jr $ra + ret i64 9223372034707290112 +} + +define i64 @li7fffffff7fffffff() { +; CHECK-LABEL: li7fffffff7fffffff: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, 524287 +; CHECK-NEXT: ori $r4, $r4, 4095 +; CHECK-NEXT: lu32i.d $r4, -1 +; CHECK-NEXT: lu52i.d $r4, $r4, 2047 +; CHECK-NEXT: jr $ra + ret i64 9223372034707292159 +} + +define i64 @li7fffffff80000000() { +; CHECK-LABEL: li7fffffff80000000: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, -524288 +; CHECK-NEXT: lu52i.d $r4, $r4, 2047 +; CHECK-NEXT: jr $ra + ret i64 9223372034707292160 +} + +define i64 @li7fffffff800007ff() { +; CHECK-LABEL: li7fffffff800007ff: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, -524288 +; CHECK-NEXT: ori $r4, $r4, 2047 +; CHECK-NEXT: lu52i.d $r4, $r4, 2047 +; CHECK-NEXT: jr $ra + ret i64 9223372034707294207 +} + +define i64 @li7fffffff80000800() { +; CHECK-LABEL: li7fffffff80000800: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, -524288 +; CHECK-NEXT: ori $r4, $r4, 2048 +; CHECK-NEXT: lu52i.d $r4, $r4, 2047 +; CHECK-NEXT: jr $ra + ret i64 9223372034707294208 +} + +define i64 @li7fffffff80000fff() { +; CHECK-LABEL: li7fffffff80000fff: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, -524288 +; CHECK-NEXT: ori $r4, $r4, 4095 +; CHECK-NEXT: lu52i.d $r4, $r4, 2047 +; CHECK-NEXT: jr $ra + ret i64 9223372034707296255 +} + +define i64 @li7ffffffffffff000() { +; CHECK-LABEL: li7ffffffffffff000: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, -1 +; CHECK-NEXT: lu52i.d $r4, $r4, 2047 +; CHECK-NEXT: jr $ra + ret i64 9223372036854771712 +} + +define i64 @li7ffffffffffff7ff() { +; CHECK-LABEL: li7ffffffffffff7ff: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, -1 +; CHECK-NEXT: ori $r4, $r4, 2047 +; CHECK-NEXT: lu52i.d $r4, $r4, 2047 +; CHECK-NEXT: jr $ra + ret i64 9223372036854773759 +} + +define i64 @li7ffffffffffff800() { +; CHECK-LABEL: li7ffffffffffff800: +; CHECK: # %bb.0: +; CHECK-NEXT: addi.w $r4, $zero, -2048 +; CHECK-NEXT: lu52i.d $r4, $r4, 2047 +; CHECK-NEXT: jr $ra + ret i64 9223372036854773760 +} + +define i64 @li7fffffffffffffff() { +; CHECK-LABEL: li7fffffffffffffff: +; CHECK: # %bb.0: +; CHECK-NEXT: addi.w $r4, $zero, -1 +; CHECK-NEXT: lu52i.d $r4, $r4, 2047 +; CHECK-NEXT: jr $ra + ret i64 9223372036854775807 +} + +define i64 @li8000000000000000() { +; CHECK-LABEL: li8000000000000000: +; CHECK: # %bb.0: +; CHECK-NEXT: lu52i.d $r4, $zero, -2048 +; CHECK-NEXT: jr $ra + ret i64 -9223372036854775808 +} + +define i64 @li80000000000007ff() { +; CHECK-LABEL: li80000000000007ff: +; CHECK: # %bb.0: +; CHECK-NEXT: ori $r4, $zero, 2047 +; CHECK-NEXT: lu52i.d $r4, $r4, -2048 +; CHECK-NEXT: jr $ra + ret i64 -9223372036854773761 +} + +define i64 @li8000000000000800() { +; CHECK-LABEL: li8000000000000800: +; CHECK: # %bb.0: +; CHECK-NEXT: ori $r4, $zero, 2048 +; CHECK-NEXT: lu52i.d $r4, $r4, -2048 +; CHECK-NEXT: jr $ra + ret i64 -9223372036854773760 +} + +define i64 @li8000000000000fff() { +; CHECK-LABEL: li8000000000000fff: +; CHECK: # %bb.0: +; CHECK-NEXT: ori $r4, $zero, 4095 +; CHECK-NEXT: lu52i.d $r4, $r4, -2048 +; CHECK-NEXT: jr $ra + ret i64 -9223372036854771713 +} + +define i64 @li800000007ffff000() { +; CHECK-LABEL: li800000007ffff000: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, 524287 +; CHECK-NEXT: lu52i.d $r4, $r4, -2048 +; CHECK-NEXT: jr $ra + ret i64 -9223372034707296256 +} + +define i64 @li800000007ffff7ff() { +; CHECK-LABEL: li800000007ffff7ff: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, 524287 +; CHECK-NEXT: ori $r4, $r4, 2047 +; CHECK-NEXT: lu52i.d $r4, $r4, -2048 +; CHECK-NEXT: jr $ra + ret i64 -9223372034707294209 +} + +define i64 @li800000007ffff800() { +; CHECK-LABEL: li800000007ffff800: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, 524287 +; CHECK-NEXT: ori $r4, $r4, 2048 +; CHECK-NEXT: lu52i.d $r4, $r4, -2048 +; CHECK-NEXT: jr $ra + ret i64 -9223372034707294208 +} + +define i64 @li800000007fffffff() { +; CHECK-LABEL: li800000007fffffff: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, 524287 +; CHECK-NEXT: ori $r4, $r4, 4095 +; CHECK-NEXT: lu52i.d $r4, $r4, -2048 +; CHECK-NEXT: jr $ra + ret i64 -9223372034707292161 +} + +define i64 @li8000000080000000() { +; CHECK-LABEL: li8000000080000000: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, -524288 +; CHECK-NEXT: lu32i.d $r4, 0 +; CHECK-NEXT: lu52i.d $r4, $r4, -2048 +; CHECK-NEXT: jr $ra + ret i64 -9223372034707292160 +} + +define i64 @li80000000800007ff() { +; CHECK-LABEL: li80000000800007ff: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, -524288 +; CHECK-NEXT: ori $r4, $r4, 2047 +; CHECK-NEXT: lu32i.d $r4, 0 +; CHECK-NEXT: lu52i.d $r4, $r4, -2048 +; CHECK-NEXT: jr $ra + ret i64 -9223372034707290113 +} + +define i64 @li8000000080000800() { +; CHECK-LABEL: li8000000080000800: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, -524288 +; CHECK-NEXT: ori $r4, $r4, 2048 +; CHECK-NEXT: lu32i.d $r4, 0 +; CHECK-NEXT: lu52i.d $r4, $r4, -2048 +; CHECK-NEXT: jr $ra + ret i64 -9223372034707290112 +} + +define i64 @li8000000080000fff() { +; CHECK-LABEL: li8000000080000fff: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, -524288 +; CHECK-NEXT: ori $r4, $r4, 4095 +; CHECK-NEXT: lu32i.d $r4, 0 +; CHECK-NEXT: lu52i.d $r4, $r4, -2048 +; CHECK-NEXT: jr $ra + ret i64 -9223372034707288065 +} + +define i64 @li80000000fffff000() { +; CHECK-LABEL: li80000000fffff000: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, -1 +; CHECK-NEXT: lu32i.d $r4, 0 +; CHECK-NEXT: lu52i.d $r4, $r4, -2048 +; CHECK-NEXT: jr $ra + ret i64 -9223372032559812608 +} + +define i64 @li80000000fffff7ff() { +; CHECK-LABEL: li80000000fffff7ff: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, -1 +; CHECK-NEXT: ori $r4, $r4, 2047 +; CHECK-NEXT: lu32i.d $r4, 0 +; CHECK-NEXT: lu52i.d $r4, $r4, -2048 +; CHECK-NEXT: jr $ra + ret i64 -9223372032559810561 +} + +define i64 @li80000000fffff800() { +; CHECK-LABEL: li80000000fffff800: +; CHECK: # %bb.0: +; CHECK-NEXT: addi.w $r4, $zero, -2048 +; CHECK-NEXT: lu32i.d $r4, 0 +; CHECK-NEXT: lu52i.d $r4, $r4, -2048 +; CHECK-NEXT: jr $ra + ret i64 -9223372032559810560 +} + +define i64 @li80000000ffffffff() { +; CHECK-LABEL: li80000000ffffffff: +; CHECK: # %bb.0: +; CHECK-NEXT: addi.w $r4, $zero, -1 +; CHECK-NEXT: lu32i.d $r4, 0 +; CHECK-NEXT: lu52i.d $r4, $r4, -2048 +; CHECK-NEXT: jr $ra + ret i64 -9223372032559808513 +} + +define i64 @li8007ffff00000000() { +; CHECK-LABEL: li8007ffff00000000: +; CHECK: # %bb.0: +; CHECK-NEXT: ori $r4, $zero, 0 +; CHECK-NEXT: lu32i.d $r4, 524287 +; CHECK-NEXT: lu52i.d $r4, $r4, -2048 +; CHECK-NEXT: jr $ra + ret i64 -9221120241336057856 +} + +define i64 @li8007ffff000007ff() { +; CHECK-LABEL: li8007ffff000007ff: +; CHECK: # %bb.0: +; CHECK-NEXT: ori $r4, $zero, 2047 +; CHECK-NEXT: lu32i.d $r4, 524287 +; CHECK-NEXT: lu52i.d $r4, $r4, -2048 +; CHECK-NEXT: jr $ra + ret i64 -9221120241336055809 +} + +define i64 @li8007ffff00000800() { +; CHECK-LABEL: li8007ffff00000800: +; CHECK: # %bb.0: +; CHECK-NEXT: ori $r4, $zero, 2048 +; CHECK-NEXT: lu32i.d $r4, 524287 +; CHECK-NEXT: lu52i.d $r4, $r4, -2048 +; CHECK-NEXT: jr $ra + ret i64 -9221120241336055808 +} + +define i64 @li8007ffff00000fff() { +; CHECK-LABEL: li8007ffff00000fff: +; CHECK: # %bb.0: +; CHECK-NEXT: ori $r4, $zero, 4095 +; CHECK-NEXT: lu32i.d $r4, 524287 +; CHECK-NEXT: lu52i.d $r4, $r4, -2048 +; CHECK-NEXT: jr $ra + ret i64 -9221120241336053761 +} + +define i64 @li8007ffff7ffff000() { +; CHECK-LABEL: li8007ffff7ffff000: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, 524287 +; CHECK-NEXT: lu32i.d $r4, 524287 +; CHECK-NEXT: lu52i.d $r4, $r4, -2048 +; CHECK-NEXT: jr $ra + ret i64 -9221120239188578304 +} + +define i64 @li8007ffff7ffff7ff() { +; CHECK-LABEL: li8007ffff7ffff7ff: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, 524287 +; CHECK-NEXT: ori $r4, $r4, 2047 +; CHECK-NEXT: lu32i.d $r4, 524287 +; CHECK-NEXT: lu52i.d $r4, $r4, -2048 +; CHECK-NEXT: jr $ra + ret i64 -9221120239188576257 +} + +define i64 @li8007ffff7ffff800() { +; CHECK-LABEL: li8007ffff7ffff800: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, 524287 +; CHECK-NEXT: ori $r4, $r4, 2048 +; CHECK-NEXT: lu32i.d $r4, 524287 +; CHECK-NEXT: lu52i.d $r4, $r4, -2048 +; CHECK-NEXT: jr $ra + ret i64 -9221120239188576256 +} + +define i64 @li8007ffff7fffffff() { +; CHECK-LABEL: li8007ffff7fffffff: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, 524287 +; CHECK-NEXT: ori $r4, $r4, 4095 +; CHECK-NEXT: lu32i.d $r4, 524287 +; CHECK-NEXT: lu52i.d $r4, $r4, -2048 +; CHECK-NEXT: jr $ra + ret i64 -9221120239188574209 +} + +define i64 @li8007ffff80000000() { +; CHECK-LABEL: li8007ffff80000000: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, -524288 +; CHECK-NEXT: lu32i.d $r4, 524287 +; CHECK-NEXT: lu52i.d $r4, $r4, -2048 +; CHECK-NEXT: jr $ra + ret i64 -9221120239188574208 +} + +define i64 @li8007ffff800007ff() { +; CHECK-LABEL: li8007ffff800007ff: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, -524288 +; CHECK-NEXT: ori $r4, $r4, 2047 +; CHECK-NEXT: lu32i.d $r4, 524287 +; CHECK-NEXT: lu52i.d $r4, $r4, -2048 +; CHECK-NEXT: jr $ra + ret i64 -9221120239188572161 +} + +define i64 @li8007ffff80000800() { +; CHECK-LABEL: li8007ffff80000800: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, -524288 +; CHECK-NEXT: ori $r4, $r4, 2048 +; CHECK-NEXT: lu32i.d $r4, 524287 +; CHECK-NEXT: lu52i.d $r4, $r4, -2048 +; CHECK-NEXT: jr $ra + ret i64 -9221120239188572160 +} + +define i64 @li8007ffff80000fff() { +; CHECK-LABEL: li8007ffff80000fff: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, -524288 +; CHECK-NEXT: ori $r4, $r4, 4095 +; CHECK-NEXT: lu32i.d $r4, 524287 +; CHECK-NEXT: lu52i.d $r4, $r4, -2048 +; CHECK-NEXT: jr $ra + ret i64 -9221120239188570113 +} + +define i64 @li8007fffffffff000() { +; CHECK-LABEL: li8007fffffffff000: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, -1 +; CHECK-NEXT: lu32i.d $r4, 524287 +; CHECK-NEXT: lu52i.d $r4, $r4, -2048 +; CHECK-NEXT: jr $ra + ret i64 -9221120237041094656 +} + +define i64 @li8007fffffffff7ff() { +; CHECK-LABEL: li8007fffffffff7ff: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, -1 +; CHECK-NEXT: ori $r4, $r4, 2047 +; CHECK-NEXT: lu32i.d $r4, 524287 +; CHECK-NEXT: lu52i.d $r4, $r4, -2048 +; CHECK-NEXT: jr $ra + ret i64 -9221120237041092609 +} + +define i64 @li8007fffffffff800() { +; CHECK-LABEL: li8007fffffffff800: +; CHECK: # %bb.0: +; CHECK-NEXT: addi.w $r4, $zero, -2048 +; CHECK-NEXT: lu32i.d $r4, 524287 +; CHECK-NEXT: lu52i.d $r4, $r4, -2048 +; CHECK-NEXT: jr $ra + ret i64 -9221120237041092608 +} + +define i64 @li8007ffffffffffff() { +; CHECK-LABEL: li8007ffffffffffff: +; CHECK: # %bb.0: +; CHECK-NEXT: addi.w $r4, $zero, -1 +; CHECK-NEXT: lu32i.d $r4, 524287 +; CHECK-NEXT: lu52i.d $r4, $r4, -2048 +; CHECK-NEXT: jr $ra + ret i64 -9221120237041090561 +} + +define i64 @li8008000000000000() { +; CHECK-LABEL: li8008000000000000: +; CHECK: # %bb.0: +; CHECK-NEXT: ori $r4, $zero, 0 +; CHECK-NEXT: lu32i.d $r4, -524288 +; CHECK-NEXT: lu52i.d $r4, $r4, -2048 +; CHECK-NEXT: jr $ra + ret i64 -9221120237041090560 +} + +define i64 @li80080000000007ff() { +; CHECK-LABEL: li80080000000007ff: +; CHECK: # %bb.0: +; CHECK-NEXT: ori $r4, $zero, 2047 +; CHECK-NEXT: lu32i.d $r4, -524288 +; CHECK-NEXT: lu52i.d $r4, $r4, -2048 +; CHECK-NEXT: jr $ra + ret i64 -9221120237041088513 +} + +define i64 @li8008000000000800() { +; CHECK-LABEL: li8008000000000800: +; CHECK: # %bb.0: +; CHECK-NEXT: ori $r4, $zero, 2048 +; CHECK-NEXT: lu32i.d $r4, -524288 +; CHECK-NEXT: lu52i.d $r4, $r4, -2048 +; CHECK-NEXT: jr $ra + ret i64 -9221120237041088512 +} + +define i64 @li8008000000000fff() { +; CHECK-LABEL: li8008000000000fff: +; CHECK: # %bb.0: +; CHECK-NEXT: ori $r4, $zero, 4095 +; CHECK-NEXT: lu32i.d $r4, -524288 +; CHECK-NEXT: lu52i.d $r4, $r4, -2048 +; CHECK-NEXT: jr $ra + ret i64 -9221120237041086465 +} + +define i64 @li800800007ffff000() { +; CHECK-LABEL: li800800007ffff000: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, 524287 +; CHECK-NEXT: lu32i.d $r4, -524288 +; CHECK-NEXT: lu52i.d $r4, $r4, -2048 +; CHECK-NEXT: jr $ra + ret i64 -9221120234893611008 +} + +define i64 @li800800007ffff7ff() { +; CHECK-LABEL: li800800007ffff7ff: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, 524287 +; CHECK-NEXT: ori $r4, $r4, 2047 +; CHECK-NEXT: lu32i.d $r4, -524288 +; CHECK-NEXT: lu52i.d $r4, $r4, -2048 +; CHECK-NEXT: jr $ra + ret i64 -9221120234893608961 +} + +define i64 @li800800007ffff800() { +; CHECK-LABEL: li800800007ffff800: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, 524287 +; CHECK-NEXT: ori $r4, $r4, 2048 +; CHECK-NEXT: lu32i.d $r4, -524288 +; CHECK-NEXT: lu52i.d $r4, $r4, -2048 +; CHECK-NEXT: jr $ra + ret i64 -9221120234893608960 +} + +define i64 @li800800007fffffff() { +; CHECK-LABEL: li800800007fffffff: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, 524287 +; CHECK-NEXT: ori $r4, $r4, 4095 +; CHECK-NEXT: lu32i.d $r4, -524288 +; CHECK-NEXT: lu52i.d $r4, $r4, -2048 +; CHECK-NEXT: jr $ra + ret i64 -9221120234893606913 +} + +define i64 @li8008000080000000() { +; CHECK-LABEL: li8008000080000000: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, -524288 +; CHECK-NEXT: lu32i.d $r4, -524288 +; CHECK-NEXT: lu52i.d $r4, $r4, -2048 +; CHECK-NEXT: jr $ra + ret i64 -9221120234893606912 +} + +define i64 @li80080000800007ff() { +; CHECK-LABEL: li80080000800007ff: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, -524288 +; CHECK-NEXT: ori $r4, $r4, 2047 +; CHECK-NEXT: lu32i.d $r4, -524288 +; CHECK-NEXT: lu52i.d $r4, $r4, -2048 +; CHECK-NEXT: jr $ra + ret i64 -9221120234893604865 +} + +define i64 @li8008000080000800() { +; CHECK-LABEL: li8008000080000800: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, -524288 +; CHECK-NEXT: ori $r4, $r4, 2048 +; CHECK-NEXT: lu32i.d $r4, -524288 +; CHECK-NEXT: lu52i.d $r4, $r4, -2048 +; CHECK-NEXT: jr $ra + ret i64 -9221120234893604864 +} + +define i64 @li8008000080000fff() { +; CHECK-LABEL: li8008000080000fff: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, -524288 +; CHECK-NEXT: ori $r4, $r4, 4095 +; CHECK-NEXT: lu32i.d $r4, -524288 +; CHECK-NEXT: lu52i.d $r4, $r4, -2048 +; CHECK-NEXT: jr $ra + ret i64 -9221120234893602817 +} + +define i64 @li80080000fffff000() { +; CHECK-LABEL: li80080000fffff000: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, -1 +; CHECK-NEXT: lu32i.d $r4, -524288 +; CHECK-NEXT: lu52i.d $r4, $r4, -2048 +; CHECK-NEXT: jr $ra + ret i64 -9221120232746127360 +} + +define i64 @li80080000fffff7ff() { +; CHECK-LABEL: li80080000fffff7ff: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, -1 +; CHECK-NEXT: ori $r4, $r4, 2047 +; CHECK-NEXT: lu32i.d $r4, -524288 +; CHECK-NEXT: lu52i.d $r4, $r4, -2048 +; CHECK-NEXT: jr $ra + ret i64 -9221120232746125313 +} + +define i64 @li80080000fffff800() { +; CHECK-LABEL: li80080000fffff800: +; CHECK: # %bb.0: +; CHECK-NEXT: addi.w $r4, $zero, -2048 +; CHECK-NEXT: lu32i.d $r4, -524288 +; CHECK-NEXT: lu52i.d $r4, $r4, -2048 +; CHECK-NEXT: jr $ra + ret i64 -9221120232746125312 +} + +define i64 @li80080000ffffffff() { +; CHECK-LABEL: li80080000ffffffff: +; CHECK: # %bb.0: +; CHECK-NEXT: addi.w $r4, $zero, -1 +; CHECK-NEXT: lu32i.d $r4, -524288 +; CHECK-NEXT: lu52i.d $r4, $r4, -2048 +; CHECK-NEXT: jr $ra + ret i64 -9221120232746123265 +} + +define i64 @li800fffff00000000() { +; CHECK-LABEL: li800fffff00000000: +; CHECK: # %bb.0: +; CHECK-NEXT: ori $r4, $zero, 0 +; CHECK-NEXT: lu32i.d $r4, -1 +; CHECK-NEXT: lu52i.d $r4, $r4, -2048 +; CHECK-NEXT: jr $ra + ret i64 -9218868441522372608 +} + +define i64 @li800fffff000007ff() { +; CHECK-LABEL: li800fffff000007ff: +; CHECK: # %bb.0: +; CHECK-NEXT: ori $r4, $zero, 2047 +; CHECK-NEXT: lu32i.d $r4, -1 +; CHECK-NEXT: lu52i.d $r4, $r4, -2048 +; CHECK-NEXT: jr $ra + ret i64 -9218868441522370561 +} + +define i64 @li800fffff00000800() { +; CHECK-LABEL: li800fffff00000800: +; CHECK: # %bb.0: +; CHECK-NEXT: ori $r4, $zero, 2048 +; CHECK-NEXT: lu32i.d $r4, -1 +; CHECK-NEXT: lu52i.d $r4, $r4, -2048 +; CHECK-NEXT: jr $ra + ret i64 -9218868441522370560 +} + +define i64 @li800fffff00000fff() { +; CHECK-LABEL: li800fffff00000fff: +; CHECK: # %bb.0: +; CHECK-NEXT: ori $r4, $zero, 4095 +; CHECK-NEXT: lu32i.d $r4, -1 +; CHECK-NEXT: lu52i.d $r4, $r4, -2048 +; CHECK-NEXT: jr $ra + ret i64 -9218868441522368513 +} + +define i64 @li800fffff7ffff000() { +; CHECK-LABEL: li800fffff7ffff000: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, 524287 +; CHECK-NEXT: lu32i.d $r4, -1 +; CHECK-NEXT: lu52i.d $r4, $r4, -2048 +; CHECK-NEXT: jr $ra + ret i64 -9218868439374893056 +} + +define i64 @li800fffff7ffff7ff() { +; CHECK-LABEL: li800fffff7ffff7ff: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, 524287 +; CHECK-NEXT: ori $r4, $r4, 2047 +; CHECK-NEXT: lu32i.d $r4, -1 +; CHECK-NEXT: lu52i.d $r4, $r4, -2048 +; CHECK-NEXT: jr $ra + ret i64 -9218868439374891009 +} + +define i64 @li800fffff7ffff800() { +; CHECK-LABEL: li800fffff7ffff800: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, 524287 +; CHECK-NEXT: ori $r4, $r4, 2048 +; CHECK-NEXT: lu32i.d $r4, -1 +; CHECK-NEXT: lu52i.d $r4, $r4, -2048 +; CHECK-NEXT: jr $ra + ret i64 -9218868439374891008 +} + +define i64 @li800fffff7fffffff() { +; CHECK-LABEL: li800fffff7fffffff: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, 524287 +; CHECK-NEXT: ori $r4, $r4, 4095 +; CHECK-NEXT: lu32i.d $r4, -1 +; CHECK-NEXT: lu52i.d $r4, $r4, -2048 +; CHECK-NEXT: jr $ra + ret i64 -9218868439374888961 +} + +define i64 @li800fffff80000000() { +; CHECK-LABEL: li800fffff80000000: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, -524288 +; CHECK-NEXT: lu52i.d $r4, $r4, -2048 +; CHECK-NEXT: jr $ra + ret i64 -9218868439374888960 +} + +define i64 @li800fffff800007ff() { +; CHECK-LABEL: li800fffff800007ff: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, -524288 +; CHECK-NEXT: ori $r4, $r4, 2047 +; CHECK-NEXT: lu52i.d $r4, $r4, -2048 +; CHECK-NEXT: jr $ra + ret i64 -9218868439374886913 +} + +define i64 @li800fffff80000800() { +; CHECK-LABEL: li800fffff80000800: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, -524288 +; CHECK-NEXT: ori $r4, $r4, 2048 +; CHECK-NEXT: lu52i.d $r4, $r4, -2048 +; CHECK-NEXT: jr $ra + ret i64 -9218868439374886912 +} + +define i64 @li800fffff80000fff() { +; CHECK-LABEL: li800fffff80000fff: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, -524288 +; CHECK-NEXT: ori $r4, $r4, 4095 +; CHECK-NEXT: lu52i.d $r4, $r4, -2048 +; CHECK-NEXT: jr $ra + ret i64 -9218868439374884865 +} + +define i64 @li800ffffffffff000() { +; CHECK-LABEL: li800ffffffffff000: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, -1 +; CHECK-NEXT: lu52i.d $r4, $r4, -2048 +; CHECK-NEXT: jr $ra + ret i64 -9218868437227409408 +} + +define i64 @li800ffffffffff7ff() { +; CHECK-LABEL: li800ffffffffff7ff: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, -1 +; CHECK-NEXT: ori $r4, $r4, 2047 +; CHECK-NEXT: lu52i.d $r4, $r4, -2048 +; CHECK-NEXT: jr $ra + ret i64 -9218868437227407361 +} + +define i64 @li800ffffffffff800() { +; CHECK-LABEL: li800ffffffffff800: +; CHECK: # %bb.0: +; CHECK-NEXT: addi.w $r4, $zero, -2048 +; CHECK-NEXT: lu52i.d $r4, $r4, -2048 +; CHECK-NEXT: jr $ra + ret i64 -9218868437227407360 +} + +define i64 @li800fffffffffffff() { +; CHECK-LABEL: li800fffffffffffff: +; CHECK: # %bb.0: +; CHECK-NEXT: addi.w $r4, $zero, -1 +; CHECK-NEXT: lu52i.d $r4, $r4, -2048 +; CHECK-NEXT: jr $ra + ret i64 -9218868437227405313 +} + +define i64 @lifff0000000000000() { +; CHECK-LABEL: lifff0000000000000: +; CHECK: # %bb.0: +; CHECK-NEXT: lu52i.d $r4, $zero, -1 +; CHECK-NEXT: jr $ra + ret i64 -4503599627370496 +} + +define i64 @lifff00000000007ff() { +; CHECK-LABEL: lifff00000000007ff: +; CHECK: # %bb.0: +; CHECK-NEXT: ori $r4, $zero, 2047 +; CHECK-NEXT: lu52i.d $r4, $r4, -1 +; CHECK-NEXT: jr $ra + ret i64 -4503599627368449 +} + +define i64 @lifff0000000000800() { +; CHECK-LABEL: lifff0000000000800: +; CHECK: # %bb.0: +; CHECK-NEXT: ori $r4, $zero, 2048 +; CHECK-NEXT: lu52i.d $r4, $r4, -1 +; CHECK-NEXT: jr $ra + ret i64 -4503599627368448 +} + +define i64 @lifff0000000000fff() { +; CHECK-LABEL: lifff0000000000fff: +; CHECK: # %bb.0: +; CHECK-NEXT: ori $r4, $zero, 4095 +; CHECK-NEXT: lu52i.d $r4, $r4, -1 +; CHECK-NEXT: jr $ra + ret i64 -4503599627366401 +} + +define i64 @lifff000007ffff000() { +; CHECK-LABEL: lifff000007ffff000: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, 524287 +; CHECK-NEXT: lu52i.d $r4, $r4, -1 +; CHECK-NEXT: jr $ra + ret i64 -4503597479890944 +} + +define i64 @lifff000007ffff7ff() { +; CHECK-LABEL: lifff000007ffff7ff: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, 524287 +; CHECK-NEXT: ori $r4, $r4, 2047 +; CHECK-NEXT: lu52i.d $r4, $r4, -1 +; CHECK-NEXT: jr $ra + ret i64 -4503597479888897 +} + +define i64 @lifff000007ffff800() { +; CHECK-LABEL: lifff000007ffff800: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, 524287 +; CHECK-NEXT: ori $r4, $r4, 2048 +; CHECK-NEXT: lu52i.d $r4, $r4, -1 +; CHECK-NEXT: jr $ra + ret i64 -4503597479888896 +} + +define i64 @lifff000007fffffff() { +; CHECK-LABEL: lifff000007fffffff: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, 524287 +; CHECK-NEXT: ori $r4, $r4, 4095 +; CHECK-NEXT: lu52i.d $r4, $r4, -1 +; CHECK-NEXT: jr $ra + ret i64 -4503597479886849 +} + +define i64 @lifff0000080000000() { +; CHECK-LABEL: lifff0000080000000: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, -524288 +; CHECK-NEXT: lu32i.d $r4, 0 +; CHECK-NEXT: lu52i.d $r4, $r4, -1 +; CHECK-NEXT: jr $ra + ret i64 -4503597479886848 +} + +define i64 @lifff00000800007ff() { +; CHECK-LABEL: lifff00000800007ff: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, -524288 +; CHECK-NEXT: ori $r4, $r4, 2047 +; CHECK-NEXT: lu32i.d $r4, 0 +; CHECK-NEXT: lu52i.d $r4, $r4, -1 +; CHECK-NEXT: jr $ra + ret i64 -4503597479884801 +} + +define i64 @lifff0000080000800() { +; CHECK-LABEL: lifff0000080000800: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, -524288 +; CHECK-NEXT: ori $r4, $r4, 2048 +; CHECK-NEXT: lu32i.d $r4, 0 +; CHECK-NEXT: lu52i.d $r4, $r4, -1 +; CHECK-NEXT: jr $ra + ret i64 -4503597479884800 +} + +define i64 @lifff0000080000fff() { +; CHECK-LABEL: lifff0000080000fff: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, -524288 +; CHECK-NEXT: ori $r4, $r4, 4095 +; CHECK-NEXT: lu32i.d $r4, 0 +; CHECK-NEXT: lu52i.d $r4, $r4, -1 +; CHECK-NEXT: jr $ra + ret i64 -4503597479882753 +} + +define i64 @lifff00000fffff000() { +; CHECK-LABEL: lifff00000fffff000: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, -1 +; CHECK-NEXT: lu32i.d $r4, 0 +; CHECK-NEXT: lu52i.d $r4, $r4, -1 +; CHECK-NEXT: jr $ra + ret i64 -4503595332407296 +} + +define i64 @lifff00000fffff7ff() { +; CHECK-LABEL: lifff00000fffff7ff: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, -1 +; CHECK-NEXT: ori $r4, $r4, 2047 +; CHECK-NEXT: lu32i.d $r4, 0 +; CHECK-NEXT: lu52i.d $r4, $r4, -1 +; CHECK-NEXT: jr $ra + ret i64 -4503595332405249 +} + +define i64 @lifff00000fffff800() { +; CHECK-LABEL: lifff00000fffff800: +; CHECK: # %bb.0: +; CHECK-NEXT: addi.w $r4, $zero, -2048 +; CHECK-NEXT: lu32i.d $r4, 0 +; CHECK-NEXT: lu52i.d $r4, $r4, -1 +; CHECK-NEXT: jr $ra + ret i64 -4503595332405248 +} + +define i64 @lifff00000ffffffff() { +; CHECK-LABEL: lifff00000ffffffff: +; CHECK: # %bb.0: +; CHECK-NEXT: addi.w $r4, $zero, -1 +; CHECK-NEXT: lu32i.d $r4, 0 +; CHECK-NEXT: lu52i.d $r4, $r4, -1 +; CHECK-NEXT: jr $ra + ret i64 -4503595332403201 +} + +define i64 @lifff7ffff00000000() { +; CHECK-LABEL: lifff7ffff00000000: +; CHECK: # %bb.0: +; CHECK-NEXT: ori $r4, $zero, 0 +; CHECK-NEXT: lu32i.d $r4, 524287 +; CHECK-NEXT: lu52i.d $r4, $r4, -1 +; CHECK-NEXT: jr $ra + ret i64 -2251804108652544 +} + +define i64 @lifff7ffff000007ff() { +; CHECK-LABEL: lifff7ffff000007ff: +; CHECK: # %bb.0: +; CHECK-NEXT: ori $r4, $zero, 2047 +; CHECK-NEXT: lu32i.d $r4, 524287 +; CHECK-NEXT: lu52i.d $r4, $r4, -1 +; CHECK-NEXT: jr $ra + ret i64 -2251804108650497 +} + +define i64 @lifff7ffff00000800() { +; CHECK-LABEL: lifff7ffff00000800: +; CHECK: # %bb.0: +; CHECK-NEXT: ori $r4, $zero, 2048 +; CHECK-NEXT: lu32i.d $r4, 524287 +; CHECK-NEXT: lu52i.d $r4, $r4, -1 +; CHECK-NEXT: jr $ra + ret i64 -2251804108650496 +} + +define i64 @lifff7ffff00000fff() { +; CHECK-LABEL: lifff7ffff00000fff: +; CHECK: # %bb.0: +; CHECK-NEXT: ori $r4, $zero, 4095 +; CHECK-NEXT: lu32i.d $r4, 524287 +; CHECK-NEXT: lu52i.d $r4, $r4, -1 +; CHECK-NEXT: jr $ra + ret i64 -2251804108648449 +} + +define i64 @lifff7ffff7ffff000() { +; CHECK-LABEL: lifff7ffff7ffff000: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, 524287 +; CHECK-NEXT: lu32i.d $r4, 524287 +; CHECK-NEXT: lu52i.d $r4, $r4, -1 +; CHECK-NEXT: jr $ra + ret i64 -2251801961172992 +} + +define i64 @lifff7ffff7ffff7ff() { +; CHECK-LABEL: lifff7ffff7ffff7ff: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, 524287 +; CHECK-NEXT: ori $r4, $r4, 2047 +; CHECK-NEXT: lu32i.d $r4, 524287 +; CHECK-NEXT: lu52i.d $r4, $r4, -1 +; CHECK-NEXT: jr $ra + ret i64 -2251801961170945 +} + +define i64 @lifff7ffff7ffff800() { +; CHECK-LABEL: lifff7ffff7ffff800: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, 524287 +; CHECK-NEXT: ori $r4, $r4, 2048 +; CHECK-NEXT: lu32i.d $r4, 524287 +; CHECK-NEXT: lu52i.d $r4, $r4, -1 +; CHECK-NEXT: jr $ra + ret i64 -2251801961170944 +} + +define i64 @lifff7ffff7fffffff() { +; CHECK-LABEL: lifff7ffff7fffffff: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, 524287 +; CHECK-NEXT: ori $r4, $r4, 4095 +; CHECK-NEXT: lu32i.d $r4, 524287 +; CHECK-NEXT: lu52i.d $r4, $r4, -1 +; CHECK-NEXT: jr $ra + ret i64 -2251801961168897 +} + +define i64 @lifff7ffff80000000() { +; CHECK-LABEL: lifff7ffff80000000: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, -524288 +; CHECK-NEXT: lu32i.d $r4, 524287 +; CHECK-NEXT: lu52i.d $r4, $r4, -1 +; CHECK-NEXT: jr $ra + ret i64 -2251801961168896 +} + +define i64 @lifff7ffff800007ff() { +; CHECK-LABEL: lifff7ffff800007ff: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, -524288 +; CHECK-NEXT: ori $r4, $r4, 2047 +; CHECK-NEXT: lu32i.d $r4, 524287 +; CHECK-NEXT: lu52i.d $r4, $r4, -1 +; CHECK-NEXT: jr $ra + ret i64 -2251801961166849 +} + +define i64 @lifff7ffff80000800() { +; CHECK-LABEL: lifff7ffff80000800: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, -524288 +; CHECK-NEXT: ori $r4, $r4, 2048 +; CHECK-NEXT: lu32i.d $r4, 524287 +; CHECK-NEXT: lu52i.d $r4, $r4, -1 +; CHECK-NEXT: jr $ra + ret i64 -2251801961166848 +} + +define i64 @lifff7ffff80000fff() { +; CHECK-LABEL: lifff7ffff80000fff: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, -524288 +; CHECK-NEXT: ori $r4, $r4, 4095 +; CHECK-NEXT: lu32i.d $r4, 524287 +; CHECK-NEXT: lu52i.d $r4, $r4, -1 +; CHECK-NEXT: jr $ra + ret i64 -2251801961164801 +} + +define i64 @lifff7fffffffff000() { +; CHECK-LABEL: lifff7fffffffff000: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, -1 +; CHECK-NEXT: lu32i.d $r4, 524287 +; CHECK-NEXT: lu52i.d $r4, $r4, -1 +; CHECK-NEXT: jr $ra + ret i64 -2251799813689344 +} + +define i64 @lifff7fffffffff7ff() { +; CHECK-LABEL: lifff7fffffffff7ff: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, -1 +; CHECK-NEXT: ori $r4, $r4, 2047 +; CHECK-NEXT: lu32i.d $r4, 524287 +; CHECK-NEXT: lu52i.d $r4, $r4, -1 +; CHECK-NEXT: jr $ra + ret i64 -2251799813687297 +} + +define i64 @lifff7fffffffff800() { +; CHECK-LABEL: lifff7fffffffff800: +; CHECK: # %bb.0: +; CHECK-NEXT: addi.w $r4, $zero, -2048 +; CHECK-NEXT: lu32i.d $r4, 524287 +; CHECK-NEXT: lu52i.d $r4, $r4, -1 +; CHECK-NEXT: jr $ra + ret i64 -2251799813687296 +} + +define i64 @lifff7ffffffffffff() { +; CHECK-LABEL: lifff7ffffffffffff: +; CHECK: # %bb.0: +; CHECK-NEXT: addi.w $r4, $zero, -1 +; CHECK-NEXT: lu32i.d $r4, 524287 +; CHECK-NEXT: lu52i.d $r4, $r4, -1 +; CHECK-NEXT: jr $ra + ret i64 -2251799813685249 +} + +define i64 @lifff8000000000000() { +; CHECK-LABEL: lifff8000000000000: +; CHECK: # %bb.0: +; CHECK-NEXT: ori $r4, $zero, 0 +; CHECK-NEXT: lu32i.d $r4, -524288 +; CHECK-NEXT: jr $ra + ret i64 -2251799813685248 +} + +define i64 @lifff80000000007ff() { +; CHECK-LABEL: lifff80000000007ff: +; CHECK: # %bb.0: +; CHECK-NEXT: ori $r4, $zero, 2047 +; CHECK-NEXT: lu32i.d $r4, -524288 +; CHECK-NEXT: jr $ra + ret i64 -2251799813683201 +} + +define i64 @lifff8000000000800() { +; CHECK-LABEL: lifff8000000000800: +; CHECK: # %bb.0: +; CHECK-NEXT: ori $r4, $zero, 2048 +; CHECK-NEXT: lu32i.d $r4, -524288 +; CHECK-NEXT: jr $ra + ret i64 -2251799813683200 +} + +define i64 @lifff8000000000fff() { +; CHECK-LABEL: lifff8000000000fff: +; CHECK: # %bb.0: +; CHECK-NEXT: ori $r4, $zero, 4095 +; CHECK-NEXT: lu32i.d $r4, -524288 +; CHECK-NEXT: jr $ra + ret i64 -2251799813681153 +} + +define i64 @lifff800007ffff000() { +; CHECK-LABEL: lifff800007ffff000: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, 524287 +; CHECK-NEXT: lu32i.d $r4, -524288 +; CHECK-NEXT: jr $ra + ret i64 -2251797666205696 +} + +define i64 @lifff800007ffff7ff() { +; CHECK-LABEL: lifff800007ffff7ff: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, 524287 +; CHECK-NEXT: ori $r4, $r4, 2047 +; CHECK-NEXT: lu32i.d $r4, -524288 +; CHECK-NEXT: jr $ra + ret i64 -2251797666203649 +} + +define i64 @lifff800007ffff800() { +; CHECK-LABEL: lifff800007ffff800: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, 524287 +; CHECK-NEXT: ori $r4, $r4, 2048 +; CHECK-NEXT: lu32i.d $r4, -524288 +; CHECK-NEXT: jr $ra + ret i64 -2251797666203648 +} + +define i64 @lifff800007fffffff() { +; CHECK-LABEL: lifff800007fffffff: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, 524287 +; CHECK-NEXT: ori $r4, $r4, 4095 +; CHECK-NEXT: lu32i.d $r4, -524288 +; CHECK-NEXT: jr $ra + ret i64 -2251797666201601 +} + +define i64 @lifff8000080000000() { +; CHECK-LABEL: lifff8000080000000: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, -524288 +; CHECK-NEXT: lu32i.d $r4, -524288 +; CHECK-NEXT: jr $ra + ret i64 -2251797666201600 +} + +define i64 @lifff80000800007ff() { +; CHECK-LABEL: lifff80000800007ff: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, -524288 +; CHECK-NEXT: ori $r4, $r4, 2047 +; CHECK-NEXT: lu32i.d $r4, -524288 +; CHECK-NEXT: jr $ra + ret i64 -2251797666199553 +} + +define i64 @lifff8000080000800() { +; CHECK-LABEL: lifff8000080000800: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, -524288 +; CHECK-NEXT: ori $r4, $r4, 2048 +; CHECK-NEXT: lu32i.d $r4, -524288 +; CHECK-NEXT: jr $ra + ret i64 -2251797666199552 +} + +define i64 @lifff8000080000fff() { +; CHECK-LABEL: lifff8000080000fff: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, -524288 +; CHECK-NEXT: ori $r4, $r4, 4095 +; CHECK-NEXT: lu32i.d $r4, -524288 +; CHECK-NEXT: jr $ra + ret i64 -2251797666197505 +} + +define i64 @lifff80000fffff000() { +; CHECK-LABEL: lifff80000fffff000: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, -1 +; CHECK-NEXT: lu32i.d $r4, -524288 +; CHECK-NEXT: jr $ra + ret i64 -2251795518722048 +} + +define i64 @lifff80000fffff7ff() { +; CHECK-LABEL: lifff80000fffff7ff: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, -1 +; CHECK-NEXT: ori $r4, $r4, 2047 +; CHECK-NEXT: lu32i.d $r4, -524288 +; CHECK-NEXT: jr $ra + ret i64 -2251795518720001 +} + +define i64 @lifff80000fffff800() { +; CHECK-LABEL: lifff80000fffff800: +; CHECK: # %bb.0: +; CHECK-NEXT: addi.w $r4, $zero, -2048 +; CHECK-NEXT: lu32i.d $r4, -524288 +; CHECK-NEXT: jr $ra + ret i64 -2251795518720000 +} + +define i64 @lifff80000ffffffff() { +; CHECK-LABEL: lifff80000ffffffff: +; CHECK: # %bb.0: +; CHECK-NEXT: addi.w $r4, $zero, -1 +; CHECK-NEXT: lu32i.d $r4, -524288 +; CHECK-NEXT: jr $ra + ret i64 -2251795518717953 +} + +define i64 @liffffffff00000000() { +; CHECK-LABEL: liffffffff00000000: +; CHECK: # %bb.0: +; CHECK-NEXT: ori $r4, $zero, 0 +; CHECK-NEXT: lu32i.d $r4, -1 +; CHECK-NEXT: jr $ra + ret i64 -4294967296 +} + +define i64 @liffffffff000007ff() { +; CHECK-LABEL: liffffffff000007ff: +; CHECK: # %bb.0: +; CHECK-NEXT: ori $r4, $zero, 2047 +; CHECK-NEXT: lu32i.d $r4, -1 +; CHECK-NEXT: jr $ra + ret i64 -4294965249 +} + +define i64 @liffffffff00000800() { +; CHECK-LABEL: liffffffff00000800: +; CHECK: # %bb.0: +; CHECK-NEXT: ori $r4, $zero, 2048 +; CHECK-NEXT: lu32i.d $r4, -1 +; CHECK-NEXT: jr $ra + ret i64 -4294965248 +} + +define i64 @liffffffff00000fff() { +; CHECK-LABEL: liffffffff00000fff: +; CHECK: # %bb.0: +; CHECK-NEXT: ori $r4, $zero, 4095 +; CHECK-NEXT: lu32i.d $r4, -1 +; CHECK-NEXT: jr $ra + ret i64 -4294963201 +} + +define i64 @liffffffff7ffff000() { +; CHECK-LABEL: liffffffff7ffff000: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, 524287 +; CHECK-NEXT: lu32i.d $r4, -1 +; CHECK-NEXT: jr $ra + ret i64 -2147487744 +} + +define i64 @liffffffff7ffff7ff() { +; CHECK-LABEL: liffffffff7ffff7ff: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, 524287 +; CHECK-NEXT: ori $r4, $r4, 2047 +; CHECK-NEXT: lu32i.d $r4, -1 +; CHECK-NEXT: jr $ra + ret i64 -2147485697 +} + +define i64 @liffffffff7ffff800() { +; CHECK-LABEL: liffffffff7ffff800: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, 524287 +; CHECK-NEXT: ori $r4, $r4, 2048 +; CHECK-NEXT: lu32i.d $r4, -1 +; CHECK-NEXT: jr $ra + ret i64 -2147485696 +} + +define i64 @liffffffff7fffffff() { +; CHECK-LABEL: liffffffff7fffffff: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, 524287 +; CHECK-NEXT: ori $r4, $r4, 4095 +; CHECK-NEXT: lu32i.d $r4, -1 +; CHECK-NEXT: jr $ra + ret i64 -2147483649 +} + +define i64 @liffffffff80000000() { +; CHECK-LABEL: liffffffff80000000: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, -524288 +; CHECK-NEXT: jr $ra + ret i64 -2147483648 +} + +define i64 @liffffffff800007ff() { +; CHECK-LABEL: liffffffff800007ff: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, -524288 +; CHECK-NEXT: ori $r4, $r4, 2047 +; CHECK-NEXT: jr $ra + ret i64 -2147481601 +} + +define i64 @liffffffff80000800() { +; CHECK-LABEL: liffffffff80000800: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, -524288 +; CHECK-NEXT: ori $r4, $r4, 2048 +; CHECK-NEXT: jr $ra + ret i64 -2147481600 +} + +define i64 @liffffffff80000fff() { +; CHECK-LABEL: liffffffff80000fff: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, -524288 +; CHECK-NEXT: ori $r4, $r4, 4095 +; CHECK-NEXT: jr $ra + ret i64 -2147479553 +} + +define i64 @lifffffffffffff000() { +; CHECK-LABEL: lifffffffffffff000: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, -1 +; CHECK-NEXT: jr $ra + ret i64 -4096 +} + +define i64 @lifffffffffffff7ff() { +; CHECK-LABEL: lifffffffffffff7ff: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, -1 +; CHECK-NEXT: ori $r4, $r4, 2047 +; CHECK-NEXT: jr $ra + ret i64 -2049 +} + +define i64 @lifffffffffffff800() { +; CHECK-LABEL: lifffffffffffff800: +; CHECK: # %bb.0: +; CHECK-NEXT: addi.d $r4, $zero, -2048 +; CHECK-NEXT: jr $ra + ret i64 -2048 +} + +define i64 @liffffffffffffffff() { +; CHECK-LABEL: liffffffffffffffff: +; CHECK: # %bb.0: +; CHECK-NEXT: addi.d $r4, $zero, -1 +; CHECK-NEXT: jr $ra + ret i64 -1 +} diff --git a/test/CodeGen/LoongArch/inlineasm/extra-code.ll b/test/CodeGen/LoongArch/inlineasm/extra-code.ll new file mode 100644 index 00000000..986e27e2 --- /dev/null +++ b/test/CodeGen/LoongArch/inlineasm/extra-code.ll @@ -0,0 +1,8 @@ +; RUN: llc -march=loongarch64 -no-integrated-as -o - %s | FileCheck %s + +define i64 @test(i64 %a) { +; CHECK: add.d $r4, $r4, $r0 +entry: + %0 = tail call i64 asm sideeffect "add.d $0, $1, ${2:z} \0A", "=r,r,Jr"(i64 %a, i64 0) + ret i64 %0 +} diff --git a/test/CodeGen/LoongArch/inlineasm/floating-point-in-gpr.ll b/test/CodeGen/LoongArch/inlineasm/floating-point-in-gpr.ll new file mode 100644 index 00000000..29cb2bab --- /dev/null +++ b/test/CodeGen/LoongArch/inlineasm/floating-point-in-gpr.ll @@ -0,0 +1,31 @@ +; RUN: llc -march=loongarch64 -mattr=+d -target-abi=lp64d -o - %s | FileCheck %s + +;; Test that floating-point bits can be stored in GPR. + +define void @reg_float(float %x) { +; CHECK-LABEL: reg_float: +; CHECK: movfr2gr.s $r{{[0-9]+}}, $f0 + call void asm "", "r"(float %x) + ret void +} + +define void @r10_float(float %x) { +; CHECK-LABEL: r10_float: +; CHECK: movfr2gr.s $r10, $f0 + call void asm "", "{$r10}"(float %x) + ret void +} + +define void @reg_double(double %x) { +; CHECK-LABEL: reg_double: +; CHECK: movfr2gr.d $r{{[0-9]+}}, $f0 + call void asm "", "r"(double %x) + ret void +} + +define void @r10_double(double %x) { +; CHECK-LABEL: r10_double: +; CHECK: movfr2gr.d $r10, $f0 + call void asm "", "{$r10}"(double %x) + ret void +} diff --git a/test/CodeGen/LoongArch/inlineasm/non-native-value-type-registers-error.ll b/test/CodeGen/LoongArch/inlineasm/non-native-value-type-registers-error.ll new file mode 100644 index 00000000..7f58ea2e --- /dev/null +++ b/test/CodeGen/LoongArch/inlineasm/non-native-value-type-registers-error.ll @@ -0,0 +1,8 @@ +; RUN: not llc -march=loongarch64 %s 2>&1 | FileCheck %s + +define void @test_i128() { +; CHECK: error: couldn't allocate input reg for constraint '{$r20}' +start: + call void asm "", "{$r20}"(i128 5) + ret void +} diff --git a/test/CodeGen/LoongArch/inlineasm/non-native-value-type-registers.ll b/test/CodeGen/LoongArch/inlineasm/non-native-value-type-registers.ll new file mode 100644 index 00000000..d18a184a --- /dev/null +++ b/test/CodeGen/LoongArch/inlineasm/non-native-value-type-registers.ll @@ -0,0 +1,42 @@ +; RUN: llc -march=loongarch64 -o - %s 2>&1 | FileCheck %s + +;; Test that non native value types can be parsed. + +define void @test_i1() { +; CHECK-LABEL: test_i1: +; CHECK: ori $r6, $zero, 0 +; CHECK: jr $ra +start: + call void asm "", "{$r6}"(i1 0) + ret void +} + +;; Note: non-simple values like `i3` are only allowed in newer llvm versions (>= 12). +;; In older llvm versions (<= 11), SelectionDAGBuilder::visitInlineAsm asserts simple +;; values must be used. For details, please see https://reviews.llvm.org/D91710. +define void @test_i3() { +; CHECK-LABEL: test_i3: +; CHECK: ori $r7, $zero, 0 +; CHECK: jr $ra +start: + call void asm "", "{$r7}"(i3 0) + ret void +} + +define void @test_i8() { +; CHECK-LABEL: test_i8: +; CHECK: ori $r5, $zero, 0 +; CHECK: jr $ra +start: + call void asm "", "{$r5}"(i8 0) + ret void +} + +define void @test_i16() { +; CHECK-LABEL: test_i16: +; CHECK: ori $r20, $zero, 5 +; CHECK: jr $ra +start: + call void asm "", "{$r20}"(i16 5) + ret void +} diff --git a/test/CodeGen/LoongArch/inlineasm/preld.ll b/test/CodeGen/LoongArch/inlineasm/preld.ll new file mode 100644 index 00000000..8dbbed99 --- /dev/null +++ b/test/CodeGen/LoongArch/inlineasm/preld.ll @@ -0,0 +1,8 @@ +; RUN: llc -march=loongarch64 -o - %s | FileCheck %s + +define void @preld(i32* %p) { +entry: + ; CHECK: preld 10, $r4, 23 + tail call void asm sideeffect "preld 10, $0, 23 \0A\09", "r"(i32* %p) + ret void +} diff --git a/test/CodeGen/LoongArch/jirl-verify.ll b/test/CodeGen/LoongArch/jirl-verify.ll new file mode 100644 index 00000000..ed72e51f --- /dev/null +++ b/test/CodeGen/LoongArch/jirl-verify.ll @@ -0,0 +1,22 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -march=loongarch64 -verify-machineinstrs -o - %s \ +; RUN: | FileCheck %s --check-prefix=STATIC + +define void @test() nounwind { +; STATIC-LABEL: test: +; STATIC: # %bb.0: +; STATIC-NEXT: addi.d $sp, $sp, -16 +; STATIC-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; STATIC-NEXT: lu12i.w $ra, foo +; STATIC-NEXT: ori $ra, $ra, foo +; STATIC-NEXT: lu32i.d $ra, foo +; STATIC-NEXT: lu52i.d $ra, $ra, foo +; STATIC-NEXT: jirl $ra, $ra, 0 +; STATIC-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; STATIC-NEXT: addi.d $sp, $sp, 16 +; STATIC-NEXT: jr $ra + call void @foo() nounwind + ret void +} + +declare void @foo() diff --git a/test/CodeGen/LoongArch/lasx/VExtend.ll b/test/CodeGen/LoongArch/lasx/VExtend.ll new file mode 100644 index 00000000..895592a0 --- /dev/null +++ b/test/CodeGen/LoongArch/lasx/VExtend.ll @@ -0,0 +1,54 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -march=loongarch64 -mattr=+lasx -mattr=+d < %s | FileCheck %s + +define <4 x i64> @uvadd(<8 x i32> %b, <8 x i32> %c) { +; CHECK-LABEL: uvadd: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvhaddw.du.wu $xr0, $xr0, $xr1 +; CHECK-NEXT: jr $ra +entry: + %0 = shufflevector <8 x i32> %c, <8 x i32> undef, <4 x i32> + %1 = shufflevector <8 x i32> %b, <8 x i32> undef, <4 x i32> + %2 = add <4 x i32> %0, %1 + %3 = zext <4 x i32> %2 to <4 x i64> + ret <4 x i64> %3 +} + +define <4 x i64> @svadd(<8 x i32> %b, <8 x i32> %c) { +; CHECK-LABEL: svadd: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvhaddw.d.w $xr0, $xr0, $xr1 +; CHECK-NEXT: jr $ra +entry: + %0 = shufflevector <8 x i32> %c, <8 x i32> undef, <4 x i32> + %1 = shufflevector <8 x i32> %b, <8 x i32> undef, <4 x i32> + %2 = add nsw <4 x i32> %0, %1 + %3 = sext <4 x i32> %2 to <4 x i64> + ret <4 x i64> %3 +} + +define <4 x i64> @uvsub(<8 x i32> %b, <8 x i32> %c) { +; CHECK-LABEL: uvsub: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvhsubw.du.wu $xr0, $xr0, $xr1 +; CHECK-NEXT: jr $ra +entry: + %0 = shufflevector <8 x i32> %b, <8 x i32> undef, <4 x i32> + %1 = shufflevector <8 x i32> %c, <8 x i32> undef, <4 x i32> + %2 = sub <4 x i32> %0, %1 + %3 = zext <4 x i32> %2 to <4 x i64> + ret <4 x i64> %3 +} + +define <4 x i64> @svsub(<8 x i32> %b, <8 x i32> %c) { +; CHECK-LABEL: svsub: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvhsubw.d.w $xr0, $xr0, $xr1 +; CHECK-NEXT: jr $ra +entry: + %0 = shufflevector <8 x i32> %b, <8 x i32> undef, <4 x i32> + %1 = shufflevector <8 x i32> %c, <8 x i32> undef, <4 x i32> + %2 = sub nsw <4 x i32> %0, %1 + %3 = sext <4 x i32> %2 to <4 x i64> + ret <4 x i64> %3 +} diff --git a/test/CodeGen/LoongArch/lasx/imm_vector_lasx.ll b/test/CodeGen/LoongArch/lasx/imm_vector_lasx.ll new file mode 100644 index 00000000..6a7a35e4 --- /dev/null +++ b/test/CodeGen/LoongArch/lasx/imm_vector_lasx.ll @@ -0,0 +1,176 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -march=loongarch64 -mattr=+lasx -mattr=+d < %s | FileCheck %s + +define <4 x i64> @build_lasx0(<4 x i64> %a) { +; CHECK-LABEL: build_lasx0: +; CHECK: # %bb.0: +; CHECK-NEXT: addi.w $r4, $zero, -1 +; CHECK-NEXT: lu32i.d $r4, 0 +; CHECK-NEXT: xvreplgr2vr.d $xr1, $r4 +; CHECK-NEXT: xvadd.d $xr0, $xr0, $xr1 +; CHECK-NEXT: jr $ra + %b = add <4 x i64> %a, + ret <4 x i64> %b +} + +define <4 x i64> @build_lasx1(<4 x i64> %a) { +; CHECK-LABEL: build_lasx1: +; CHECK: # %bb.0: +; CHECK-NEXT: lu52i.d $r4, $zero, 2047 +; CHECK-NEXT: xvreplgr2vr.d $xr1, $r4 +; CHECK-NEXT: xvadd.d $xr0, $xr0, $xr1 +; CHECK-NEXT: jr $ra + %b = add <4 x i64> %a, + ret <4 x i64> %b +} + +define <4 x i64> @build_lasx2(<4 x i64> %a) { +; CHECK-LABEL: build_lasx2: +; CHECK: # %bb.0: +; CHECK-NEXT: ori $r4, $zero, 2048 +; CHECK-NEXT: lu32i.d $r4, 524287 +; CHECK-NEXT: xvreplgr2vr.d $xr1, $r4 +; CHECK-NEXT: xvadd.d $xr0, $xr0, $xr1 +; CHECK-NEXT: jr $ra + %b = add <4 x i64> %a, + ret <4 x i64> %b +} + +define <4 x i64> @build_lasx3(<4 x i64> %a) { +; CHECK-LABEL: build_lasx3: +; CHECK: # %bb.0: +; CHECK-NEXT: ori $r4, $zero, 4095 +; CHECK-NEXT: lu52i.d $r4, $r4, -1 +; CHECK-NEXT: xvreplgr2vr.d $xr1, $r4 +; CHECK-NEXT: xvadd.d $xr0, $xr0, $xr1 +; CHECK-NEXT: jr $ra + %b = add <4 x i64> %a, + ret <4 x i64> %b +} + +define <4 x i64> @build_lasx4(<4 x i64> %a) { +; CHECK-LABEL: build_lasx4: +; CHECK: # %bb.0: +; CHECK-NEXT: ori $r4, $zero, 4095 +; CHECK-NEXT: lu32i.d $r4, -524288 +; CHECK-NEXT: lu52i.d $r4, $r4, 0 +; CHECK-NEXT: xvreplgr2vr.d $xr1, $r4 +; CHECK-NEXT: xvadd.d $xr0, $xr0, $xr1 +; CHECK-NEXT: jr $ra + %b = add <4 x i64> %a, + ret <4 x i64> %b +} + +define <4 x i64> @build_lasx5(<4 x i64> %a) { +; CHECK-LABEL: build_lasx5: +; CHECK: # %bb.0: +; CHECK-NEXT: addi.w $r4, $zero, -2048 +; CHECK-NEXT: lu32i.d $r4, 0 +; CHECK-NEXT: xvreplgr2vr.d $xr1, $r4 +; CHECK-NEXT: xvadd.d $xr0, $xr0, $xr1 +; CHECK-NEXT: jr $ra + %b = add <4 x i64> %a, + ret <4 x i64> %b +} + +define <4 x i64> @build_lasx6(<4 x i64> %a) { +; CHECK-LABEL: build_lasx6: +; CHECK: # %bb.0: +; CHECK-NEXT: addi.w $r4, $zero, -2048 +; CHECK-NEXT: lu52i.d $r4, $r4, 0 +; CHECK-NEXT: xvreplgr2vr.d $xr1, $r4 +; CHECK-NEXT: xvadd.d $xr0, $xr0, $xr1 +; CHECK-NEXT: jr $ra + %b = add <4 x i64> %a, + ret <4 x i64> %b +} + +define <4 x i64> @build_lasx7(<4 x i64> %a) { +; CHECK-LABEL: build_lasx7: +; CHECK: # %bb.0: +; CHECK-NEXT: addi.w $r4, $zero, -2048 +; CHECK-NEXT: lu32i.d $r4, -524288 +; CHECK-NEXT: lu52i.d $r4, $r4, 0 +; CHECK-NEXT: xvreplgr2vr.d $xr1, $r4 +; CHECK-NEXT: xvadd.d $xr0, $xr0, $xr1 +; CHECK-NEXT: jr $ra + %b = add <4 x i64> %a, + ret <4 x i64> %b +} + +define <4 x i64> @build_lasx8(<4 x i64> %a) { +; CHECK-LABEL: build_lasx8: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, -524288 +; CHECK-NEXT: lu32i.d $r4, 0 +; CHECK-NEXT: xvreplgr2vr.d $xr1, $r4 +; CHECK-NEXT: xvadd.d $xr0, $xr0, $xr1 +; CHECK-NEXT: jr $ra + %b = add <4 x i64> %a, + ret <4 x i64> %b +} + +define <4 x i64> @build_lasx9(<4 x i64> %a) { +; CHECK-LABEL: build_lasx9: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, -1 +; CHECK-NEXT: lu52i.d $r4, $r4, 0 +; CHECK-NEXT: xvreplgr2vr.d $xr1, $r4 +; CHECK-NEXT: xvadd.d $xr0, $xr0, $xr1 +; CHECK-NEXT: jr $ra + %b = add <4 x i64> %a, + ret <4 x i64> %b +} + +define <4 x i64> @build_lasx10(<4 x i64> %a) { +; CHECK-LABEL: build_lasx10: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, -524288 +; CHECK-NEXT: lu32i.d $r4, 0 +; CHECK-NEXT: lu52i.d $r4, $r4, 2047 +; CHECK-NEXT: xvreplgr2vr.d $xr1, $r4 +; CHECK-NEXT: xvadd.d $xr0, $xr0, $xr1 +; CHECK-NEXT: jr $ra + %b = add <4 x i64> %a, + ret <4 x i64> %b +} + +define <4 x i64> @build_lasx11(<4 x i64> %a) { +; CHECK-LABEL: build_lasx11: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, 524287 +; CHECK-NEXT: ori $r4, $r4, 2048 +; CHECK-NEXT: lu32i.d $r4, -1 +; CHECK-NEXT: xvreplgr2vr.d $xr1, $r4 +; CHECK-NEXT: xvadd.d $xr0, $xr0, $xr1 +; CHECK-NEXT: jr $ra + %b = add <4 x i64> %a, + ret <4 x i64> %b +} + +define <4 x i64> @build_lasx12(<4 x i64> %a) { +; CHECK-LABEL: build_lasx12: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, -524288 +; CHECK-NEXT: ori $r4, $r4, 2047 +; CHECK-NEXT: lu52i.d $r4, $r4, 2047 +; CHECK-NEXT: xvreplgr2vr.d $xr1, $r4 +; CHECK-NEXT: xvadd.d $xr0, $xr0, $xr1 +; CHECK-NEXT: jr $ra + %b = add <4 x i64> %a, + ret <4 x i64> %b +} + +define <4 x i64> @build_lasx13(<4 x i64> %a) { +; CHECK-LABEL: build_lasx13: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, -524288 +; CHECK-NEXT: ori $r4, $r4, 2048 +; CHECK-NEXT: lu32i.d $r4, -524288 +; CHECK-NEXT: lu52i.d $r4, $r4, 0 +; CHECK-NEXT: xvreplgr2vr.d $xr1, $r4 +; CHECK-NEXT: xvadd.d $xr0, $xr0, $xr1 +; CHECK-NEXT: jr $ra + %b = add <4 x i64> %a, + ret <4 x i64> %b +} diff --git a/test/CodeGen/LoongArch/lasx/inline-asm.ll b/test/CodeGen/LoongArch/lasx/inline-asm.ll new file mode 100644 index 00000000..2d7adf73 --- /dev/null +++ b/test/CodeGen/LoongArch/lasx/inline-asm.ll @@ -0,0 +1,55 @@ +; A basic inline assembly test + +; RUN: llc -march=loongarch64 -mattr=+lasx -mattr=+d < %s | FileCheck %s + +@v4i64_r = global <4 x i64> zeroinitializer, align 32 +@v8i32_r = global <8 x i32> zeroinitializer, align 32 + +define void @test1() nounwind { +entry: + ; CHECK-LABEL: test1: + %0 = call <4 x i64> asm "xvldi ${0:u}, 1", "=f"() + ; CHECK: xvldi $xr{{[1-3]?[0-9]}}, 1 + store <4 x i64> %0, <4 x i64>* @v4i64_r + ret void +} + +define void @test2() nounwind { +entry: + ; CHECK-LABEL: test2: + %0 = load <8 x i32>, <8 x i32>* @v8i32_r + %1 = call <8 x i32> asm "xvaddi.wu ${0:u}, ${1:u}, 1", "=f,f"(<8 x i32> %0) + ; CHECK: xvaddi.wu $xr{{[1-3]?[0-9]}}, $xr{{[1-3]?[0-9]}}, 1 + store <8 x i32> %1, <8 x i32>* @v8i32_r + ret void +} + +define void @test2_d() nounwind { +entry: + ; CHECK-LABEL: test2_d: + %0 = load < 4 x i64>, < 4 x i64>* @v4i64_r + %1 = call < 4 x i64> asm "xvaddi.wu ${0:u}, ${1:u}, 1", "=f,f"(< 4 x i64> %0) + ; CHECK: xvaddi.wu $xr{{[1-3]?[0-9]}}, $xr{{[1-3]?[0-9]}}, 1 + store < 4 x i64> %1, < 4 x i64>* @v4i64_r + ret void +} + +define void @test3() nounwind { +entry: + ; CHECK-LABEL: test3: + %0 = load <8 x i32>, <8 x i32>* @v8i32_r + %1 = call <8 x i32> asm sideeffect "xvaddi.wu ${0:u}, ${1:u}, 1", "=f,f,~{$xr0}"(<8 x i32> %0) + ; CHECK: xvaddi.wu $xr{{([1-9]|[1-3][0-9])}}, $xr{{([1-9]|[1-3][0-9])}}, 1 + store <8 x i32> %1, <8 x i32>* @v8i32_r + ret void +} + +define void @test3_d() nounwind { +entry: + ; CHECK-LABEL: test3_d: + %0 = load <4 x i64>, <4 x i64>* @v4i64_r + %1 = call <4 x i64> asm sideeffect "xvaddi.wu ${0:u}, ${1:u}, 1", "=f,f,~{$xr0}"(<4 x i64> %0) + ; CHECK: xvaddi.wu $xr{{([1-9]|[1-3][0-9])}}, $xr{{([1-9]|[1-3][0-9])}}, 1 + store <4 x i64> %1, <4 x i64>* @v4i64_r + ret void +} diff --git a/test/CodeGen/LoongArch/lasx/lasxvclr.ll b/test/CodeGen/LoongArch/lasx/lasxvclr.ll new file mode 100644 index 00000000..54b9867e --- /dev/null +++ b/test/CodeGen/LoongArch/lasx/lasxvclr.ll @@ -0,0 +1,46 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -march=loongarch64 -mattr=+lasx -mattr=+d < %s | FileCheck %s + +define <32 x i8> @clri8(<32 x i8> %0, <32 x i8> %1) { +; CHECK-LABEL: clri8: +; CHECK: # %bb.0: +; CHECK-NEXT: xvbitclr.b $xr0, $xr0, $xr1 +; CHECK-NEXT: jr $ra + %3 = shl <32 x i8> , %1 + %4 = xor <32 x i8> %3, + %5 = and <32 x i8> %4, %0 + ret <32 x i8> %5 +} + +define <16 x i16> @clri16(<16 x i16> %0, <16 x i16> %1) { +; CHECK-LABEL: clri16: +; CHECK: # %bb.0: +; CHECK-NEXT: xvbitclr.h $xr0, $xr0, $xr1 +; CHECK-NEXT: jr $ra + %3 = shl <16 x i16> , %1 + %4 = xor <16 x i16> %3, + %5 = and <16 x i16> %4, %0 + ret <16 x i16> %5 +} + +define <8 x i32> @clri32(<8 x i32> %0, <8 x i32> %1) { +; CHECK-LABEL: clri32: +; CHECK: # %bb.0: +; CHECK-NEXT: xvbitclr.w $xr0, $xr0, $xr1 +; CHECK-NEXT: jr $ra + %3 = shl <8 x i32> , %1 + %4 = xor <8 x i32> %3, + %5 = and <8 x i32> %4, %0 + ret <8 x i32> %5 +} + +define <4 x i64> @clri64(<4 x i64> %0, <4 x i64> %1) { +; CHECK-LABEL: clri64: +; CHECK: # %bb.0: +; CHECK-NEXT: xvbitclr.d $xr0, $xr0, $xr1 +; CHECK-NEXT: jr $ra + %3 = shl <4 x i64> , %1 + %4 = xor <4 x i64> %3, + %5 = and <4 x i64> %4, %0 + ret <4 x i64> %5 +} diff --git a/test/CodeGen/LoongArch/lasx/logic-lasx.ll b/test/CodeGen/LoongArch/lasx/logic-lasx.ll new file mode 100644 index 00000000..3d9eefe2 --- /dev/null +++ b/test/CodeGen/LoongArch/lasx/logic-lasx.ll @@ -0,0 +1,130 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -march=loongarch64 -mattr=+lasx -mattr=+d < %s | FileCheck %s + +define <4 x i64> @not_v4i64(<4 x i64> %a) { +; CHECK-LABEL: not_v4i64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvnor.v $xr0, $xr0, $xr0 +; CHECK-NEXT: jr $ra +entry: + %not = xor <4 x i64> %a, + ret <4 x i64> %not +} + +define <8 x i32> @not_v8i32(<8 x i32> %a) { +; CHECK-LABEL: not_v8i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvnor.v $xr0, $xr0, $xr0 +; CHECK-NEXT: jr $ra +entry: + %not = xor <8 x i32> %a, + ret <8 x i32> %not +} + +define <16 x i16> @not_v16i16(<16 x i16> %a) { +; CHECK-LABEL: not_v16i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvnor.v $xr0, $xr0, $xr0 +; CHECK-NEXT: jr $ra +entry: + %not = xor <16 x i16> %a, + ret <16 x i16> %not +} + +define <32 x i8> @not_v32i8(<32 x i8> %a) { +; CHECK-LABEL: not_v32i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvxori.b $xr0, $xr0, 255 +; CHECK-NEXT: jr $ra +entry: + %not = xor <32 x i8> %a, + ret <32 x i8> %not +} + +define <4 x i64> @andn_v4i64(<4 x i64> %a, <4 x i64> %b) { +; CHECK-LABEL: andn_v4i64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvandn.v $xr0, $xr1, $xr0 +; CHECK-NEXT: jr $ra +entry: + %not = xor <4 x i64> %b, + %and = and <4 x i64> %not, %a + ret <4 x i64> %and +} + +define <8 x i32> @andn_v8i32(<8 x i32> %a, <8 x i32> %b) { +; CHECK-LABEL: andn_v8i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvandn.v $xr0, $xr1, $xr0 +; CHECK-NEXT: jr $ra +entry: + %not = xor <8 x i32> %b, + %and = and <8 x i32> %not, %a + ret <8 x i32> %and +} + +define <16 x i16> @andn_v16i16(<16 x i16> %a, <16 x i16> %b) { +; CHECK-LABEL: andn_v16i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvandn.v $xr0, $xr1, $xr0 +; CHECK-NEXT: jr $ra +entry: + %not = xor <16 x i16> %b, + %and = and <16 x i16> %not, %a + ret <16 x i16> %and +} + +define <32 x i8> @andn_v32i8(<32 x i8> %a, <32 x i8> %b) { +; CHECK-LABEL: andn_v32i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvandn.v $xr0, $xr1, $xr0 +; CHECK-NEXT: jr $ra +entry: + %not = xor <32 x i8> %b, + %and = and <32 x i8> %not, %a + ret <32 x i8> %and +} + +define <4 x i64> @orn_v4i64(<4 x i64> %a, <4 x i64> %b) { +; CHECK-LABEL: orn_v4i64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvorn.v $xr0, $xr0, $xr1 +; CHECK-NEXT: jr $ra +entry: + %not = xor <4 x i64> %b, + %or = or <4 x i64> %not, %a + ret <4 x i64> %or +} + +define <8 x i32> @orn_v8i32(<8 x i32> %a, <8 x i32> %b) { +; CHECK-LABEL: orn_v8i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvorn.v $xr0, $xr0, $xr1 +; CHECK-NEXT: jr $ra +entry: + %not = xor <8 x i32> %b, + %or = or <8 x i32> %not, %a + ret <8 x i32> %or +} + +define <16 x i16> @orn_v16i16(<16 x i16> %a, <16 x i16> %b) { +; CHECK-LABEL: orn_v16i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvorn.v $xr0, $xr0, $xr1 +; CHECK-NEXT: jr $ra +entry: + %not = xor <16 x i16> %b, + %or = or <16 x i16> %not, %a + ret <16 x i16> %or +} + +define <32 x i8> @orn_v32i8(<32 x i8> %a, <32 x i8> %b) { +; CHECK-LABEL: orn_v32i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvorn.v $xr0, $xr0, $xr1 +; CHECK-NEXT: jr $ra +entry: + %not = xor <32 x i8> %b, + %or = or <32 x i8> %not, %a + ret <32 x i8> %or +} diff --git a/test/CodeGen/LoongArch/lasx/set-lasx.ll b/test/CodeGen/LoongArch/lasx/set-lasx.ll new file mode 100644 index 00000000..8d6930f0 --- /dev/null +++ b/test/CodeGen/LoongArch/lasx/set-lasx.ll @@ -0,0 +1,38 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -march=loongarch64 -mattr=+lasx -mattr=+d < %s | FileCheck %s + +define <32 x i8> @seti8(<32 x i8>) { +; CHECK-LABEL: seti8: +; CHECK: # %bb.0: +; CHECK-NEXT: xvbitseti.b $xr0, $xr0, 6 +; CHECK-NEXT: jr $ra + %2 = or <32 x i8> %0, + ret <32 x i8> %2 +} + +define <16 x i16> @seti16(<16 x i16>) { +; CHECK-LABEL: seti16: +; CHECK: # %bb.0: +; CHECK-NEXT: xvbitseti.h $xr0, $xr0, 6 +; CHECK-NEXT: jr $ra + %2 = or <16 x i16> %0, + ret <16 x i16> %2 +} + +define <8 x i32> @seti32(<8 x i32>) { +; CHECK-LABEL: seti32: +; CHECK: # %bb.0: +; CHECK-NEXT: xvbitseti.w $xr0, $xr0, 6 +; CHECK-NEXT: jr $ra + %2 = or <8 x i32> %0, + ret <8 x i32> %2 +} + +define <4 x i64> @seti64(<4 x i64>) { +; CHECK-LABEL: seti64: +; CHECK: # %bb.0: +; CHECK-NEXT: xvbitseti.d $xr0, $xr0, 6 +; CHECK-NEXT: jr $ra + %2 = or <4 x i64> %0, + ret <4 x i64> %2 +} diff --git a/test/CodeGen/LoongArch/lasx/vext2xv.ll b/test/CodeGen/LoongArch/lasx/vext2xv.ll new file mode 100644 index 00000000..aa31d5e8 --- /dev/null +++ b/test/CodeGen/LoongArch/lasx/vext2xv.ll @@ -0,0 +1,65 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -march=loongarch64 -mattr=+lasx -mattr=+d < %s | FileCheck %s + +define <4 x i64> @s_v4i32_v4i64(<4 x i32> %a0) { +; CHECK-LABEL: s_v4i32_v4i64: +; CHECK: # %bb.0: +; CHECK-NEXT: # kill: def $vr0 killed $vr0 def $xr0 +; CHECK-NEXT: vext2xv.d.w $xr0, $xr0 +; CHECK-NEXT: jr $ra + %1 = sext <4 x i32> %a0 to <4 x i64> + ret <4 x i64> %1 +} + +define <4 x i64> @z_v4i32_v4i64(<4 x i32> %a0) { +; CHECK-LABEL: z_v4i32_v4i64: +; CHECK: # %bb.0: +; CHECK-NEXT: # kill: def $vr0 killed $vr0 def $xr0 +; CHECK-NEXT: vext2xv.du.wu $xr0, $xr0 +; CHECK-NEXT: jr $ra + %1 = zext <4 x i32> %a0 to <4 x i64> + ret <4 x i64> %1 +} + +define <16 x i16> @s_v16i8_v16i16(<16 x i8> %A) { +; CHECK-LABEL: s_v16i8_v16i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: # kill: def $vr0 killed $vr0 def $xr0 +; CHECK-NEXT: vext2xv.h.b $xr0, $xr0 +; CHECK-NEXT: jr $ra + entry: + %B = sext <16 x i8> %A to <16 x i16> + ret <16 x i16> %B +} + +define <16 x i16> @z_v16i8_v16i16(<16 x i8> %A) { +; CHECK-LABEL: z_v16i8_v16i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: # kill: def $vr0 killed $vr0 def $xr0 +; CHECK-NEXT: vext2xv.hu.bu $xr0, $xr0 +; CHECK-NEXT: jr $ra + entry: + %B = zext <16 x i8> %A to <16 x i16> + ret <16 x i16> %B +} + +define <8 x i32> @s_v8i16_v8i32(<8 x i16> %x) { +; CHECK-LABEL: s_v8i16_v8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: # kill: def $vr0 killed $vr0 def $xr0 +; CHECK-NEXT: vext2xv.w.h $xr0, $xr0 +; CHECK-NEXT: jr $ra + %1 = sext <8 x i16> %x to <8 x i32> + ret <8 x i32> %1 +} + +define <8 x i32> @z_v8i16_v8i32(<8 x i16> %x) { +; CHECK-LABEL: z_v8i16_v8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: # kill: def $vr0 killed $vr0 def $xr0 +; CHECK-NEXT: vext2xv.wu.hu $xr0, $xr0 +; CHECK-NEXT: jr $ra + %1 = zext <8 x i16> %x to <8 x i32> + ret <8 x i32> %1 +} + diff --git a/test/CodeGen/LoongArch/lasx/xvadda.ll b/test/CodeGen/LoongArch/lasx/xvadda.ll new file mode 100644 index 00000000..f1e6d9c2 --- /dev/null +++ b/test/CodeGen/LoongArch/lasx/xvadda.ll @@ -0,0 +1,62 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -march=loongarch64 -mattr=+lasx -mattr=+d < %s | FileCheck %s + +define <32 x i8> @xvaddab(<32 x i8>, <32 x i8>) { +; CHECK-LABEL: xvaddab: +; CHECK: # %bb.0: +; CHECK-NEXT: xvadda.b $xr0, $xr0, $xr1 +; CHECK-NEXT: jr $ra + %3 = icmp slt <32 x i8> %0, zeroinitializer + %4 = sub <32 x i8> zeroinitializer, %0 + %5 = select <32 x i1> %3, <32 x i8> %4, <32 x i8> %0 + %6 = icmp slt <32 x i8> %1, zeroinitializer + %7 = sub <32 x i8> zeroinitializer, %1 + %8 = select <32 x i1> %6, <32 x i8> %7, <32 x i8> %1 + %9 = add <32 x i8> %5, %8 + ret <32 x i8> %9 +} + +define <16 x i16> @xvaddah(<16 x i16>, <16 x i16>) { +; CHECK-LABEL: xvaddah: +; CHECK: # %bb.0: +; CHECK-NEXT: xvadda.h $xr0, $xr0, $xr1 +; CHECK-NEXT: jr $ra + %3 = icmp slt <16 x i16> %0, zeroinitializer + %4 = sub <16 x i16> zeroinitializer, %0 + %5 = select <16 x i1> %3, <16 x i16> %4, <16 x i16> %0 + %6 = icmp slt <16 x i16> %1, zeroinitializer + %7 = sub <16 x i16> zeroinitializer, %1 + %8 = select <16 x i1> %6, <16 x i16> %7, <16 x i16> %1 + %9 = add <16 x i16> %5, %8 + ret <16 x i16> %9 +} + +define <8 x i32> @xvaddaw(<8 x i32>, <8 x i32>) { +; CHECK-LABEL: xvaddaw: +; CHECK: # %bb.0: +; CHECK-NEXT: xvadda.w $xr0, $xr0, $xr1 +; CHECK-NEXT: jr $ra + %3 = icmp slt <8 x i32> %0, zeroinitializer + %4 = sub nsw <8 x i32> zeroinitializer, %0 + %5 = select <8 x i1> %3, <8 x i32> %4, <8 x i32> %0 + %6 = icmp slt <8 x i32> %1, zeroinitializer + %7 = sub nsw <8 x i32> zeroinitializer, %1 + %8 = select <8 x i1> %6, <8 x i32> %7, <8 x i32> %1 + %9 = add nuw nsw <8 x i32> %5, %8 + ret <8 x i32> %9 +} + +define <4 x i64> @xvaddad(<4 x i64>, <4 x i64>) { +; CHECK-LABEL: xvaddad: +; CHECK: # %bb.0: +; CHECK-NEXT: xvadda.d $xr0, $xr0, $xr1 +; CHECK-NEXT: jr $ra + %3 = icmp slt <4 x i64> %0, zeroinitializer + %4 = sub nsw <4 x i64> zeroinitializer, %0 + %5 = select <4 x i1> %3, <4 x i64> %4, <4 x i64> %0 + %6 = icmp slt <4 x i64> %1, zeroinitializer + %7 = sub nsw <4 x i64> zeroinitializer, %1 + %8 = select <4 x i1> %6, <4 x i64> %7, <4 x i64> %1 + %9 = add nuw nsw <4 x i64> %5, %8 + ret <4 x i64> %9 +} diff --git a/test/CodeGen/LoongArch/lasx/xvaddsub.ll b/test/CodeGen/LoongArch/lasx/xvaddsub.ll new file mode 100644 index 00000000..68fff0a6 --- /dev/null +++ b/test/CodeGen/LoongArch/lasx/xvaddsub.ll @@ -0,0 +1,98 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -march=loongarch64 -mattr=+lasx -mattr=+d < %s | FileCheck %s + +define <4 x i64> @svaddev(<8 x i32> %b, <8 x i32> %c) { +; CHECK-LABEL: svaddev: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvaddwev.d.w $xr0, $xr0, $xr1 +; CHECK-NEXT: jr $ra +entry: + %0 = add nsw <8 x i32> %c, %b + %1 = shufflevector <8 x i32> %0, <8 x i32> undef, <4 x i32> + %2 = sext <4 x i32> %1 to <4 x i64> + ret <4 x i64> %2 +} + +define <4 x i64> @uvaddev(<8 x i32> %b, <8 x i32> %c) { +; CHECK-LABEL: uvaddev: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvaddwev.d.wu $xr0, $xr0, $xr1 +; CHECK-NEXT: jr $ra +entry: + %0 = add <8 x i32> %c, %b + %1 = shufflevector <8 x i32> %0, <8 x i32> undef, <4 x i32> + %2 = zext <4 x i32> %1 to <4 x i64> + ret <4 x i64> %2 +} + +define <4 x i64> @uvsubev(<8 x i32> %b, <8 x i32> %c) { +; CHECK-LABEL: uvsubev: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvsubwev.d.wu $xr0, $xr0, $xr1 +; CHECK-NEXT: jr $ra +entry: + %0 = sub <8 x i32> %b, %c + %1 = shufflevector <8 x i32> %0, <8 x i32> undef, <4 x i32> + %2 = zext <4 x i32> %1 to <4 x i64> + ret <4 x i64> %2 +} + +define <4 x i64> @svsubev(<8 x i32> %b, <8 x i32> %c) { +; CHECK-LABEL: svsubev: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvsubwev.d.w $xr0, $xr0, $xr1 +; CHECK-NEXT: jr $ra +entry: + %0 = sub nsw <8 x i32> %b, %c + %1 = shufflevector <8 x i32> %0, <8 x i32> undef, <4 x i32> + %2 = sext <4 x i32> %1 to <4 x i64> + ret <4 x i64> %2 +} + +define <4 x i64> @uvaddod(<8 x i32> %b, <8 x i32> %c) { +; CHECK-LABEL: uvaddod: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvaddwod.d.wu $xr0, $xr0, $xr1 +; CHECK-NEXT: jr $ra +entry: + %0 = add <8 x i32> %c, %b + %1 = shufflevector <8 x i32> %0, <8 x i32> undef, <4 x i32> + %2 = zext <4 x i32> %1 to <4 x i64> + ret <4 x i64> %2 +} + +define <4 x i64> @svaddod(<8 x i32> %b, <8 x i32> %c) { +; CHECK-LABEL: svaddod: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvaddwod.d.w $xr0, $xr0, $xr1 +; CHECK-NEXT: jr $ra +entry: + %0 = add nsw <8 x i32> %c, %b + %1 = shufflevector <8 x i32> %0, <8 x i32> undef, <4 x i32> + %2 = sext <4 x i32> %1 to <4 x i64> + ret <4 x i64> %2 +} + +define <4 x i64> @uvsubod(<8 x i32> %b, <8 x i32> %c) { +; CHECK-LABEL: uvsubod: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvsubwod.d.wu $xr0, $xr0, $xr1 +; CHECK-NEXT: jr $ra +entry: + %0 = sub <8 x i32> %b, %c + %1 = shufflevector <8 x i32> %0, <8 x i32> undef, <4 x i32> + %2 = zext <4 x i32> %1 to <4 x i64> + ret <4 x i64> %2 +} + +define <4 x i64> @svsubod(<8 x i32> %b, <8 x i32> %c) { +; CHECK-LABEL: svsubod: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvsubwod.d.w $xr0, $xr0, $xr1 +; CHECK-NEXT: jr $ra +entry: + %0 = sub nsw <8 x i32> %b, %c + %1 = shufflevector <8 x i32> %0, <8 x i32> undef, <4 x i32> + %2 = sext <4 x i32> %1 to <4 x i64> + ret <4 x i64> %2 +} diff --git a/test/CodeGen/LoongArch/lasx/xvhadd.ll b/test/CodeGen/LoongArch/lasx/xvhadd.ll new file mode 100644 index 00000000..21bb0dcd --- /dev/null +++ b/test/CodeGen/LoongArch/lasx/xvhadd.ll @@ -0,0 +1,21 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -march=loongarch64 -mattr=+lasx -mattr=+d < %s | FileCheck %s + +define <4 x i64> @mul(<4 x i64> %a, <8 x i32> %m, <8 x i32> %n) { +; CHECK-LABEL: mul: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvhaddw.d.w $xr0, $xr1, $xr2 +; CHECK-NEXT: jr $ra +entry: + %0 = shufflevector <8 x i32> %n, <8 x i32> undef, <2 x i32> + %1 = shufflevector <8 x i32> %m, <8 x i32> undef, <2 x i32> + %2 = add nsw <2 x i32> %0, %1 + %3 = sext <2 x i32> %2 to <2 x i64> + %4 = shufflevector <8 x i32> %n, <8 x i32> undef, <2 x i32> + %5 = shufflevector <8 x i32> %m, <8 x i32> undef, <2 x i32> + %6 = add nsw <2 x i32> %4, %5 + %7 = sext <2 x i32> %6 to <2 x i64> + %vecins16 = shufflevector <2 x i64> %3, <2 x i64> %7, <4 x i32> + ret <4 x i64> %vecins16 +} + diff --git a/test/CodeGen/LoongArch/ldptr.ll b/test/CodeGen/LoongArch/ldptr.ll new file mode 100644 index 00000000..8395b264 --- /dev/null +++ b/test/CodeGen/LoongArch/ldptr.ll @@ -0,0 +1,70 @@ +; Check whether ld.w/ld.d/ldptr.w/ldptr.d/ldx.w/ldx.d instructions are properly generated +; RUN: llc -march=loongarch64 -o - %s | FileCheck %s + +define signext i32 @ld_w(i32* %p) { +; CHECK-LABEL: ld_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: ld.w $r4, $r4, 2044 +; CHECK-NEXT: jr $ra +entry: + %addr = getelementptr inbounds i32, i32* %p, i64 511 + %val = load i32, i32* %addr, align 4 + ret i32 %val +} + +define signext i32 @ldptr_w(i32* %p) { +; CHECK-LABEL: ldptr_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: ldptr.w $r4, $r4, 2048 +; CHECK-NEXT: jr $ra +entry: + %addr = getelementptr inbounds i32, i32* %p, i64 512 + %val = load i32, i32* %addr, align 4 + ret i32 %val +} + +define signext i32 @ldx_w(i32* %p) { +; CHECK-LABEL: ldx_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: lu12i.w $r[[REG:[0-9]+]], 8 +; CHECK-NEXT: ldx.w $r4, $r4, $r[[REG:[0-9]+]] +; CHECK-NEXT: jr $ra +entry: + %addr = getelementptr inbounds i32, i32* %p, i64 8192 + %val = load i32, i32* %addr, align 4 + ret i32 %val +} + +define i64 @ld_d(i64* %p) { +; CHECK-LABEL: ld_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: ld.d $r4, $r4, 2040 +; CHECK-NEXT: jr $ra +entry: + %addr = getelementptr inbounds i64, i64* %p, i64 255 + %val = load i64, i64* %addr, align 8 + ret i64 %val +} + +define i64 @ldptr_d(i64* %p) { +; CHECK-LABEL: ldptr_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: ldptr.d $r4, $r4, 2048 +; CHECK-NEXT: jr $ra +entry: + %addr = getelementptr inbounds i64, i64* %p, i64 256 + %val = load i64, i64* %addr, align 8 + ret i64 %val +} + +define i64 @ldx_d(i64* %p) { +; CHECK-LABEL: ldx_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: lu12i.w $r[[REG:[0-9]+]], 8 +; CHECK-NEXT: ldx.d $r4, $r4, $r[[REG:[0-9]+]] +; CHECK-NEXT: jr $ra +entry: + %addr = getelementptr inbounds i64, i64* %p, i64 4096 + %val = load i64, i64* %addr, align 8 + ret i64 %val +} diff --git a/test/CodeGen/LoongArch/lit.local.cfg b/test/CodeGen/LoongArch/lit.local.cfg new file mode 100644 index 00000000..6223fc69 --- /dev/null +++ b/test/CodeGen/LoongArch/lit.local.cfg @@ -0,0 +1,3 @@ +if not 'LoongArch' in config.root.targets: + config.unsupported = True + diff --git a/test/CodeGen/LoongArch/logic-op.ll b/test/CodeGen/LoongArch/logic-op.ll new file mode 100644 index 00000000..c1029c1f --- /dev/null +++ b/test/CodeGen/LoongArch/logic-op.ll @@ -0,0 +1,171 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -march=loongarch64 < %s | FileCheck %s + +define signext i32 @foo32(i32 signext %a) { +; CHECK-LABEL: foo32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: sltui $r4, $r4, 1 +; CHECK-NEXT: jr $ra +entry: + %tobool = icmp eq i32 %a, 0 + %conv = zext i1 %tobool to i32 + ret i32 %conv +} + +define i64 @foo(i64 %a) { +; CHECK-LABEL: foo: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: sltui $r4, $r4, 1 +; CHECK-NEXT: jr $ra +entry: + %tobool = icmp eq i64 %a, 0 + %conv = zext i1 %tobool to i64 + ret i64 %conv +} + +define i64 @not(i64 %a) { +; CHECK-LABEL: not: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: nor $r4, $zero, $r4 +; CHECK-NEXT: jr $ra +entry: + %not = xor i64 %a, -1 + ret i64 %not +} + +define i64 @and(i64 %a, i64 %b) { +; CHECK-LABEL: and: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: and $r4, $r5, $r4 +; CHECK-NEXT: jr $ra +entry: + %and = and i64 %b, %a + ret i64 %and +} + +define i64 @or(i64 %a, i64 %b) { +; CHECK-LABEL: or: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: or $r4, $r5, $r4 +; CHECK-NEXT: jr $ra +entry: + %or = or i64 %b, %a + ret i64 %or +} + +define i64 @xor(i64 %a, i64 %b) { +; CHECK-LABEL: xor: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xor $r4, $r5, $r4 +; CHECK-NEXT: jr $ra +entry: + %xor = xor i64 %b, %a + ret i64 %xor +} + +define i64 @nor(i64 %a, i64 %b) { +; CHECK-LABEL: nor: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: nor $r4, $r5, $r4 +; CHECK-NEXT: jr $ra +entry: + %or = or i64 %b, %a + %not = xor i64 %or, -1 + ret i64 %not +} + +define i64 @andn(i64 %a, i64 %b) { +; CHECK-LABEL: andn: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: andn $r4, $r4, $r5 +; CHECK-NEXT: jr $ra +entry: + %not = xor i64 %b, -1 + %and = and i64 %not, %a + ret i64 %and +} + +define signext i32 @andn32(i32 signext %a, i32 signext %b) { +; CHECK-LABEL: andn32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: andn $r4, $r4, $r5 +; CHECK-NEXT: jr $ra +entry: + %not = xor i32 %b, -1 + %and = and i32 %not, %a + ret i32 %and +} + +define i64 @orn(i64 %a, i64 %b) { +; CHECK-LABEL: orn: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: orn $r4, $r4, $r5 +; CHECK-NEXT: jr $ra +entry: + %not = xor i64 %b, -1 + %or = or i64 %not, %a + ret i64 %or +} + +define signext i32 @orn32(i32 signext %a, i32 signext %b) { +; CHECK-LABEL: orn32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: orn $r4, $r4, $r5 +; CHECK-NEXT: jr $ra +entry: + %not = xor i32 %b, -1 + %or = or i32 %not, %a + ret i32 %or +} + +define signext i32 @and32(i32 signext %a, i32 signext %b) { +; CHECK-LABEL: and32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: and $r4, $r5, $r4 +; CHECK-NEXT: jr $ra +entry: + %and = and i32 %b, %a + ret i32 %and +} + +define signext i32 @or32(i32 signext %a, i32 signext %b) { +; CHECK-LABEL: or32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: or $r4, $r5, $r4 +; CHECK-NEXT: jr $ra +entry: + %or = or i32 %b, %a + ret i32 %or +} + +define signext i32 @xor32(i32 signext %a, i32 signext %b) { +; CHECK-LABEL: xor32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xor $r4, $r5, $r4 +; CHECK-NEXT: jr $ra +entry: + %xor = xor i32 %b, %a + ret i32 %xor +} + +define signext i32 @nor32(i32 signext %a, i32 signext %b) { +; CHECK-LABEL: nor32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: nor $r4, $r4, $r5 +; CHECK-NEXT: jr $ra +entry: + %or = or i32 %b, %a + %not = xor i32 %or, -1 + ret i32 %not +} + +define signext i32 @not32(i32 signext %a) { +; CHECK-LABEL: not32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: nor $r4, $zero, $r4 +; CHECK-NEXT: jr $ra +entry: + %not = xor i32 %a, -1 + ret i32 %not +} + diff --git a/test/CodeGen/LoongArch/lshr.ll b/test/CodeGen/LoongArch/lshr.ll new file mode 100644 index 00000000..54e4a5f2 --- /dev/null +++ b/test/CodeGen/LoongArch/lshr.ll @@ -0,0 +1,12 @@ +; RUN: llc -march=loongarch64 < %s | FileCheck %s + +define signext i32 @foo(i32 %a) { +; CHECK-LABEL: foo: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: slli.w $r4, $r4, 0 +; CHECK-NEXT: bstrpick.d $r4, $r4, 31, 1 +; CHECK-NEXT: jr $ra +entry: + %b = lshr i32 %a, 1 + ret i32 %b +} diff --git a/test/CodeGen/LoongArch/lsx/imm_vector_lsx.ll b/test/CodeGen/LoongArch/lsx/imm_vector_lsx.ll new file mode 100644 index 00000000..6474cd7e --- /dev/null +++ b/test/CodeGen/LoongArch/lsx/imm_vector_lsx.ll @@ -0,0 +1,176 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -march=loongarch64 -mattr=+lsx -mattr=+d < %s | FileCheck %s + +define <2 x i64> @build_lsx0(<2 x i64> %a) { +; CHECK-LABEL: build_lsx0: +; CHECK: # %bb.0: +; CHECK-NEXT: addi.w $r4, $zero, -1 +; CHECK-NEXT: lu32i.d $r4, 0 +; CHECK-NEXT: vreplgr2vr.d $vr1, $r4 +; CHECK-NEXT: vadd.d $vr0, $vr0, $vr1 +; CHECK-NEXT: jr $ra + %b = add <2 x i64> %a, + ret <2 x i64> %b +} + +define <2 x i64> @build_lsx1(<2 x i64> %a) { +; CHECK-LABEL: build_lsx1: +; CHECK: # %bb.0: +; CHECK-NEXT: lu52i.d $r4, $zero, 2047 +; CHECK-NEXT: vreplgr2vr.d $vr1, $r4 +; CHECK-NEXT: vadd.d $vr0, $vr0, $vr1 +; CHECK-NEXT: jr $ra + %b = add <2 x i64> %a, + ret <2 x i64> %b +} + +define <2 x i64> @build_lsx2(<2 x i64> %a) { +; CHECK-LABEL: build_lsx2: +; CHECK: # %bb.0: +; CHECK-NEXT: ori $r4, $zero, 2048 +; CHECK-NEXT: lu32i.d $r4, 524287 +; CHECK-NEXT: vreplgr2vr.d $vr1, $r4 +; CHECK-NEXT: vadd.d $vr0, $vr0, $vr1 +; CHECK-NEXT: jr $ra + %b = add <2 x i64> %a, + ret <2 x i64> %b +} + +define <2 x i64> @build_lsx3(<2 x i64> %a) { +; CHECK-LABEL: build_lsx3: +; CHECK: # %bb.0: +; CHECK-NEXT: ori $r4, $zero, 4095 +; CHECK-NEXT: lu52i.d $r4, $r4, -1 +; CHECK-NEXT: vreplgr2vr.d $vr1, $r4 +; CHECK-NEXT: vadd.d $vr0, $vr0, $vr1 +; CHECK-NEXT: jr $ra + %b = add <2 x i64> %a, + ret <2 x i64> %b +} + +define <2 x i64> @build_lsx4(<2 x i64> %a) { +; CHECK-LABEL: build_lsx4: +; CHECK: # %bb.0: +; CHECK-NEXT: ori $r4, $zero, 4095 +; CHECK-NEXT: lu32i.d $r4, -524288 +; CHECK-NEXT: lu52i.d $r4, $r4, 0 +; CHECK-NEXT: vreplgr2vr.d $vr1, $r4 +; CHECK-NEXT: vadd.d $vr0, $vr0, $vr1 +; CHECK-NEXT: jr $ra + %b = add <2 x i64> %a, + ret <2 x i64> %b +} + +define <2 x i64> @build_lsx5(<2 x i64> %a) { +; CHECK-LABEL: build_lsx5: +; CHECK: # %bb.0: +; CHECK-NEXT: addi.w $r4, $zero, -2048 +; CHECK-NEXT: lu32i.d $r4, 0 +; CHECK-NEXT: vreplgr2vr.d $vr1, $r4 +; CHECK-NEXT: vadd.d $vr0, $vr0, $vr1 +; CHECK-NEXT: jr $ra + %b = add <2 x i64> %a, + ret <2 x i64> %b +} + +define <2 x i64> @build_lsx6(<2 x i64> %a) { +; CHECK-LABEL: build_lsx6: +; CHECK: # %bb.0: +; CHECK-NEXT: addi.w $r4, $zero, -2048 +; CHECK-NEXT: lu52i.d $r4, $r4, 0 +; CHECK-NEXT: vreplgr2vr.d $vr1, $r4 +; CHECK-NEXT: vadd.d $vr0, $vr0, $vr1 +; CHECK-NEXT: jr $ra + %b = add <2 x i64> %a, + ret <2 x i64> %b +} + +define <2 x i64> @build_lsx7(<2 x i64> %a) { +; CHECK-LABEL: build_lsx7: +; CHECK: # %bb.0: +; CHECK-NEXT: addi.w $r4, $zero, -2048 +; CHECK-NEXT: lu32i.d $r4, -524288 +; CHECK-NEXT: lu52i.d $r4, $r4, 0 +; CHECK-NEXT: vreplgr2vr.d $vr1, $r4 +; CHECK-NEXT: vadd.d $vr0, $vr0, $vr1 +; CHECK-NEXT: jr $ra + %b = add <2 x i64> %a, + ret <2 x i64> %b +} + +define <2 x i64> @build_lsx8(<2 x i64> %a) { +; CHECK-LABEL: build_lsx8: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, -524288 +; CHECK-NEXT: lu32i.d $r4, 0 +; CHECK-NEXT: vreplgr2vr.d $vr1, $r4 +; CHECK-NEXT: vadd.d $vr0, $vr0, $vr1 +; CHECK-NEXT: jr $ra + %b = add <2 x i64> %a, + ret <2 x i64> %b +} + +define <2 x i64> @build_lsx9(<2 x i64> %a) { +; CHECK-LABEL: build_lsx9: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, -1 +; CHECK-NEXT: lu52i.d $r4, $r4, 0 +; CHECK-NEXT: vreplgr2vr.d $vr1, $r4 +; CHECK-NEXT: vadd.d $vr0, $vr0, $vr1 +; CHECK-NEXT: jr $ra + %b = add <2 x i64> %a, + ret <2 x i64> %b +} + +define <2 x i64> @build_lsx10(<2 x i64> %a) { +; CHECK-LABEL: build_lsx10: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, -524288 +; CHECK-NEXT: lu32i.d $r4, 0 +; CHECK-NEXT: lu52i.d $r4, $r4, 2047 +; CHECK-NEXT: vreplgr2vr.d $vr1, $r4 +; CHECK-NEXT: vadd.d $vr0, $vr0, $vr1 +; CHECK-NEXT: jr $ra + %b = add <2 x i64> %a, + ret <2 x i64> %b +} + +define <2 x i64> @build_lsx11(<2 x i64> %a) { +; CHECK-LABEL: build_lsx11: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, 524287 +; CHECK-NEXT: ori $r4, $r4, 2048 +; CHECK-NEXT: lu32i.d $r4, -1 +; CHECK-NEXT: vreplgr2vr.d $vr1, $r4 +; CHECK-NEXT: vadd.d $vr0, $vr0, $vr1 +; CHECK-NEXT: jr $ra + %b = add <2 x i64> %a, + ret <2 x i64> %b +} + +define <2 x i64> @build_lsx12(<2 x i64> %a) { +; CHECK-LABEL: build_lsx12: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, -524288 +; CHECK-NEXT: ori $r4, $r4, 2047 +; CHECK-NEXT: lu52i.d $r4, $r4, 2047 +; CHECK-NEXT: vreplgr2vr.d $vr1, $r4 +; CHECK-NEXT: vadd.d $vr0, $vr0, $vr1 +; CHECK-NEXT: jr $ra + %b = add <2 x i64> %a, + ret <2 x i64> %b +} + +define <2 x i64> @build_lsx13(<2 x i64> %a) { +; CHECK-LABEL: build_lsx13: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, -524288 +; CHECK-NEXT: ori $r4, $r4, 2048 +; CHECK-NEXT: lu32i.d $r4, -524288 +; CHECK-NEXT: lu52i.d $r4, $r4, 0 +; CHECK-NEXT: vreplgr2vr.d $vr1, $r4 +; CHECK-NEXT: vadd.d $vr0, $vr0, $vr1 +; CHECK-NEXT: jr $ra + %b = add <2 x i64> %a, + ret <2 x i64> %b +} diff --git a/test/CodeGen/LoongArch/lsx/inline-asm.ll b/test/CodeGen/LoongArch/lsx/inline-asm.ll new file mode 100644 index 00000000..dbc4b6ab --- /dev/null +++ b/test/CodeGen/LoongArch/lsx/inline-asm.ll @@ -0,0 +1,34 @@ +; A basic inline assembly test + +; RUN: llc -march=loongarch64 -mattr=+lsx -mattr=+d < %s | FileCheck %s + +@v2i64_r = global <2 x i64> zeroinitializer, align 16 + +define void @test1() nounwind { +entry: + ; CHECK-LABEL: test1: + %0 = call <2 x i64> asm "vldi ${0:w}, 1", "=f"() + ; CHECK: vldi $vr{{[1-3]?[0-9]}}, 1 + store <2 x i64> %0, <2 x i64>* @v2i64_r + ret void +} + +define void @test2() nounwind { +entry: + ; CHECK-LABEL: test2: + %0 = load <2 x i64>, <2 x i64>* @v2i64_r + %1 = call <2 x i64> asm "vaddi.wu ${0:w}, ${1:w}, 1", "=f,f"(<2 x i64> %0) + ; CHECK: vaddi.wu $vr{{[1-3]?[0-9]}}, $vr{{[1-3]?[0-9]}}, 1 + store <2 x i64> %1, <2 x i64>* @v2i64_r + ret void +} + +define void @test3() nounwind { +entry: + ; CHECK-LABEL: test3: + %0 = load <2 x i64>, <2 x i64>* @v2i64_r + %1 = call <2 x i64> asm sideeffect "vaddi.wu ${0:w}, ${1:w}, 1", "=f,f,~{$vr0}"(<2 x i64> %0) + ; CHECK: vaddi.wu $vr{{([1-9]|[1-3][0-9])}}, $vr{{([1-9]|[1-3][0-9])}}, 1 + store <2 x i64> %1, <2 x i64>* @v2i64_r + ret void +} diff --git a/test/CodeGen/LoongArch/lsx/logic-lsx.ll b/test/CodeGen/LoongArch/lsx/logic-lsx.ll new file mode 100644 index 00000000..3c869ea8 --- /dev/null +++ b/test/CodeGen/LoongArch/lsx/logic-lsx.ll @@ -0,0 +1,132 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -march=loongarch64 -mattr=+lsx -mattr=+d < %s | FileCheck %s + +define <2 x i64> @not_v2i64(<2 x i64> %a) { +; CHECK-LABEL: not_v2i64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vnor.v $vr0, $vr0, $vr0 +; CHECK-NEXT: jr $ra +entry: + %not = xor <2 x i64> %a, + ret <2 x i64> %not +} + +define <4 x i32> @not_v4i32(<4 x i32> %a) { +; CHECK-LABEL: not_v4i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vnor.v $vr0, $vr0, $vr0 +; CHECK-NEXT: jr $ra +entry: + %not = xor <4 x i32> %a, + ret <4 x i32> %not +} + +define <8 x i16> @not_v8i16(<8 x i16> %a) { +; CHECK-LABEL: not_v8i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vnor.v $vr0, $vr0, $vr0 +; CHECK-NEXT: jr $ra +entry: + %not = xor <8 x i16> %a, + ret <8 x i16> %not +} + +define <16 x i8> @not_v16i8(<16 x i8> %a) { +; CHECK-LABEL: not_v16i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vxori.b $vr0, $vr0, 255 +; CHECK-NEXT: jr $ra +entry: + %not = xor <16 x i8> %a, + ret <16 x i8> %not +} + + +define <2 x i64> @andn_v2i64(<2 x i64> %a, <2 x i64> %b) { +; CHECK-LABEL: andn_v2i64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vandn.v $vr0, $vr1, $vr0 +; CHECK-NEXT: jr $ra +entry: + %not = xor <2 x i64> %b, + %and = and <2 x i64> %not, %a + ret <2 x i64> %and +} + +define <4 x i32> @andn_v4i32(<4 x i32> %a, <4 x i32> %b) { +; CHECK-LABEL: andn_v4i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vandn.v $vr0, $vr1, $vr0 +; CHECK-NEXT: jr $ra +entry: + %not = xor <4 x i32> %b, + %and = and <4 x i32> %not, %a + ret <4 x i32> %and +} + +define <8 x i16> @andn_v8i16(<8 x i16> %a, <8 x i16> %b) { +; CHECK-LABEL: andn_v8i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vandn.v $vr0, $vr1, $vr0 +; CHECK-NEXT: jr $ra +entry: + %not = xor <8 x i16> %b, + %and = and <8 x i16> %not, %a + ret <8 x i16> %and +} + +define <16 x i8> @andn_v16i8(<16 x i8> %a, <16 x i8> %b) { +; CHECK-LABEL: andn_v16i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vandn.v $vr0, $vr1, $vr0 +; CHECK-NEXT: jr $ra +entry: + %not = xor <16 x i8> %b, + %and = and <16 x i8> %not, %a + ret <16 x i8> %and +} + + +define <2 x i64> @orn_v2i64(<2 x i64> %a, <2 x i64> %b) { +; CHECK-LABEL: orn_v2i64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vorn.v $vr0, $vr0, $vr1 +; CHECK-NEXT: jr $ra +entry: + %not = xor <2 x i64> %b, + %or = or <2 x i64> %not, %a + ret <2 x i64> %or +} + +define <4 x i32> @orn_v4i32(<4 x i32> %a, <4 x i32> %b) { +; CHECK-LABEL: orn_v4i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vorn.v $vr0, $vr0, $vr1 +; CHECK-NEXT: jr $ra +entry: + %not = xor <4 x i32> %b, + %or = or <4 x i32> %not, %a + ret <4 x i32> %or +} + +define <8 x i16> @orn_v8i16(<8 x i16> %a, <8 x i16> %b) { +; CHECK-LABEL: orn_v8i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vorn.v $vr0, $vr0, $vr1 +; CHECK-NEXT: jr $ra +entry: + %not = xor <8 x i16> %b, + %or = or <8 x i16> %not, %a + ret <8 x i16> %or +} + +define <16 x i8> @orn_v16i8(<16 x i8> %a, <16 x i8> %b) { +; CHECK-LABEL: orn_v16i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vorn.v $vr0, $vr0, $vr1 +; CHECK-NEXT: jr $ra +entry: + %not = xor <16 x i8> %b, + %or = or <16 x i8> %not, %a + ret <16 x i8> %or +} diff --git a/test/CodeGen/LoongArch/lsx/lsxvclr.ll b/test/CodeGen/LoongArch/lsx/lsxvclr.ll new file mode 100644 index 00000000..5b345b1d --- /dev/null +++ b/test/CodeGen/LoongArch/lsx/lsxvclr.ll @@ -0,0 +1,50 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -march=loongarch64 -mattr=+lsx -mattr=+d < %s | FileCheck %s + +define <16 x i8> @clri8(<16 x i8> %b, <16 x i8> %c) { +; CHECK-LABEL: clri8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vbitclr.b $vr0, $vr0, $vr1 +; CHECK-NEXT: jr $ra +entry: + %shl = shl <16 x i8> , %c + %xor = xor <16 x i8> %shl, + %and = and <16 x i8> %xor, %b + ret <16 x i8> %and +} + +define <8 x i16> @clri16(<8 x i16> %b, <8 x i16> %c) { +; CHECK-LABEL: clri16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vbitclr.h $vr0, $vr0, $vr1 +; CHECK-NEXT: jr $ra +entry: + %shl = shl <8 x i16> , %c + %xor = xor <8 x i16> %shl, + %and = and <8 x i16> %xor, %b + ret <8 x i16> %and +} + +define <4 x i32> @clri32(<4 x i32> %b, <4 x i32> %c) { +; CHECK-LABEL: clri32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vbitclr.w $vr0, $vr0, $vr1 +; CHECK-NEXT: jr $ra +entry: + %shl = shl <4 x i32> , %c + %xor = xor <4 x i32> %shl, + %and = and <4 x i32> %xor, %b + ret <4 x i32> %and +} + +define <2 x i64> @clri64(<2 x i64> %b, <2 x i64> %c) { +; CHECK-LABEL: clri64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vbitclr.d $vr0, $vr0, $vr1 +; CHECK-NEXT: jr $ra +entry: + %shl = shl <2 x i64> , %c + %xor = xor <2 x i64> %shl, + %and = and <2 x i64> %xor, %b + ret <2 x i64> %and +} diff --git a/test/CodeGen/LoongArch/lsx/set-lsx.ll b/test/CodeGen/LoongArch/lsx/set-lsx.ll new file mode 100644 index 00000000..e5e307a8 --- /dev/null +++ b/test/CodeGen/LoongArch/lsx/set-lsx.ll @@ -0,0 +1,38 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -march=loongarch64 -mattr=+lsx -mattr=+d < %s | FileCheck %s + +define <16 x i8> @seti8(<16 x i8>) { +; CHECK-LABEL: seti8: +; CHECK: # %bb.0: +; CHECK-NEXT: vbitseti.b $vr0, $vr0, 6 +; CHECK-NEXT: jr $ra + %2 = or <16 x i8> %0, + ret <16 x i8> %2 +} + +define <8 x i16> @seti16(<8 x i16>) { +; CHECK-LABEL: seti16: +; CHECK: # %bb.0: +; CHECK-NEXT: vbitseti.h $vr0, $vr0, 6 +; CHECK-NEXT: jr $ra + %2 = or <8 x i16> %0, + ret <8 x i16> %2 +} + +define <4 x i32> @seti32(<4 x i32>) { +; CHECK-LABEL: seti32: +; CHECK: # %bb.0: +; CHECK-NEXT: vbitseti.w $vr0, $vr0, 6 +; CHECK-NEXT: jr $ra + %2 = or <4 x i32> %0, + ret <4 x i32> %2 +} + +define <2 x i64> @seti64(<2 x i64>) { +; CHECK-LABEL: seti64: +; CHECK: # %bb.0: +; CHECK-NEXT: vbitseti.d $vr0, $vr0, 6 +; CHECK-NEXT: jr $ra + %2 = or <2 x i64> %0, + ret <2 x i64> %2 +} diff --git a/test/CodeGen/LoongArch/lsx/vadda.ll b/test/CodeGen/LoongArch/lsx/vadda.ll new file mode 100644 index 00000000..d4270419 --- /dev/null +++ b/test/CodeGen/LoongArch/lsx/vadda.ll @@ -0,0 +1,62 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -march=loongarch64 -mattr=+lsx -mattr=+d < %s | FileCheck %s + +define <16 x i8> @vaddab(<16 x i8>, <16 x i8>) { +; CHECK-LABEL: vaddab: +; CHECK: # %bb.0: +; CHECK-NEXT: vadda.b $vr0, $vr0, $vr1 +; CHECK-NEXT: jr $ra + %3 = icmp slt <16 x i8> %0, zeroinitializer + %4 = sub <16 x i8> zeroinitializer, %0 + %5 = select <16 x i1> %3, <16 x i8> %4, <16 x i8> %0 + %6 = icmp slt <16 x i8> %1, zeroinitializer + %7 = sub <16 x i8> zeroinitializer, %1 + %8 = select <16 x i1> %6, <16 x i8> %7, <16 x i8> %1 + %9 = add <16 x i8> %5, %8 + ret <16 x i8> %9 +} + +define <8 x i16> @vaddah(<8 x i16>, <8 x i16>) { +; CHECK-LABEL: vaddah: +; CHECK: # %bb.0: +; CHECK-NEXT: vadda.h $vr0, $vr0, $vr1 +; CHECK-NEXT: jr $ra + %3 = icmp slt <8 x i16> %0, zeroinitializer + %4 = sub <8 x i16> zeroinitializer, %0 + %5 = select <8 x i1> %3, <8 x i16> %4, <8 x i16> %0 + %6 = icmp slt <8 x i16> %1, zeroinitializer + %7 = sub <8 x i16> zeroinitializer, %1 + %8 = select <8 x i1> %6, <8 x i16> %7, <8 x i16> %1 + %9 = add <8 x i16> %5, %8 + ret <8 x i16> %9 +} + +define <4 x i32> @vaddaw(<4 x i32>, <4 x i32>) { +; CHECK-LABEL: vaddaw: +; CHECK: # %bb.0: +; CHECK-NEXT: vadda.w $vr0, $vr0, $vr1 +; CHECK-NEXT: jr $ra + %3 = icmp slt <4 x i32> %0, zeroinitializer + %4 = sub nsw <4 x i32> zeroinitializer, %0 + %5 = select <4 x i1> %3, <4 x i32> %4, <4 x i32> %0 + %6 = icmp slt <4 x i32> %1, zeroinitializer + %7 = sub nsw <4 x i32> zeroinitializer, %1 + %8 = select <4 x i1> %6, <4 x i32> %7, <4 x i32> %1 + %9 = add nuw nsw <4 x i32> %5, %8 + ret <4 x i32> %9 +} + +define <2 x i64> @vaddad(<2 x i64>, <2 x i64>) { +; CHECK-LABEL: vaddad: +; CHECK: # %bb.0: +; CHECK-NEXT: vadda.d $vr0, $vr0, $vr1 +; CHECK-NEXT: jr $ra + %3 = icmp slt <2 x i64> %0, zeroinitializer + %4 = sub nsw <2 x i64> zeroinitializer, %0 + %5 = select <2 x i1> %3, <2 x i64> %4, <2 x i64> %0 + %6 = icmp slt <2 x i64> %1, zeroinitializer + %7 = sub nsw <2 x i64> zeroinitializer, %1 + %8 = select <2 x i1> %6, <2 x i64> %7, <2 x i64> %1 + %9 = add nuw nsw <2 x i64> %5, %8 + ret <2 x i64> %9 +} diff --git a/test/CodeGen/LoongArch/lu12i.ll b/test/CodeGen/LoongArch/lu12i.ll new file mode 100644 index 00000000..55fd40ed --- /dev/null +++ b/test/CodeGen/LoongArch/lu12i.ll @@ -0,0 +1,7 @@ +; RUN: llc -march=loongarch64 -o - %s | FileCheck %s + +define i32 @foo() { +; CHECK: lu12i.w $r4, -1 +entry: + ret i32 -4096 +} diff --git a/test/CodeGen/LoongArch/named-register.ll b/test/CodeGen/LoongArch/named-register.ll new file mode 100644 index 00000000..0b0660fc --- /dev/null +++ b/test/CodeGen/LoongArch/named-register.ll @@ -0,0 +1,29 @@ +; RUN: llc -march=loongarch64 < %s | FileCheck %s + +define i64 @get_r2() { +; CHECK-LABEL: get_r2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: move $r4, $tp +; CHECK-NEXT: jr $ra +entry: + %0 = call i64 @llvm.read_register.i64(metadata !0) + ret i64 %0 +} + +define i64 @get_r21() { +; CHECK-LABEL: get_r21: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: move $r4, $r21 +; CHECK-NEXT: jr $ra +entry: + %0 = call i64 @llvm.read_register.i64(metadata !1) + ret i64 %0 +} + +declare i64 @llvm.read_register.i64(metadata) + +!llvm.named.register.$r2 = !{!0} +!llvm.named.register.$r21 = !{!1} + +!0 = !{!"$r2"} +!1 = !{!"$r21"} diff --git a/test/CodeGen/LoongArch/nomerge.ll b/test/CodeGen/LoongArch/nomerge.ll new file mode 100644 index 00000000..a8ce6322 --- /dev/null +++ b/test/CodeGen/LoongArch/nomerge.ll @@ -0,0 +1,35 @@ +; RUN: llc < %s -mtriple=loongarch64 -relocation-model=pic -o - | FileCheck %s + +define void @foo(i32 %i) { +entry: + switch i32 %i, label %if.end3 [ + i32 5, label %if.then + i32 7, label %if.then2 + ] + +if.then: + tail call void @bar() #0 + br label %if.end3 + +if.then2: + tail call void @bar() #0 + br label %if.end3 + +if.end3: + tail call void @bar() #0 + ret void +} + +declare void @bar() + +attributes #0 = { nomerge } + +; CHECK-LABEL: foo: +; CHECK: # %bb.0: # %entry +; CHECK: .LBB0_1: # %entry +; CHECK: .LBB0_2: # %if.then +; CHECK-NEXT: bl bar +; CHECK: .LBB0_3: # %if.then2 +; CHECK-NEXT: bl bar +; CHECK: .LBB0_4: # %if.end3 +; CHECK: b bar diff --git a/test/CodeGen/LoongArch/noti32.ll b/test/CodeGen/LoongArch/noti32.ll new file mode 100644 index 00000000..42e4bd53 --- /dev/null +++ b/test/CodeGen/LoongArch/noti32.ll @@ -0,0 +1,119 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -march=loongarch64 < %s | FileCheck %s + +define i8 @nor_i8(i8 %a, i8 %b) nounwind { +; CHECK-LABEL: nor_i8: +; CHECK: # %bb.0: +; CHECK-NEXT: nor $r4, $r4, $r5 +; CHECK-NEXT: jr $ra + %or = or i8 %a, %b + %neg = xor i8 %or, -1 + ret i8 %neg +} + +define i16 @nor_i16(i16 %a, i16 %b) nounwind { +; CHECK-LABEL: nor_i16: +; CHECK: # %bb.0: +; CHECK-NEXT: nor $r4, $r4, $r5 +; CHECK-NEXT: jr $ra + %or = or i16 %a, %b + %neg = xor i16 %or, -1 + ret i16 %neg +} + +define i32 @nor_i32(i32 %a, i32 %b) nounwind { +; CHECK-LABEL: nor_i32: +; CHECK: # %bb.0: +; CHECK-NEXT: nor $r4, $r4, $r5 +; CHECK-NEXT: jr $ra + %or = or i32 %a, %b + %neg = xor i32 %or, -1 + ret i32 %neg +} + +define i8 @nor_zero_i8(i8 %a) nounwind { +; CHECK-LABEL: nor_zero_i8: +; CHECK: # %bb.0: +; CHECK-NEXT: nor $r4, $zero, $r4 +; CHECK-NEXT: jr $ra + %neg = xor i8 %a, -1 + ret i8 %neg +} + +define i16 @nor_zero_i16(i16 %a) nounwind { +; CHECK-LABEL: nor_zero_i16: +; CHECK: # %bb.0: +; CHECK-NEXT: nor $r4, $zero, $r4 +; CHECK-NEXT: jr $ra + %neg = xor i16 %a, -1 + ret i16 %neg +} + +define i32 @nor_zero_i32(i32 %a) nounwind { +; CHECK-LABEL: nor_zero_i32: +; CHECK: # %bb.0: +; CHECK-NEXT: nor $r4, $zero, $r4 +; CHECK-NEXT: jr $ra + %neg = xor i32 %a, -1 + ret i32 %neg +} + +define i8 @orn_i8(i8 %a, i8 %b) nounwind { +; CHECK-LABEL: orn_i8: +; CHECK: # %bb.0: +; CHECK-NEXT: orn $r4, $r4, $r5 +; CHECK-NEXT: jr $ra + %neg = xor i8 %b, -1 + %or = or i8 %neg, %a + ret i8 %or +} + +define i16 @orn_i16(i16 %a, i16 %b) nounwind { +; CHECK-LABEL: orn_i16: +; CHECK: # %bb.0: +; CHECK-NEXT: orn $r4, $r4, $r5 +; CHECK-NEXT: jr $ra + %neg = xor i16 %b, -1 + %or = or i16 %neg, %a + ret i16 %or +} + +define i32 @orn_i32(i32 %a, i32 %b) nounwind { +; CHECK-LABEL: orn_i32: +; CHECK: # %bb.0: +; CHECK-NEXT: orn $r4, $r4, $r5 +; CHECK-NEXT: jr $ra + %neg = xor i32 %b, -1 + %or = or i32 %neg, %a + ret i32 %or +} + +define i8 @andn_i8(i8 %a, i8 %b) nounwind { +; CHECK-LABEL: andn_i8: +; CHECK: # %bb.0: +; CHECK-NEXT: andn $r4, $r4, $r5 +; CHECK-NEXT: jr $ra + %neg = xor i8 %b, -1 + %and = and i8 %neg, %a + ret i8 %and +} + +define i16 @andn_i16(i16 %a, i16 %b) nounwind { +; CHECK-LABEL: andn_i16: +; CHECK: # %bb.0: +; CHECK-NEXT: andn $r4, $r4, $r5 +; CHECK-NEXT: jr $ra + %neg = xor i16 %b, -1 + %and = and i16 %neg, %a + ret i16 %and +} + +define i32 @andn_i32(i32 %a, i32 %b) nounwind { +; CHECK-LABEL: andn_i32: +; CHECK: # %bb.0: +; CHECK-NEXT: andn $r4, $r4, $r5 +; CHECK-NEXT: jr $ra + %neg = xor i32 %b, -1 + %and = and i32 %neg, %a + ret i32 %and +} diff --git a/test/CodeGen/LoongArch/peephole-load-store-addi.ll b/test/CodeGen/LoongArch/peephole-load-store-addi.ll new file mode 100644 index 00000000..b65838db --- /dev/null +++ b/test/CodeGen/LoongArch/peephole-load-store-addi.ll @@ -0,0 +1,101 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 < %s | FileCheck %s + +define i8 @load_i8() nounwind { +; CHECK-LABEL: load_i8: +; CHECK: # %bb.0: +; CHECK-NEXT: ld.bu $r4, $zero, 40 +; CHECK-NEXT: jr $ra + %a = load i8, i8* inttoptr (i64 40 to i8*), align 8 + ret i8 %a +} +define signext i8 @load_i8_sext() nounwind { +; CHECK-LABEL: load_i8_sext: +; CHECK: # %bb.0: +; CHECK-NEXT: ld.b $r4, $zero, 40 +; CHECK-NEXT: jr $ra + %a = load i8, i8* inttoptr (i64 40 to i8*), align 8 + ret i8 %a +} + +define i16 @load_i16() nounwind { +; CHECK-LABEL: load_i16: +; CHECK: # %bb.0: +; CHECK-NEXT: ld.hu $r4, $zero, 40 +; CHECK-NEXT: jr $ra + %a = load i16, i16* inttoptr (i64 40 to i16*), align 8 + ret i16 %a +} + +define signext i16 @load_i16_sext() nounwind { +; CHECK-LABEL: load_i16_sext: +; CHECK: # %bb.0: +; CHECK-NEXT: ld.h $r4, $zero, 40 +; CHECK-NEXT: jr $ra + %a = load i16, i16* inttoptr (i64 40 to i16*), align 8 + ret i16 %a +} + +define i32 @load_i32() nounwind { +; CHECK-LABEL: load_i32: +; CHECK: # %bb.0: +; CHECK-NEXT: ld.w $r4, $zero, 40 +; CHECK-NEXT: jr $ra + %a = load i32, i32* inttoptr (i64 40 to i32*), align 8 + ret i32 %a +} + +define signext i32 @load_i32_sext() nounwind { +; CHECK-LABEL: load_i32_sext: +; CHECK: # %bb.0: +; CHECK-NEXT: ld.w $r4, $zero, 40 +; CHECK-NEXT: jr $ra + %a = load i32, i32* inttoptr (i64 40 to i32*), align 8 + ret i32 %a +} + +define i64 @load_i64() nounwind { +; CHECK-LABEL: load_i64: +; CHECK: # %bb.0: +; CHECK-NEXT: ld.d $r4, $zero, 40 +; CHECK-NEXT: jr $ra + %a = load i64, i64* inttoptr (i64 40 to i64*), align 8 + ret i64 %a +} + +define void @store_i8(i8 %v) nounwind { +; CHECK-LABEL: store_i8: +; CHECK: # %bb.0: +; CHECK-NEXT: st.b $r4, $zero, 40 +; CHECK-NEXT: jr $ra + store i8 %v, i8* inttoptr (i64 40 to i8*), align 8 + ret void +} + +define void @store_i16(i16 %v) nounwind { +; CHECK-LABEL: store_i16: +; CHECK: # %bb.0: +; CHECK-NEXT: st.h $r4, $zero, 40 +; CHECK-NEXT: jr $ra + store i16 %v, i16* inttoptr (i64 40 to i16*), align 8 + ret void +} + +define void @store_i32(i32 %v) nounwind { +; CHECK-LABEL: store_i32: +; CHECK: # %bb.0: +; CHECK-NEXT: slli.w $r4, $r4, 0 +; CHECK-NEXT: st.w $r4, $zero, 40 +; CHECK-NEXT: jr $ra + store i32 %v, i32* inttoptr (i64 40 to i32*), align 8 + ret void +} + +define void @store_i64(i64 %v) nounwind { +; CHECK-LABEL: store_i64: +; CHECK: # %bb.0: +; CHECK-NEXT: st.d $r4, $zero, 40 +; CHECK-NEXT: jr $ra + store i64 %v, i64* inttoptr (i64 40 to i64*), align 8 + ret void +} diff --git a/test/CodeGen/LoongArch/signext.ll b/test/CodeGen/LoongArch/signext.ll new file mode 100644 index 00000000..13c710d1 --- /dev/null +++ b/test/CodeGen/LoongArch/signext.ll @@ -0,0 +1,37 @@ +; RUN: llc -march=loongarch64 < %s | FileCheck %s + +define i32 @foo(i32 signext %a) { +; CHECK-LABEL: foo: +; CHECK: # %bb.0: +; CHECK-NEXT: slli.w $r4, $r4, 0 +; CHECK-NEXT: jr $ra + ret i32 %a +} + +define signext i32 @foo1() { +; CHECK-LABEL: foo1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: ori $r4, $zero, 0 +; CHECK-NEXT: ori $r5, $zero, 896 +; CHECK-NEXT: move $r6, $r4 +; CHECK-NEXT: .LBB1_1: # %for.body +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: add.w $r4, $r4, $r6 +; CHECK-NEXT: addi.w $r6, $r6, 1 +; CHECK-NEXT: bne $r6, $r5, .LBB1_1 +; CHECK-NEXT: # %bb.2: # %for.end +; CHECK-NEXT: jr $ra +entry: + br label %for.body + +for.body: + %sum.013 = phi i32 [ 0, %entry ], [ %add, %for.body ] + %i.010 = phi i32 [ 0, %entry ], [ %inc, %for.body ] + %add = add i32 %sum.013, %i.010 + %inc = add nuw nsw i32 %i.010, 1 + %exitcond = icmp eq i32 %inc, 896 + br i1 %exitcond, label %for.end, label %for.body + +for.end: + ret i32 %add +} diff --git a/test/CodeGen/LoongArch/stptr.ll b/test/CodeGen/LoongArch/stptr.ll new file mode 100644 index 00000000..0a54e0f8 --- /dev/null +++ b/test/CodeGen/LoongArch/stptr.ll @@ -0,0 +1,52 @@ +; Check whether st.w/st.d/stptr.w/stptr.d/stx.w/stx.d instructions are properly generated +; RUN: llc -march=loongarch64 -o - %s | FileCheck %s + +define void @st_w(i32* %p, i32 signext %val) { +; CHECK: st.w $r5, $r4, 2044 +; CHECK: jr $ra + %addr = getelementptr inbounds i32, i32* %p, i64 511 + store i32 %val, i32* %addr, align 4 + ret void +} + +define void @stptr_w(i32* %p, i32 signext %val) { +; CHECK: stptr.w $r5, $r4, 2048 +; CHECK: jr $ra + %addr = getelementptr inbounds i32, i32* %p, i64 512 + store i32 %val, i32* %addr, align 4 + ret void +} + +define void @stx_w(i32* %p, i32 signext %val) { +; CHECK: lu12i.w $r[[REG:[0-9]+]], 8 +; CHECK: stx.w $r5, $r4, $r[[REG:[0-9]+]] +; CHECK: jr $ra + %addr = getelementptr inbounds i32, i32* %p, i64 8192 + store i32 %val, i32* %addr, align 4 + ret void +} + +define void @st_d(i64* %p, i64 %val) { +; CHECK: st.d $r5, $r4, 2040 +; CHECK: jr $ra + %addr = getelementptr inbounds i64, i64* %p, i64 255 + store i64 %val, i64* %addr, align 8 + ret void +} + +define void @stptr_d(i64* %p, i64 %val) { +; CHECK: stptr.d $r5, $r4, 2048 +; CHECK: jr $ra + %addr = getelementptr inbounds i64, i64* %p, i64 256 + store i64 %val, i64* %addr, align 8 + ret void +} + +define void @stx_d(i64* %p, i64 %val) { +; CHECK: lu12i.w $r[[REG:[0-9]+]], 8 +; CHECK: stx.d $r5, $r4, $r[[REG:[0-9]+]] +; CHECK: jr $ra + %addr = getelementptr inbounds i64, i64* %p, i64 4096 + store i64 %val, i64* %addr, align 8 + ret void +} diff --git a/test/CodeGen/LoongArch/tailcall-R.ll b/test/CodeGen/LoongArch/tailcall-R.ll new file mode 100644 index 00000000..2445e32e --- /dev/null +++ b/test/CodeGen/LoongArch/tailcall-R.ll @@ -0,0 +1,62 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -march=loongarch64 -relocation-model=pic < %s | FileCheck %s + +@errors = external local_unnamed_addr global i32, align 4 + +define signext i32 @compare(i8* %x, i8* %y) { +; CHECK-LABEL: compare: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi.d $sp, $sp, -32 +; CHECK-NEXT: .cfi_def_cfa_offset 32 +; CHECK-NEXT: st.d $ra, $sp, 24 # 8-byte Folded Spill +; CHECK-NEXT: st.d $r23, $sp, 16 # 8-byte Folded Spill +; CHECK-NEXT: .cfi_offset 1, -8 +; CHECK-NEXT: .cfi_offset 23, -16 +; CHECK-NEXT: ld.w $r23, $r5, 0 +; CHECK-NEXT: ld.d $r6, $r4, 8 +; CHECK-NEXT: beqz $r23, .LBB0_3 +; CHECK-NEXT: # %bb.1: # %land.lhs.true +; CHECK-NEXT: ld.w $r4, $r4, 0 +; CHECK-NEXT: st.d $r6, $sp, 8 # 8-byte Folded Spill +; CHECK-NEXT: ld.d $r5, $sp, 8 # 8-byte Folded Reload +; CHECK-NEXT: jirl $ra, $r5, 0 +; CHECK-NEXT: ld.d $r6, $sp, 8 # 8-byte Folded Reload +; CHECK-NEXT: beqz $r4, .LBB0_3 +; CHECK-NEXT: # %bb.2: # %if.then +; CHECK-NEXT: la.got $r4, errors +; CHECK-NEXT: # la expanded slot +; CHECK-NEXT: ld.w $r5, $r4, 0 +; CHECK-NEXT: addi.w $r5, $r5, 1 +; CHECK-NEXT: st.w $r5, $r4, 0 +; CHECK-NEXT: .LBB0_3: # %if.end +; CHECK-NEXT: move $r4, $r23 +; CHECK-NEXT: ld.d $r23, $sp, 16 # 8-byte Folded Reload +; CHECK-NEXT: ld.d $ra, $sp, 24 # 8-byte Folded Reload +; CHECK-NEXT: addi.d $sp, $sp, 32 +; CHECK-NEXT: jr $r6 +entry: + %compare = getelementptr inbounds i8, i8* %x, i64 8 + %0 = bitcast i8* %compare to i32 (i32)** + %1 = load i32 (i32)*, i32 (i32)** %0, align 8 + %elt = bitcast i8* %y to i32* + %2 = load i32, i32* %elt, align 8 + %cmp = icmp eq i32 %2, 0 + br i1 %cmp, label %if.end, label %land.lhs.true + +land.lhs.true: ; preds = %entry + %elt3 = bitcast i8* %x to i32* + %3 = load i32, i32* %elt3, align 8 + %call4 = tail call signext i32 %1(i32 signext %3) + %cmp5 = icmp eq i32 %call4, 0 + br i1 %cmp5, label %if.end, label %if.then + +if.then: ; preds = %land.lhs.true + %4 = load i32, i32* @errors, align 4 + %inc = add nsw i32 %4, 1 + store i32 %inc, i32* @errors, align 4 + br label %if.end + +if.end: ; preds = %if.then, %land.lhs.true, %entry + %call6 = tail call signext i32 %1(i32 signext %2) + ret i32 %call6 +} diff --git a/test/CodeGen/LoongArch/tailcall-check.ll b/test/CodeGen/LoongArch/tailcall-check.ll new file mode 100644 index 00000000..2b5902d6 --- /dev/null +++ b/test/CodeGen/LoongArch/tailcall-check.ll @@ -0,0 +1,155 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -march=loongarch64 -relocation-model=pic < %s | FileCheck %s + +; Perform tail call optimization for global address. +declare i32 @callee_tail(i32 %i) +define i32 @caller_tail(i32 %i) { +; CHECK-LABEL: caller_tail: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: b callee_tail +entry: + %r = tail call i32 @callee_tail(i32 %i) + ret i32 %r +} + + +; Do not tail call optimize functions with varargs. +declare i32 @callee_varargs(i32, ...) +define void @caller_varargs(i32 %a, i32 %b) { +; CHECK-LABEL: caller_varargs: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi.d $sp, $sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; CHECK-NEXT: .cfi_offset 1, -8 +; CHECK-NEXT: move $r6, $r5 +; CHECK-NEXT: move $r7, $r4 +; CHECK-NEXT: bl callee_varargs +; CHECK-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; CHECK-NEXT: addi.d $sp, $sp, 16 +; CHECK-NEXT: jr $ra +entry: + %call = tail call i32 (i32, ...) @callee_varargs(i32 %a, i32 %b, i32 %b, i32 %a) + ret void +} + + +; Do not tail call optimize if stack is used to pass parameters. +declare i32 @callee_args(i32 %a, i32 %b, i32 %c, i32 %dd, i32 %e, i32 %ff, i32 %g, i32 %h, i32 %i, i32 %j, i32 %k, i32 %l, i32 %m, i32 %n) +define i32 @caller_args(i32 %a, i32 %b, i32 %c, i32 %dd, i32 %e, i32 %ff, i32 %g, i32 %h, i32 %i, i32 %j, i32 %k, i32 %l, i32 %m, i32 %n) { +; CHECK-LABEL: caller_args: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi.d $sp, $sp, -64 +; CHECK-NEXT: .cfi_def_cfa_offset 64 +; CHECK-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill +; CHECK-NEXT: .cfi_offset 1, -8 +; CHECK-NEXT: ld.d $r12, $sp, 64 +; CHECK-NEXT: ld.d $r13, $sp, 72 +; CHECK-NEXT: ld.d $r14, $sp, 80 +; CHECK-NEXT: ld.d $r15, $sp, 88 +; CHECK-NEXT: ld.d $r16, $sp, 96 +; CHECK-NEXT: ld.d $r17, $sp, 104 +; CHECK-NEXT: st.d $r17, $sp, 40 +; CHECK-NEXT: st.d $r16, $sp, 32 +; CHECK-NEXT: st.d $r15, $sp, 24 +; CHECK-NEXT: st.d $r14, $sp, 16 +; CHECK-NEXT: st.d $r13, $sp, 8 +; CHECK-NEXT: st.d $r12, $sp, 0 +; CHECK-NEXT: bl callee_args +; CHECK-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload +; CHECK-NEXT: addi.d $sp, $sp, 64 +; CHECK-NEXT: jr $ra +entry: + %r = tail call i32 @callee_args(i32 %a, i32 %b, i32 %c, i32 %dd, i32 %e, i32 %ff, i32 %g, i32 %h, i32 %i, i32 %j, i32 %k, i32 %l, i32 %m, i32 %n) + ret i32 %r +} + + +; Do not tail call optimize for exception-handling functions. +declare void @callee_interrupt() +define void @caller_interrupt() #0 { +; CHECK-LABEL: caller_interrupt: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi.d $sp, $sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; CHECK-NEXT: .cfi_offset 1, -8 +; CHECK-NEXT: bl callee_interrupt +; CHECK-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; CHECK-NEXT: addi.d $sp, $sp, 16 +; CHECK-NEXT: jr $ra +entry: + tail call void @callee_interrupt() + ret void +} +attributes #0 = { "interrupt"="machine" } + + +; Do not tail call optimize functions with byval parameters. +declare i32 @callee_byval(i32** byval(i32*) %a) +define i32 @caller_byval() { +; CHECK-LABEL: caller_byval: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi.d $sp, $sp, -32 +; CHECK-NEXT: .cfi_def_cfa_offset 32 +; CHECK-NEXT: st.d $ra, $sp, 24 # 8-byte Folded Spill +; CHECK-NEXT: .cfi_offset 1, -8 +; CHECK-NEXT: ld.d $r4, $sp, 16 +; CHECK-NEXT: st.d $r4, $sp, 0 +; CHECK-NEXT: bl callee_byval +; CHECK-NEXT: ld.d $ra, $sp, 24 # 8-byte Folded Reload +; CHECK-NEXT: addi.d $sp, $sp, 32 +; CHECK-NEXT: jr $ra +entry: + %a = alloca i32* + %r = tail call i32 @callee_byval(i32** byval(i32*) %a) + ret i32 %r +} + + +; Do not tail call optimize if callee uses structret semantics. +%struct.A = type { i32 } +@a = global %struct.A zeroinitializer + +declare void @callee_struct(%struct.A* sret(%struct.A) %a) +define void @caller_nostruct() { +; CHECK-LABEL: caller_nostruct: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi.d $sp, $sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; CHECK-NEXT: .cfi_offset 1, -8 +; CHECK-NEXT: la.got $r4, a +; CHECK-NEXT: # la expanded slot +; CHECK-NEXT: bl callee_struct +; CHECK-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; CHECK-NEXT: addi.d $sp, $sp, 16 +; CHECK-NEXT: jr $ra +entry: + tail call void @callee_struct(%struct.A* sret(%struct.A) @a) + ret void +} + + +; Do not tail call optimize if caller uses structret semantics. +declare void @callee_nostruct() +define void @caller_struct(%struct.A* sret(%struct.A) %a) { +; CHECK-LABEL: caller_struct: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi.d $sp, $sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; CHECK-NEXT: st.d $r23, $sp, 0 # 8-byte Folded Spill +; CHECK-NEXT: .cfi_offset 1, -8 +; CHECK-NEXT: .cfi_offset 23, -16 +; CHECK-NEXT: move $r23, $r4 +; CHECK-NEXT: bl callee_nostruct +; CHECK-NEXT: move $r4, $r23 +; CHECK-NEXT: ld.d $r23, $sp, 0 # 8-byte Folded Reload +; CHECK-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; CHECK-NEXT: addi.d $sp, $sp, 16 +; CHECK-NEXT: jr $ra +entry: + tail call void @callee_nostruct() + ret void +} diff --git a/test/CodeGen/LoongArch/tailcall-mem.ll b/test/CodeGen/LoongArch/tailcall-mem.ll new file mode 100644 index 00000000..68ddaa89 --- /dev/null +++ b/test/CodeGen/LoongArch/tailcall-mem.ll @@ -0,0 +1,35 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -march=loongarch64 -relocation-model=pic < %s | FileCheck %s + + +define void @tail_memcpy(i8* %p, i8* %q, i32 %n) { +; CHECK-LABEL: tail_memcpy: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: b memcpy +entry: + tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* %p, i8* %q, i32 %n, i1 false) + ret void +} + +define void @tail_memmove(i8* %p, i8* %q, i32 %n) { +; CHECK-LABEL: tail_memmove: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: b memmove +entry: + tail call void @llvm.memmove.p0i8.p0i8.i32(i8* %p, i8* %q, i32 %n, i1 false) + ret void +} + +define void @tail_memset(i8* %p, i8 %c, i32 %n) { +; CHECK-LABEL: tail_memset: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: b memset +entry: + tail call void @llvm.memset.p0i8.i32(i8* %p, i8 %c, i32 %n, i1 false) + ret void +} + +declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture readonly, i32, i1) +declare void @llvm.memmove.p0i8.p0i8.i32(i8* nocapture, i8* nocapture readonly, i32, i1) +declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i1) + diff --git a/test/CodeGen/LoongArch/tailcall.ll b/test/CodeGen/LoongArch/tailcall.ll new file mode 100644 index 00000000..984df2cb --- /dev/null +++ b/test/CodeGen/LoongArch/tailcall.ll @@ -0,0 +1,13 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -march=loongarch64 -relocation-model=pic < %s | FileCheck %s + +define void @f() { +; CHECK-LABEL: f: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: b foo +entry: + tail call void bitcast (void (...)* @foo to void ()*)() + ret void +} + +declare void @foo(...) diff --git a/test/CodeGen/LoongArch/target-feature-double.ll b/test/CodeGen/LoongArch/target-feature-double.ll new file mode 100644 index 00000000..814d1307 --- /dev/null +++ b/test/CodeGen/LoongArch/target-feature-double.ll @@ -0,0 +1,31 @@ +; RUN: llc -march=loongarch64 -target-abi lp64d --relocation-model=pic < %s \ +; RUN: | FileCheck -check-prefix=ATTRN-F-DNF %s +; RUN: llc -march=loongarch64 -target-abi lp64d -mattr=+d < %s \ +; RUN: | FileCheck -check-prefix=ATTRD-DF-FD-NFD %s +; RUN: llc -march=loongarch64 -target-abi lp64d -mattr=+f --relocation-model=pic < %s \ +; RUN: | FileCheck -check-prefix=ATTRN-F-DNF %s +; RUN: llc -march=loongarch64 -target-abi lp64d -mattr=+d,+f < %s \ +; RUN: | FileCheck -check-prefix=ATTRD-DF-FD-NFD %s +; RUN: llc -march=loongarch64 -target-abi lp64d -mattr=+f,+d < %s \ +; RUN: | FileCheck -check-prefix=ATTRD-DF-FD-NFD %s +; RUN: llc -march=loongarch64 -target-abi lp64d -mattr=+d,-f --relocation-model=pic < %s \ +; RUN: | FileCheck -check-prefix=ATTRN-F-DNF %s +; RUN: llc -march=loongarch64 -target-abi lp64d -mattr=-f,+d < %s \ +; RUN: | FileCheck -check-prefix=ATTRD-DF-FD-NFD %s + +define double @test(double %a, double %b) { +; ATTRN-F-DNF-LABEL: test: +; ATTRN-F-DNF: # %bb.0: # %entry +; ATTRN-F-DNF: addi.d $sp, $sp, -16 +; ATTRN-F-DNF: bl __adddf3 +; ATTRN-F-DNF: addi.d $sp, $sp, 16 +; ATTRN-F-DNF: jr $ra +; +; ATTRD-DF-FD-NFD-LABEL: test: +; ATTRD-DF-FD-NFD: # %bb.0: # %entry +; ATTRD-DF-FD-NFD: fadd.d $f0, $f0, $f1 +; ATTRD-DF-FD-NFD: jr $ra +entry: + %add = fadd double %a, %b + ret double %add +} diff --git a/test/CodeGen/LoongArch/target-feature-float.ll b/test/CodeGen/LoongArch/target-feature-float.ll new file mode 100644 index 00000000..d6c2497b --- /dev/null +++ b/test/CodeGen/LoongArch/target-feature-float.ll @@ -0,0 +1,33 @@ +; RUN: llc -march=loongarch64 -target-abi lp64d --relocation-model=pic < %s \ +; RUN: | FileCheck -check-prefix=ATTRN-DNF %s +; RUN: llc -march=loongarch64 -target-abi lp64d -mattr=+d < %s \ +; RUN: | FileCheck -check-prefix=ATTRD-F-DF-FD-NFD %s +; RUN: llc -march=loongarch64 -target-abi lp64d -mattr=+f < %s \ +; RUN: | FileCheck -check-prefix=ATTRD-F-DF-FD-NFD %s +; RUN: llc -march=loongarch64 -target-abi lp64d -mattr=+d,+f < %s \ +; RUN: | FileCheck -check-prefix=ATTRD-F-DF-FD-NFD %s +; RUN: llc -march=loongarch64 -target-abi lp64d -mattr=+f,+d < %s \ +; RUN: | FileCheck -check-prefix=ATTRD-F-DF-FD-NFD %s +; RUN: llc -march=loongarch64 -target-abi lp64d -mattr=+d,-f --relocation-model=pic < %s \ +; RUN: | FileCheck -check-prefix=ATTRN-DNF %s +; RUN: llc -march=loongarch64 -target-abi lp64d -mattr=-f,+d < %s \ +; RUN: | FileCheck -check-prefix=ATTRD-F-DF-FD-NFD %s + +define float @test(float %a, float %b) { +; ATTRN-DNF-LABEL: test: +; ATTRN-DNF: # %bb.0: # %entry +; ATTRN-DNF: addi.d $sp, $sp, -16 +; ATTRN-DNF: st.d $ra, $sp, 8 # 8-byte Folded Spill +; ATTRN-DNF: bl __addsf3 +; ATTRN-DNF: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; ATTRN-DNF: addi.d $sp, $sp, 16 +; ATTRN-DNF: jr $ra +; +; ATTRD-F-DF-FD-NFD-LABEL: test: +; ATTRD-F-DF-FD-NFD: # %bb.0: # %entry +; ATTRD-F-DF-FD-NFD: fadd.s $f0, $f0, $f1 +; ATTRD-F-DF-FD-NFD: jr $ra +entry: + %add = fadd float %a, %b + ret float %add +} diff --git a/test/CodeGen/LoongArch/thread-pointer.ll b/test/CodeGen/LoongArch/thread-pointer.ll new file mode 100644 index 00000000..06a5886c --- /dev/null +++ b/test/CodeGen/LoongArch/thread-pointer.ll @@ -0,0 +1,9 @@ +; RUN: llc -march=loongarch64 < %s | FileCheck %s + +declare i8* @llvm.thread.pointer() nounwind readnone + +define i8* @thread_pointer() { +; CHECK: move $r4, $tp + %1 = tail call i8* @llvm.thread.pointer() + ret i8* %1 +} diff --git a/test/CodeGen/LoongArch/trunc.ll b/test/CodeGen/LoongArch/trunc.ll new file mode 100644 index 00000000..d1b5a3a1 --- /dev/null +++ b/test/CodeGen/LoongArch/trunc.ll @@ -0,0 +1,108 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -march=loongarch64 < %s | FileCheck %s + +define signext i32 @foo1(i64 %a, i64 %b) { +; CHECK-LABEL: foo1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: add.w $r4, $r5, $r4 +; CHECK-NEXT: jr $ra +entry: + %conv = trunc i64 %a to i32 + %conv1 = trunc i64 %b to i32 + %add = add nsw i32 %conv1, %conv + ret i32 %add +} + +define signext i32 @foo2(i64 %a, i64 %b) { +; CHECK-LABEL: foo2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: sub.w $r4, $r4, $r5 +; CHECK-NEXT: jr $ra +entry: + %conv = trunc i64 %a to i32 + %conv1 = trunc i64 %b to i32 + %sub = sub nsw i32 %conv, %conv1 + ret i32 %sub +} + +define signext i32 @foo3(i64 %a, i64 %b) { +; CHECK-LABEL: foo3: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: sll.w $r4, $r4, $r5 +; CHECK-NEXT: jr $ra +entry: + %conv = trunc i64 %a to i32 + %conv1 = trunc i64 %b to i32 + %shl = shl i32 %conv, %conv1 + ret i32 %shl +} + +define signext i32 @foo4(i64 %a, i64 %b) { +; CHECK-LABEL: foo4: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: srl.w $r4, $r4, $r5 +; CHECK-NEXT: jr $ra +entry: + %conv = trunc i64 %a to i32 + %conv1 = trunc i64 %b to i32 + %shr = lshr i32 %conv, %conv1 + ret i32 %shr +} + +define signext i32 @foo5(i64 %a, i64 %b) { +; CHECK-LABEL: foo5: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: mul.w $r4, $r5, $r4 +; CHECK-NEXT: jr $ra +entry: + %conv = trunc i64 %a to i32 + %conv1 = trunc i64 %b to i32 + %mul = mul nsw i32 %conv1, %conv + ret i32 %mul +} + +define signext i32 @foo6(i64 %a, i64 %b) { +; CHECK-LABEL: foo6: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: sra.w $r4, $r4, $r5 +; CHECK-NEXT: jr $ra +entry: + %conv = trunc i64 %a to i32 + %conv1 = trunc i64 %b to i32 + %shr = ashr i32 %conv, %conv1 + ret i32 %shr +} + +define signext i32 @sdiv(i64 %a, i64 %b) { +; CHECK-LABEL: sdiv: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: slli.w $r5, $r5, 0 +; CHECK-NEXT: slli.w $r4, $r4, 0 +; CHECK-NEXT: div.w $r4, $r4, $r5 +; CHECK-NEXT: bne $r5, $zero, 8 +; CHECK-NEXT: break 7 +; CHECK-NEXT: slli.w $r4, $r4, 0 +; CHECK-NEXT: jr $ra +entry: + %conv = trunc i64 %a to i32 + %conv1 = trunc i64 %b to i32 + %div = sdiv i32 %conv, %conv1 + ret i32 %div +} + +define signext i32 @udiv(i64 %a, i64 %b) { +; CHECK-LABEL: udiv: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: slli.w $r5, $r5, 0 +; CHECK-NEXT: slli.w $r4, $r4, 0 +; CHECK-NEXT: div.wu $r4, $r4, $r5 +; CHECK-NEXT: bne $r5, $zero, 8 +; CHECK-NEXT: break 7 +; CHECK-NEXT: slli.w $r4, $r4, 0 +; CHECK-NEXT: jr $ra +entry: + %conv = trunc i64 %a to i32 + %conv1 = trunc i64 %b to i32 + %div = udiv i32 %conv, %conv1 + ret i32 %div +} diff --git a/test/CodeGen/LoongArch/unalignment.ll b/test/CodeGen/LoongArch/unalignment.ll new file mode 100644 index 00000000..d468a361 --- /dev/null +++ b/test/CodeGen/LoongArch/unalignment.ll @@ -0,0 +1,72 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -march=loongarch64 -o - %s | FileCheck -check-prefix=UNALIGNED %s +; RUN: llc -march=loongarch64 -mattr=+unaligned-access -o - %s | FileCheck -check-prefix=UNALIGNED %s +; RUN: llc -march=loongarch64 -mattr=-unaligned-access -o - %s | FileCheck -check-prefix=ALIGNED %s + +define i32 @i32_load(i32* %p) { +; UNALIGNED-LABEL: i32_load: +; UNALIGNED: # %bb.0: +; UNALIGNED-NEXT: ld.w $r4, $r4, 0 +; UNALIGNED-NEXT: jr $ra +; +; ALIGNED-LABEL: i32_load: +; ALIGNED: # %bb.0: +; ALIGNED-NEXT: ld.hu $r5, $r4, 0 +; ALIGNED-NEXT: ld.hu $r4, $r4, 2 +; ALIGNED-NEXT: slli.w $r4, $r4, 16 +; ALIGNED-NEXT: or $r4, $r4, $r5 +; ALIGNED-NEXT: jr $ra + %tmp = load i32, i32* %p, align 2 + ret i32 %tmp +} + +define signext i32 @i32_sextload(i32* %p) { +; UNALIGNED-LABEL: i32_sextload: +; UNALIGNED: # %bb.0: +; UNALIGNED-NEXT: ld.w $r4, $r4, 0 +; UNALIGNED-NEXT: jr $ra +; +; ALIGNED-LABEL: i32_sextload: +; ALIGNED: # %bb.0: +; ALIGNED-NEXT: ld.hu $r5, $r4, 0 +; ALIGNED-NEXT: ld.h $r4, $r4, 2 +; ALIGNED-NEXT: slli.d $r4, $r4, 16 +; ALIGNED-NEXT: or $r4, $r4, $r5 +; ALIGNED-NEXT: jr $ra + %tmp = load i32, i32* %p, align 2 + ret i32 %tmp +} + +define zeroext i32 @i32_zextload(i32* %p) { +; UNALIGNED-LABEL: i32_zextload: +; UNALIGNED: # %bb.0: +; UNALIGNED-NEXT: ld.wu $r4, $r4, 0 +; UNALIGNED-NEXT: jr $ra +; +; ALIGNED-LABEL: i32_zextload: +; ALIGNED: # %bb.0: +; ALIGNED-NEXT: ld.hu $r5, $r4, 0 +; ALIGNED-NEXT: ld.hu $r4, $r4, 2 +; ALIGNED-NEXT: slli.d $r4, $r4, 16 +; ALIGNED-NEXT: or $r4, $r4, $r5 +; ALIGNED-NEXT: jr $ra + %tmp = load i32, i32* %p, align 2 + ret i32 %tmp +} + +define i64 @i64_load(i64* %p) { +; UNALIGNED-LABEL: i64_load: +; UNALIGNED: # %bb.0: +; UNALIGNED-NEXT: ld.d $r4, $r4, 0 +; UNALIGNED-NEXT: jr $ra +; +; ALIGNED-LABEL: i64_load: +; ALIGNED: # %bb.0: +; ALIGNED-NEXT: ld.wu $r5, $r4, 0 +; ALIGNED-NEXT: ld.wu $r4, $r4, 4 +; ALIGNED-NEXT: slli.d $r4, $r4, 32 +; ALIGNED-NEXT: or $r4, $r4, $r5 +; ALIGNED-NEXT: jr $ra + %tmp = load i64, i64* %p, align 4 + ret i64 %tmp +} diff --git a/test/MC/Disassembler/LoongArch/lit.local.cfg b/test/MC/Disassembler/LoongArch/lit.local.cfg new file mode 100644 index 00000000..6223fc69 --- /dev/null +++ b/test/MC/Disassembler/LoongArch/lit.local.cfg @@ -0,0 +1,3 @@ +if not 'LoongArch' in config.root.targets: + config.unsupported = True + diff --git a/test/MC/Disassembler/LoongArch/simd.txt b/test/MC/Disassembler/LoongArch/simd.txt new file mode 100644 index 00000000..90da1700 --- /dev/null +++ b/test/MC/Disassembler/LoongArch/simd.txt @@ -0,0 +1,1361 @@ +# RUN: llvm-mc --disassemble %s -triple=loongarch64-unknown-linux -mattr=+lsx,+lasx | FileCheck %s + +0xcf 0x2a 0x19 0x09 # CHECK: vfmadd.s $vr15, $vr22, $vr10, $vr18 +0x01 0x30 0x25 0x09 # CHECK: vfmadd.d $vr1, $vr0, $vr12, $vr10 +0x50 0x36 0x54 0x09 # CHECK: vfmsub.s $vr16, $vr18, $vr13, $vr8 +0xb9 0x05 0x6a 0x09 # CHECK: vfmsub.d $vr25, $vr13, $vr1, $vr20 +0x56 0x44 0x9b 0x09 # CHECK: vfnmadd.s $vr22, $vr2, $vr17, $vr22 +0xbc 0x0b 0xa7 0x09 # CHECK: vfnmadd.d $vr28, $vr29, $vr2, $vr14 +0x93 0x44 0xdc 0x09 # CHECK: vfnmsub.s $vr19, $vr4, $vr17, $vr24 +0xd8 0x72 0xef 0x09 # CHECK: vfnmsub.d $vr24, $vr22, $vr28, $vr30 +0x8f 0xa7 0x17 0x0a # CHECK: xvfmadd.s $xr15, $xr28, $xr9, $xr15 +0x05 0x33 0x25 0x0a # CHECK: xvfmadd.d $xr5, $xr24, $xr12, $xr10 +0x14 0x6c 0x5d 0x0a # CHECK: xvfmsub.s $xr20, $xr0, $xr27, $xr26 +0x0d 0x65 0x6d 0x0a # CHECK: xvfmsub.d $xr13, $xr8, $xr25, $xr26 +0xce 0x59 0x94 0x0a # CHECK: xvfnmadd.s $xr14, $xr14, $xr22, $xr8 +0x39 0x02 0xa2 0x0a # CHECK: xvfnmadd.d $xr25, $xr17, $xr0, $xr4 +0x6b 0x80 0xd5 0x0a # CHECK: xvfnmsub.s $xr11, $xr3, $xr0, $xr11 +0x62 0x60 0xeb 0x0a # CHECK: xvfnmsub.d $xr2, $xr3, $xr24, $xr22 +0xfa 0x6d 0x52 0x0c # CHECK: vfcmp.ceq.s $vr26, $vr15, $vr27 +0xb5 0x06 0x62 0x0c # CHECK: vfcmp.ceq.d $vr21, $vr21, $vr1 +0x28 0x4d 0x92 0x0c # CHECK: xvfcmp.ceq.s $xr8, $xr9, $xr19 +0x19 0x72 0xa2 0x0c # CHECK: xvfcmp.ceq.d $xr25, $xr16, $xr28 +0xf4 0xf6 0x14 0x0d # CHECK: vbitsel.v $vr20, $vr23, $vr29, $vr9 +0x47 0xf3 0x2b 0x0d # CHECK: xvbitsel.v $xr7, $xr26, $xr28, $xr23 +0x8b 0x9c 0x54 0x0d # CHECK: vshuf.b $vr11, $vr4, $vr7, $vr9 +0xb0 0x2a 0x66 0x0d # CHECK: xvshuf.b $xr16, $xr21, $xr10, $xr12 +0x3c 0x0b 0x38 0x2c # CHECK: vld $vr28, $r25, -510 +0xdc 0x3d 0x48 0x2c # CHECK: vst $vr28, $r14, 527 +0xcb 0x00 0x88 0x2c # CHECK: xvld $xr11, $r6, 512 +0xed 0xfc 0xd2 0x2c # CHECK: xvst $xr13, $r7, 1215 +0x28 0xfd 0x14 0x30 # CHECK: vldrepl.d $vr8, $r9, -1544 +0x22 0xd9 0x2e 0x30 # CHECK: vldrepl.w $vr2, $r9, -296 +0xfc 0xfa 0x41 0x30 # CHECK: vldrepl.h $vr28, $r23, 252 +0x25 0xad 0xb4 0x30 # CHECK: vldrepl.b $vr5, $r9, -725 +0x57 0x57 0x15 0x31 # CHECK: vstelm.d $vr23, $r26, 680, 1 +0xfe 0x8e 0x26 0x31 # CHECK: vstelm.w $vr30, $r23, -372, 1 +0xcb 0x3c 0x5c 0x31 # CHECK: vstelm.h $vr11, $r6, 30, 7 +0xe3 0xb1 0xb8 0x31 # CHECK: vstelm.b $vr3, $r15, 44, 14 +0x18 0xa5 0x11 0x32 # CHECK: xvldrepl.d $xr24, $r8, 840 +0x0e 0xef 0x21 0x32 # CHECK: xvldrepl.w $xr14, $r24, 492 +0x32 0x49 0x46 0x32 # CHECK: xvldrepl.h $xr18, $r9, 804 +0xa6 0xaf 0x8c 0x32 # CHECK: xvldrepl.b $xr6, $r29, 811 +0x75 0x94 0x13 0x33 # CHECK: xvstelm.d $xr21, $sp, -216, 0 +0xbf 0xab 0x21 0x33 # CHECK: xvstelm.w $xr31, $r29, 424, 0 +0xee 0xb4 0x50 0x33 # CHECK: xvstelm.h $xr14, $r7, 90, 4 +0x15 0xef 0xa3 0x33 # CHECK: xvstelm.b $xr21, $r24, -5, 8 +0x9d 0x78 0x40 0x38 # CHECK: vldx $vr29, $r4, $r30 +0x9f 0x77 0x44 0x38 # CHECK: vstx $vr31, $r28, $r29 +0xc8 0x63 0x48 0x38 # CHECK: xvldx $xr8, $r30, $r24 +0x22 0x75 0x4c 0x38 # CHECK: xvstx $xr2, $r9, $r29 +0x5c 0x5f 0x00 0x70 # CHECK: vseq.b $vr28, $vr26, $vr23 +0x2a 0x94 0x00 0x70 # CHECK: vseq.h $vr10, $vr1, $vr5 +0x63 0x47 0x01 0x70 # CHECK: vseq.w $vr3, $vr27, $vr17 +0x65 0x8c 0x01 0x70 # CHECK: vseq.d $vr5, $vr3, $vr3 +0x3d 0x1d 0x02 0x70 # CHECK: vsle.b $vr29, $vr9, $vr7 +0x05 0xa7 0x02 0x70 # CHECK: vsle.h $vr5, $vr24, $vr9 +0xd1 0x53 0x03 0x70 # CHECK: vsle.w $vr17, $vr30, $vr20 +0xdb 0xb4 0x03 0x70 # CHECK: vsle.d $vr27, $vr6, $vr13 +0x7e 0x29 0x04 0x70 # CHECK: vsle.bu $vr30, $vr11, $vr10 +0xb3 0xff 0x04 0x70 # CHECK: vsle.hu $vr19, $vr29, $vr31 +0x50 0x52 0x05 0x70 # CHECK: vsle.wu $vr16, $vr18, $vr20 +0x3f 0xa2 0x05 0x70 # CHECK: vsle.du $vr31, $vr17, $vr8 +0xfa 0x14 0x06 0x70 # CHECK: vslt.b $vr26, $vr7, $vr5 +0x4e 0xd0 0x06 0x70 # CHECK: vslt.h $vr14, $vr2, $vr20 +0xae 0x64 0x07 0x70 # CHECK: vslt.w $vr14, $vr5, $vr25 +0x3a 0xe5 0x07 0x70 # CHECK: vslt.d $vr26, $vr9, $vr25 +0x5f 0x3a 0x08 0x70 # CHECK: vslt.bu $vr31, $vr18, $vr14 +0xe5 0x95 0x08 0x70 # CHECK: vslt.hu $vr5, $vr15, $vr5 +0x9f 0x37 0x09 0x70 # CHECK: vslt.wu $vr31, $vr28, $vr13 +0x6b 0xda 0x09 0x70 # CHECK: vslt.du $vr11, $vr19, $vr22 +0x9a 0x7e 0x0a 0x70 # CHECK: vadd.b $vr26, $vr20, $vr31 +0x2b 0xf7 0x0a 0x70 # CHECK: vadd.h $vr11, $vr25, $vr29 +0x27 0x37 0x0b 0x70 # CHECK: vadd.w $vr7, $vr25, $vr13 +0xb0 0xc1 0x0b 0x70 # CHECK: vadd.d $vr16, $vr13, $vr16 +0x6c 0x54 0x0c 0x70 # CHECK: vsub.b $vr12, $vr3, $vr21 +0xaf 0xe5 0x0c 0x70 # CHECK: vsub.h $vr15, $vr13, $vr25 +0x14 0x66 0x0d 0x70 # CHECK: vsub.w $vr20, $vr16, $vr25 +0x73 0x9c 0x0d 0x70 # CHECK: vsub.d $vr19, $vr3, $vr7 +0xce 0x17 0x46 0x70 # CHECK: vsadd.b $vr14, $vr30, $vr5 +0x2a 0xbc 0x46 0x70 # CHECK: vsadd.h $vr10, $vr1, $vr15 +0xf3 0x2b 0x47 0x70 # CHECK: vsadd.w $vr19, $vr31, $vr10 +0x7a 0xf2 0x47 0x70 # CHECK: vsadd.d $vr26, $vr19, $vr28 +0x78 0x1c 0x48 0x70 # CHECK: vssub.b $vr24, $vr3, $vr7 +0x9f 0xe0 0x48 0x70 # CHECK: vssub.h $vr31, $vr4, $vr24 +0x7d 0x33 0x49 0x70 # CHECK: vssub.w $vr29, $vr27, $vr12 +0x17 0xa6 0x49 0x70 # CHECK: vssub.d $vr23, $vr16, $vr9 +0xba 0x13 0x4a 0x70 # CHECK: vsadd.bu $vr26, $vr29, $vr4 +0xef 0xa4 0x4a 0x70 # CHECK: vsadd.hu $vr15, $vr7, $vr9 +0x4d 0x42 0x4b 0x70 # CHECK: vsadd.wu $vr13, $vr18, $vr16 +0xa4 0x80 0x4b 0x70 # CHECK: vsadd.du $vr4, $vr5, $vr0 +0x3b 0x36 0x4c 0x70 # CHECK: vssub.bu $vr27, $vr17, $vr13 +0x05 0x85 0x4c 0x70 # CHECK: vssub.hu $vr5, $vr8, $vr1 +0x0e 0x59 0x4d 0x70 # CHECK: vssub.wu $vr14, $vr8, $vr22 +0x31 0xa1 0x4d 0x70 # CHECK: vssub.du $vr17, $vr9, $vr8 +0x77 0x0a 0x54 0x70 # CHECK: vhaddw.h.b $vr23, $vr19, $vr2 +0x1a 0xea 0x54 0x70 # CHECK: vhaddw.w.h $vr26, $vr16, $vr26 +0xe0 0x6f 0x55 0x70 # CHECK: vhaddw.d.w $vr0, $vr31, $vr27 +0xb9 0xe5 0x55 0x70 # CHECK: vhaddw.q.d $vr25, $vr13, $vr25 +0xe9 0x16 0x56 0x70 # CHECK: vhsubw.h.b $vr9, $vr23, $vr5 +0xaf 0xeb 0x56 0x70 # CHECK: vhsubw.w.h $vr15, $vr29, $vr26 +0x80 0x4b 0x57 0x70 # CHECK: vhsubw.d.w $vr0, $vr28, $vr18 +0x2e 0xa3 0x57 0x70 # CHECK: vhsubw.q.d $vr14, $vr25, $vr8 +0x01 0x56 0x58 0x70 # CHECK: vhaddw.hu.bu $vr1, $vr16, $vr21 +0xbc 0xf6 0x58 0x70 # CHECK: vhaddw.wu.hu $vr28, $vr21, $vr29 +0x9d 0x42 0x59 0x70 # CHECK: vhaddw.du.wu $vr29, $vr20, $vr16 +0x42 0xf1 0x59 0x70 # CHECK: vhaddw.qu.du $vr2, $vr10, $vr28 +0x7f 0x78 0x5a 0x70 # CHECK: vhsubw.hu.bu $vr31, $vr3, $vr30 +0x25 0xad 0x5a 0x70 # CHECK: vhsubw.wu.hu $vr5, $vr9, $vr11 +0xf7 0x5b 0x5b 0x70 # CHECK: vhsubw.du.wu $vr23, $vr31, $vr22 +0x84 0xcb 0x5b 0x70 # CHECK: vhsubw.qu.du $vr4, $vr28, $vr18 +0xb2 0x2d 0x5c 0x70 # CHECK: vadda.b $vr18, $vr13, $vr11 +0xd1 0xb1 0x5c 0x70 # CHECK: vadda.h $vr17, $vr14, $vr12 +0x76 0x0d 0x5d 0x70 # CHECK: vadda.w $vr22, $vr11, $vr3 +0x18 0xbf 0x5d 0x70 # CHECK: vadda.d $vr24, $vr24, $vr15 +0x77 0x46 0x60 0x70 # CHECK: vabsd.b $vr23, $vr19, $vr17 +0xee 0xb7 0x60 0x70 # CHECK: vabsd.h $vr14, $vr31, $vr13 +0x38 0x24 0x61 0x70 # CHECK: vabsd.w $vr24, $vr1, $vr9 +0x9f 0x82 0x61 0x70 # CHECK: vabsd.d $vr31, $vr20, $vr0 +0x97 0x75 0x62 0x70 # CHECK: vabsd.bu $vr23, $vr12, $vr29 +0x72 0x86 0x62 0x70 # CHECK: vabsd.hu $vr18, $vr19, $vr1 +0xad 0x72 0x63 0x70 # CHECK: vabsd.wu $vr13, $vr21, $vr28 +0x50 0xaf 0x63 0x70 # CHECK: vabsd.du $vr16, $vr26, $vr11 +0xa1 0x6e 0x64 0x70 # CHECK: vavg.b $vr1, $vr21, $vr27 +0x54 0xbf 0x64 0x70 # CHECK: vavg.h $vr20, $vr26, $vr15 +0x5d 0x0e 0x65 0x70 # CHECK: vavg.w $vr29, $vr18, $vr3 +0xf3 0xfd 0x65 0x70 # CHECK: vavg.d $vr19, $vr15, $vr31 +0x6b 0x45 0x66 0x70 # CHECK: vavg.bu $vr11, $vr11, $vr17 +0x9e 0xb7 0x66 0x70 # CHECK: vavg.hu $vr30, $vr28, $vr13 +0xe7 0x28 0x67 0x70 # CHECK: vavg.wu $vr7, $vr7, $vr10 +0xf9 0xb0 0x67 0x70 # CHECK: vavg.du $vr25, $vr7, $vr12 +0xbd 0x1d 0x68 0x70 # CHECK: vavgr.b $vr29, $vr13, $vr7 +0x85 0xcf 0x68 0x70 # CHECK: vavgr.h $vr5, $vr28, $vr19 +0xf3 0x39 0x69 0x70 # CHECK: vavgr.w $vr19, $vr15, $vr14 +0x03 0x88 0x69 0x70 # CHECK: vavgr.d $vr3, $vr0, $vr2 +0x77 0x7d 0x6a 0x70 # CHECK: vavgr.bu $vr23, $vr11, $vr31 +0x79 0xa2 0x6a 0x70 # CHECK: vavgr.hu $vr25, $vr19, $vr8 +0x3e 0x33 0x6b 0x70 # CHECK: vavgr.wu $vr30, $vr25, $vr12 +0x99 0xe6 0x6b 0x70 # CHECK: vavgr.du $vr25, $vr20, $vr25 +0x5c 0x6b 0x70 0x70 # CHECK: vmax.b $vr28, $vr26, $vr26 +0xa8 0xad 0x70 0x70 # CHECK: vmax.h $vr8, $vr13, $vr11 +0x95 0x7f 0x71 0x70 # CHECK: vmax.w $vr21, $vr28, $vr31 +0xc1 0xeb 0x71 0x70 # CHECK: vmax.d $vr1, $vr30, $vr26 +0xca 0x25 0x72 0x70 # CHECK: vmin.b $vr10, $vr14, $vr9 +0x6a 0xd5 0x72 0x70 # CHECK: vmin.h $vr10, $vr11, $vr21 +0x1a 0x30 0x73 0x70 # CHECK: vmin.w $vr26, $vr0, $vr12 +0x53 0x82 0x73 0x70 # CHECK: vmin.d $vr19, $vr18, $vr0 +0x22 0x73 0x74 0x70 # CHECK: vmax.bu $vr2, $vr25, $vr28 +0xc9 0xfa 0x74 0x70 # CHECK: vmax.hu $vr9, $vr22, $vr30 +0x35 0x6f 0x75 0x70 # CHECK: vmax.wu $vr21, $vr25, $vr27 +0xc3 0xe5 0x75 0x70 # CHECK: vmax.du $vr3, $vr14, $vr25 +0xf8 0x6c 0x76 0x70 # CHECK: vmin.bu $vr24, $vr7, $vr27 +0x92 0xf7 0x76 0x70 # CHECK: vmin.hu $vr18, $vr28, $vr29 +0x9a 0x08 0x77 0x70 # CHECK: vmin.wu $vr26, $vr4, $vr2 +0x0d 0x90 0x77 0x70 # CHECK: vmin.du $vr13, $vr0, $vr4 +0xa1 0x5e 0x84 0x70 # CHECK: vmul.b $vr1, $vr21, $vr23 +0xa9 0xe6 0x84 0x70 # CHECK: vmul.h $vr9, $vr21, $vr25 +0x10 0x71 0x85 0x70 # CHECK: vmul.w $vr16, $vr8, $vr28 +0x24 0xae 0x85 0x70 # CHECK: vmul.d $vr4, $vr17, $vr11 +0x0c 0x23 0x86 0x70 # CHECK: vmuh.b $vr12, $vr24, $vr8 +0xa6 0xe2 0x86 0x70 # CHECK: vmuh.h $vr6, $vr21, $vr24 +0xab 0x7b 0x87 0x70 # CHECK: vmuh.w $vr11, $vr29, $vr30 +0x21 0xe6 0x87 0x70 # CHECK: vmuh.d $vr1, $vr17, $vr25 +0xbd 0x2b 0x88 0x70 # CHECK: vmuh.bu $vr29, $vr29, $vr10 +0x38 0xd5 0x88 0x70 # CHECK: vmuh.hu $vr24, $vr9, $vr21 +0x8f 0x4e 0x89 0x70 # CHECK: vmuh.wu $vr15, $vr20, $vr19 +0x80 0x87 0x89 0x70 # CHECK: vmuh.du $vr0, $vr28, $vr1 +0x1b 0x10 0xa8 0x70 # CHECK: vmadd.b $vr27, $vr0, $vr4 +0x93 0xf2 0xa8 0x70 # CHECK: vmadd.h $vr19, $vr20, $vr28 +0xef 0x0c 0xa9 0x70 # CHECK: vmadd.w $vr15, $vr7, $vr3 +0x39 0xfb 0xa9 0x70 # CHECK: vmadd.d $vr25, $vr25, $vr30 +0x38 0x6b 0xaa 0x70 # CHECK: vmsub.b $vr24, $vr25, $vr26 +0x0c 0xb4 0xaa 0x70 # CHECK: vmsub.h $vr12, $vr0, $vr13 +0x1a 0x62 0xab 0x70 # CHECK: vmsub.w $vr26, $vr16, $vr24 +0x4d 0xa1 0xab 0x70 # CHECK: vmsub.d $vr13, $vr10, $vr8 +0x92 0x57 0xe0 0x70 # CHECK: vdiv.b $vr18, $vr28, $vr21 +0x11 0x87 0xe0 0x70 # CHECK: vdiv.h $vr17, $vr24, $vr1 +0x43 0x59 0xe1 0x70 # CHECK: vdiv.w $vr3, $vr10, $vr22 +0xaf 0xa1 0xe1 0x70 # CHECK: vdiv.d $vr15, $vr13, $vr8 +0x33 0x53 0xe2 0x70 # CHECK: vmod.b $vr19, $vr25, $vr20 +0x02 0xdb 0xe2 0x70 # CHECK: vmod.h $vr2, $vr24, $vr22 +0x5f 0x02 0xe3 0x70 # CHECK: vmod.w $vr31, $vr18, $vr0 +0x1f 0x88 0xe3 0x70 # CHECK: vmod.d $vr31, $vr0, $vr2 +0x8f 0x0c 0xe4 0x70 # CHECK: vdiv.bu $vr15, $vr4, $vr3 +0xf1 0xf4 0xe4 0x70 # CHECK: vdiv.hu $vr17, $vr7, $vr29 +0x5b 0x0d 0xe5 0x70 # CHECK: vdiv.wu $vr27, $vr10, $vr3 +0x08 0xeb 0xe5 0x70 # CHECK: vdiv.du $vr8, $vr24, $vr26 +0xca 0x62 0xe6 0x70 # CHECK: vmod.bu $vr10, $vr22, $vr24 +0xf3 0xe3 0xe6 0x70 # CHECK: vmod.hu $vr19, $vr31, $vr24 +0x1a 0x37 0xe7 0x70 # CHECK: vmod.wu $vr26, $vr24, $vr13 +0x74 0xaa 0xe7 0x70 # CHECK: vmod.du $vr20, $vr19, $vr10 +0x5c 0x7a 0xe8 0x70 # CHECK: vsll.b $vr28, $vr18, $vr30 +0x96 0xf8 0xe8 0x70 # CHECK: vsll.h $vr22, $vr4, $vr30 +0x21 0x23 0xe9 0x70 # CHECK: vsll.w $vr1, $vr25, $vr8 +0x5f 0xbe 0xe9 0x70 # CHECK: vsll.d $vr31, $vr18, $vr15 +0x85 0x41 0xea 0x70 # CHECK: vsrl.b $vr5, $vr12, $vr16 +0xa9 0xf0 0xea 0x70 # CHECK: vsrl.h $vr9, $vr5, $vr28 +0x1e 0x06 0xeb 0x70 # CHECK: vsrl.w $vr30, $vr16, $vr1 +0xfc 0xee 0xeb 0x70 # CHECK: vsrl.d $vr28, $vr23, $vr27 +0x2f 0x66 0xec 0x70 # CHECK: vsra.b $vr15, $vr17, $vr25 +0x00 0x95 0xec 0x70 # CHECK: vsra.h $vr0, $vr8, $vr5 +0x3d 0x1d 0xed 0x70 # CHECK: vsra.w $vr29, $vr9, $vr7 +0x76 0xcc 0xed 0x70 # CHECK: vsra.d $vr22, $vr3, $vr19 +0x08 0x22 0xee 0x70 # CHECK: vrotr.b $vr8, $vr16, $vr8 +0xae 0xac 0xee 0x70 # CHECK: vrotr.h $vr14, $vr5, $vr11 +0x91 0x67 0xef 0x70 # CHECK: vrotr.w $vr17, $vr28, $vr25 +0x92 0xcf 0xef 0x70 # CHECK: vrotr.d $vr18, $vr28, $vr19 +0x61 0x47 0xf0 0x70 # CHECK: vsrlr.b $vr1, $vr27, $vr17 +0xda 0xa9 0xf0 0x70 # CHECK: vsrlr.h $vr26, $vr14, $vr10 +0xa3 0x63 0xf1 0x70 # CHECK: vsrlr.w $vr3, $vr29, $vr24 +0x97 0xa8 0xf1 0x70 # CHECK: vsrlr.d $vr23, $vr4, $vr10 +0x59 0x54 0xf2 0x70 # CHECK: vsrar.b $vr25, $vr2, $vr21 +0x64 0xd1 0xf2 0x70 # CHECK: vsrar.h $vr4, $vr11, $vr20 +0xab 0x76 0xf3 0x70 # CHECK: vsrar.w $vr11, $vr21, $vr29 +0xbd 0x88 0xf3 0x70 # CHECK: vsrar.d $vr29, $vr5, $vr2 +0xd8 0xf5 0xf4 0x70 # CHECK: vsrln.b.h $vr24, $vr14, $vr29 +0xda 0x42 0xf5 0x70 # CHECK: vsrln.h.w $vr26, $vr22, $vr16 +0xf1 0x8b 0xf5 0x70 # CHECK: vsrln.w.d $vr17, $vr31, $vr2 +0x1f 0xdc 0xf6 0x70 # CHECK: vsran.b.h $vr31, $vr0, $vr23 +0x94 0x75 0xf7 0x70 # CHECK: vsran.h.w $vr20, $vr12, $vr29 +0x22 0x88 0xf7 0x70 # CHECK: vsran.w.d $vr2, $vr1, $vr2 +0x93 0x83 0xf8 0x70 # CHECK: vsrlrn.b.h $vr19, $vr28, $vr0 +0xb7 0x3b 0xf9 0x70 # CHECK: vsrlrn.h.w $vr23, $vr29, $vr14 +0x45 0x97 0xf9 0x70 # CHECK: vsrlrn.w.d $vr5, $vr26, $vr5 +0xf1 0x9d 0xfa 0x70 # CHECK: vsrarn.b.h $vr17, $vr15, $vr7 +0x4c 0x75 0xfb 0x70 # CHECK: vsrarn.h.w $vr12, $vr10, $vr29 +0x58 0xef 0xfb 0x70 # CHECK: vsrarn.w.d $vr24, $vr26, $vr27 +0x81 0xb9 0xfc 0x70 # CHECK: vssrln.b.h $vr1, $vr12, $vr14 +0x0b 0x49 0xfd 0x70 # CHECK: vssrln.h.w $vr11, $vr8, $vr18 +0xff 0x99 0xfd 0x70 # CHECK: vssrln.w.d $vr31, $vr15, $vr6 +0xad 0xe0 0xfe 0x70 # CHECK: vssran.b.h $vr13, $vr5, $vr24 +0x44 0x1f 0xff 0x70 # CHECK: vssran.h.w $vr4, $vr26, $vr7 +0x59 0x99 0xff 0x70 # CHECK: vssran.w.d $vr25, $vr10, $vr6 +0x9c 0x9b 0x00 0x71 # CHECK: vssrlrn.b.h $vr28, $vr28, $vr6 +0xef 0x46 0x01 0x71 # CHECK: vssrlrn.h.w $vr15, $vr23, $vr17 +0x2c 0x89 0x01 0x71 # CHECK: vssrlrn.w.d $vr12, $vr9, $vr2 +0x21 0xc7 0x02 0x71 # CHECK: vssrarn.b.h $vr1, $vr25, $vr17 +0x23 0x5d 0x03 0x71 # CHECK: vssrarn.h.w $vr3, $vr9, $vr23 +0x2e 0xed 0x03 0x71 # CHECK: vssrarn.w.d $vr14, $vr9, $vr27 +0x10 0xbf 0x04 0x71 # CHECK: vssrln.bu.h $vr16, $vr24, $vr15 +0xf5 0x7a 0x05 0x71 # CHECK: vssrln.hu.w $vr21, $vr23, $vr30 +0x0c 0xf9 0x05 0x71 # CHECK: vssrln.wu.d $vr12, $vr8, $vr30 +0x45 0xb2 0x06 0x71 # CHECK: vssran.bu.h $vr5, $vr18, $vr12 +0xe0 0x70 0x07 0x71 # CHECK: vssran.hu.w $vr0, $vr7, $vr28 +0x65 0xa1 0x07 0x71 # CHECK: vssran.wu.d $vr5, $vr11, $vr8 +0x32 0x8f 0x08 0x71 # CHECK: vssrlrn.bu.h $vr18, $vr25, $vr3 +0x33 0x50 0x09 0x71 # CHECK: vssrlrn.hu.w $vr19, $vr1, $vr20 +0xc6 0xcb 0x09 0x71 # CHECK: vssrlrn.wu.d $vr6, $vr30, $vr18 +0xac 0x8d 0x0a 0x71 # CHECK: vssrarn.bu.h $vr12, $vr13, $vr3 +0xb2 0x50 0x0b 0x71 # CHECK: vssrarn.hu.w $vr18, $vr5, $vr20 +0x17 0xd5 0x0b 0x71 # CHECK: vssrarn.wu.d $vr23, $vr8, $vr21 +0x4e 0x7c 0x0c 0x71 # CHECK: vbitclr.b $vr14, $vr2, $vr31 +0x31 0xa3 0x0c 0x71 # CHECK: vbitclr.h $vr17, $vr25, $vr8 +0x72 0x0d 0x0d 0x71 # CHECK: vbitclr.w $vr18, $vr11, $vr3 +0xff 0xf5 0x0d 0x71 # CHECK: vbitclr.d $vr31, $vr15, $vr29 +0xa8 0x43 0x0e 0x71 # CHECK: vbitset.b $vr8, $vr29, $vr16 +0x25 0xc6 0x0e 0x71 # CHECK: vbitset.h $vr5, $vr17, $vr17 +0x65 0x16 0x0f 0x71 # CHECK: vbitset.w $vr5, $vr19, $vr5 +0x65 0xab 0x0f 0x71 # CHECK: vbitset.d $vr5, $vr27, $vr10 +0xb0 0x20 0x10 0x71 # CHECK: vbitrev.b $vr16, $vr5, $vr8 +0xac 0xb3 0x10 0x71 # CHECK: vbitrev.h $vr12, $vr29, $vr12 +0xc3 0x39 0x11 0x71 # CHECK: vbitrev.w $vr3, $vr14, $vr14 +0x7f 0xbb 0x11 0x71 # CHECK: vbitrev.d $vr31, $vr27, $vr14 +0x16 0x4f 0x16 0x71 # CHECK: vpackev.b $vr22, $vr24, $vr19 +0x5c 0xc8 0x16 0x71 # CHECK: vpackev.h $vr28, $vr2, $vr18 +0x75 0x10 0x17 0x71 # CHECK: vpackev.w $vr21, $vr3, $vr4 +0xb8 0xae 0x17 0x71 # CHECK: vpackev.d $vr24, $vr21, $vr11 +0xec 0x6b 0x18 0x71 # CHECK: vpackod.b $vr12, $vr31, $vr26 +0x79 0xc0 0x18 0x71 # CHECK: vpackod.h $vr25, $vr3, $vr16 +0x55 0x3e 0x19 0x71 # CHECK: vpackod.w $vr21, $vr18, $vr15 +0x62 0x80 0x19 0x71 # CHECK: vpackod.d $vr2, $vr3, $vr0 +0x08 0x71 0x1a 0x71 # CHECK: vilvl.b $vr8, $vr8, $vr28 +0x14 0xfc 0x1a 0x71 # CHECK: vilvl.h $vr20, $vr0, $vr31 +0x4b 0x45 0x1b 0x71 # CHECK: vilvl.w $vr11, $vr10, $vr17 +0xe7 0x84 0x1b 0x71 # CHECK: vilvl.d $vr7, $vr7, $vr1 +0x6b 0x05 0x1c 0x71 # CHECK: vilvh.b $vr11, $vr11, $vr1 +0xe0 0xb7 0x1c 0x71 # CHECK: vilvh.h $vr0, $vr31, $vr13 +0xbc 0x1e 0x1d 0x71 # CHECK: vilvh.w $vr28, $vr21, $vr7 +0x77 0xcc 0x1d 0x71 # CHECK: vilvh.d $vr23, $vr3, $vr19 +0xa1 0x22 0x1e 0x71 # CHECK: vpickev.b $vr1, $vr21, $vr8 +0x30 0xa4 0x1e 0x71 # CHECK: vpickev.h $vr16, $vr1, $vr9 +0xad 0x11 0x1f 0x71 # CHECK: vpickev.w $vr13, $vr13, $vr4 +0xcb 0xfb 0x1f 0x71 # CHECK: vpickev.d $vr11, $vr30, $vr30 +0x67 0x35 0x20 0x71 # CHECK: vpickod.b $vr7, $vr11, $vr13 +0x72 0x84 0x20 0x71 # CHECK: vpickod.h $vr18, $vr3, $vr1 +0x03 0x4e 0x21 0x71 # CHECK: vpickod.w $vr3, $vr16, $vr19 +0xac 0xd5 0x21 0x71 # CHECK: vpickod.d $vr12, $vr13, $vr21 +0x2f 0x4e 0x22 0x71 # CHECK: vreplve.b $vr15, $vr17, $r19 +0xee 0x92 0x22 0x71 # CHECK: vreplve.h $vr14, $vr23, $r4 +0x7d 0x6e 0x23 0x71 # CHECK: vreplve.w $vr29, $vr19, $r27 +0x8d 0xd2 0x23 0x71 # CHECK: vreplve.d $vr13, $vr20, $r20 +0x59 0x54 0x26 0x71 # CHECK: vand.v $vr25, $vr2, $vr21 +0x64 0xc3 0x26 0x71 # CHECK: vor.v $vr4, $vr27, $vr16 +0x3e 0x13 0x27 0x71 # CHECK: vxor.v $vr30, $vr25, $vr4 +0x49 0xd8 0x27 0x71 # CHECK: vnor.v $vr9, $vr2, $vr22 +0x54 0x13 0x28 0x71 # CHECK: vandn.v $vr20, $vr26, $vr4 +0xa6 0xfa 0x28 0x71 # CHECK: vorn.v $vr6, $vr21, $vr30 +0x2b 0x35 0x2b 0x71 # CHECK: vfrstp.b $vr11, $vr9, $vr13 +0x55 0xdb 0x2b 0x71 # CHECK: vfrstp.h $vr21, $vr26, $vr22 +0xe9 0x40 0x2d 0x71 # CHECK: vadd.q $vr9, $vr7, $vr16 +0x22 0xc0 0x2d 0x71 # CHECK: vsub.q $vr2, $vr1, $vr16 +0x42 0x38 0x2e 0x71 # CHECK: vsigncov.b $vr2, $vr2, $vr14 +0xb5 0xb6 0x2e 0x71 # CHECK: vsigncov.h $vr21, $vr21, $vr13 +0xf5 0x14 0x2f 0x71 # CHECK: vsigncov.w $vr21, $vr7, $vr5 +0x4a 0x8d 0x2f 0x71 # CHECK: vsigncov.d $vr10, $vr10, $vr3 +0x8a 0x84 0x30 0x71 # CHECK: vfadd.s $vr10, $vr4, $vr1 +0x6f 0x0b 0x31 0x71 # CHECK: vfadd.d $vr15, $vr27, $vr2 +0x0e 0xa6 0x32 0x71 # CHECK: vfsub.s $vr14, $vr16, $vr9 +0x24 0x20 0x33 0x71 # CHECK: vfsub.d $vr4, $vr1, $vr8 +0x40 0x9a 0x38 0x71 # CHECK: vfmul.s $vr0, $vr18, $vr6 +0xfb 0x7b 0x39 0x71 # CHECK: vfmul.d $vr27, $vr31, $vr30 +0xe3 0x98 0x3a 0x71 # CHECK: vfdiv.s $vr3, $vr7, $vr6 +0xd0 0x78 0x3b 0x71 # CHECK: vfdiv.d $vr16, $vr6, $vr30 +0xd2 0xa3 0x3c 0x71 # CHECK: vfmax.s $vr18, $vr30, $vr8 +0x13 0x61 0x3d 0x71 # CHECK: vfmax.d $vr19, $vr8, $vr24 +0x58 0x9b 0x3e 0x71 # CHECK: vfmin.s $vr24, $vr26, $vr6 +0x30 0x07 0x3f 0x71 # CHECK: vfmin.d $vr16, $vr25, $vr1 +0xe8 0xb8 0x40 0x71 # CHECK: vfmaxa.s $vr8, $vr7, $vr14 +0x0a 0x11 0x41 0x71 # CHECK: vfmaxa.d $vr10, $vr8, $vr4 +0xd0 0xc8 0x42 0x71 # CHECK: vfmina.s $vr16, $vr6, $vr18 +0xfa 0x38 0x43 0x71 # CHECK: vfmina.d $vr26, $vr7, $vr14 +0x9e 0x60 0x46 0x71 # CHECK: vfcvt.h.s $vr30, $vr4, $vr24 +0x30 0x92 0x46 0x71 # CHECK: vfcvt.s.d $vr16, $vr17, $vr4 +0xf9 0x2a 0x48 0x71 # CHECK: vffint.s.l $vr25, $vr23, $vr10 +0xc9 0xee 0x49 0x71 # CHECK: vftint.w.d $vr9, $vr22, $vr27 +0x5f 0x75 0x4a 0x71 # CHECK: vftintrm.w.d $vr31, $vr10, $vr29 +0xb7 0xbd 0x4a 0x71 # CHECK: vftintrp.w.d $vr23, $vr13, $vr15 +0x32 0x19 0x4b 0x71 # CHECK: vftintrz.w.d $vr18, $vr9, $vr6 +0x95 0xf9 0x4b 0x71 # CHECK: vftintrne.w.d $vr21, $vr12, $vr30 +0x63 0x89 0x7a 0x71 # CHECK: vshuf.h $vr3, $vr11, $vr2 +0x95 0x74 0x7b 0x71 # CHECK: vshuf.w $vr21, $vr4, $vr29 +0xeb 0xca 0x7b 0x71 # CHECK: vshuf.d $vr11, $vr23, $vr18 +0xdb 0x1d 0x80 0x72 # CHECK: vseqi.b $vr27, $vr14, 7 +0x77 0xeb 0x80 0x72 # CHECK: vseqi.h $vr23, $vr27, -6 +0x08 0x41 0x81 0x72 # CHECK: vseqi.w $vr8, $vr8, -16 +0xab 0x94 0x81 0x72 # CHECK: vseqi.d $vr11, $vr5, 5 +0x68 0x1f 0x82 0x72 # CHECK: vslei.b $vr8, $vr27, 7 +0xbb 0xef 0x82 0x72 # CHECK: vslei.h $vr27, $vr29, -5 +0xb7 0x75 0x83 0x72 # CHECK: vslei.w $vr23, $vr13, -3 +0xe5 0xe1 0x83 0x72 # CHECK: vslei.d $vr5, $vr15, -8 +0x5d 0x25 0x84 0x72 # CHECK: vslei.bu $vr29, $vr10, 9 +0x5d 0xae 0x84 0x72 # CHECK: vslei.hu $vr29, $vr18, 11 +0x28 0x08 0x85 0x72 # CHECK: vslei.wu $vr8, $vr1, 2 +0xb0 0xa8 0x85 0x72 # CHECK: vslei.du $vr16, $vr5, 10 +0x88 0x78 0x86 0x72 # CHECK: vslti.b $vr8, $vr4, -2 +0xfa 0xc8 0x86 0x72 # CHECK: vslti.h $vr26, $vr7, -14 +0x1c 0x31 0x87 0x72 # CHECK: vslti.w $vr28, $vr8, 12 +0x64 0xa7 0x87 0x72 # CHECK: vslti.d $vr4, $vr27, 9 +0xca 0x49 0x88 0x72 # CHECK: vslti.bu $vr10, $vr14, 18 +0x9c 0xfb 0x88 0x72 # CHECK: vslti.hu $vr28, $vr28, 30 +0x6f 0x6f 0x89 0x72 # CHECK: vslti.wu $vr15, $vr27, 27 +0x3e 0xce 0x89 0x72 # CHECK: vslti.du $vr30, $vr17, 19 +0x26 0x48 0x8a 0x72 # CHECK: vaddi.bu $vr6, $vr1, 18 +0xcc 0x95 0x8a 0x72 # CHECK: vaddi.hu $vr12, $vr14, 5 +0x1c 0x68 0x8b 0x72 # CHECK: vaddi.wu $vr28, $vr0, 26 +0xaa 0x88 0x8b 0x72 # CHECK: vaddi.du $vr10, $vr5, 2 +0x96 0x0b 0x8c 0x72 # CHECK: vsubi.bu $vr22, $vr28, 2 +0xc0 0xfe 0x8c 0x72 # CHECK: vsubi.hu $vr0, $vr22, 31 +0xd4 0x14 0x8d 0x72 # CHECK: vsubi.wu $vr20, $vr6, 5 +0x72 0x85 0x8d 0x72 # CHECK: vsubi.du $vr18, $vr11, 1 +0x44 0x13 0x8e 0x72 # CHECK: vbsll.v $vr4, $vr26, 4 +0xe7 0xbf 0x8e 0x72 # CHECK: vbsrl.v $vr7, $vr31, 15 +0xf3 0x39 0x90 0x72 # CHECK: vmaxi.b $vr19, $vr15, 14 +0x79 0xd0 0x90 0x72 # CHECK: vmaxi.h $vr25, $vr3, -12 +0x34 0x17 0x91 0x72 # CHECK: vmaxi.w $vr20, $vr25, 5 +0x49 0xb1 0x91 0x72 # CHECK: vmaxi.d $vr9, $vr10, 12 +0xbe 0x72 0x92 0x72 # CHECK: vmini.b $vr30, $vr21, -4 +0x8b 0xf7 0x92 0x72 # CHECK: vmini.h $vr11, $vr28, -3 +0x26 0x5f 0x93 0x72 # CHECK: vmini.w $vr6, $vr25, -9 +0x1c 0x89 0x93 0x72 # CHECK: vmini.d $vr28, $vr8, 2 +0x0d 0x4f 0x94 0x72 # CHECK: vmaxi.bu $vr13, $vr24, 19 +0x23 0xd8 0x94 0x72 # CHECK: vmaxi.hu $vr3, $vr1, 22 +0x61 0x5c 0x95 0x72 # CHECK: vmaxi.wu $vr1, $vr3, 23 +0x46 0xd6 0x95 0x72 # CHECK: vmaxi.du $vr6, $vr18, 21 +0x4a 0x50 0x96 0x72 # CHECK: vmini.bu $vr10, $vr2, 20 +0x31 0xbe 0x96 0x72 # CHECK: vmini.hu $vr17, $vr17, 15 +0x7a 0x5f 0x97 0x72 # CHECK: vmini.wu $vr26, $vr27, 23 +0x6c 0xa3 0x97 0x72 # CHECK: vmini.du $vr12, $vr27, 8 +0x1a 0x25 0x9a 0x72 # CHECK: vfrstpi.b $vr26, $vr8, 9 +0x50 0xd0 0x9a 0x72 # CHECK: vfrstpi.h $vr16, $vr2, 20 +0x25 0x02 0x9c 0x72 # CHECK: vclo.b $vr5, $vr17 +0x88 0x04 0x9c 0x72 # CHECK: vclo.h $vr8, $vr4 +0xa1 0x09 0x9c 0x72 # CHECK: vclo.w $vr1, $vr13 +0xe0 0x0e 0x9c 0x72 # CHECK: vclo.d $vr0, $vr23 +0x24 0x13 0x9c 0x72 # CHECK: vclz.b $vr4, $vr25 +0x21 0x17 0x9c 0x72 # CHECK: vclz.h $vr1, $vr25 +0xa1 0x18 0x9c 0x72 # CHECK: vclz.w $vr1, $vr5 +0x30 0x1e 0x9c 0x72 # CHECK: vclz.d $vr16, $vr17 +0x64 0x20 0x9c 0x72 # CHECK: vpcnt.b $vr4, $vr3 +0x2f 0x26 0x9c 0x72 # CHECK: vpcnt.h $vr15, $vr17 +0x0d 0x29 0x9c 0x72 # CHECK: vpcnt.w $vr13, $vr8 +0x00 0x2d 0x9c 0x72 # CHECK: vpcnt.d $vr0, $vr8 +0x0e 0x33 0x9c 0x72 # CHECK: vneg.b $vr14, $vr24 +0xf8 0x34 0x9c 0x72 # CHECK: vneg.h $vr24, $vr7 +0xb3 0x38 0x9c 0x72 # CHECK: vneg.w $vr19, $vr5 +0x83 0x3f 0x9c 0x72 # CHECK: vneg.d $vr3, $vr28 +0x3f 0x43 0x9c 0x72 # CHECK: vmskltz.b $vr31, $vr25 +0x89 0x46 0x9c 0x72 # CHECK: vmskltz.h $vr9, $vr20 +0x56 0x4b 0x9c 0x72 # CHECK: vmskltz.w $vr22, $vr26 +0x5c 0x4d 0x9c 0x72 # CHECK: vmskltz.d $vr28, $vr10 +0xa7 0x50 0x9c 0x72 # CHECK: vmskgez.b $vr7, $vr5 +0x94 0x61 0x9c 0x72 # CHECK: vmsknz.b $vr20, $vr12 +0xc5 0x99 0x9c 0x72 # CHECK: vseteqz.v $fcc5, $vr14 +0x02 0x9d 0x9c 0x72 # CHECK: vsetnez.v $fcc2, $vr8 +0x80 0xa2 0x9c 0x72 # CHECK: vsetanyeqz.b $fcc0, $vr20 +0x04 0xa6 0x9c 0x72 # CHECK: vsetanyeqz.h $fcc4, $vr16 +0x47 0xa8 0x9c 0x72 # CHECK: vsetanyeqz.w $fcc7, $vr2 +0x84 0xad 0x9c 0x72 # CHECK: vsetanyeqz.d $fcc4, $vr12 +0x07 0xb0 0x9c 0x72 # CHECK: vsetallnez.b $fcc7, $vr0 +0x62 0xb5 0x9c 0x72 # CHECK: vsetallnez.h $fcc2, $vr11 +0x26 0xbb 0x9c 0x72 # CHECK: vsetallnez.w $fcc6, $vr25 +0xe7 0xbf 0x9c 0x72 # CHECK: vsetallnez.d $fcc7, $vr31 +0x8e 0xc7 0x9c 0x72 # CHECK: vflogb.s $vr14, $vr28 +0x3d 0xc9 0x9c 0x72 # CHECK: vflogb.d $vr29, $vr9 +0xa3 0xd5 0x9c 0x72 # CHECK: vfclass.s $vr3, $vr13 +0xe5 0xd9 0x9c 0x72 # CHECK: vfclass.d $vr5, $vr15 +0x73 0xe7 0x9c 0x72 # CHECK: vfsqrt.s $vr19, $vr27 +0x7f 0xe8 0x9c 0x72 # CHECK: vfsqrt.d $vr31, $vr3 +0x18 0xf6 0x9c 0x72 # CHECK: vfrecip.s $vr24, $vr16 +0x77 0xfa 0x9c 0x72 # CHECK: vfrecip.d $vr23, $vr19 +0xf2 0x05 0x9d 0x72 # CHECK: vfrsqrt.s $vr18, $vr15 +0xf2 0x0b 0x9d 0x72 # CHECK: vfrsqrt.d $vr18, $vr31 +0x7a 0x35 0x9d 0x72 # CHECK: vfrint.s $vr26, $vr11 +0x58 0x3a 0x9d 0x72 # CHECK: vfrint.d $vr24, $vr18 +0x65 0x44 0x9d 0x72 # CHECK: vfrintrm.s $vr5, $vr3 +0x57 0x49 0x9d 0x72 # CHECK: vfrintrm.d $vr23, $vr10 +0x54 0x54 0x9d 0x72 # CHECK: vfrintrp.s $vr20, $vr2 +0x3e 0x5a 0x9d 0x72 # CHECK: vfrintrp.d $vr30, $vr17 +0xd3 0x64 0x9d 0x72 # CHECK: vfrintrz.s $vr19, $vr6 +0x50 0x6a 0x9d 0x72 # CHECK: vfrintrz.d $vr16, $vr18 +0x08 0x77 0x9d 0x72 # CHECK: vfrintrne.s $vr8, $vr24 +0xa6 0x78 0x9d 0x72 # CHECK: vfrintrne.d $vr6, $vr5 +0xc4 0xe8 0x9d 0x72 # CHECK: vfcvtl.s.h $vr4, $vr6 +0xf0 0xec 0x9d 0x72 # CHECK: vfcvth.s.h $vr16, $vr7 +0x50 0xf1 0x9d 0x72 # CHECK: vfcvtl.d.s $vr16, $vr10 +0x3c 0xf7 0x9d 0x72 # CHECK: vfcvth.d.s $vr28, $vr25 +0x1c 0x02 0x9e 0x72 # CHECK: vffint.s.w $vr28, $vr16 +0xe4 0x07 0x9e 0x72 # CHECK: vffint.s.wu $vr4, $vr31 +0x32 0x0b 0x9e 0x72 # CHECK: vffint.d.l $vr18, $vr25 +0x38 0x0e 0x9e 0x72 # CHECK: vffint.d.lu $vr24, $vr17 +0x62 0x13 0x9e 0x72 # CHECK: vffintl.d.w $vr2, $vr27 +0x04 0x16 0x9e 0x72 # CHECK: vffinth.d.w $vr4, $vr16 +0x11 0x30 0x9e 0x72 # CHECK: vftint.w.s $vr17, $vr0 +0x57 0x36 0x9e 0x72 # CHECK: vftint.l.d $vr23, $vr18 +0x97 0x38 0x9e 0x72 # CHECK: vftintrm.w.s $vr23, $vr4 +0xde 0x3d 0x9e 0x72 # CHECK: vftintrm.l.d $vr30, $vr14 +0x07 0x40 0x9e 0x72 # CHECK: vftintrp.w.s $vr7, $vr0 +0x9c 0x46 0x9e 0x72 # CHECK: vftintrp.l.d $vr28, $vr20 +0xfc 0x4b 0x9e 0x72 # CHECK: vftintrz.w.s $vr28, $vr31 +0x12 0x4c 0x9e 0x72 # CHECK: vftintrz.l.d $vr18, $vr0 +0x2e 0x52 0x9e 0x72 # CHECK: vftintrne.w.s $vr14, $vr17 +0x56 0x56 0x9e 0x72 # CHECK: vftintrne.l.d $vr22, $vr18 +0x3a 0x5b 0x9e 0x72 # CHECK: vftint.wu.s $vr26, $vr25 +0x69 0x5f 0x9e 0x72 # CHECK: vftint.lu.d $vr9, $vr27 +0xda 0x72 0x9e 0x72 # CHECK: vftintrz.wu.s $vr26, $vr22 +0x9d 0x76 0x9e 0x72 # CHECK: vftintrz.lu.d $vr29, $vr20 +0x36 0x80 0x9e 0x72 # CHECK: vftintl.l.s $vr22, $vr1 +0x0d 0x87 0x9e 0x72 # CHECK: vftinth.l.s $vr13, $vr24 +0x68 0x8b 0x9e 0x72 # CHECK: vftintrml.l.s $vr8, $vr27 +0x92 0x8f 0x9e 0x72 # CHECK: vftintrmh.l.s $vr18, $vr28 +0x9b 0x93 0x9e 0x72 # CHECK: vftintrpl.l.s $vr27, $vr28 +0xf4 0x94 0x9e 0x72 # CHECK: vftintrph.l.s $vr20, $vr7 +0x46 0x98 0x9e 0x72 # CHECK: vftintrzl.l.s $vr6, $vr2 +0xd5 0x9c 0x9e 0x72 # CHECK: vftintrzh.l.s $vr21, $vr6 +0x79 0xa0 0x9e 0x72 # CHECK: vftintrnel.l.s $vr25, $vr3 +0xa7 0xa4 0x9e 0x72 # CHECK: vftintrneh.l.s $vr7, $vr5 +0x49 0xe0 0x9e 0x72 # CHECK: vexth.h.b $vr9, $vr2 +0x64 0xe7 0x9e 0x72 # CHECK: vexth.w.h $vr4, $vr27 +0x37 0xe8 0x9e 0x72 # CHECK: vexth.d.w $vr23, $vr1 +0xcf 0xec 0x9e 0x72 # CHECK: vexth.q.d $vr15, $vr6 +0x43 0xf0 0x9e 0x72 # CHECK: vexth.hu.bu $vr3, $vr2 +0x5f 0xf7 0x9e 0x72 # CHECK: vexth.wu.hu $vr31, $vr26 +0xea 0xfb 0x9e 0x72 # CHECK: vexth.du.wu $vr10, $vr31 +0x1c 0xfd 0x9e 0x72 # CHECK: vexth.qu.du $vr28, $vr8 +0x6f 0x00 0x9f 0x72 # CHECK: vreplgr2vr.b $vr15, $sp +0xea 0x06 0x9f 0x72 # CHECK: vreplgr2vr.h $vr10, $r23 +0x19 0x0a 0x9f 0x72 # CHECK: vreplgr2vr.w $vr25, $r16 +0xfb 0x0c 0x9f 0x72 # CHECK: vreplgr2vr.d $vr27, $r7 +0x18 0x3f 0xa0 0x72 # CHECK: vrotri.b $vr24, $vr24, 7 +0xa1 0x40 0xa0 0x72 # CHECK: vrotri.h $vr1, $vr5, 0 +0x0a 0xb1 0xa0 0x72 # CHECK: vrotri.w $vr10, $vr8, 12 +0xbe 0xab 0xa1 0x72 # CHECK: vrotri.d $vr30, $vr29, 42 +0x01 0x2e 0xa4 0x72 # CHECK: vsrlri.b $vr1, $vr16, 3 +0xbc 0x46 0xa4 0x72 # CHECK: vsrlri.h $vr28, $vr21, 1 +0x92 0xbc 0xa4 0x72 # CHECK: vsrlri.w $vr18, $vr4, 15 +0x7e 0x4c 0xa5 0x72 # CHECK: vsrlri.d $vr30, $vr3, 19 +0x0d 0x3c 0xa8 0x72 # CHECK: vsrari.b $vr13, $vr0, 7 +0x31 0x59 0xa8 0x72 # CHECK: vsrari.h $vr17, $vr9, 6 +0x83 0x9b 0xa8 0x72 # CHECK: vsrari.w $vr3, $vr28, 6 +0x44 0x88 0xa9 0x72 # CHECK: vsrari.d $vr4, $vr2, 34 +0x08 0x91 0xeb 0x72 # CHECK: vinsgr2vr.b $vr8, $r8, 4 +0xed 0xc4 0xeb 0x72 # CHECK: vinsgr2vr.h $vr13, $r7, 1 +0xc4 0xec 0xeb 0x72 # CHECK: vinsgr2vr.w $vr4, $r6, 3 +0xf7 0xf3 0xeb 0x72 # CHECK: vinsgr2vr.d $vr23, $r31, 0 +0x18 0xaa 0xef 0x72 # CHECK: vpickve2gr.b $r24, $vr16, 10 +0x31 0xcf 0xef 0x72 # CHECK: vpickve2gr.h $r17, $vr25, 3 +0x9e 0xeb 0xef 0x72 # CHECK: vpickve2gr.w $r30, $vr28, 2 +0x39 0xf5 0xef 0x72 # CHECK: vpickve2gr.d $r25, $vr9, 1 +0xdf 0x89 0xf3 0x72 # CHECK: vpickve2gr.bu $r31, $vr14, 2 +0x2c 0xd8 0xf3 0x72 # CHECK: vpickve2gr.hu $r12, $vr1, 6 +0x2a 0xe6 0xf3 0x72 # CHECK: vpickve2gr.wu $r10, $vr17, 1 +0x1a 0xf5 0xf3 0x72 # CHECK: vpickve2gr.du $r26, $vr8, 1 +0xc3 0xb0 0xf7 0x72 # CHECK: vreplvei.b $vr3, $vr6, 12 +0xb6 0xdf 0xf7 0x72 # CHECK: vreplvei.h $vr22, $vr29, 7 +0x51 0xe7 0xf7 0x72 # CHECK: vreplvei.w $vr17, $vr26, 1 +0x20 0xf6 0xf7 0x72 # CHECK: vreplvei.d $vr0, $vr17, 1 +0xd9 0x29 0x08 0x73 # CHECK: vsllwil.h.b $vr25, $vr14, 2 +0xb8 0x44 0x08 0x73 # CHECK: vsllwil.w.h $vr24, $vr5, 1 +0xd9 0xa1 0x08 0x73 # CHECK: vsllwil.d.w $vr25, $vr14, 8 +0xc3 0x02 0x09 0x73 # CHECK: vextl.q.d $vr3, $vr22 +0x2b 0x2f 0x0c 0x73 # CHECK: vsllwil.hu.bu $vr11, $vr25, 3 +0x42 0x6b 0x0c 0x73 # CHECK: vsllwil.wu.hu $vr2, $vr26, 10 +0x32 0xf1 0x0c 0x73 # CHECK: vsllwil.du.wu $vr18, $vr9, 28 +0x2d 0x03 0x0d 0x73 # CHECK: vextl.qu.du $vr13, $vr25 +0x1d 0x3b 0x10 0x73 # CHECK: vbitclri.b $vr29, $vr24, 6 +0xfb 0x55 0x10 0x73 # CHECK: vbitclri.h $vr27, $vr15, 5 +0x4b 0xa1 0x10 0x73 # CHECK: vbitclri.w $vr11, $vr10, 8 +0xe4 0x3c 0x11 0x73 # CHECK: vbitclri.d $vr4, $vr7, 15 +0x98 0x2e 0x14 0x73 # CHECK: vbitseti.b $vr24, $vr20, 3 +0x06 0x61 0x14 0x73 # CHECK: vbitseti.h $vr6, $vr8, 8 +0x35 0xe1 0x14 0x73 # CHECK: vbitseti.w $vr21, $vr9, 24 +0x5c 0x7a 0x15 0x73 # CHECK: vbitseti.d $vr28, $vr18, 30 +0xf3 0x23 0x18 0x73 # CHECK: vbitrevi.b $vr19, $vr31, 0 +0x32 0x40 0x18 0x73 # CHECK: vbitrevi.h $vr18, $vr1, 0 +0xd9 0xc8 0x18 0x73 # CHECK: vbitrevi.w $vr25, $vr6, 18 +0x68 0x5b 0x19 0x73 # CHECK: vbitrevi.d $vr8, $vr27, 22 +0x95 0x2b 0x24 0x73 # CHECK: vsat.b $vr21, $vr28, 2 +0xa6 0x70 0x24 0x73 # CHECK: vsat.h $vr6, $vr5, 12 +0xc3 0xc3 0x24 0x73 # CHECK: vsat.w $vr3, $vr30, 16 +0xe0 0x63 0x25 0x73 # CHECK: vsat.d $vr0, $vr31, 24 +0x94 0x2a 0x28 0x73 # CHECK: vsat.bu $vr20, $vr20, 2 +0xc8 0x70 0x28 0x73 # CHECK: vsat.hu $vr8, $vr6, 12 +0x92 0xea 0x28 0x73 # CHECK: vsat.wu $vr18, $vr20, 26 +0xca 0x84 0x29 0x73 # CHECK: vsat.du $vr10, $vr6, 33 +0x64 0x2e 0x2c 0x73 # CHECK: vslli.b $vr4, $vr19, 3 +0xe3 0x7a 0x2c 0x73 # CHECK: vslli.h $vr3, $vr23, 14 +0xb6 0x9a 0x2c 0x73 # CHECK: vslli.w $vr22, $vr21, 6 +0xf7 0x91 0x2d 0x73 # CHECK: vslli.d $vr23, $vr15, 36 +0x25 0x33 0x30 0x73 # CHECK: vsrli.b $vr5, $vr25, 4 +0xc9 0x65 0x30 0x73 # CHECK: vsrli.h $vr9, $vr14, 9 +0x07 0xb3 0x30 0x73 # CHECK: vsrli.w $vr7, $vr24, 12 +0x4f 0xfe 0x31 0x73 # CHECK: vsrli.d $vr15, $vr18, 63 +0x26 0x2c 0x34 0x73 # CHECK: vsrai.b $vr6, $vr1, 3 +0xa7 0x4f 0x34 0x73 # CHECK: vsrai.h $vr7, $vr29, 3 +0x7f 0xf7 0x34 0x73 # CHECK: vsrai.w $vr31, $vr27, 29 +0xdc 0xe3 0x35 0x73 # CHECK: vsrai.d $vr28, $vr30, 56 +0x42 0x4b 0x40 0x73 # CHECK: vsrlni.b.h $vr2, $vr26, 2 +0xdf 0x8d 0x40 0x73 # CHECK: vsrlni.h.w $vr31, $vr14, 3 +0x93 0x84 0x41 0x73 # CHECK: vsrlni.w.d $vr19, $vr4, 33 +0x7f 0xfc 0x42 0x73 # CHECK: vsrlni.d.q $vr31, $vr3, 63 +0x5a 0x42 0x44 0x73 # CHECK: vsrlrni.b.h $vr26, $vr18, 0 +0xd2 0x96 0x44 0x73 # CHECK: vsrlrni.h.w $vr18, $vr22, 5 +0x78 0x55 0x45 0x73 # CHECK: vsrlrni.w.d $vr24, $vr11, 21 +0x66 0x95 0x46 0x73 # CHECK: vsrlrni.d.q $vr6, $vr11, 37 +0xa3 0x56 0x48 0x73 # CHECK: vssrlni.b.h $vr3, $vr21, 5 +0x26 0xc0 0x48 0x73 # CHECK: vssrlni.h.w $vr6, $vr1, 16 +0xa4 0x6e 0x49 0x73 # CHECK: vssrlni.w.d $vr4, $vr21, 27 +0x48 0x7a 0x4b 0x73 # CHECK: vssrlni.d.q $vr8, $vr18, 94 +0x46 0x54 0x4c 0x73 # CHECK: vssrlni.bu.h $vr6, $vr2, 5 +0xbd 0x8b 0x4c 0x73 # CHECK: vssrlni.hu.w $vr29, $vr29, 2 +0x9c 0xbe 0x4d 0x73 # CHECK: vssrlni.wu.d $vr28, $vr20, 47 +0x56 0x49 0x4f 0x73 # CHECK: vssrlni.du.q $vr22, $vr10, 82 +0x31 0x6b 0x50 0x73 # CHECK: vssrlrni.b.h $vr17, $vr25, 10 +0xb5 0x83 0x50 0x73 # CHECK: vssrlrni.h.w $vr21, $vr29, 0 +0xe9 0xfd 0x51 0x73 # CHECK: vssrlrni.w.d $vr9, $vr15, 63 +0x24 0xd4 0x53 0x73 # CHECK: vssrlrni.d.q $vr4, $vr1, 117 +0xb9 0x4d 0x54 0x73 # CHECK: vssrlrni.bu.h $vr25, $vr13, 3 +0x9e 0x9f 0x54 0x73 # CHECK: vssrlrni.hu.w $vr30, $vr28, 7 +0x70 0x2f 0x55 0x73 # CHECK: vssrlrni.wu.d $vr16, $vr27, 11 +0xb4 0xfd 0x56 0x73 # CHECK: vssrlrni.du.q $vr20, $vr13, 63 +0x23 0x53 0x58 0x73 # CHECK: vsrani.b.h $vr3, $vr25, 4 +0xac 0xc5 0x58 0x73 # CHECK: vsrani.h.w $vr12, $vr13, 17 +0xc2 0x64 0x59 0x73 # CHECK: vsrani.w.d $vr2, $vr6, 25 +0x0c 0xa5 0x5b 0x73 # CHECK: vsrani.d.q $vr12, $vr8, 105 +0xbb 0x4a 0x5c 0x73 # CHECK: vsrarni.b.h $vr27, $vr21, 2 +0x6d 0x80 0x5c 0x73 # CHECK: vsrarni.h.w $vr13, $vr3, 0 +0xe9 0xab 0x5d 0x73 # CHECK: vsrarni.w.d $vr9, $vr31, 42 +0xb9 0xec 0x5e 0x73 # CHECK: vsrarni.d.q $vr25, $vr5, 59 +0xe8 0x70 0x60 0x73 # CHECK: vssrani.b.h $vr8, $vr7, 12 +0x55 0xfa 0x60 0x73 # CHECK: vssrani.h.w $vr21, $vr18, 30 +0xf7 0xcc 0x61 0x73 # CHECK: vssrani.w.d $vr23, $vr7, 51 +0xcc 0x21 0x62 0x73 # CHECK: vssrani.d.q $vr12, $vr14, 8 +0xb3 0x70 0x64 0x73 # CHECK: vssrani.bu.h $vr19, $vr5, 12 +0x3b 0xbf 0x64 0x73 # CHECK: vssrani.hu.w $vr27, $vr25, 15 +0x98 0xab 0x65 0x73 # CHECK: vssrani.wu.d $vr24, $vr28, 42 +0xe4 0xfe 0x66 0x73 # CHECK: vssrani.du.q $vr4, $vr23, 63 +0x1a 0x41 0x68 0x73 # CHECK: vssrarni.b.h $vr26, $vr8, 0 +0x64 0xe4 0x68 0x73 # CHECK: vssrarni.h.w $vr4, $vr3, 25 +0x20 0x4f 0x69 0x73 # CHECK: vssrarni.w.d $vr0, $vr25, 19 +0x74 0xa9 0x6b 0x73 # CHECK: vssrarni.d.q $vr20, $vr11, 106 +0x99 0x67 0x6c 0x73 # CHECK: vssrarni.bu.h $vr25, $vr28, 9 +0xf4 0xb2 0x6c 0x73 # CHECK: vssrarni.hu.w $vr20, $vr23, 12 +0xfc 0xea 0x6d 0x73 # CHECK: vssrarni.wu.d $vr28, $vr23, 58 +0xc1 0x75 0x6f 0x73 # CHECK: vssrarni.du.q $vr1, $vr14, 93 +0x6f 0x1f 0x80 0x73 # CHECK: vextrins.d $vr15, $vr27, 7 +0x13 0x4c 0x86 0x73 # CHECK: vextrins.w $vr19, $vr0, 147 +0x3d 0x15 0x89 0x73 # CHECK: vextrins.h $vr29, $vr9, 69 +0xa0 0x5e 0x8c 0x73 # CHECK: vextrins.b $vr0, $vr21, 23 +0x53 0xf1 0x92 0x73 # CHECK: vshuf4i.b $vr19, $vr10, 188 +0x2f 0x2c 0x96 0x73 # CHECK: vshuf4i.h $vr15, $vr1, 139 +0xa3 0x08 0x9a 0x73 # CHECK: vshuf4i.w $vr3, $vr5, 130 +0xa8 0x0f 0x9e 0x73 # CHECK: vshuf4i.d $vr8, $vr29, 131 +0x30 0xa3 0xc6 0x73 # CHECK: vbitseli.b $vr16, $vr25, 168 +0xe4 0xe6 0xd1 0x73 # CHECK: vandi.b $vr4, $vr23, 121 +0x47 0xf1 0xd6 0x73 # CHECK: vori.b $vr7, $vr10, 188 +0x49 0x63 0xdb 0x73 # CHECK: vxori.b $vr9, $vr26, 216 +0x84 0x6f 0xdf 0x73 # CHECK: vnori.b $vr4, $vr28, 219 +0x56 0x2c 0xe2 0x73 # CHECK: vldi $vr22, -3742 +0xae 0x43 0xe4 0x73 # CHECK: vpermi.w $vr14, $vr29, 16 +0xeb 0x56 0x00 0x74 # CHECK: xvseq.b $xr11, $xr23, $xr21 +0x46 0xed 0x00 0x74 # CHECK: xvseq.h $xr6, $xr10, $xr27 +0x73 0x57 0x01 0x74 # CHECK: xvseq.w $xr19, $xr27, $xr21 +0x92 0x88 0x01 0x74 # CHECK: xvseq.d $xr18, $xr4, $xr2 +0x53 0x15 0x02 0x74 # CHECK: xvsle.b $xr19, $xr10, $xr5 +0x2a 0xbb 0x02 0x74 # CHECK: xvsle.h $xr10, $xr25, $xr14 +0xf1 0x4a 0x03 0x74 # CHECK: xvsle.w $xr17, $xr23, $xr18 +0xef 0xa4 0x03 0x74 # CHECK: xvsle.d $xr15, $xr7, $xr9 +0xc5 0x3d 0x04 0x74 # CHECK: xvsle.bu $xr5, $xr14, $xr15 +0x29 0xe7 0x04 0x74 # CHECK: xvsle.hu $xr9, $xr25, $xr25 +0xfc 0x43 0x05 0x74 # CHECK: xvsle.wu $xr28, $xr31, $xr16 +0x11 0xe3 0x05 0x74 # CHECK: xvsle.du $xr17, $xr24, $xr24 +0x92 0x67 0x06 0x74 # CHECK: xvslt.b $xr18, $xr28, $xr25 +0xdd 0x88 0x06 0x74 # CHECK: xvslt.h $xr29, $xr6, $xr2 +0x4e 0x15 0x07 0x74 # CHECK: xvslt.w $xr14, $xr10, $xr5 +0xd3 0xbf 0x07 0x74 # CHECK: xvslt.d $xr19, $xr30, $xr15 +0xce 0x6c 0x08 0x74 # CHECK: xvslt.bu $xr14, $xr6, $xr27 +0x5b 0x97 0x08 0x74 # CHECK: xvslt.hu $xr27, $xr26, $xr5 +0x26 0x29 0x09 0x74 # CHECK: xvslt.wu $xr6, $xr9, $xr10 +0x8d 0xf1 0x09 0x74 # CHECK: xvslt.du $xr13, $xr12, $xr28 +0xc0 0x0c 0x0a 0x74 # CHECK: xvadd.b $xr0, $xr6, $xr3 +0x68 0xa9 0x0a 0x74 # CHECK: xvadd.h $xr8, $xr11, $xr10 +0xc5 0x54 0x0b 0x74 # CHECK: xvadd.w $xr5, $xr6, $xr21 +0xa4 0xaa 0x0b 0x74 # CHECK: xvadd.d $xr4, $xr21, $xr10 +0x10 0x78 0x0c 0x74 # CHECK: xvsub.b $xr16, $xr0, $xr30 +0x7c 0xc9 0x0c 0x74 # CHECK: xvsub.h $xr28, $xr11, $xr18 +0x4d 0x34 0x0d 0x74 # CHECK: xvsub.w $xr13, $xr2, $xr13 +0x20 0xd7 0x0d 0x74 # CHECK: xvsub.d $xr0, $xr25, $xr21 +0xc8 0x2f 0x1e 0x74 # CHECK: xvaddwev.h.b $xr8, $xr30, $xr11 +0xca 0x97 0x1e 0x74 # CHECK: xvaddwev.w.h $xr10, $xr30, $xr5 +0x34 0x07 0x1f 0x74 # CHECK: xvaddwev.d.w $xr20, $xr25, $xr1 +0x16 0xe3 0x1f 0x74 # CHECK: xvaddwev.q.d $xr22, $xr24, $xr24 +0x21 0x07 0x20 0x74 # CHECK: xvsubwev.h.b $xr1, $xr25, $xr1 +0xc4 0xaf 0x20 0x74 # CHECK: xvsubwev.w.h $xr4, $xr30, $xr11 +0x46 0x48 0x21 0x74 # CHECK: xvsubwev.d.w $xr6, $xr2, $xr18 +0x60 0xfd 0x21 0x74 # CHECK: xvsubwev.q.d $xr0, $xr11, $xr31 +0x84 0x64 0x22 0x74 # CHECK: xvaddwod.h.b $xr4, $xr4, $xr25 +0x2c 0xf7 0x22 0x74 # CHECK: xvaddwod.w.h $xr12, $xr25, $xr29 +0xd0 0x4e 0x23 0x74 # CHECK: xvaddwod.d.w $xr16, $xr22, $xr19 +0x37 0xbb 0x23 0x74 # CHECK: xvaddwod.q.d $xr23, $xr25, $xr14 +0x01 0x22 0x24 0x74 # CHECK: xvsubwod.h.b $xr1, $xr16, $xr8 +0x65 0xa1 0x24 0x74 # CHECK: xvsubwod.w.h $xr5, $xr11, $xr8 +0xf4 0x00 0x25 0x74 # CHECK: xvsubwod.d.w $xr20, $xr7, $xr0 +0xf1 0xd2 0x25 0x74 # CHECK: xvsubwod.q.d $xr17, $xr23, $xr20 +0x4f 0x7d 0x2e 0x74 # CHECK: xvaddwev.h.bu $xr15, $xr10, $xr31 +0x15 0xf3 0x2e 0x74 # CHECK: xvaddwev.w.hu $xr21, $xr24, $xr28 +0xe9 0x3b 0x2f 0x74 # CHECK: xvaddwev.d.wu $xr9, $xr31, $xr14 +0x39 0xa0 0x2f 0x74 # CHECK: xvaddwev.q.du $xr25, $xr1, $xr8 +0xfe 0x37 0x30 0x74 # CHECK: xvsubwev.h.bu $xr30, $xr31, $xr13 +0x81 0x87 0x30 0x74 # CHECK: xvsubwev.w.hu $xr1, $xr28, $xr1 +0xfd 0x76 0x31 0x74 # CHECK: xvsubwev.d.wu $xr29, $xr23, $xr29 +0x0d 0xee 0x31 0x74 # CHECK: xvsubwev.q.du $xr13, $xr16, $xr27 +0xad 0x0b 0x32 0x74 # CHECK: xvaddwod.h.bu $xr13, $xr29, $xr2 +0x4e 0xb5 0x32 0x74 # CHECK: xvaddwod.w.hu $xr14, $xr10, $xr13 +0x5e 0x2b 0x33 0x74 # CHECK: xvaddwod.d.wu $xr30, $xr26, $xr10 +0xa2 0x81 0x33 0x74 # CHECK: xvaddwod.q.du $xr2, $xr13, $xr0 +0xc6 0x16 0x34 0x74 # CHECK: xvsubwod.h.bu $xr6, $xr22, $xr5 +0xb3 0xa2 0x34 0x74 # CHECK: xvsubwod.w.hu $xr19, $xr21, $xr8 +0x70 0x79 0x35 0x74 # CHECK: xvsubwod.d.wu $xr16, $xr11, $xr30 +0x41 0xa7 0x35 0x74 # CHECK: xvsubwod.q.du $xr1, $xr26, $xr9 +0xa5 0x09 0x3e 0x74 # CHECK: xvaddwev.h.bu.b $xr5, $xr13, $xr2 +0xb1 0xd2 0x3e 0x74 # CHECK: xvaddwev.w.hu.h $xr17, $xr21, $xr20 +0x6b 0x4f 0x3f 0x74 # CHECK: xvaddwev.d.wu.w $xr11, $xr27, $xr19 +0xb4 0xf6 0x3f 0x74 # CHECK: xvaddwev.q.du.d $xr20, $xr21, $xr29 +0xc1 0x38 0x40 0x74 # CHECK: xvaddwod.h.bu.b $xr1, $xr6, $xr14 +0xa7 0xaf 0x40 0x74 # CHECK: xvaddwod.w.hu.h $xr7, $xr29, $xr11 +0x50 0x39 0x41 0x74 # CHECK: xvaddwod.d.wu.w $xr16, $xr10, $xr14 +0x6a 0xdd 0x41 0x74 # CHECK: xvaddwod.q.du.d $xr10, $xr11, $xr23 +0x58 0x71 0x46 0x74 # CHECK: xvsadd.b $xr24, $xr10, $xr28 +0x53 0xc6 0x46 0x74 # CHECK: xvsadd.h $xr19, $xr18, $xr17 +0xc2 0x30 0x47 0x74 # CHECK: xvsadd.w $xr2, $xr6, $xr12 +0x4f 0xf6 0x47 0x74 # CHECK: xvsadd.d $xr15, $xr18, $xr29 +0xaf 0x43 0x48 0x74 # CHECK: xvssub.b $xr15, $xr29, $xr16 +0x7c 0xa4 0x48 0x74 # CHECK: xvssub.h $xr28, $xr3, $xr9 +0x88 0x3e 0x49 0x74 # CHECK: xvssub.w $xr8, $xr20, $xr15 +0x17 0xcd 0x49 0x74 # CHECK: xvssub.d $xr23, $xr8, $xr19 +0x8c 0x40 0x4a 0x74 # CHECK: xvsadd.bu $xr12, $xr4, $xr16 +0x49 0xd3 0x4a 0x74 # CHECK: xvsadd.hu $xr9, $xr26, $xr20 +0xfe 0x71 0x4b 0x74 # CHECK: xvsadd.wu $xr30, $xr15, $xr28 +0xaf 0xf1 0x4b 0x74 # CHECK: xvsadd.du $xr15, $xr13, $xr28 +0x6a 0x3c 0x4c 0x74 # CHECK: xvssub.bu $xr10, $xr3, $xr15 +0x80 0x89 0x4c 0x74 # CHECK: xvssub.hu $xr0, $xr12, $xr2 +0x5e 0x5d 0x4d 0x74 # CHECK: xvssub.wu $xr30, $xr10, $xr23 +0xc9 0xbb 0x4d 0x74 # CHECK: xvssub.du $xr9, $xr30, $xr14 +0xb9 0x48 0x54 0x74 # CHECK: xvhaddw.h.b $xr25, $xr5, $xr18 +0x87 0xce 0x54 0x74 # CHECK: xvhaddw.w.h $xr7, $xr20, $xr19 +0xb7 0x10 0x55 0x74 # CHECK: xvhaddw.d.w $xr23, $xr5, $xr4 +0xf1 0xe4 0x55 0x74 # CHECK: xvhaddw.q.d $xr17, $xr7, $xr25 +0x5d 0x4e 0x56 0x74 # CHECK: xvhsubw.h.b $xr29, $xr18, $xr19 +0x9e 0x8f 0x56 0x74 # CHECK: xvhsubw.w.h $xr30, $xr28, $xr3 +0x25 0x35 0x57 0x74 # CHECK: xvhsubw.d.w $xr5, $xr9, $xr13 +0x94 0xf5 0x57 0x74 # CHECK: xvhsubw.q.d $xr20, $xr12, $xr29 +0x4b 0x1d 0x58 0x74 # CHECK: xvhaddw.hu.bu $xr11, $xr10, $xr7 +0xb0 0xd6 0x58 0x74 # CHECK: xvhaddw.wu.hu $xr16, $xr21, $xr21 +0xf1 0x23 0x59 0x74 # CHECK: xvhaddw.du.wu $xr17, $xr31, $xr8 +0x82 0xac 0x59 0x74 # CHECK: xvhaddw.qu.du $xr2, $xr4, $xr11 +0xd5 0x21 0x5a 0x74 # CHECK: xvhsubw.hu.bu $xr21, $xr14, $xr8 +0x19 0xec 0x5a 0x74 # CHECK: xvhsubw.wu.hu $xr25, $xr0, $xr27 +0x04 0x7a 0x5b 0x74 # CHECK: xvhsubw.du.wu $xr4, $xr16, $xr30 +0x2b 0x99 0x5b 0x74 # CHECK: xvhsubw.qu.du $xr11, $xr9, $xr6 +0xae 0x6a 0x5c 0x74 # CHECK: xvadda.b $xr14, $xr21, $xr26 +0xd5 0xd7 0x5c 0x74 # CHECK: xvadda.h $xr21, $xr30, $xr21 +0x7f 0x4e 0x5d 0x74 # CHECK: xvadda.w $xr31, $xr19, $xr19 +0x89 0xfc 0x5d 0x74 # CHECK: xvadda.d $xr9, $xr4, $xr31 +0x74 0x36 0x60 0x74 # CHECK: xvabsd.b $xr20, $xr19, $xr13 +0xf4 0xa8 0x60 0x74 # CHECK: xvabsd.h $xr20, $xr7, $xr10 +0xf7 0x03 0x61 0x74 # CHECK: xvabsd.w $xr23, $xr31, $xr0 +0x27 0xba 0x61 0x74 # CHECK: xvabsd.d $xr7, $xr17, $xr14 +0xec 0x1a 0x62 0x74 # CHECK: xvabsd.bu $xr12, $xr23, $xr6 +0xd0 0xcf 0x62 0x74 # CHECK: xvabsd.hu $xr16, $xr30, $xr19 +0xb3 0x68 0x63 0x74 # CHECK: xvabsd.wu $xr19, $xr5, $xr26 +0x80 0x9d 0x63 0x74 # CHECK: xvabsd.du $xr0, $xr12, $xr7 +0xf7 0x67 0x64 0x74 # CHECK: xvavg.b $xr23, $xr31, $xr25 +0x5b 0xec 0x64 0x74 # CHECK: xvavg.h $xr27, $xr2, $xr27 +0x14 0x40 0x65 0x74 # CHECK: xvavg.w $xr20, $xr0, $xr16 +0x2d 0xa9 0x65 0x74 # CHECK: xvavg.d $xr13, $xr9, $xr10 +0xdf 0x13 0x66 0x74 # CHECK: xvavg.bu $xr31, $xr30, $xr4 +0x36 0x96 0x66 0x74 # CHECK: xvavg.hu $xr22, $xr17, $xr5 +0xb5 0x47 0x67 0x74 # CHECK: xvavg.wu $xr21, $xr29, $xr17 +0xab 0xf4 0x67 0x74 # CHECK: xvavg.du $xr11, $xr5, $xr29 +0xb7 0x35 0x68 0x74 # CHECK: xvavgr.b $xr23, $xr13, $xr13 +0x9e 0xfe 0x68 0x74 # CHECK: xvavgr.h $xr30, $xr20, $xr31 +0x9d 0x27 0x69 0x74 # CHECK: xvavgr.w $xr29, $xr28, $xr9 +0x95 0xa2 0x69 0x74 # CHECK: xvavgr.d $xr21, $xr20, $xr8 +0x20 0x11 0x6a 0x74 # CHECK: xvavgr.bu $xr0, $xr9, $xr4 +0x03 0xec 0x6a 0x74 # CHECK: xvavgr.hu $xr3, $xr0, $xr27 +0xc2 0x57 0x6b 0x74 # CHECK: xvavgr.wu $xr2, $xr30, $xr21 +0xb6 0xc6 0x6b 0x74 # CHECK: xvavgr.du $xr22, $xr21, $xr17 +0x81 0x4e 0x70 0x74 # CHECK: xvmax.b $xr1, $xr20, $xr19 +0x20 0xba 0x70 0x74 # CHECK: xvmax.h $xr0, $xr17, $xr14 +0x00 0x41 0x71 0x74 # CHECK: xvmax.w $xr0, $xr8, $xr16 +0xf0 0xc2 0x71 0x74 # CHECK: xvmax.d $xr16, $xr23, $xr16 +0xd4 0x38 0x72 0x74 # CHECK: xvmin.b $xr20, $xr6, $xr14 +0x64 0xe0 0x72 0x74 # CHECK: xvmin.h $xr4, $xr3, $xr24 +0x45 0x5c 0x73 0x74 # CHECK: xvmin.w $xr5, $xr2, $xr23 +0xff 0xea 0x73 0x74 # CHECK: xvmin.d $xr31, $xr23, $xr26 +0xae 0x0d 0x74 0x74 # CHECK: xvmax.bu $xr14, $xr13, $xr3 +0x36 0x92 0x74 0x74 # CHECK: xvmax.hu $xr22, $xr17, $xr4 +0xb1 0x75 0x75 0x74 # CHECK: xvmax.wu $xr17, $xr13, $xr29 +0x4d 0x80 0x75 0x74 # CHECK: xvmax.du $xr13, $xr2, $xr0 +0xf2 0x6f 0x76 0x74 # CHECK: xvmin.bu $xr18, $xr31, $xr27 +0x42 0xb9 0x76 0x74 # CHECK: xvmin.hu $xr2, $xr10, $xr14 +0x1f 0x69 0x77 0x74 # CHECK: xvmin.wu $xr31, $xr8, $xr26 +0x4c 0xa7 0x77 0x74 # CHECK: xvmin.du $xr12, $xr26, $xr9 +0x5a 0x0c 0x84 0x74 # CHECK: xvmul.b $xr26, $xr2, $xr3 +0xb0 0x97 0x84 0x74 # CHECK: xvmul.h $xr16, $xr29, $xr5 +0x33 0x0c 0x85 0x74 # CHECK: xvmul.w $xr19, $xr1, $xr3 +0xef 0x81 0x85 0x74 # CHECK: xvmul.d $xr15, $xr15, $xr0 +0x89 0x25 0x86 0x74 # CHECK: xvmuh.b $xr9, $xr12, $xr9 +0xe8 0xc2 0x86 0x74 # CHECK: xvmuh.h $xr8, $xr23, $xr16 +0xdd 0x2c 0x87 0x74 # CHECK: xvmuh.w $xr29, $xr6, $xr11 +0x43 0x9e 0x87 0x74 # CHECK: xvmuh.d $xr3, $xr18, $xr7 +0xe3 0x4c 0x88 0x74 # CHECK: xvmuh.bu $xr3, $xr7, $xr19 +0x2d 0xc8 0x88 0x74 # CHECK: xvmuh.hu $xr13, $xr1, $xr18 +0xaf 0x42 0x89 0x74 # CHECK: xvmuh.wu $xr15, $xr21, $xr16 +0x4b 0xcd 0x89 0x74 # CHECK: xvmuh.du $xr11, $xr10, $xr19 +0x84 0x25 0x90 0x74 # CHECK: xvmulwev.h.b $xr4, $xr12, $xr9 +0x6a 0xd0 0x90 0x74 # CHECK: xvmulwev.w.h $xr10, $xr3, $xr20 +0xc4 0x4a 0x91 0x74 # CHECK: xvmulwev.d.w $xr4, $xr22, $xr18 +0xb4 0xee 0x91 0x74 # CHECK: xvmulwev.q.d $xr20, $xr21, $xr27 +0xe5 0x00 0x92 0x74 # CHECK: xvmulwod.h.b $xr5, $xr7, $xr0 +0x93 0xaf 0x92 0x74 # CHECK: xvmulwod.w.h $xr19, $xr28, $xr11 +0xf3 0x40 0x93 0x74 # CHECK: xvmulwod.d.w $xr19, $xr7, $xr16 +0x8b 0xb5 0x93 0x74 # CHECK: xvmulwod.q.d $xr11, $xr12, $xr13 +0x56 0x04 0x98 0x74 # CHECK: xvmulwev.h.bu $xr22, $xr2, $xr1 +0x62 0x90 0x98 0x74 # CHECK: xvmulwev.w.hu $xr2, $xr3, $xr4 +0x82 0x65 0x99 0x74 # CHECK: xvmulwev.d.wu $xr2, $xr12, $xr25 +0xb6 0xc7 0x99 0x74 # CHECK: xvmulwev.q.du $xr22, $xr29, $xr17 +0x29 0x01 0x9a 0x74 # CHECK: xvmulwod.h.bu $xr9, $xr9, $xr0 +0x54 0xc0 0x9a 0x74 # CHECK: xvmulwod.w.hu $xr20, $xr2, $xr16 +0x61 0x61 0x9b 0x74 # CHECK: xvmulwod.d.wu $xr1, $xr11, $xr24 +0x53 0xd8 0x9b 0x74 # CHECK: xvmulwod.q.du $xr19, $xr2, $xr22 +0xb6 0x63 0xa0 0x74 # CHECK: xvmulwev.h.bu.b $xr22, $xr29, $xr24 +0xc1 0xae 0xa0 0x74 # CHECK: xvmulwev.w.hu.h $xr1, $xr22, $xr11 +0x8c 0x31 0xa1 0x74 # CHECK: xvmulwev.d.wu.w $xr12, $xr12, $xr12 +0x20 0xde 0xa1 0x74 # CHECK: xvmulwev.q.du.d $xr0, $xr17, $xr23 +0x1a 0x5e 0xa2 0x74 # CHECK: xvmulwod.h.bu.b $xr26, $xr16, $xr23 +0x9f 0xa5 0xa2 0x74 # CHECK: xvmulwod.w.hu.h $xr31, $xr12, $xr9 +0x75 0x4f 0xa3 0x74 # CHECK: xvmulwod.d.wu.w $xr21, $xr27, $xr19 +0xa7 0xac 0xa3 0x74 # CHECK: xvmulwod.q.du.d $xr7, $xr5, $xr11 +0x76 0x3d 0xa8 0x74 # CHECK: xvmadd.b $xr22, $xr11, $xr15 +0xc3 0xe7 0xa8 0x74 # CHECK: xvmadd.h $xr3, $xr30, $xr25 +0x41 0x16 0xa9 0x74 # CHECK: xvmadd.w $xr1, $xr18, $xr5 +0xb0 0xae 0xa9 0x74 # CHECK: xvmadd.d $xr16, $xr21, $xr11 +0x8b 0x29 0xaa 0x74 # CHECK: xvmsub.b $xr11, $xr12, $xr10 +0x70 0x85 0xaa 0x74 # CHECK: xvmsub.h $xr16, $xr11, $xr1 +0xaf 0x56 0xab 0x74 # CHECK: xvmsub.w $xr15, $xr21, $xr21 +0x6c 0x91 0xab 0x74 # CHECK: xvmsub.d $xr12, $xr11, $xr4 +0xf5 0x18 0xac 0x74 # CHECK: xvmaddwev.h.b $xr21, $xr7, $xr6 +0xb0 0xb7 0xac 0x74 # CHECK: xvmaddwev.w.h $xr16, $xr29, $xr13 +0x27 0x7b 0xad 0x74 # CHECK: xvmaddwev.d.w $xr7, $xr25, $xr30 +0x73 0xa0 0xad 0x74 # CHECK: xvmaddwev.q.d $xr19, $xr3, $xr8 +0x74 0x33 0xae 0x74 # CHECK: xvmaddwod.h.b $xr20, $xr27, $xr12 +0xa0 0xb6 0xae 0x74 # CHECK: xvmaddwod.w.h $xr0, $xr21, $xr13 +0xb9 0x7d 0xaf 0x74 # CHECK: xvmaddwod.d.w $xr25, $xr13, $xr31 +0x5a 0xc3 0xaf 0x74 # CHECK: xvmaddwod.q.d $xr26, $xr26, $xr16 +0x52 0x57 0xb4 0x74 # CHECK: xvmaddwev.h.bu $xr18, $xr26, $xr21 +0x0e 0x96 0xb4 0x74 # CHECK: xvmaddwev.w.hu $xr14, $xr16, $xr5 +0xb3 0x53 0xb5 0x74 # CHECK: xvmaddwev.d.wu $xr19, $xr29, $xr20 +0xaf 0xc7 0xb5 0x74 # CHECK: xvmaddwev.q.du $xr15, $xr29, $xr17 +0x4d 0x07 0xb6 0x74 # CHECK: xvmaddwod.h.bu $xr13, $xr26, $xr1 +0x2f 0xc3 0xb6 0x74 # CHECK: xvmaddwod.w.hu $xr15, $xr25, $xr16 +0x97 0x24 0xb7 0x74 # CHECK: xvmaddwod.d.wu $xr23, $xr4, $xr9 +0xdd 0xc6 0xb7 0x74 # CHECK: xvmaddwod.q.du $xr29, $xr22, $xr17 +0x37 0x18 0xbc 0x74 # CHECK: xvmaddwev.h.bu.b $xr23, $xr1, $xr6 +0x64 0xb3 0xbc 0x74 # CHECK: xvmaddwev.w.hu.h $xr4, $xr27, $xr12 +0x40 0x14 0xbd 0x74 # CHECK: xvmaddwev.d.wu.w $xr0, $xr2, $xr5 +0xe9 0x87 0xbd 0x74 # CHECK: xvmaddwev.q.du.d $xr9, $xr31, $xr1 +0x69 0x52 0xbe 0x74 # CHECK: xvmaddwod.h.bu.b $xr9, $xr19, $xr20 +0xa7 0xb4 0xbe 0x74 # CHECK: xvmaddwod.w.hu.h $xr7, $xr5, $xr13 +0x6a 0x07 0xbf 0x74 # CHECK: xvmaddwod.d.wu.w $xr10, $xr27, $xr1 +0x79 0x82 0xbf 0x74 # CHECK: xvmaddwod.q.du.d $xr25, $xr19, $xr0 +0xe3 0x0b 0xe0 0x74 # CHECK: xvdiv.b $xr3, $xr31, $xr2 +0x81 0xc5 0xe0 0x74 # CHECK: xvdiv.h $xr1, $xr12, $xr17 +0x0d 0x30 0xe1 0x74 # CHECK: xvdiv.w $xr13, $xr0, $xr12 +0xb1 0xac 0xe1 0x74 # CHECK: xvdiv.d $xr17, $xr5, $xr11 +0x36 0x06 0xe2 0x74 # CHECK: xvmod.b $xr22, $xr17, $xr1 +0xbc 0xb0 0xe2 0x74 # CHECK: xvmod.h $xr28, $xr5, $xr12 +0x7d 0x3a 0xe3 0x74 # CHECK: xvmod.w $xr29, $xr19, $xr14 +0x11 0x99 0xe3 0x74 # CHECK: xvmod.d $xr17, $xr8, $xr6 +0xd7 0x08 0xe4 0x74 # CHECK: xvdiv.bu $xr23, $xr6, $xr2 +0xe9 0x83 0xe4 0x74 # CHECK: xvdiv.hu $xr9, $xr31, $xr0 +0x2f 0x10 0xe5 0x74 # CHECK: xvdiv.wu $xr15, $xr1, $xr4 +0xae 0xaf 0xe5 0x74 # CHECK: xvdiv.du $xr14, $xr29, $xr11 +0x84 0x7d 0xe6 0x74 # CHECK: xvmod.bu $xr4, $xr12, $xr31 +0x96 0xad 0xe6 0x74 # CHECK: xvmod.hu $xr22, $xr12, $xr11 +0xf5 0x2a 0xe7 0x74 # CHECK: xvmod.wu $xr21, $xr23, $xr10 +0xb5 0xfe 0xe7 0x74 # CHECK: xvmod.du $xr21, $xr21, $xr31 +0x50 0x2d 0xe8 0x74 # CHECK: xvsll.b $xr16, $xr10, $xr11 +0x4c 0xed 0xe8 0x74 # CHECK: xvsll.h $xr12, $xr10, $xr27 +0x5e 0x68 0xe9 0x74 # CHECK: xvsll.w $xr30, $xr2, $xr26 +0xa8 0xc6 0xe9 0x74 # CHECK: xvsll.d $xr8, $xr21, $xr17 +0x1b 0x4b 0xea 0x74 # CHECK: xvsrl.b $xr27, $xr24, $xr18 +0xf1 0xe3 0xea 0x74 # CHECK: xvsrl.h $xr17, $xr31, $xr24 +0x65 0x10 0xeb 0x74 # CHECK: xvsrl.w $xr5, $xr3, $xr4 +0xd5 0xa0 0xeb 0x74 # CHECK: xvsrl.d $xr21, $xr6, $xr8 +0x9c 0x57 0xec 0x74 # CHECK: xvsra.b $xr28, $xr28, $xr21 +0x93 0xe8 0xec 0x74 # CHECK: xvsra.h $xr19, $xr4, $xr26 +0x8d 0x06 0xed 0x74 # CHECK: xvsra.w $xr13, $xr20, $xr1 +0x00 0xc9 0xed 0x74 # CHECK: xvsra.d $xr0, $xr8, $xr18 +0xc8 0x73 0xee 0x74 # CHECK: xvrotr.b $xr8, $xr30, $xr28 +0x71 0x82 0xee 0x74 # CHECK: xvrotr.h $xr17, $xr19, $xr0 +0x8f 0x5f 0xef 0x74 # CHECK: xvrotr.w $xr15, $xr28, $xr23 +0x5f 0xd4 0xef 0x74 # CHECK: xvrotr.d $xr31, $xr2, $xr21 +0x54 0x2f 0xf0 0x74 # CHECK: xvsrlr.b $xr20, $xr26, $xr11 +0x4d 0x9e 0xf0 0x74 # CHECK: xvsrlr.h $xr13, $xr18, $xr7 +0x3c 0x0c 0xf1 0x74 # CHECK: xvsrlr.w $xr28, $xr1, $xr3 +0x66 0xb8 0xf1 0x74 # CHECK: xvsrlr.d $xr6, $xr3, $xr14 +0x0a 0x45 0xf2 0x74 # CHECK: xvsrar.b $xr10, $xr8, $xr17 +0x5f 0xac 0xf2 0x74 # CHECK: xvsrar.h $xr31, $xr2, $xr11 +0x0d 0x15 0xf3 0x74 # CHECK: xvsrar.w $xr13, $xr8, $xr5 +0x4c 0x82 0xf3 0x74 # CHECK: xvsrar.d $xr12, $xr18, $xr0 +0xcf 0xbc 0xf4 0x74 # CHECK: xvsrln.b.h $xr15, $xr6, $xr15 +0x76 0x46 0xf5 0x74 # CHECK: xvsrln.h.w $xr22, $xr19, $xr17 +0xe4 0x94 0xf5 0x74 # CHECK: xvsrln.w.d $xr4, $xr7, $xr5 +0x63 0xde 0xf6 0x74 # CHECK: xvsran.b.h $xr3, $xr19, $xr23 +0xd0 0x04 0xf7 0x74 # CHECK: xvsran.h.w $xr16, $xr6, $xr1 +0x1b 0x82 0xf7 0x74 # CHECK: xvsran.w.d $xr27, $xr16, $xr0 +0x22 0xa5 0xf8 0x74 # CHECK: xvsrlrn.b.h $xr2, $xr9, $xr9 +0x70 0x4d 0xf9 0x74 # CHECK: xvsrlrn.h.w $xr16, $xr11, $xr19 +0x3d 0xbf 0xf9 0x74 # CHECK: xvsrlrn.w.d $xr29, $xr25, $xr15 +0x8d 0xb6 0xfa 0x74 # CHECK: xvsrarn.b.h $xr13, $xr20, $xr13 +0xcd 0x06 0xfb 0x74 # CHECK: xvsrarn.h.w $xr13, $xr22, $xr1 +0x8d 0x89 0xfb 0x74 # CHECK: xvsrarn.w.d $xr13, $xr12, $xr2 +0x73 0xaa 0xfc 0x74 # CHECK: xvssrln.b.h $xr19, $xr19, $xr10 +0x0c 0x47 0xfd 0x74 # CHECK: xvssrln.h.w $xr12, $xr24, $xr17 +0xc7 0xbb 0xfd 0x74 # CHECK: xvssrln.w.d $xr7, $xr30, $xr14 +0x26 0xdd 0xfe 0x74 # CHECK: xvssran.b.h $xr6, $xr9, $xr23 +0x2d 0x09 0xff 0x74 # CHECK: xvssran.h.w $xr13, $xr9, $xr2 +0x52 0x87 0xff 0x74 # CHECK: xvssran.w.d $xr18, $xr26, $xr1 +0x38 0xde 0x00 0x75 # CHECK: xvssrlrn.b.h $xr24, $xr17, $xr23 +0x8a 0x21 0x01 0x75 # CHECK: xvssrlrn.h.w $xr10, $xr12, $xr8 +0x7e 0x9b 0x01 0x75 # CHECK: xvssrlrn.w.d $xr30, $xr27, $xr6 +0x74 0xff 0x02 0x75 # CHECK: xvssrarn.b.h $xr20, $xr27, $xr31 +0xf8 0x5e 0x03 0x75 # CHECK: xvssrarn.h.w $xr24, $xr23, $xr23 +0xa8 0xe7 0x03 0x75 # CHECK: xvssrarn.w.d $xr8, $xr29, $xr25 +0x8e 0xc4 0x04 0x75 # CHECK: xvssrln.bu.h $xr14, $xr4, $xr17 +0x9c 0x2a 0x05 0x75 # CHECK: xvssrln.hu.w $xr28, $xr20, $xr10 +0x0a 0xd1 0x05 0x75 # CHECK: xvssrln.wu.d $xr10, $xr8, $xr20 +0x92 0xdf 0x06 0x75 # CHECK: xvssran.bu.h $xr18, $xr28, $xr23 +0x79 0x62 0x07 0x75 # CHECK: xvssran.hu.w $xr25, $xr19, $xr24 +0xb0 0xcb 0x07 0x75 # CHECK: xvssran.wu.d $xr16, $xr29, $xr18 +0x62 0xba 0x08 0x75 # CHECK: xvssrlrn.bu.h $xr2, $xr19, $xr14 +0x06 0x48 0x09 0x75 # CHECK: xvssrlrn.hu.w $xr6, $xr0, $xr18 +0x9e 0xfc 0x09 0x75 # CHECK: xvssrlrn.wu.d $xr30, $xr4, $xr31 +0x90 0xa3 0x0a 0x75 # CHECK: xvssrarn.bu.h $xr16, $xr28, $xr8 +0x4b 0x18 0x0b 0x75 # CHECK: xvssrarn.hu.w $xr11, $xr2, $xr6 +0xd6 0xb0 0x0b 0x75 # CHECK: xvssrarn.wu.d $xr22, $xr6, $xr12 +0x04 0x42 0x0c 0x75 # CHECK: xvbitclr.b $xr4, $xr16, $xr16 +0xf0 0xeb 0x0c 0x75 # CHECK: xvbitclr.h $xr16, $xr31, $xr26 +0x58 0x50 0x0d 0x75 # CHECK: xvbitclr.w $xr24, $xr2, $xr20 +0x92 0xf9 0x0d 0x75 # CHECK: xvbitclr.d $xr18, $xr12, $xr30 +0x7a 0x5f 0x0e 0x75 # CHECK: xvbitset.b $xr26, $xr27, $xr23 +0x73 0xae 0x0e 0x75 # CHECK: xvbitset.h $xr19, $xr19, $xr11 +0x27 0x49 0x0f 0x75 # CHECK: xvbitset.w $xr7, $xr9, $xr18 +0xc6 0x8f 0x0f 0x75 # CHECK: xvbitset.d $xr6, $xr30, $xr3 +0xbe 0x1d 0x10 0x75 # CHECK: xvbitrev.b $xr30, $xr13, $xr7 +0x6c 0xa0 0x10 0x75 # CHECK: xvbitrev.h $xr12, $xr3, $xr8 +0x88 0x52 0x11 0x75 # CHECK: xvbitrev.w $xr8, $xr20, $xr20 +0xfc 0xc4 0x11 0x75 # CHECK: xvbitrev.d $xr28, $xr7, $xr17 +0x5d 0x32 0x16 0x75 # CHECK: xvpackev.b $xr29, $xr18, $xr12 +0x66 0xc5 0x16 0x75 # CHECK: xvpackev.h $xr6, $xr11, $xr17 +0x42 0x78 0x17 0x75 # CHECK: xvpackev.w $xr2, $xr2, $xr30 +0xfa 0xd5 0x17 0x75 # CHECK: xvpackev.d $xr26, $xr15, $xr21 +0x33 0x46 0x18 0x75 # CHECK: xvpackod.b $xr19, $xr17, $xr17 +0x0f 0x8d 0x18 0x75 # CHECK: xvpackod.h $xr15, $xr8, $xr3 +0xed 0x31 0x19 0x75 # CHECK: xvpackod.w $xr13, $xr15, $xr12 +0x65 0xe8 0x19 0x75 # CHECK: xvpackod.d $xr5, $xr3, $xr26 +0x3b 0x05 0x1a 0x75 # CHECK: xvilvl.b $xr27, $xr9, $xr1 +0x1d 0x85 0x1a 0x75 # CHECK: xvilvl.h $xr29, $xr8, $xr1 +0x09 0x1d 0x1b 0x75 # CHECK: xvilvl.w $xr9, $xr8, $xr7 +0xf9 0xc8 0x1b 0x75 # CHECK: xvilvl.d $xr25, $xr7, $xr18 +0x07 0x6b 0x1c 0x75 # CHECK: xvilvh.b $xr7, $xr24, $xr26 +0x86 0xf2 0x1c 0x75 # CHECK: xvilvh.h $xr6, $xr20, $xr28 +0xad 0x30 0x1d 0x75 # CHECK: xvilvh.w $xr13, $xr5, $xr12 +0xa1 0xfe 0x1d 0x75 # CHECK: xvilvh.d $xr1, $xr21, $xr31 +0xb1 0x7d 0x1e 0x75 # CHECK: xvpickev.b $xr17, $xr13, $xr31 +0x04 0xb9 0x1e 0x75 # CHECK: xvpickev.h $xr4, $xr8, $xr14 +0x0a 0x2d 0x1f 0x75 # CHECK: xvpickev.w $xr10, $xr8, $xr11 +0x9a 0xa2 0x1f 0x75 # CHECK: xvpickev.d $xr26, $xr20, $xr8 +0xb3 0x6e 0x20 0x75 # CHECK: xvpickod.b $xr19, $xr21, $xr27 +0xbc 0xcc 0x20 0x75 # CHECK: xvpickod.h $xr28, $xr5, $xr19 +0x55 0x5a 0x21 0x75 # CHECK: xvpickod.w $xr21, $xr18, $xr22 +0xfc 0xc8 0x21 0x75 # CHECK: xvpickod.d $xr28, $xr7, $xr18 +0x86 0x66 0x22 0x75 # CHECK: xvreplve.b $xr6, $xr20, $r25 +0xfb 0xb8 0x22 0x75 # CHECK: xvreplve.h $xr27, $xr7, $r14 +0x81 0x3c 0x23 0x75 # CHECK: xvreplve.w $xr1, $xr4, $r15 +0x8c 0xc1 0x23 0x75 # CHECK: xvreplve.d $xr12, $xr12, $r16 +0x61 0x74 0x26 0x75 # CHECK: xvand.v $xr1, $xr3, $xr29 +0x77 0xd1 0x26 0x75 # CHECK: xvor.v $xr23, $xr11, $xr20 +0x3f 0x78 0x27 0x75 # CHECK: xvxor.v $xr31, $xr1, $xr30 +0x5d 0xb7 0x27 0x75 # CHECK: xvnor.v $xr29, $xr26, $xr13 +0xc9 0x01 0x28 0x75 # CHECK: xvandn.v $xr9, $xr14, $xr0 +0x19 0xb1 0x28 0x75 # CHECK: xvorn.v $xr25, $xr8, $xr12 +0x55 0x6b 0x2b 0x75 # CHECK: xvfrstp.b $xr21, $xr26, $xr26 +0x24 0x8a 0x2b 0x75 # CHECK: xvfrstp.h $xr4, $xr17, $xr2 +0x9d 0x47 0x2d 0x75 # CHECK: xvadd.q $xr29, $xr28, $xr17 +0x5d 0xec 0x2d 0x75 # CHECK: xvsub.q $xr29, $xr2, $xr27 +0x92 0x1f 0x2e 0x75 # CHECK: xvsigncov.b $xr18, $xr28, $xr7 +0x92 0xc5 0x2e 0x75 # CHECK: xvsigncov.h $xr18, $xr12, $xr17 +0x3a 0x00 0x2f 0x75 # CHECK: xvsigncov.w $xr26, $xr1, $xr0 +0x6a 0xbb 0x2f 0x75 # CHECK: xvsigncov.d $xr10, $xr27, $xr14 +0x2f 0xa3 0x30 0x75 # CHECK: xvfadd.s $xr15, $xr25, $xr8 +0xd3 0x54 0x31 0x75 # CHECK: xvfadd.d $xr19, $xr6, $xr21 +0xda 0x98 0x32 0x75 # CHECK: xvfsub.s $xr26, $xr6, $xr6 +0x09 0x54 0x33 0x75 # CHECK: xvfsub.d $xr9, $xr0, $xr21 +0x06 0xb9 0x38 0x75 # CHECK: xvfmul.s $xr6, $xr8, $xr14 +0xab 0x6a 0x39 0x75 # CHECK: xvfmul.d $xr11, $xr21, $xr26 +0xeb 0x98 0x3a 0x75 # CHECK: xvfdiv.s $xr11, $xr7, $xr6 +0x40 0x13 0x3b 0x75 # CHECK: xvfdiv.d $xr0, $xr26, $xr4 +0x27 0x91 0x3c 0x75 # CHECK: xvfmax.s $xr7, $xr9, $xr4 +0x40 0x53 0x3d 0x75 # CHECK: xvfmax.d $xr0, $xr26, $xr20 +0x48 0xe9 0x3e 0x75 # CHECK: xvfmin.s $xr8, $xr10, $xr26 +0xc2 0x66 0x3f 0x75 # CHECK: xvfmin.d $xr2, $xr22, $xr25 +0x91 0x84 0x40 0x75 # CHECK: xvfmaxa.s $xr17, $xr4, $xr1 +0xfb 0x26 0x41 0x75 # CHECK: xvfmaxa.d $xr27, $xr23, $xr9 +0x75 0xec 0x42 0x75 # CHECK: xvfmina.s $xr21, $xr3, $xr27 +0xc7 0x10 0x43 0x75 # CHECK: xvfmina.d $xr7, $xr6, $xr4 +0x49 0x51 0x46 0x75 # CHECK: xvfcvt.h.s $xr9, $xr10, $xr20 +0xe5 0xd6 0x46 0x75 # CHECK: xvfcvt.s.d $xr5, $xr23, $xr21 +0x1c 0x2b 0x48 0x75 # CHECK: xvffint.s.l $xr28, $xr24, $xr10 +0x06 0x87 0x49 0x75 # CHECK: xvftint.w.d $xr6, $xr24, $xr1 +0x5b 0x7b 0x4a 0x75 # CHECK: xvftintrm.w.d $xr27, $xr26, $xr30 +0x9f 0x85 0x4a 0x75 # CHECK: xvftintrp.w.d $xr31, $xr12, $xr1 +0xab 0x56 0x4b 0x75 # CHECK: xvftintrz.w.d $xr11, $xr21, $xr21 +0x0f 0xf1 0x4b 0x75 # CHECK: xvftintrne.w.d $xr15, $xr8, $xr28 +0xb4 0x8e 0x7a 0x75 # CHECK: xvshuf.h $xr20, $xr21, $xr3 +0x56 0x7c 0x7b 0x75 # CHECK: xvshuf.w $xr22, $xr2, $xr31 +0x6f 0xe8 0x7b 0x75 # CHECK: xvshuf.d $xr15, $xr3, $xr26 +0xf5 0x62 0x7d 0x75 # CHECK: xvperm.w $xr21, $xr23, $xr24 +0xbc 0x04 0x80 0x76 # CHECK: xvseqi.b $xr28, $xr5, 1 +0x33 0xed 0x80 0x76 # CHECK: xvseqi.h $xr19, $xr9, -5 +0x48 0x7a 0x81 0x76 # CHECK: xvseqi.w $xr8, $xr18, -2 +0xc2 0xf2 0x81 0x76 # CHECK: xvseqi.d $xr2, $xr22, -4 +0xa4 0x5a 0x82 0x76 # CHECK: xvslei.b $xr4, $xr21, -10 +0x91 0xd2 0x82 0x76 # CHECK: xvslei.h $xr17, $xr20, -12 +0x89 0x66 0x83 0x76 # CHECK: xvslei.w $xr9, $xr20, -7 +0xd3 0xab 0x83 0x76 # CHECK: xvslei.d $xr19, $xr30, 10 +0x44 0x07 0x84 0x76 # CHECK: xvslei.bu $xr4, $xr26, 1 +0x0b 0x91 0x84 0x76 # CHECK: xvslei.hu $xr11, $xr8, 4 +0x92 0x7d 0x85 0x76 # CHECK: xvslei.wu $xr18, $xr12, 31 +0xfe 0xe8 0x85 0x76 # CHECK: xvslei.du $xr30, $xr7, 26 +0xab 0x0b 0x86 0x76 # CHECK: xvslti.b $xr11, $xr29, 2 +0x66 0xa3 0x86 0x76 # CHECK: xvslti.h $xr6, $xr27, 8 +0xf5 0x06 0x87 0x76 # CHECK: xvslti.w $xr21, $xr23, 1 +0xf2 0xef 0x87 0x76 # CHECK: xvslti.d $xr18, $xr31, -5 +0x9b 0x45 0x88 0x76 # CHECK: xvslti.bu $xr27, $xr12, 17 +0xd2 0xb1 0x88 0x76 # CHECK: xvslti.hu $xr18, $xr14, 12 +0x84 0x39 0x89 0x76 # CHECK: xvslti.wu $xr4, $xr12, 14 +0x1a 0xe0 0x89 0x76 # CHECK: xvslti.du $xr26, $xr0, 24 +0x5e 0x14 0x8a 0x76 # CHECK: xvaddi.bu $xr30, $xr2, 5 +0x36 0xa6 0x8a 0x76 # CHECK: xvaddi.hu $xr22, $xr17, 9 +0x43 0x77 0x8b 0x76 # CHECK: xvaddi.wu $xr3, $xr26, 29 +0x80 0xfa 0x8b 0x76 # CHECK: xvaddi.du $xr0, $xr20, 30 +0x80 0x1e 0x8c 0x76 # CHECK: xvsubi.bu $xr0, $xr20, 7 +0x04 0xcb 0x8c 0x76 # CHECK: xvsubi.hu $xr4, $xr24, 18 +0x41 0x6b 0x8d 0x76 # CHECK: xvsubi.wu $xr1, $xr26, 26 +0x89 0xa3 0x8d 0x76 # CHECK: xvsubi.du $xr9, $xr28, 8 +0xa0 0x22 0x8e 0x76 # CHECK: xvbsll.v $xr0, $xr21, 8 +0x04 0xf1 0x8e 0x76 # CHECK: xvbsrl.v $xr4, $xr8, 28 +0x28 0x48 0x90 0x76 # CHECK: xvmaxi.b $xr8, $xr1, -14 +0x93 0xc1 0x90 0x76 # CHECK: xvmaxi.h $xr19, $xr12, -16 +0x3b 0x14 0x91 0x76 # CHECK: xvmaxi.w $xr27, $xr1, 5 +0xe6 0x8c 0x91 0x76 # CHECK: xvmaxi.d $xr6, $xr7, 3 +0xca 0x14 0x92 0x76 # CHECK: xvmini.b $xr10, $xr6, 5 +0x48 0xd2 0x92 0x76 # CHECK: xvmini.h $xr8, $xr18, -12 +0xbf 0x65 0x93 0x76 # CHECK: xvmini.w $xr31, $xr13, -7 +0x6f 0xa7 0x93 0x76 # CHECK: xvmini.d $xr15, $xr27, 9 +0x25 0x5a 0x94 0x76 # CHECK: xvmaxi.bu $xr5, $xr17, 22 +0x66 0x90 0x94 0x76 # CHECK: xvmaxi.hu $xr6, $xr3, 4 +0x9a 0x45 0x95 0x76 # CHECK: xvmaxi.wu $xr26, $xr12, 17 +0x7e 0xf9 0x95 0x76 # CHECK: xvmaxi.du $xr30, $xr11, 30 +0x0f 0x1d 0x96 0x76 # CHECK: xvmini.bu $xr15, $xr8, 7 +0x32 0x87 0x96 0x76 # CHECK: xvmini.hu $xr18, $xr25, 1 +0x90 0x03 0x97 0x76 # CHECK: xvmini.wu $xr16, $xr28, 0 +0x6a 0xf6 0x97 0x76 # CHECK: xvmini.du $xr10, $xr19, 29 +0x28 0x0b 0x9a 0x76 # CHECK: xvfrstpi.b $xr8, $xr25, 2 +0x7c 0xea 0x9a 0x76 # CHECK: xvfrstpi.h $xr28, $xr19, 26 +0x02 0x01 0x9c 0x76 # CHECK: xvclo.b $xr2, $xr8 +0x2a 0x05 0x9c 0x76 # CHECK: xvclo.h $xr10, $xr9 +0xe2 0x0b 0x9c 0x76 # CHECK: xvclo.w $xr2, $xr31 +0x15 0x0f 0x9c 0x76 # CHECK: xvclo.d $xr21, $xr24 +0x0d 0x13 0x9c 0x76 # CHECK: xvclz.b $xr13, $xr24 +0xe4 0x17 0x9c 0x76 # CHECK: xvclz.h $xr4, $xr31 +0x27 0x18 0x9c 0x76 # CHECK: xvclz.w $xr7, $xr1 +0xcd 0x1e 0x9c 0x76 # CHECK: xvclz.d $xr13, $xr22 +0x49 0x23 0x9c 0x76 # CHECK: xvpcnt.b $xr9, $xr26 +0x6a 0x24 0x9c 0x76 # CHECK: xvpcnt.h $xr10, $xr3 +0xf8 0x28 0x9c 0x76 # CHECK: xvpcnt.w $xr24, $xr7 +0x05 0x2d 0x9c 0x76 # CHECK: xvpcnt.d $xr5, $xr8 +0x73 0x31 0x9c 0x76 # CHECK: xvneg.b $xr19, $xr11 +0xb5 0x36 0x9c 0x76 # CHECK: xvneg.h $xr21, $xr21 +0x33 0x3a 0x9c 0x76 # CHECK: xvneg.w $xr19, $xr17 +0xbf 0x3f 0x9c 0x76 # CHECK: xvneg.d $xr31, $xr29 +0x76 0x43 0x9c 0x76 # CHECK: xvmskltz.b $xr22, $xr27 +0x05 0x44 0x9c 0x76 # CHECK: xvmskltz.h $xr5, $xr0 +0x98 0x4b 0x9c 0x76 # CHECK: xvmskltz.w $xr24, $xr28 +0x59 0x4c 0x9c 0x76 # CHECK: xvmskltz.d $xr25, $xr2 +0xde 0x53 0x9c 0x76 # CHECK: xvmskgez.b $xr30, $xr30 +0x85 0x62 0x9c 0x76 # CHECK: xvmsknz.b $xr5, $xr20 +0x21 0x9b 0x9c 0x76 # CHECK: xvseteqz.v $fcc1, $xr25 +0xa5 0x9d 0x9c 0x76 # CHECK: xvsetnez.v $fcc5, $xr13 +0x80 0xa0 0x9c 0x76 # CHECK: xvsetanyeqz.b $fcc0, $xr4 +0xe0 0xa7 0x9c 0x76 # CHECK: xvsetanyeqz.h $fcc0, $xr31 +0xc2 0xab 0x9c 0x76 # CHECK: xvsetanyeqz.w $fcc2, $xr30 +0xe3 0xaf 0x9c 0x76 # CHECK: xvsetanyeqz.d $fcc3, $xr31 +0xa1 0xb2 0x9c 0x76 # CHECK: xvsetallnez.b $fcc1, $xr21 +0xa0 0xb6 0x9c 0x76 # CHECK: xvsetallnez.h $fcc0, $xr21 +0x00 0xb8 0x9c 0x76 # CHECK: xvsetallnez.w $fcc0, $xr0 +0xe1 0xbf 0x9c 0x76 # CHECK: xvsetallnez.d $fcc1, $xr31 +0x95 0xc4 0x9c 0x76 # CHECK: xvflogb.s $xr21, $xr4 +0x88 0xca 0x9c 0x76 # CHECK: xvflogb.d $xr8, $xr20 +0xaf 0xd7 0x9c 0x76 # CHECK: xvfclass.s $xr15, $xr29 +0xc7 0xd9 0x9c 0x76 # CHECK: xvfclass.d $xr7, $xr14 +0x7c 0xe6 0x9c 0x76 # CHECK: xvfsqrt.s $xr28, $xr19 +0xeb 0xeb 0x9c 0x76 # CHECK: xvfsqrt.d $xr11, $xr31 +0xe6 0xf6 0x9c 0x76 # CHECK: xvfrecip.s $xr6, $xr23 +0x00 0xfb 0x9c 0x76 # CHECK: xvfrecip.d $xr0, $xr24 +0x08 0x06 0x9d 0x76 # CHECK: xvfrsqrt.s $xr8, $xr16 +0x2f 0x0a 0x9d 0x76 # CHECK: xvfrsqrt.d $xr15, $xr17 +0x24 0x37 0x9d 0x76 # CHECK: xvfrint.s $xr4, $xr25 +0x81 0x3a 0x9d 0x76 # CHECK: xvfrint.d $xr1, $xr20 +0x1d 0x46 0x9d 0x76 # CHECK: xvfrintrm.s $xr29, $xr16 +0x44 0x49 0x9d 0x76 # CHECK: xvfrintrm.d $xr4, $xr10 +0xed 0x57 0x9d 0x76 # CHECK: xvfrintrp.s $xr13, $xr31 +0x74 0x59 0x9d 0x76 # CHECK: xvfrintrp.d $xr20, $xr11 +0xbb 0x65 0x9d 0x76 # CHECK: xvfrintrz.s $xr27, $xr13 +0x31 0x6b 0x9d 0x76 # CHECK: xvfrintrz.d $xr17, $xr25 +0x0e 0x75 0x9d 0x76 # CHECK: xvfrintrne.s $xr14, $xr8 +0x57 0x7b 0x9d 0x76 # CHECK: xvfrintrne.d $xr23, $xr26 +0xe4 0xea 0x9d 0x76 # CHECK: xvfcvtl.s.h $xr4, $xr23 +0x6e 0xed 0x9d 0x76 # CHECK: xvfcvth.s.h $xr14, $xr11 +0xfa 0xf3 0x9d 0x76 # CHECK: xvfcvtl.d.s $xr26, $xr31 +0x8d 0xf7 0x9d 0x76 # CHECK: xvfcvth.d.s $xr13, $xr28 +0x8e 0x03 0x9e 0x76 # CHECK: xvffint.s.w $xr14, $xr28 +0x00 0x05 0x9e 0x76 # CHECK: xvffint.s.wu $xr0, $xr8 +0x65 0x0b 0x9e 0x76 # CHECK: xvffint.d.l $xr5, $xr27 +0x5d 0x0e 0x9e 0x76 # CHECK: xvffint.d.lu $xr29, $xr18 +0x89 0x12 0x9e 0x76 # CHECK: xvffintl.d.w $xr9, $xr20 +0xab 0x15 0x9e 0x76 # CHECK: xvffinth.d.w $xr11, $xr13 +0x86 0x30 0x9e 0x76 # CHECK: xvftint.w.s $xr6, $xr4 +0xcb 0x36 0x9e 0x76 # CHECK: xvftint.l.d $xr11, $xr22 +0xb4 0x3a 0x9e 0x76 # CHECK: xvftintrm.w.s $xr20, $xr21 +0x7c 0x3f 0x9e 0x76 # CHECK: xvftintrm.l.d $xr28, $xr27 +0x0e 0x42 0x9e 0x76 # CHECK: xvftintrp.w.s $xr14, $xr16 +0x2e 0x47 0x9e 0x76 # CHECK: xvftintrp.l.d $xr14, $xr25 +0xc5 0x4b 0x9e 0x76 # CHECK: xvftintrz.w.s $xr5, $xr30 +0x6b 0x4e 0x9e 0x76 # CHECK: xvftintrz.l.d $xr11, $xr19 +0xfb 0x52 0x9e 0x76 # CHECK: xvftintrne.w.s $xr27, $xr23 +0xbb 0x55 0x9e 0x76 # CHECK: xvftintrne.l.d $xr27, $xr13 +0x5c 0x58 0x9e 0x76 # CHECK: xvftint.wu.s $xr28, $xr2 +0x9b 0x5d 0x9e 0x76 # CHECK: xvftint.lu.d $xr27, $xr12 +0xb5 0x73 0x9e 0x76 # CHECK: xvftintrz.wu.s $xr21, $xr29 +0x53 0x74 0x9e 0x76 # CHECK: xvftintrz.lu.d $xr19, $xr2 +0x42 0x82 0x9e 0x76 # CHECK: xvftintl.l.s $xr2, $xr18 +0xc8 0x87 0x9e 0x76 # CHECK: xvftinth.l.s $xr8, $xr30 +0x2d 0x8a 0x9e 0x76 # CHECK: xvftintrml.l.s $xr13, $xr17 +0x5e 0x8f 0x9e 0x76 # CHECK: xvftintrmh.l.s $xr30, $xr26 +0x4b 0x93 0x9e 0x76 # CHECK: xvftintrpl.l.s $xr11, $xr26 +0x7e 0x95 0x9e 0x76 # CHECK: xvftintrph.l.s $xr30, $xr11 +0xf9 0x98 0x9e 0x76 # CHECK: xvftintrzl.l.s $xr25, $xr7 +0xac 0x9c 0x9e 0x76 # CHECK: xvftintrzh.l.s $xr12, $xr5 +0x08 0xa3 0x9e 0x76 # CHECK: xvftintrnel.l.s $xr8, $xr24 +0x19 0xa7 0x9e 0x76 # CHECK: xvftintrneh.l.s $xr25, $xr24 +0xb7 0xe0 0x9e 0x76 # CHECK: xvexth.h.b $xr23, $xr5 +0xd9 0xe4 0x9e 0x76 # CHECK: xvexth.w.h $xr25, $xr6 +0x67 0xeb 0x9e 0x76 # CHECK: xvexth.d.w $xr7, $xr27 +0x4e 0xed 0x9e 0x76 # CHECK: xvexth.q.d $xr14, $xr10 +0xa0 0xf2 0x9e 0x76 # CHECK: xvexth.hu.bu $xr0, $xr21 +0xcf 0xf6 0x9e 0x76 # CHECK: xvexth.wu.hu $xr15, $xr22 +0xf8 0xf9 0x9e 0x76 # CHECK: xvexth.du.wu $xr24, $xr15 +0x44 0xfc 0x9e 0x76 # CHECK: xvexth.qu.du $xr4, $xr2 +0xd5 0x00 0x9f 0x76 # CHECK: xvreplgr2vr.b $xr21, $r6 +0x2b 0x04 0x9f 0x76 # CHECK: xvreplgr2vr.h $xr11, $ra +0xcd 0x0a 0x9f 0x76 # CHECK: xvreplgr2vr.w $xr13, $r22 +0x29 0x0e 0x9f 0x76 # CHECK: xvreplgr2vr.d $xr9, $r17 +0x12 0x12 0x9f 0x76 # CHECK: vext2xv.h.b $xr18, $xr16 +0xe3 0x16 0x9f 0x76 # CHECK: vext2xv.w.b $xr3, $xr23 +0x1e 0x1a 0x9f 0x76 # CHECK: vext2xv.d.b $xr30, $xr16 +0xfc 0x1e 0x9f 0x76 # CHECK: vext2xv.w.h $xr28, $xr23 +0x24 0x20 0x9f 0x76 # CHECK: vext2xv.d.h $xr4, $xr1 +0x97 0x25 0x9f 0x76 # CHECK: vext2xv.d.w $xr23, $xr12 +0xa0 0x28 0x9f 0x76 # CHECK: vext2xv.hu.bu $xr0, $xr5 +0x81 0x2c 0x9f 0x76 # CHECK: vext2xv.wu.bu $xr1, $xr4 +0x71 0x31 0x9f 0x76 # CHECK: vext2xv.du.bu $xr17, $xr11 +0x1c 0x34 0x9f 0x76 # CHECK: vext2xv.wu.hu $xr28, $xr0 +0x3a 0x3b 0x9f 0x76 # CHECK: vext2xv.du.hu $xr26, $xr25 +0xdd 0x3d 0x9f 0x76 # CHECK: vext2xv.du.wu $xr29, $xr14 +0xc3 0xb6 0x9f 0x76 # CHECK: xvhseli.d $xr3, $xr22, 13 +0xc0 0x29 0xa0 0x76 # CHECK: xvrotri.b $xr0, $xr14, 2 +0xe0 0x6c 0xa0 0x76 # CHECK: xvrotri.h $xr0, $xr7, 11 +0x38 0x8c 0xa0 0x76 # CHECK: xvrotri.w $xr24, $xr1, 3 +0xff 0x40 0xa1 0x76 # CHECK: xvrotri.d $xr31, $xr7, 16 +0x74 0x26 0xa4 0x76 # CHECK: xvsrlri.b $xr20, $xr19, 1 +0x3c 0x6c 0xa4 0x76 # CHECK: xvsrlri.h $xr28, $xr1, 11 +0x59 0xec 0xa4 0x76 # CHECK: xvsrlri.w $xr25, $xr2, 27 +0x3d 0x19 0xa5 0x76 # CHECK: xvsrlri.d $xr29, $xr9, 6 +0xa7 0x28 0xa8 0x76 # CHECK: xvsrari.b $xr7, $xr5, 2 +0x40 0x65 0xa8 0x76 # CHECK: xvsrari.h $xr0, $xr10, 9 +0x11 0xab 0xa8 0x76 # CHECK: xvsrari.w $xr17, $xr24, 10 +0xc7 0x99 0xa9 0x76 # CHECK: xvsrari.d $xr7, $xr14, 38 +0xe5 0xc7 0xeb 0x76 # CHECK: xvinsgr2vr.w $xr5, $r31, 1 +0x45 0xe7 0xeb 0x76 # CHECK: xvinsgr2vr.d $xr5, $r26, 1 +0x92 0xcb 0xef 0x76 # CHECK: xvpickve2gr.w $r18, $xr28, 2 +0x54 0xe5 0xef 0x76 # CHECK: xvpickve2gr.d $r20, $xr10, 1 +0x89 0xd9 0xf3 0x76 # CHECK: xvpickve2gr.wu $r9, $xr12, 6 +0xa9 0xe9 0xf3 0x76 # CHECK: xvpickve2gr.du $r9, $xr13, 2 +0xc1 0x97 0xf7 0x76 # CHECK: xvrepl128vei.b $xr1, $xr30, 5 +0xad 0xdd 0xf7 0x76 # CHECK: xvrepl128vei.h $xr13, $xr13, 7 +0xa7 0xe9 0xf7 0x76 # CHECK: xvrepl128vei.w $xr7, $xr13, 2 +0xe2 0xf7 0xf7 0x76 # CHECK: xvrepl128vei.d $xr2, $xr31, 1 +0xa4 0xcd 0xff 0x76 # CHECK: xvinsve0.w $xr4, $xr13, 3 +0x3b 0xe3 0xff 0x76 # CHECK: xvinsve0.d $xr27, $xr25, 0 +0x7d 0xde 0x03 0x77 # CHECK: xvpickve.w $xr29, $xr19, 7 +0x13 0xee 0x03 0x77 # CHECK: xvpickve.d $xr19, $xr16, 3 +0xa5 0x00 0x07 0x77 # CHECK: xvreplve0.b $xr5, $xr5 +0x0e 0x83 0x07 0x77 # CHECK: xvreplve0.h $xr14, $xr24 +0xaf 0xc1 0x07 0x77 # CHECK: xvreplve0.w $xr15, $xr13 +0x94 0xe2 0x07 0x77 # CHECK: xvreplve0.d $xr20, $xr20 +0x45 0xf1 0x07 0x77 # CHECK: xvreplve0.q $xr5, $xr10 +0x1f 0x2c 0x08 0x77 # CHECK: xvsllwil.h.b $xr31, $xr0, 3 +0x15 0x5f 0x08 0x77 # CHECK: xvsllwil.w.h $xr21, $xr24, 7 +0x1a 0xcb 0x08 0x77 # CHECK: xvsllwil.d.w $xr26, $xr24, 18 +0xc5 0x00 0x09 0x77 # CHECK: xvextl.q.d $xr5, $xr6 +0xed 0x3b 0x0c 0x77 # CHECK: xvsllwil.hu.bu $xr13, $xr31, 6 +0x93 0x62 0x0c 0x77 # CHECK: xvsllwil.wu.hu $xr19, $xr20, 8 +0xae 0x89 0x0c 0x77 # CHECK: xvsllwil.du.wu $xr14, $xr13, 2 +0xea 0x00 0x0d 0x77 # CHECK: xvextl.qu.du $xr10, $xr7 +0xbf 0x36 0x10 0x77 # CHECK: xvbitclri.b $xr31, $xr21, 5 +0x9a 0x48 0x10 0x77 # CHECK: xvbitclri.h $xr26, $xr4, 2 +0x35 0xbf 0x10 0x77 # CHECK: xvbitclri.w $xr21, $xr25, 15 +0x0e 0xfc 0x11 0x77 # CHECK: xvbitclri.d $xr14, $xr0, 63 +0x30 0x34 0x14 0x77 # CHECK: xvbitseti.b $xr16, $xr1, 5 +0xd3 0x4f 0x14 0x77 # CHECK: xvbitseti.h $xr19, $xr30, 3 +0xd2 0xee 0x14 0x77 # CHECK: xvbitseti.w $xr18, $xr22, 27 +0x2f 0xa0 0x15 0x77 # CHECK: xvbitseti.d $xr15, $xr1, 40 +0xb7 0x20 0x18 0x77 # CHECK: xvbitrevi.b $xr23, $xr5, 0 +0x45 0x5c 0x18 0x77 # CHECK: xvbitrevi.h $xr5, $xr2, 7 +0xd7 0xb0 0x18 0x77 # CHECK: xvbitrevi.w $xr23, $xr6, 12 +0xd2 0x85 0x19 0x77 # CHECK: xvbitrevi.d $xr18, $xr14, 33 +0x5b 0x33 0x24 0x77 # CHECK: xvsat.b $xr27, $xr26, 4 +0xa4 0x56 0x24 0x77 # CHECK: xvsat.h $xr4, $xr21, 5 +0x7d 0xab 0x24 0x77 # CHECK: xvsat.w $xr29, $xr27, 10 +0x0e 0xf0 0x25 0x77 # CHECK: xvsat.d $xr14, $xr0, 60 +0x3f 0x2f 0x28 0x77 # CHECK: xvsat.bu $xr31, $xr25, 3 +0x91 0x78 0x28 0x77 # CHECK: xvsat.hu $xr17, $xr4, 14 +0x31 0x92 0x28 0x77 # CHECK: xvsat.wu $xr17, $xr17, 4 +0x0b 0xac 0x29 0x77 # CHECK: xvsat.du $xr11, $xr0, 43 +0x18 0x2b 0x2c 0x77 # CHECK: xvslli.b $xr24, $xr24, 2 +0x37 0x5d 0x2c 0x77 # CHECK: xvslli.h $xr23, $xr9, 7 +0x8d 0xc1 0x2c 0x77 # CHECK: xvslli.w $xr13, $xr12, 16 +0xcb 0x46 0x2d 0x77 # CHECK: xvslli.d $xr11, $xr22, 17 +0xc9 0x25 0x30 0x77 # CHECK: xvsrli.b $xr9, $xr14, 1 +0x96 0x7e 0x30 0x77 # CHECK: xvsrli.h $xr22, $xr20, 15 +0xc5 0xd3 0x30 0x77 # CHECK: xvsrli.w $xr5, $xr30, 20 +0x01 0xea 0x31 0x77 # CHECK: xvsrli.d $xr1, $xr16, 58 +0xd2 0x28 0x34 0x77 # CHECK: xvsrai.b $xr18, $xr6, 2 +0x15 0x72 0x34 0x77 # CHECK: xvsrai.h $xr21, $xr16, 12 +0x2d 0xc6 0x34 0x77 # CHECK: xvsrai.w $xr13, $xr17, 17 +0x83 0xcd 0x35 0x77 # CHECK: xvsrai.d $xr3, $xr12, 51 +0xe1 0x50 0x40 0x77 # CHECK: xvsrlni.b.h $xr1, $xr7, 4 +0xb0 0xe6 0x40 0x77 # CHECK: xvsrlni.h.w $xr16, $xr21, 25 +0x4d 0xc1 0x41 0x77 # CHECK: xvsrlni.w.d $xr13, $xr10, 48 +0x91 0xf9 0x43 0x77 # CHECK: xvsrlni.d.q $xr17, $xr12, 126 +0x71 0x7e 0x44 0x77 # CHECK: xvsrlrni.b.h $xr17, $xr19, 15 +0x15 0xbb 0x44 0x77 # CHECK: xvsrlrni.h.w $xr21, $xr24, 14 +0xf4 0x0f 0x45 0x77 # CHECK: xvsrlrni.w.d $xr20, $xr31, 3 +0x1c 0x33 0x47 0x77 # CHECK: xvsrlrni.d.q $xr28, $xr24, 76 +0xfa 0x5c 0x48 0x77 # CHECK: xvssrlni.b.h $xr26, $xr7, 7 +0x9b 0xe7 0x48 0x77 # CHECK: xvssrlni.h.w $xr27, $xr28, 25 +0x04 0x41 0x49 0x77 # CHECK: xvssrlni.w.d $xr4, $xr8, 16 +0x2e 0x52 0x4b 0x77 # CHECK: xvssrlni.d.q $xr14, $xr17, 84 +0xd1 0x48 0x4c 0x77 # CHECK: xvssrlni.bu.h $xr17, $xr6, 2 +0x46 0x8f 0x4c 0x77 # CHECK: xvssrlni.hu.w $xr6, $xr26, 3 +0x4a 0xda 0x4d 0x77 # CHECK: xvssrlni.wu.d $xr10, $xr18, 54 +0x5d 0x1b 0x4f 0x77 # CHECK: xvssrlni.du.q $xr29, $xr26, 70 +0x26 0x59 0x50 0x77 # CHECK: xvssrlrni.b.h $xr6, $xr9, 6 +0x16 0x85 0x50 0x77 # CHECK: xvssrlrni.h.w $xr22, $xr8, 1 +0x3c 0x71 0x51 0x77 # CHECK: xvssrlrni.w.d $xr28, $xr9, 28 +0x74 0xa3 0x53 0x77 # CHECK: xvssrlrni.d.q $xr20, $xr27, 104 +0x99 0x70 0x54 0x77 # CHECK: xvssrlrni.bu.h $xr25, $xr4, 12 +0xb5 0x97 0x54 0x77 # CHECK: xvssrlrni.hu.w $xr21, $xr29, 5 +0x01 0xda 0x55 0x77 # CHECK: xvssrlrni.wu.d $xr1, $xr16, 54 +0xfd 0x64 0x56 0x77 # CHECK: xvssrlrni.du.q $xr29, $xr7, 25 +0x30 0x53 0x58 0x77 # CHECK: xvsrani.b.h $xr16, $xr25, 4 +0x4d 0x99 0x58 0x77 # CHECK: xvsrani.h.w $xr13, $xr10, 6 +0xa7 0xd6 0x59 0x77 # CHECK: xvsrani.w.d $xr7, $xr21, 53 +0x5a 0xde 0x5a 0x77 # CHECK: xvsrani.d.q $xr26, $xr18, 55 +0xb1 0x6e 0x5c 0x77 # CHECK: xvsrarni.b.h $xr17, $xr21, 11 +0xcf 0x8b 0x5c 0x77 # CHECK: xvsrarni.h.w $xr15, $xr30, 2 +0x77 0x7d 0x5d 0x77 # CHECK: xvsrarni.w.d $xr23, $xr11, 31 +0x36 0x43 0x5e 0x77 # CHECK: xvsrarni.d.q $xr22, $xr25, 16 +0x93 0x6a 0x60 0x77 # CHECK: xvssrani.b.h $xr19, $xr20, 10 +0x39 0xd9 0x60 0x77 # CHECK: xvssrani.h.w $xr25, $xr9, 22 +0x57 0x1c 0x61 0x77 # CHECK: xvssrani.w.d $xr23, $xr2, 7 +0x06 0xfd 0x63 0x77 # CHECK: xvssrani.d.q $xr6, $xr8, 127 +0xdb 0x55 0x64 0x77 # CHECK: xvssrani.bu.h $xr27, $xr14, 5 +0x2e 0xd0 0x64 0x77 # CHECK: xvssrani.hu.w $xr14, $xr1, 20 +0x8a 0xec 0x65 0x77 # CHECK: xvssrani.wu.d $xr10, $xr4, 59 +0x31 0x48 0x67 0x77 # CHECK: xvssrani.du.q $xr17, $xr1, 82 +0x5b 0x7e 0x68 0x77 # CHECK: xvssrarni.b.h $xr27, $xr18, 15 +0x70 0xbc 0x68 0x77 # CHECK: xvssrarni.h.w $xr16, $xr3, 15 +0x3a 0x4b 0x69 0x77 # CHECK: xvssrarni.w.d $xr26, $xr25, 18 +0x3c 0x03 0x6a 0x77 # CHECK: xvssrarni.d.q $xr28, $xr25, 0 +0x81 0x61 0x6c 0x77 # CHECK: xvssrarni.bu.h $xr1, $xr12, 8 +0x63 0xff 0x6c 0x77 # CHECK: xvssrarni.hu.w $xr3, $xr27, 31 +0x78 0xd3 0x6d 0x77 # CHECK: xvssrarni.wu.d $xr24, $xr27, 52 +0x65 0xc0 0x6f 0x77 # CHECK: xvssrarni.du.q $xr5, $xr3, 112 +0x35 0x8f 0x82 0x77 # CHECK: xvextrins.d $xr21, $xr25, 163 +0x33 0x72 0x84 0x77 # CHECK: xvextrins.w $xr19, $xr17, 28 +0xfe 0x3c 0x89 0x77 # CHECK: xvextrins.h $xr30, $xr7, 79 +0xe1 0x4b 0x8f 0x77 # CHECK: xvextrins.b $xr1, $xr31, 210 +0xc3 0x52 0x92 0x77 # CHECK: xvshuf4i.b $xr3, $xr22, 148 +0xc2 0x8a 0x94 0x77 # CHECK: xvshuf4i.h $xr2, $xr22, 34 +0x7f 0x96 0x9a 0x77 # CHECK: xvshuf4i.w $xr31, $xr19, 165 +0x3f 0x3a 0x9c 0x77 # CHECK: xvshuf4i.d $xr31, $xr17, 14 +0x1b 0x40 0xc5 0x77 # CHECK: xvbitseli.b $xr27, $xr0, 80 +0x57 0x64 0xd2 0x77 # CHECK: xvandi.b $xr23, $xr2, 153 +0x9b 0xf3 0xd6 0x77 # CHECK: xvori.b $xr27, $xr28, 188 +0x3c 0xf8 0xdb 0x77 # CHECK: xvxori.b $xr28, $xr1, 254 +0x44 0x90 0xdc 0x77 # CHECK: xvnori.b $xr4, $xr2, 36 +0x1a 0xc2 0xe2 0x77 # CHECK: xvldi $xr26, -2544 +0x16 0xa3 0xe6 0x77 # CHECK: xvpermi.w $xr22, $xr24, 168 +0xee 0x23 0xea 0x77 # CHECK: xvpermi.d $xr14, $xr31, 136 +0xdc 0x4d 0xef 0x77 # CHECK: xvpermi.q $xr28, $xr14, 211 +0xe0 0x7f 0x1e 0x70 # CHECK: vaddwev.h.b $vr0, $vr31, $vr31 +0x83 0xdc 0x1e 0x70 # CHECK: vaddwev.w.h $vr3, $vr4, $vr23 +0x5e 0x2f 0x1f 0x70 # CHECK: vaddwev.d.w $vr30, $vr26, $vr11 +0xb9 0xb7 0x1f 0x70 # CHECK: vaddwev.q.d $vr25, $vr29, $vr13 +0x8b 0x07 0x20 0x70 # CHECK: vsubwev.h.b $vr11, $vr28, $vr1 +0xe9 0x95 0x20 0x70 # CHECK: vsubwev.w.h $vr9, $vr15, $vr5 +0x31 0x29 0x21 0x70 # CHECK: vsubwev.d.w $vr17, $vr9, $vr10 +0x5a 0xae 0x21 0x70 # CHECK: vsubwev.q.d $vr26, $vr18, $vr11 +0x67 0x49 0x22 0x70 # CHECK: vaddwod.h.b $vr7, $vr11, $vr18 +0xe0 0xb0 0x22 0x70 # CHECK: vaddwod.w.h $vr0, $vr7, $vr12 +0x7e 0x43 0x23 0x70 # CHECK: vaddwod.d.w $vr30, $vr27, $vr16 +0x82 0xf6 0x23 0x70 # CHECK: vaddwod.q.d $vr2, $vr20, $vr29 +0xfa 0x4c 0x24 0x70 # CHECK: vsubwod.h.b $vr26, $vr7, $vr19 +0x73 0xac 0x24 0x70 # CHECK: vsubwod.w.h $vr19, $vr3, $vr11 +0x9f 0x33 0x25 0x70 # CHECK: vsubwod.d.w $vr31, $vr28, $vr12 +0x01 0xc3 0x25 0x70 # CHECK: vsubwod.q.d $vr1, $vr24, $vr16 +0xa3 0x77 0x2e 0x70 # CHECK: vaddwev.h.bu $vr3, $vr29, $vr29 +0xea 0xa9 0x2e 0x70 # CHECK: vaddwev.w.hu $vr10, $vr15, $vr10 +0xb8 0x13 0x2f 0x70 # CHECK: vaddwev.d.wu $vr24, $vr29, $vr4 +0xf1 0x82 0x2f 0x70 # CHECK: vaddwev.q.du $vr17, $vr23, $vr0 +0x79 0x51 0x30 0x70 # CHECK: vsubwev.h.bu $vr25, $vr11, $vr20 +0xf1 0xd1 0x30 0x70 # CHECK: vsubwev.w.hu $vr17, $vr15, $vr20 +0x2a 0x17 0x31 0x70 # CHECK: vsubwev.d.wu $vr10, $vr25, $vr5 +0x7d 0xa0 0x31 0x70 # CHECK: vsubwev.q.du $vr29, $vr3, $vr8 +0x0a 0x64 0x32 0x70 # CHECK: vaddwod.h.bu $vr10, $vr0, $vr25 +0x62 0xdf 0x32 0x70 # CHECK: vaddwod.w.hu $vr2, $vr27, $vr23 +0x02 0x58 0x33 0x70 # CHECK: vaddwod.d.wu $vr2, $vr0, $vr22 +0x40 0x8c 0x33 0x70 # CHECK: vaddwod.q.du $vr0, $vr2, $vr3 +0xee 0x0f 0x34 0x70 # CHECK: vsubwod.h.bu $vr14, $vr31, $vr3 +0x55 0x9c 0x34 0x70 # CHECK: vsubwod.w.hu $vr21, $vr2, $vr7 +0x0b 0x49 0x35 0x70 # CHECK: vsubwod.d.wu $vr11, $vr8, $vr18 +0x9e 0x82 0x35 0x70 # CHECK: vsubwod.q.du $vr30, $vr20, $vr0 +0x93 0x47 0x3e 0x70 # CHECK: vaddwev.h.bu.b $vr19, $vr28, $vr17 +0xee 0xf9 0x3e 0x70 # CHECK: vaddwev.w.hu.h $vr14, $vr15, $vr30 +0xef 0x28 0x3f 0x70 # CHECK: vaddwev.d.wu.w $vr15, $vr7, $vr10 +0xd3 0xf9 0x3f 0x70 # CHECK: vaddwev.q.du.d $vr19, $vr14, $vr30 +0x4f 0x22 0x40 0x70 # CHECK: vaddwod.h.bu.b $vr15, $vr18, $vr8 +0x73 0x9b 0x40 0x70 # CHECK: vaddwod.w.hu.h $vr19, $vr27, $vr6 +0x67 0x3d 0x41 0x70 # CHECK: vaddwod.d.wu.w $vr7, $vr11, $vr15 +0x00 0xe8 0x41 0x70 # CHECK: vaddwod.q.du.d $vr0, $vr0, $vr26 +0x78 0x56 0x90 0x70 # CHECK: vmulwev.h.b $vr24, $vr19, $vr21 +0xcd 0xca 0x90 0x70 # CHECK: vmulwev.w.h $vr13, $vr22, $vr18 +0xd8 0x36 0x91 0x70 # CHECK: vmulwev.d.w $vr24, $vr22, $vr13 +0xc4 0xfa 0x91 0x70 # CHECK: vmulwev.q.d $vr4, $vr22, $vr30 +0x56 0x63 0x92 0x70 # CHECK: vmulwod.h.b $vr22, $vr26, $vr24 +0x91 0x91 0x92 0x70 # CHECK: vmulwod.w.h $vr17, $vr12, $vr4 +0xf0 0x69 0x93 0x70 # CHECK: vmulwod.d.w $vr16, $vr15, $vr26 +0x03 0x96 0x93 0x70 # CHECK: vmulwod.q.d $vr3, $vr16, $vr5 +0x7f 0x4e 0x98 0x70 # CHECK: vmulwev.h.bu $vr31, $vr19, $vr19 +0xf6 0x97 0x98 0x70 # CHECK: vmulwev.w.hu $vr22, $vr31, $vr5 +0x80 0x78 0x99 0x70 # CHECK: vmulwev.d.wu $vr0, $vr4, $vr30 +0x7f 0xd0 0x99 0x70 # CHECK: vmulwev.q.du $vr31, $vr3, $vr20 +0xf9 0x34 0x9a 0x70 # CHECK: vmulwod.h.bu $vr25, $vr7, $vr13 +0x81 0xb1 0x9a 0x70 # CHECK: vmulwod.w.hu $vr1, $vr12, $vr12 +0xef 0x79 0x9b 0x70 # CHECK: vmulwod.d.wu $vr15, $vr15, $vr30 +0x8d 0x9b 0x9b 0x70 # CHECK: vmulwod.q.du $vr13, $vr28, $vr6 +0x48 0x0f 0xa0 0x70 # CHECK: vmulwev.h.bu.b $vr8, $vr26, $vr3 +0x2a 0x87 0xa0 0x70 # CHECK: vmulwev.w.hu.h $vr10, $vr25, $vr1 +0x09 0x4c 0xa1 0x70 # CHECK: vmulwev.d.wu.w $vr9, $vr0, $vr19 +0x0d 0xdf 0xa1 0x70 # CHECK: vmulwev.q.du.d $vr13, $vr24, $vr23 +0x14 0x38 0xa2 0x70 # CHECK: vmulwod.h.bu.b $vr20, $vr0, $vr14 +0x90 0x8e 0xa2 0x70 # CHECK: vmulwod.w.hu.h $vr16, $vr20, $vr3 +0xe5 0x6e 0xa3 0x70 # CHECK: vmulwod.d.wu.w $vr5, $vr23, $vr27 +0xde 0xf7 0xa3 0x70 # CHECK: vmulwod.q.du.d $vr30, $vr30, $vr29 +0x12 0x20 0xac 0x70 # CHECK: vmaddwev.h.b $vr18, $vr0, $vr8 +0xdd 0x9e 0xac 0x70 # CHECK: vmaddwev.w.h $vr29, $vr22, $vr7 +0xbc 0x7d 0xad 0x70 # CHECK: vmaddwev.d.w $vr28, $vr13, $vr31 +0x65 0xb4 0xad 0x70 # CHECK: vmaddwev.q.d $vr5, $vr3, $vr13 +0x24 0x24 0xae 0x70 # CHECK: vmaddwod.h.b $vr4, $vr1, $vr9 +0x3a 0xe1 0xae 0x70 # CHECK: vmaddwod.w.h $vr26, $vr9, $vr24 +0x7e 0x34 0xaf 0x70 # CHECK: vmaddwod.d.w $vr30, $vr3, $vr13 +0xaf 0xf5 0xaf 0x70 # CHECK: vmaddwod.q.d $vr15, $vr13, $vr29 +0x98 0x16 0xb4 0x70 # CHECK: vmaddwev.h.bu $vr24, $vr20, $vr5 +0x83 0xa0 0xb4 0x70 # CHECK: vmaddwev.w.hu $vr3, $vr4, $vr8 +0x7b 0x12 0xb5 0x70 # CHECK: vmaddwev.d.wu $vr27, $vr19, $vr4 +0x7c 0xf7 0xb5 0x70 # CHECK: vmaddwev.q.du $vr28, $vr27, $vr29 +0x85 0x6a 0xb6 0x70 # CHECK: vmaddwod.h.bu $vr5, $vr20, $vr26 +0xd5 0xab 0xb6 0x70 # CHECK: vmaddwod.w.hu $vr21, $vr30, $vr10 +0x67 0x51 0xb7 0x70 # CHECK: vmaddwod.d.wu $vr7, $vr11, $vr20 +0x5e 0xe2 0xb7 0x70 # CHECK: vmaddwod.q.du $vr30, $vr18, $vr24 +0x24 0x10 0xbc 0x70 # CHECK: vmaddwev.h.bu.b $vr4, $vr1, $vr4 +0x79 0xbd 0xbc 0x70 # CHECK: vmaddwev.w.hu.h $vr25, $vr11, $vr15 +0x0a 0x52 0xbd 0x70 # CHECK: vmaddwev.d.wu.w $vr10, $vr16, $vr20 +0x96 0xde 0xbd 0x70 # CHECK: vmaddwev.q.du.d $vr22, $vr20, $vr23 +0x3f 0x6f 0xbe 0x70 # CHECK: vmaddwod.h.bu.b $vr31, $vr25, $vr27 +0x48 0xe2 0xbe 0x70 # CHECK: vmaddwod.w.hu.h $vr8, $vr18, $vr24 +0xb2 0x29 0xbf 0x70 # CHECK: vmaddwod.d.wu.w $vr18, $vr13, $vr10 +0xaa 0xbc 0xbf 0x70 # CHECK: vmaddwod.q.du.d $vr10, $vr5, $vr15 diff --git a/test/MC/LoongArch/aligned-nops.s b/test/MC/LoongArch/aligned-nops.s new file mode 100644 index 00000000..2ef26ac4 --- /dev/null +++ b/test/MC/LoongArch/aligned-nops.s @@ -0,0 +1,25 @@ +# RUN: llvm-mc -filetype=obj -triple loongarch64 < %s \ +# RUN: | llvm-objdump -d - | FileCheck -check-prefix=CHECK-INST %s + +# alpha and main are 8 byte alignment +# but the alpha function's size is 4 +# So assembler will insert a nop to make sure 8 byte alignment. + + .text + .p2align 3 + .type alpha,@function +alpha: +# BB#0: + addi.d $sp, $sp, -16 +# CHECK-INST: nop +.Lfunc_end0: + .size alpha, .Lfunc_end0-alpha + # -- End function + .globl main + .p2align 3 + .type main,@function +main: # @main +# BB#0: +.Lfunc_end1: + .size main, .Lfunc_end1-main + # -- End function diff --git a/test/MC/LoongArch/atomic-error.s b/test/MC/LoongArch/atomic-error.s new file mode 100644 index 00000000..7e61a5ba --- /dev/null +++ b/test/MC/LoongArch/atomic-error.s @@ -0,0 +1,7 @@ +# RUN: not llvm-mc --triple=loongarch64 %s 2>&1 | FileCheck %s + +# CHECK: error: expected memory with constant 0 offset +amadd_db.d $a1, $t5, $s6, 1 + +# CHECK: error: unexpected token in argument list +amadd_db.d $a1, $t5, $s6, a diff --git a/test/MC/LoongArch/atomic.s b/test/MC/LoongArch/atomic.s new file mode 100644 index 00000000..10a40655 --- /dev/null +++ b/test/MC/LoongArch/atomic.s @@ -0,0 +1,12 @@ +# RUN: llvm-mc %s --triple=loongarch64 --show-encoding | \ +# RUN: FileCheck --check-prefixes=ASM,ASM-AND-OBJ %s +# RUN: llvm-mc %s --triple=loongarch64 --filetype=obj | llvm-objdump -d - | \ +# RUN: FileCheck --check-prefixes=ASM-AND-OBJ %s + +# ASM-AND-OBJ: amadd_db.d $r5, $r17, $r29, 0 +# ASM: encoding: [0xa5,0xc7,0x6a,0x38] +amadd_db.d $a1, $t5, $s6, 0 + +# ASM-AND-OBJ: amadd_db.d $r5, $r17, $r29, 0 +# ASM: encoding: [0xa5,0xc7,0x6a,0x38] +amadd_db.d $a1, $t5, $s6 diff --git a/test/MC/LoongArch/fixups-expr.s b/test/MC/LoongArch/fixups-expr.s new file mode 100644 index 00000000..b3c4c74f --- /dev/null +++ b/test/MC/LoongArch/fixups-expr.s @@ -0,0 +1,40 @@ +# RUN: llvm-mc --filetype=obj --triple=loongarch64 %s \ +# RUN: | llvm-readobj -r - | FileCheck %s + +# Check that subtraction expressions are emitted as two relocations + +.globl G1 +.globl G2 +.L1: +G1: +nop +.L2: +G2: + +.data +.8byte .L2-.L1 # CHECK: 0x0 R_LARCH_ADD64 .text 0x4 + # CHECK: 0x0 R_LARCH_SUB64 .text 0x0 +.8byte G2-G1 # CHECK: 0x8 R_LARCH_ADD64 G2 0x0 + # CHECK: 0x8 R_LARCH_SUB64 G1 0x0 +.4byte .L2-.L1 # CHECK: 0x10 R_LARCH_ADD32 .text 0x4 + # CHECK: 0x10 R_LARCH_SUB32 .text 0x0 +.4byte G2-G1 # CHECK: 0x14 R_LARCH_ADD32 G2 0x0 + # CHECK: 0x14 R_LARCH_SUB32 G1 0x0 +.2byte .L2-.L1 # CHECK: 0x18 R_LARCH_ADD16 .text 0x4 + # CHECK: 0x18 R_LARCH_SUB16 .text 0x0 +.2byte G2-G1 # CHECK: 0x1A R_LARCH_ADD16 G2 0x0 + # CHECK: 0x1A R_LARCH_SUB16 G1 0x0 +.byte .L2-.L1 # CHECK: 0x1C R_LARCH_ADD8 .text 0x4 + # CHECK: 0x1C R_LARCH_SUB8 .text 0x0 +.byte G2-G1 # CHECK: 0x1D R_LARCH_ADD8 G2 0x0 + # CHECK: 0x1D R_LARCH_SUB8 G1 0x0 + +.section .rodata.str.1 +.L.str: +.asciz "string" + +.rodata +.Lreltable: +.word .L.str-.Lreltable # CHECK: 0x0 R_LARCH_ADD32 .rodata.str.1 0x0 + # CHECK: 0x0 R_LARCH_SUB32 .rodata 0x0 + diff --git a/test/MC/LoongArch/invalid.s b/test/MC/LoongArch/invalid.s new file mode 100644 index 00000000..e0fc7ce4 --- /dev/null +++ b/test/MC/LoongArch/invalid.s @@ -0,0 +1,50 @@ +# RUN: not llvm-mc %s -triple=loongarch64-unknown-linux-gnu 2>&1 | FileCheck %s +.text +csrxchg $r6, $r0, 214 # CHECK: :[[@LINE]]:1: error: invalid operand ($zero) for instruction +csrxchg $r6, $r1, 214 # CHECK: :[[@LINE]]:1: error: invalid operand ($r1) for instruction + +## out-of-bound immediate +### simm16 << 2 +beq $r10, $r7, -0x20000-4 # CHECK: :[[@LINE]]:1: error: branch target out of range +beq $r10, $r7, 0x1FFFC+4 # CHECK: :[[@LINE]]:1: error: branch target out of range +bne $r10, $r7, -0x20000-4 # CHECK: :[[@LINE]]:1: error: branch target out of range +bne $r10, $r7, 0x1FFFC+4 # CHECK: :[[@LINE]]:1: error: branch target out of range +blt $r10, $r7, -0x20000-4 # CHECK: :[[@LINE]]:1: error: branch target out of range +blt $r10, $r7, 0x1FFFC+4 # CHECK: :[[@LINE]]:1: error: branch target out of range +bge $r10, $r7, -0x20000-4 # CHECK: :[[@LINE]]:1: error: branch target out of range +bge $r10, $r7, 0x1FFFC+4 # CHECK: :[[@LINE]]:1: error: branch target out of range +bltu $r10, $r7, -0x20000-4 # CHECK: :[[@LINE]]:1: error: branch target out of range +bltu $r10, $r7, 0x1FFFC+4 # CHECK: :[[@LINE]]:1: error: branch target out of range +bgeu $r10, $r7, -0x20000-4 # CHECK: :[[@LINE]]:1: error: branch target out of range +bgeu $r10, $r7, 0x1FFFC+4 # CHECK: :[[@LINE]]:1: error: branch target out of range +### simm21 << 2 +beqz $r9, -0x400000-4 # CHECK: :[[@LINE]]:1: error: branch target out of range +beqz $r9, 0x3FFFFC+4 # CHECK: :[[@LINE]]:1: error: branch target out of range +bnez $r9, -0x400000-4 # CHECK: :[[@LINE]]:1: error: branch target out of range +bnez $r9, 0x3FFFFC+4 # CHECK: :[[@LINE]]:1: error: branch target out of range +bceqz $fcc6, -0x400000-4 # CHECK: :[[@LINE]]:1: error: branch target out of range +bceqz $fcc6, 0x3FFFFC+4 # CHECK: :[[@LINE]]:1: error: branch target out of range +bcnez $fcc6, -0x400000-4 # CHECK: :[[@LINE]]:1: error: branch target out of range +bcnez $fcc6, 0x3FFFFC+4 # CHECK: :[[@LINE]]:1: error: branch target out of range +### simm26 << 2 +b -0x8000000-4 # CHECK: :[[@LINE]]:1: error: branch target out of range +b 0x7FFFFFC+4 # CHECK: :[[@LINE]]:1: error: branch target out of range +bl -0x8000000-4 # CHECK: :[[@LINE]]:1: error: branch target out of range +bl 0x7FFFFFC+4 # CHECK: :[[@LINE]]:1: error: branch target out of range + +## unaligned immediate +### simm16 << 2 +beq $r10, $r7, 0x1FFFC+1 # CHECK: :[[@LINE]]:1: error: branch to misaligned address +bne $r10, $r7, 0x1FFFC+1 # CHECK: :[[@LINE]]:1: error: branch to misaligned address +blt $r10, $r7, 0x1FFFC+1 # CHECK: :[[@LINE]]:1: error: branch to misaligned address +bge $r10, $r7, 0x1FFFC+1 # CHECK: :[[@LINE]]:1: error: branch to misaligned address +bltu $r10, $r7, 0x1FFFC+1 # CHECK: :[[@LINE]]:1: error: branch to misaligned address +bgeu $r10, $r7, 0x1FFFC+1 # CHECK: :[[@LINE]]:1: error: branch to misaligned address +### simm21 << 2 +beqz $r9, 0x3FFFFC+1 # CHECK: :[[@LINE]]:1: error: branch to misaligned address +bnez $r9, 0x3FFFFC+1 # CHECK: :[[@LINE]]:1: error: branch to misaligned address +bceqz $fcc6, 0x3FFFFC+1 # CHECK: :[[@LINE]]:1: error: branch to misaligned address +bcnez $fcc6, 0x3FFFFC+1 # CHECK: :[[@LINE]]:1: error: branch to misaligned address +### simm26 << 2 +b 0x7FFFFFC+1 # CHECK: :[[@LINE]]:1: error: branch to misaligned address +bl 0x7FFFFFC+1 # CHECK: :[[@LINE]]:1: error: branch to misaligned address diff --git a/test/MC/LoongArch/lit.local.cfg b/test/MC/LoongArch/lit.local.cfg new file mode 100644 index 00000000..6223fc69 --- /dev/null +++ b/test/MC/LoongArch/lit.local.cfg @@ -0,0 +1,3 @@ +if not 'LoongArch' in config.root.targets: + config.unsupported = True + diff --git a/test/MC/LoongArch/macro-la.s b/test/MC/LoongArch/macro-la.s new file mode 100644 index 00000000..945c529e --- /dev/null +++ b/test/MC/LoongArch/macro-la.s @@ -0,0 +1,35 @@ +# RUN: llvm-mc %s -triple=loongarch64-unknown-linux-gnu -show-encoding | FileCheck %s + +# CHECK: # fixup A - offset: 0, value: symbol, kind: fixup_loongarch_pcala_hi20 +# CHECK: # fixup A - offset: 0, value: symbol, kind: fixup_loongarch_pcala_lo12 +la.local $a0, symbol + +# CHECK: # fixup A - offset: 0, value: symbol, kind: fixup_loongarch_got_pc_hi20 +# CHECK: # fixup A - offset: 0, value: symbol, kind: fixup_loongarch_got_pc_lo12 +la.global $a0, symbol + +# CHECK: # fixup A - offset: 0, value: symbol, kind: fixup_loongarch_pcala_hi20 +# CHECK: # fixup A - offset: 0, value: symbol, kind: fixup_loongarch_pcala_lo12 +la.pcrel $a0, symbol + +# CHECK: # fixup A - offset: 0, value: symbol, kind: fixup_loongarch_got_pc_hi20 +# CHECK: # fixup A - offset: 0, value: symbol, kind: fixup_loongarch_got_pc_lo12 +la.got $a0, symbol + +# CHECK: # fixup A - offset: 0, value: symbol, kind: fixup_loongarch_tls_le_hi20 +# CHECK: # fixup A - offset: 0, value: symbol, kind: fixup_loongarch_tls_le_lo12 +# CHECK: # fixup A - offset: 0, value: symbol, kind: fixup_loongarch_tls_le64_lo20 +# CHECK: # fixup A - offset: 0, value: symbol, kind: fixup_loongarch_tls_le64_hi12 +la.tls.le $a0, symbol + +# CHECK: # fixup A - offset: 0, value: symbol, kind: fixup_loongarch_tls_ie_pc_hi20 +# CHECK: # fixup A - offset: 0, value: symbol, kind: fixup_loongarch_tls_ie_pc_lo12 +la.tls.ie $a0, symbol + +# CHECK: # fixup A - offset: 0, value: symbol, kind: fixup_loongarch_tls_gd_pc_hi20 +# CHECK: # fixup A - offset: 0, value: symbol, kind: fixup_loongarch_got_pc_lo12 +la.tls.ld $a0, symbol + +# CHECK: # fixup A - offset: 0, value: symbol, kind: fixup_loongarch_tls_gd_pc_hi20 +# CHECK: # fixup A - offset: 0, value: symbol, kind: fixup_loongarch_got_pc_lo12 +la.tls.gd $a0, symbol diff --git a/test/MC/LoongArch/macro-li.s b/test/MC/LoongArch/macro-li.s new file mode 100644 index 00000000..b1a7c58b --- /dev/null +++ b/test/MC/LoongArch/macro-li.s @@ -0,0 +1,773 @@ +# RUN: llvm-mc %s -triple=loongarch64-unknown-linux-gnu | FileCheck %s +li.w $a0, 0x00000000 # CHECK: ori $r4, $zero, 0 +li.w $a0, 0x000007ff # CHECK: ori $r4, $zero, 2047 +li.w $a0, 0x00000800 # CHECK: ori $r4, $zero, 2048 +li.w $a0, 0x00000fff # CHECK: ori $r4, $zero, 4095 +li.w $a0, 0x7ffff000 # CHECK: lu12i.w $r4, 524287 +li.w $a0, 0x7ffff7ff # CHECK: lu12i.w $r4, 524287 + # CHECK: ori $r4, $r4, 2047 +li.w $a0, 0x7ffff800 # CHECK: lu12i.w $r4, 524287 + # CHECK: ori $r4, $r4, 2048 +li.w $a0, 0x7fffffff # CHECK: lu12i.w $r4, 524287 + # CHECK: ori $r4, $r4, 4095 +li.w $a0, 0x80000000 # CHECK: lu12i.w $r4, -524288 +li.w $a0, 0x800007ff # CHECK: lu12i.w $r4, -524288 + # CHECK: ori $r4, $r4, 2047 +li.w $a0, 0x80000800 # CHECK: lu12i.w $r4, -524288 + # CHECK: ori $r4, $r4, 2048 +li.w $a0, 0x80000fff # CHECK: lu12i.w $r4, -524288 + # CHECK: ori $r4, $r4, 4095 +li.w $a0, 0xfffff000 # CHECK: lu12i.w $r4, -1 +li.w $a0, 0xfffff7ff # CHECK: lu12i.w $r4, -1 + # CHECK: ori $r4, $r4, 2047 +li.w $a0, 0xfffff800 # CHECK: addi.w $r4, $zero, -2048 +li.w $a0, 0xffffffff # CHECK: addi.w $r4, $zero, -1 +li.d $a0, 0x0000000000000000 # CHECK: addi.d $r4, $zero, 0 +li.d $a0, 0x00000000000007ff # CHECK: addi.d $r4, $zero, 2047 +li.d $a0, 0x0000000000000800 # CHECK: ori $r4, $zero, 2048 +li.d $a0, 0x0000000000000fff # CHECK: ori $r4, $zero, 4095 +li.d $a0, 0x000000007ffff000 # CHECK: lu12i.w $r4, 524287 +li.d $a0, 0x000000007ffff7ff # CHECK: lu12i.w $r4, 524287 + # CHECK: ori $r4, $r4, 2047 +li.d $a0, 0x000000007ffff800 # CHECK: lu12i.w $r4, 524287 + # CHECK: ori $r4, $r4, 2048 +li.d $a0, 0x000000007fffffff # CHECK: lu12i.w $r4, 524287 + # CHECK: ori $r4, $r4, 4095 +li.d $a0, 0x0000000080000000 # CHECK: lu12i.w $r4, -524288 + # CHECK: lu32i.d $r4, 0 +li.d $a0, 0x00000000800007ff # CHECK: lu12i.w $r4, -524288 + # CHECK: ori $r4, $r4, 2047 + # CHECK: lu32i.d $r4, 0 +li.d $a0, 0x0000000080000800 # CHECK: lu12i.w $r4, -524288 + # CHECK: ori $r4, $r4, 2048 + # CHECK: lu32i.d $r4, 0 +li.d $a0, 0x0000000080000fff # CHECK: lu12i.w $r4, -524288 + # CHECK: ori $r4, $r4, 4095 + # CHECK: lu32i.d $r4, 0 +li.d $a0, 0x00000000fffff000 # CHECK: lu12i.w $r4, -1 + # CHECK: lu32i.d $r4, 0 +li.d $a0, 0x00000000fffff7ff # CHECK: lu12i.w $r4, -1 + # CHECK: ori $r4, $r4, 2047 + # CHECK: lu32i.d $r4, 0 +li.d $a0, 0x00000000fffff800 # CHECK: addi.w $r4, $zero, -2048 + # CHECK: lu32i.d $r4, 0 +li.d $a0, 0x00000000ffffffff # CHECK: addi.w $r4, $zero, -1 + # CHECK: lu32i.d $r4, 0 +li.d $a0, 0x0007ffff00000000 # CHECK: ori $r4, $zero, 0 + # CHECK: lu32i.d $r4, 524287 +li.d $a0, 0x0007ffff000007ff # CHECK: ori $r4, $zero, 2047 + # CHECK: lu32i.d $r4, 524287 +li.d $a0, 0x0007ffff00000800 # CHECK: ori $r4, $zero, 2048 + # CHECK: lu32i.d $r4, 524287 +li.d $a0, 0x0007ffff00000fff # CHECK: ori $r4, $zero, 4095 + # CHECK: lu32i.d $r4, 524287 +li.d $a0, 0x0007ffff7ffff000 # CHECK: lu12i.w $r4, 524287 + # CHECK: lu32i.d $r4, 524287 +li.d $a0, 0x0007ffff7ffff7ff # CHECK: lu12i.w $r4, 524287 + # CHECK: ori $r4, $r4, 2047 + # CHECK: lu32i.d $r4, 524287 +li.d $a0, 0x0007ffff7ffff800 # CHECK: lu12i.w $r4, 524287 + # CHECK: ori $r4, $r4, 2048 + # CHECK: lu32i.d $r4, 524287 +li.d $a0, 0x0007ffff7fffffff # CHECK: lu12i.w $r4, 524287 + # CHECK: ori $r4, $r4, 4095 + # CHECK: lu32i.d $r4, 524287 +li.d $a0, 0x0007ffff80000000 # CHECK: lu12i.w $r4, -524288 + # CHECK: lu32i.d $r4, 524287 +li.d $a0, 0x0007ffff800007ff # CHECK: lu12i.w $r4, -524288 + # CHECK: ori $r4, $r4, 2047 + # CHECK: lu32i.d $r4, 524287 +li.d $a0, 0x0007ffff80000800 # CHECK: lu12i.w $r4, -524288 + # CHECK: ori $r4, $r4, 2048 + # CHECK: lu32i.d $r4, 524287 +li.d $a0, 0x0007ffff80000fff # CHECK: lu12i.w $r4, -524288 + # CHECK: ori $r4, $r4, 4095 + # CHECK: lu32i.d $r4, 524287 +li.d $a0, 0x0007fffffffff000 # CHECK: lu12i.w $r4, -1 + # CHECK: lu32i.d $r4, 524287 +li.d $a0, 0x0007fffffffff7ff # CHECK: lu12i.w $r4, -1 + # CHECK: ori $r4, $r4, 2047 + # CHECK: lu32i.d $r4, 524287 +li.d $a0, 0x0007fffffffff800 # CHECK: addi.w $r4, $zero, -2048 + # CHECK: lu32i.d $r4, 524287 +li.d $a0, 0x0007ffffffffffff # CHECK: addi.w $r4, $zero, -1 + # CHECK: lu32i.d $r4, 524287 +li.d $a0, 0x0008000000000000 # CHECK: ori $r4, $zero, 0 + # CHECK: lu32i.d $r4, -524288 + # CHECK: lu52i.d $r4, $r4, 0 +li.d $a0, 0x00080000000007ff # CHECK: ori $r4, $zero, 2047 + # CHECK: lu32i.d $r4, -524288 + # CHECK: lu52i.d $r4, $r4, 0 +li.d $a0, 0x0008000000000800 # CHECK: ori $r4, $zero, 2048 + # CHECK: lu32i.d $r4, -524288 + # CHECK: lu52i.d $r4, $r4, 0 +li.d $a0, 0x0008000000000fff # CHECK: ori $r4, $zero, 4095 + # CHECK: lu32i.d $r4, -524288 + # CHECK: lu52i.d $r4, $r4, 0 +li.d $a0, 0x000800007ffff000 # CHECK: lu12i.w $r4, 524287 + # CHECK: lu32i.d $r4, -524288 + # CHECK: lu52i.d $r4, $r4, 0 +li.d $a0, 0x000800007ffff7ff # CHECK: lu12i.w $r4, 524287 + # CHECK: ori $r4, $r4, 2047 + # CHECK: lu32i.d $r4, -524288 + # CHECK: lu52i.d $r4, $r4, 0 +li.d $a0, 0x000800007ffff800 # CHECK: lu12i.w $r4, 524287 + # CHECK: ori $r4, $r4, 2048 + # CHECK: lu32i.d $r4, -524288 + # CHECK: lu52i.d $r4, $r4, 0 +li.d $a0, 0x000800007fffffff # CHECK: lu12i.w $r4, 524287 + # CHECK: ori $r4, $r4, 4095 + # CHECK: lu32i.d $r4, -524288 + # CHECK: lu52i.d $r4, $r4, 0 +li.d $a0, 0x0008000080000000 # CHECK: lu12i.w $r4, -524288 + # CHECK: lu32i.d $r4, -524288 + # CHECK: lu52i.d $r4, $r4, 0 +li.d $a0, 0x00080000800007ff # CHECK: lu12i.w $r4, -524288 + # CHECK: ori $r4, $r4, 2047 + # CHECK: lu32i.d $r4, -524288 + # CHECK: lu52i.d $r4, $r4, 0 +li.d $a0, 0x0008000080000800 # CHECK: lu12i.w $r4, -524288 + # CHECK: ori $r4, $r4, 2048 + # CHECK: lu32i.d $r4, -524288 + # CHECK: lu52i.d $r4, $r4, 0 +li.d $a0, 0x0008000080000fff # CHECK: lu12i.w $r4, -524288 + # CHECK: ori $r4, $r4, 4095 + # CHECK: lu32i.d $r4, -524288 + # CHECK: lu52i.d $r4, $r4, 0 +li.d $a0, 0x00080000fffff000 # CHECK: lu12i.w $r4, -1 + # CHECK: lu32i.d $r4, -524288 + # CHECK: lu52i.d $r4, $r4, 0 +li.d $a0, 0x00080000fffff7ff # CHECK: lu12i.w $r4, -1 + # CHECK: ori $r4, $r4, 2047 + # CHECK: lu32i.d $r4, -524288 + # CHECK: lu52i.d $r4, $r4, 0 +li.d $a0, 0x00080000fffff800 # CHECK: addi.w $r4, $zero, -2048 + # CHECK: lu32i.d $r4, -524288 + # CHECK: lu52i.d $r4, $r4, 0 +li.d $a0, 0x00080000ffffffff # CHECK: addi.w $r4, $zero, -1 + # CHECK: lu32i.d $r4, -524288 + # CHECK: lu52i.d $r4, $r4, 0 +li.d $a0, 0x000fffff00000000 # CHECK: ori $r4, $zero, 0 + # CHECK: lu32i.d $r4, -1 + # CHECK: lu52i.d $r4, $r4, 0 +li.d $a0, 0x000fffff000007ff # CHECK: ori $r4, $zero, 2047 + # CHECK: lu32i.d $r4, -1 + # CHECK: lu52i.d $r4, $r4, 0 +li.d $a0, 0x000fffff00000800 # CHECK: ori $r4, $zero, 2048 + # CHECK: lu32i.d $r4, -1 + # CHECK: lu52i.d $r4, $r4, 0 +li.d $a0, 0x000fffff00000fff # CHECK: ori $r4, $zero, 4095 + # CHECK: lu32i.d $r4, -1 + # CHECK: lu52i.d $r4, $r4, 0 +li.d $a0, 0x000fffff7ffff000 # CHECK: lu12i.w $r4, 524287 + # CHECK: lu32i.d $r4, -1 + # CHECK: lu52i.d $r4, $r4, 0 +li.d $a0, 0x000fffff7ffff7ff # CHECK: lu12i.w $r4, 524287 + # CHECK: ori $r4, $r4, 2047 + # CHECK: lu32i.d $r4, -1 + # CHECK: lu52i.d $r4, $r4, 0 +li.d $a0, 0x000fffff7ffff800 # CHECK: lu12i.w $r4, 524287 + # CHECK: ori $r4, $r4, 2048 + # CHECK: lu32i.d $r4, -1 + # CHECK: lu52i.d $r4, $r4, 0 +li.d $a0, 0x000fffff7fffffff # CHECK: lu12i.w $r4, 524287 + # CHECK: ori $r4, $r4, 4095 + # CHECK: lu32i.d $r4, -1 + # CHECK: lu52i.d $r4, $r4, 0 +li.d $a0, 0x000fffff80000000 # CHECK: lu12i.w $r4, -524288 + # CHECK: lu52i.d $r4, $r4, 0 +li.d $a0, 0x000fffff800007ff # CHECK: lu12i.w $r4, -524288 + # CHECK: ori $r4, $r4, 2047 + # CHECK: lu52i.d $r4, $r4, 0 +li.d $a0, 0x000fffff80000800 # CHECK: lu12i.w $r4, -524288 + # CHECK: ori $r4, $r4, 2048 + # CHECK: lu52i.d $r4, $r4, 0 +li.d $a0, 0x000fffff80000fff # CHECK: lu12i.w $r4, -524288 + # CHECK: ori $r4, $r4, 4095 + # CHECK: lu52i.d $r4, $r4, 0 +li.d $a0, 0x000ffffffffff000 # CHECK: lu12i.w $r4, -1 + # CHECK: lu52i.d $r4, $r4, 0 +li.d $a0, 0x000ffffffffff7ff # CHECK: lu12i.w $r4, -1 + # CHECK: ori $r4, $r4, 2047 + # CHECK: lu52i.d $r4, $r4, 0 +li.d $a0, 0x000ffffffffff800 # CHECK: addi.w $r4, $zero, -2048 + # CHECK: lu52i.d $r4, $r4, 0 +li.d $a0, 0x000fffffffffffff # CHECK: addi.w $r4, $zero, -1 + # CHECK: lu52i.d $r4, $r4, 0 +li.d $a0, 0x7ff0000000000000 # CHECK: lu52i.d $r4, $zero, 2047 +li.d $a0, 0x7ff00000000007ff # CHECK: ori $r4, $zero, 2047 + # CHECK: lu52i.d $r4, $r4, 2047 +li.d $a0, 0x7ff0000000000800 # CHECK: ori $r4, $zero, 2048 + # CHECK: lu52i.d $r4, $r4, 2047 +li.d $a0, 0x7ff0000000000fff # CHECK: ori $r4, $zero, 4095 + # CHECK: lu52i.d $r4, $r4, 2047 +li.d $a0, 0x7ff000007ffff000 # CHECK: lu12i.w $r4, 524287 + # CHECK: lu52i.d $r4, $r4, 2047 +li.d $a0, 0x7ff000007ffff7ff # CHECK: lu12i.w $r4, 524287 + # CHECK: ori $r4, $r4, 2047 + # CHECK: lu52i.d $r4, $r4, 2047 +li.d $a0, 0x7ff000007ffff800 # CHECK: lu12i.w $r4, 524287 + # CHECK: ori $r4, $r4, 2048 + # CHECK: lu52i.d $r4, $r4, 2047 +li.d $a0, 0x7ff000007fffffff # CHECK: lu12i.w $r4, 524287 + # CHECK: ori $r4, $r4, 4095 + # CHECK: lu52i.d $r4, $r4, 2047 +li.d $a0, 0x7ff0000080000000 # CHECK: lu12i.w $r4, -524288 + # CHECK: lu32i.d $r4, 0 + # CHECK: lu52i.d $r4, $r4, 2047 +li.d $a0, 0x7ff00000800007ff # CHECK: lu12i.w $r4, -524288 + # CHECK: ori $r4, $r4, 2047 + # CHECK: lu32i.d $r4, 0 + # CHECK: lu52i.d $r4, $r4, 2047 +li.d $a0, 0x7ff0000080000800 # CHECK: lu12i.w $r4, -524288 + # CHECK: ori $r4, $r4, 2048 + # CHECK: lu32i.d $r4, 0 + # CHECK: lu52i.d $r4, $r4, 2047 +li.d $a0, 0x7ff0000080000fff # CHECK: lu12i.w $r4, -524288 + # CHECK: ori $r4, $r4, 4095 + # CHECK: lu32i.d $r4, 0 + # CHECK: lu52i.d $r4, $r4, 2047 +li.d $a0, 0x7ff00000fffff000 # CHECK: lu12i.w $r4, -1 + # CHECK: lu32i.d $r4, 0 + # CHECK: lu52i.d $r4, $r4, 2047 +li.d $a0, 0x7ff00000fffff7ff # CHECK: lu12i.w $r4, -1 + # CHECK: ori $r4, $r4, 2047 + # CHECK: lu32i.d $r4, 0 + # CHECK: lu52i.d $r4, $r4, 2047 +li.d $a0, 0x7ff00000fffff800 # CHECK: addi.w $r4, $zero, -2048 + # CHECK: lu32i.d $r4, 0 + # CHECK: lu52i.d $r4, $r4, 2047 +li.d $a0, 0x7ff00000ffffffff # CHECK: addi.w $r4, $zero, -1 + # CHECK: lu32i.d $r4, 0 + # CHECK: lu52i.d $r4, $r4, 2047 +li.d $a0, 0x7ff7ffff00000000 # CHECK: ori $r4, $zero, 0 + # CHECK: lu32i.d $r4, 524287 + # CHECK: lu52i.d $r4, $r4, 2047 +li.d $a0, 0x7ff7ffff000007ff # CHECK: ori $r4, $zero, 2047 + # CHECK: lu32i.d $r4, 524287 + # CHECK: lu52i.d $r4, $r4, 2047 +li.d $a0, 0x7ff7ffff00000800 # CHECK: ori $r4, $zero, 2048 + # CHECK: lu32i.d $r4, 524287 + # CHECK: lu52i.d $r4, $r4, 2047 +li.d $a0, 0x7ff7ffff00000fff # CHECK: ori $r4, $zero, 4095 + # CHECK: lu32i.d $r4, 524287 + # CHECK: lu52i.d $r4, $r4, 2047 +li.d $a0, 0x7ff7ffff7ffff000 # CHECK: lu12i.w $r4, 524287 + # CHECK: lu32i.d $r4, 524287 + # CHECK: lu52i.d $r4, $r4, 2047 +li.d $a0, 0x7ff7ffff7ffff7ff # CHECK: lu12i.w $r4, 524287 + # CHECK: ori $r4, $r4, 2047 + # CHECK: lu32i.d $r4, 524287 + # CHECK: lu52i.d $r4, $r4, 2047 +li.d $a0, 0x7ff7ffff7ffff800 # CHECK: lu12i.w $r4, 524287 + # CHECK: ori $r4, $r4, 2048 + # CHECK: lu32i.d $r4, 524287 + # CHECK: lu52i.d $r4, $r4, 2047 +li.d $a0, 0x7ff7ffff7fffffff # CHECK: lu12i.w $r4, 524287 + # CHECK: ori $r4, $r4, 4095 + # CHECK: lu32i.d $r4, 524287 + # CHECK: lu52i.d $r4, $r4, 2047 +li.d $a0, 0x7ff7ffff80000000 # CHECK: lu12i.w $r4, -524288 + # CHECK: lu32i.d $r4, 524287 + # CHECK: lu52i.d $r4, $r4, 2047 +li.d $a0, 0x7ff7ffff800007ff # CHECK: lu12i.w $r4, -524288 + # CHECK: ori $r4, $r4, 2047 + # CHECK: lu32i.d $r4, 524287 + # CHECK: lu52i.d $r4, $r4, 2047 +li.d $a0, 0x7ff7ffff80000800 # CHECK: lu12i.w $r4, -524288 + # CHECK: ori $r4, $r4, 2048 + # CHECK: lu32i.d $r4, 524287 + # CHECK: lu52i.d $r4, $r4, 2047 +li.d $a0, 0x7ff7ffff80000fff # CHECK: lu12i.w $r4, -524288 + # CHECK: ori $r4, $r4, 4095 + # CHECK: lu32i.d $r4, 524287 + # CHECK: lu52i.d $r4, $r4, 2047 +li.d $a0, 0x7ff7fffffffff000 # CHECK: lu12i.w $r4, -1 + # CHECK: lu32i.d $r4, 524287 + # CHECK: lu52i.d $r4, $r4, 2047 +li.d $a0, 0x7ff7fffffffff7ff # CHECK: lu12i.w $r4, -1 + # CHECK: ori $r4, $r4, 2047 + # CHECK: lu32i.d $r4, 524287 + # CHECK: lu52i.d $r4, $r4, 2047 +li.d $a0, 0x7ff7fffffffff800 # CHECK: addi.w $r4, $zero, -2048 + # CHECK: lu32i.d $r4, 524287 + # CHECK: lu52i.d $r4, $r4, 2047 +li.d $a0, 0x7ff7ffffffffffff # CHECK: addi.w $r4, $zero, -1 + # CHECK: lu32i.d $r4, 524287 + # CHECK: lu52i.d $r4, $r4, 2047 +li.d $a0, 0x7ff8000000000000 # CHECK: ori $r4, $zero, 0 + # CHECK: lu32i.d $r4, -524288 + # CHECK: lu52i.d $r4, $r4, 2047 +li.d $a0, 0x7ff80000000007ff # CHECK: ori $r4, $zero, 2047 + # CHECK: lu32i.d $r4, -524288 + # CHECK: lu52i.d $r4, $r4, 2047 +li.d $a0, 0x7ff8000000000800 # CHECK: ori $r4, $zero, 2048 + # CHECK: lu32i.d $r4, -524288 + # CHECK: lu52i.d $r4, $r4, 2047 +li.d $a0, 0x7ff8000000000fff # CHECK: ori $r4, $zero, 4095 + # CHECK: lu32i.d $r4, -524288 + # CHECK: lu52i.d $r4, $r4, 2047 +li.d $a0, 0x7ff800007ffff000 # CHECK: lu12i.w $r4, 524287 + # CHECK: lu32i.d $r4, -524288 + # CHECK: lu52i.d $r4, $r4, 2047 +li.d $a0, 0x7ff800007ffff7ff # CHECK: lu12i.w $r4, 524287 + # CHECK: ori $r4, $r4, 2047 + # CHECK: lu32i.d $r4, -524288 + # CHECK: lu52i.d $r4, $r4, 2047 +li.d $a0, 0x7ff800007ffff800 # CHECK: lu12i.w $r4, 524287 + # CHECK: ori $r4, $r4, 2048 + # CHECK: lu32i.d $r4, -524288 + # CHECK: lu52i.d $r4, $r4, 2047 +li.d $a0, 0x7ff800007fffffff # CHECK: lu12i.w $r4, 524287 + # CHECK: ori $r4, $r4, 4095 + # CHECK: lu32i.d $r4, -524288 + # CHECK: lu52i.d $r4, $r4, 2047 +li.d $a0, 0x7ff8000080000000 # CHECK: lu12i.w $r4, -524288 + # CHECK: lu32i.d $r4, -524288 + # CHECK: lu52i.d $r4, $r4, 2047 +li.d $a0, 0x7ff80000800007ff # CHECK: lu12i.w $r4, -524288 + # CHECK: ori $r4, $r4, 2047 + # CHECK: lu32i.d $r4, -524288 + # CHECK: lu52i.d $r4, $r4, 2047 +li.d $a0, 0x7ff8000080000800 # CHECK: lu12i.w $r4, -524288 + # CHECK: ori $r4, $r4, 2048 + # CHECK: lu32i.d $r4, -524288 + # CHECK: lu52i.d $r4, $r4, 2047 +li.d $a0, 0x7ff8000080000fff # CHECK: lu12i.w $r4, -524288 + # CHECK: ori $r4, $r4, 4095 + # CHECK: lu32i.d $r4, -524288 + # CHECK: lu52i.d $r4, $r4, 2047 +li.d $a0, 0x7ff80000fffff000 # CHECK: lu12i.w $r4, -1 + # CHECK: lu32i.d $r4, -524288 + # CHECK: lu52i.d $r4, $r4, 2047 +li.d $a0, 0x7ff80000fffff7ff # CHECK: lu12i.w $r4, -1 + # CHECK: ori $r4, $r4, 2047 + # CHECK: lu32i.d $r4, -524288 + # CHECK: lu52i.d $r4, $r4, 2047 +li.d $a0, 0x7ff80000fffff800 # CHECK: addi.w $r4, $zero, -2048 + # CHECK: lu32i.d $r4, -524288 + # CHECK: lu52i.d $r4, $r4, 2047 +li.d $a0, 0x7ff80000ffffffff # CHECK: addi.w $r4, $zero, -1 + # CHECK: lu32i.d $r4, -524288 + # CHECK: lu52i.d $r4, $r4, 2047 +li.d $a0, 0x7fffffff00000000 # CHECK: ori $r4, $zero, 0 + # CHECK: lu32i.d $r4, -1 + # CHECK: lu52i.d $r4, $r4, 2047 +li.d $a0, 0x7fffffff000007ff # CHECK: ori $r4, $zero, 2047 + # CHECK: lu32i.d $r4, -1 + # CHECK: lu52i.d $r4, $r4, 2047 +li.d $a0, 0x7fffffff00000800 # CHECK: ori $r4, $zero, 2048 + # CHECK: lu32i.d $r4, -1 + # CHECK: lu52i.d $r4, $r4, 2047 +li.d $a0, 0x7fffffff00000fff # CHECK: ori $r4, $zero, 4095 + # CHECK: lu32i.d $r4, -1 + # CHECK: lu52i.d $r4, $r4, 2047 +li.d $a0, 0x7fffffff7ffff000 # CHECK: lu12i.w $r4, 524287 + # CHECK: lu32i.d $r4, -1 + # CHECK: lu52i.d $r4, $r4, 2047 +li.d $a0, 0x7fffffff7ffff7ff # CHECK: lu12i.w $r4, 524287 + # CHECK: ori $r4, $r4, 2047 + # CHECK: lu32i.d $r4, -1 + # CHECK: lu52i.d $r4, $r4, 2047 +li.d $a0, 0x7fffffff7ffff800 # CHECK: lu12i.w $r4, 524287 + # CHECK: ori $r4, $r4, 2048 + # CHECK: lu32i.d $r4, -1 + # CHECK: lu52i.d $r4, $r4, 2047 +li.d $a0, 0x7fffffff7fffffff # CHECK: lu12i.w $r4, 524287 + # CHECK: ori $r4, $r4, 4095 + # CHECK: lu32i.d $r4, -1 + # CHECK: lu52i.d $r4, $r4, 2047 +li.d $a0, 0x7fffffff80000000 # CHECK: lu12i.w $r4, -524288 + # CHECK: lu52i.d $r4, $r4, 2047 +li.d $a0, 0x7fffffff800007ff # CHECK: lu12i.w $r4, -524288 + # CHECK: ori $r4, $r4, 2047 + # CHECK: lu52i.d $r4, $r4, 2047 +li.d $a0, 0x7fffffff80000800 # CHECK: lu12i.w $r4, -524288 + # CHECK: ori $r4, $r4, 2048 + # CHECK: lu52i.d $r4, $r4, 2047 +li.d $a0, 0x7fffffff80000fff # CHECK: lu12i.w $r4, -524288 + # CHECK: ori $r4, $r4, 4095 + # CHECK: lu52i.d $r4, $r4, 2047 +li.d $a0, 0x7ffffffffffff000 # CHECK: lu12i.w $r4, -1 + # CHECK: lu52i.d $r4, $r4, 2047 +li.d $a0, 0x7ffffffffffff7ff # CHECK: lu12i.w $r4, -1 + # CHECK: ori $r4, $r4, 2047 + # CHECK: lu52i.d $r4, $r4, 2047 +li.d $a0, 0x7ffffffffffff800 # CHECK: addi.w $r4, $zero, -2048 + # CHECK: lu52i.d $r4, $r4, 2047 +li.d $a0, 0x7fffffffffffffff # CHECK: addi.w $r4, $zero, -1 + # CHECK: lu52i.d $r4, $r4, 2047 +li.d $a0, 0x8000000000000000 # CHECK: lu52i.d $r4, $zero, -2048 +li.d $a0, 0x80000000000007ff # CHECK: ori $r4, $zero, 2047 + # CHECK: lu52i.d $r4, $r4, -2048 +li.d $a0, 0x8000000000000800 # CHECK: ori $r4, $zero, 2048 + # CHECK: lu52i.d $r4, $r4, -2048 +li.d $a0, 0x8000000000000fff # CHECK: ori $r4, $zero, 4095 + # CHECK: lu52i.d $r4, $r4, -2048 +li.d $a0, 0x800000007ffff000 # CHECK: lu12i.w $r4, 524287 + # CHECK: lu52i.d $r4, $r4, -2048 +li.d $a0, 0x800000007ffff7ff # CHECK: lu12i.w $r4, 524287 + # CHECK: ori $r4, $r4, 2047 + # CHECK: lu52i.d $r4, $r4, -2048 +li.d $a0, 0x800000007ffff800 # CHECK: lu12i.w $r4, 524287 + # CHECK: ori $r4, $r4, 2048 + # CHECK: lu52i.d $r4, $r4, -2048 +li.d $a0, 0x800000007fffffff # CHECK: lu12i.w $r4, 524287 + # CHECK: ori $r4, $r4, 4095 + # CHECK: lu52i.d $r4, $r4, -2048 +li.d $a0, 0x8000000080000000 # CHECK: lu12i.w $r4, -524288 + # CHECK: lu32i.d $r4, 0 + # CHECK: lu52i.d $r4, $r4, -2048 +li.d $a0, 0x80000000800007ff # CHECK: lu12i.w $r4, -524288 + # CHECK: ori $r4, $r4, 2047 + # CHECK: lu32i.d $r4, 0 + # CHECK: lu52i.d $r4, $r4, -2048 +li.d $a0, 0x8000000080000800 # CHECK: lu12i.w $r4, -524288 + # CHECK: ori $r4, $r4, 2048 + # CHECK: lu32i.d $r4, 0 + # CHECK: lu52i.d $r4, $r4, -2048 +li.d $a0, 0x8000000080000fff # CHECK: lu12i.w $r4, -524288 + # CHECK: ori $r4, $r4, 4095 + # CHECK: lu32i.d $r4, 0 + # CHECK: lu52i.d $r4, $r4, -2048 +li.d $a0, 0x80000000fffff000 # CHECK: lu12i.w $r4, -1 + # CHECK: lu32i.d $r4, 0 + # CHECK: lu52i.d $r4, $r4, -2048 +li.d $a0, 0x80000000fffff7ff # CHECK: lu12i.w $r4, -1 + # CHECK: ori $r4, $r4, 2047 + # CHECK: lu32i.d $r4, 0 + # CHECK: lu52i.d $r4, $r4, -2048 +li.d $a0, 0x80000000fffff800 # CHECK: addi.w $r4, $zero, -2048 + # CHECK: lu32i.d $r4, 0 + # CHECK: lu52i.d $r4, $r4, -2048 +li.d $a0, 0x80000000ffffffff # CHECK: addi.w $r4, $zero, -1 + # CHECK: lu32i.d $r4, 0 + # CHECK: lu52i.d $r4, $r4, -2048 +li.d $a0, 0x8007ffff00000000 # CHECK: ori $r4, $zero, 0 + # CHECK: lu32i.d $r4, 524287 + # CHECK: lu52i.d $r4, $r4, -2048 +li.d $a0, 0x8007ffff000007ff # CHECK: ori $r4, $zero, 2047 + # CHECK: lu32i.d $r4, 524287 + # CHECK: lu52i.d $r4, $r4, -2048 +li.d $a0, 0x8007ffff00000800 # CHECK: ori $r4, $zero, 2048 + # CHECK: lu32i.d $r4, 524287 + # CHECK: lu52i.d $r4, $r4, -2048 +li.d $a0, 0x8007ffff00000fff # CHECK: ori $r4, $zero, 4095 + # CHECK: lu32i.d $r4, 524287 + # CHECK: lu52i.d $r4, $r4, -2048 +li.d $a0, 0x8007ffff7ffff000 # CHECK: lu12i.w $r4, 524287 + # CHECK: lu32i.d $r4, 524287 + # CHECK: lu52i.d $r4, $r4, -2048 +li.d $a0, 0x8007ffff7ffff7ff # CHECK: lu12i.w $r4, 524287 + # CHECK: ori $r4, $r4, 2047 + # CHECK: lu32i.d $r4, 524287 + # CHECK: lu52i.d $r4, $r4, -2048 +li.d $a0, 0x8007ffff7ffff800 # CHECK: lu12i.w $r4, 524287 + # CHECK: ori $r4, $r4, 2048 + # CHECK: lu32i.d $r4, 524287 + # CHECK: lu52i.d $r4, $r4, -2048 +li.d $a0, 0x8007ffff7fffffff # CHECK: lu12i.w $r4, 524287 + # CHECK: ori $r4, $r4, 4095 + # CHECK: lu32i.d $r4, 524287 + # CHECK: lu52i.d $r4, $r4, -2048 +li.d $a0, 0x8007ffff80000000 # CHECK: lu12i.w $r4, -524288 + # CHECK: lu32i.d $r4, 524287 + # CHECK: lu52i.d $r4, $r4, -2048 +li.d $a0, 0x8007ffff800007ff # CHECK: lu12i.w $r4, -524288 + # CHECK: ori $r4, $r4, 2047 + # CHECK: lu32i.d $r4, 524287 + # CHECK: lu52i.d $r4, $r4, -2048 +li.d $a0, 0x8007ffff80000800 # CHECK: lu12i.w $r4, -524288 + # CHECK: ori $r4, $r4, 2048 + # CHECK: lu32i.d $r4, 524287 + # CHECK: lu52i.d $r4, $r4, -2048 +li.d $a0, 0x8007ffff80000fff # CHECK: lu12i.w $r4, -524288 + # CHECK: ori $r4, $r4, 4095 + # CHECK: lu32i.d $r4, 524287 + # CHECK: lu52i.d $r4, $r4, -2048 +li.d $a0, 0x8007fffffffff000 # CHECK: lu12i.w $r4, -1 + # CHECK: lu32i.d $r4, 524287 + # CHECK: lu52i.d $r4, $r4, -2048 +li.d $a0, 0x8007fffffffff7ff # CHECK: lu12i.w $r4, -1 + # CHECK: ori $r4, $r4, 2047 + # CHECK: lu32i.d $r4, 524287 + # CHECK: lu52i.d $r4, $r4, -2048 +li.d $a0, 0x8007fffffffff800 # CHECK: addi.w $r4, $zero, -2048 + # CHECK: lu32i.d $r4, 524287 + # CHECK: lu52i.d $r4, $r4, -2048 +li.d $a0, 0x8007ffffffffffff # CHECK: addi.w $r4, $zero, -1 + # CHECK: lu32i.d $r4, 524287 + # CHECK: lu52i.d $r4, $r4, -2048 +li.d $a0, 0x8008000000000000 # CHECK: ori $r4, $zero, 0 + # CHECK: lu32i.d $r4, -524288 + # CHECK: lu52i.d $r4, $r4, -2048 +li.d $a0, 0x80080000000007ff # CHECK: ori $r4, $zero, 2047 + # CHECK: lu32i.d $r4, -524288 + # CHECK: lu52i.d $r4, $r4, -2048 +li.d $a0, 0x8008000000000800 # CHECK: ori $r4, $zero, 2048 + # CHECK: lu32i.d $r4, -524288 + # CHECK: lu52i.d $r4, $r4, -2048 +li.d $a0, 0x8008000000000fff # CHECK: ori $r4, $zero, 4095 + # CHECK: lu32i.d $r4, -524288 + # CHECK: lu52i.d $r4, $r4, -2048 +li.d $a0, 0x800800007ffff000 # CHECK: lu12i.w $r4, 524287 + # CHECK: lu32i.d $r4, -524288 + # CHECK: lu52i.d $r4, $r4, -2048 +li.d $a0, 0x800800007ffff7ff # CHECK: lu12i.w $r4, 524287 + # CHECK: ori $r4, $r4, 2047 + # CHECK: lu32i.d $r4, -524288 + # CHECK: lu52i.d $r4, $r4, -2048 +li.d $a0, 0x800800007ffff800 # CHECK: lu12i.w $r4, 524287 + # CHECK: ori $r4, $r4, 2048 + # CHECK: lu32i.d $r4, -524288 + # CHECK: lu52i.d $r4, $r4, -2048 +li.d $a0, 0x800800007fffffff # CHECK: lu12i.w $r4, 524287 + # CHECK: ori $r4, $r4, 4095 + # CHECK: lu32i.d $r4, -524288 + # CHECK: lu52i.d $r4, $r4, -2048 +li.d $a0, 0x8008000080000000 # CHECK: lu12i.w $r4, -524288 + # CHECK: lu32i.d $r4, -524288 + # CHECK: lu52i.d $r4, $r4, -2048 +li.d $a0, 0x80080000800007ff # CHECK: lu12i.w $r4, -524288 + # CHECK: ori $r4, $r4, 2047 + # CHECK: lu32i.d $r4, -524288 + # CHECK: lu52i.d $r4, $r4, -2048 +li.d $a0, 0x8008000080000800 # CHECK: lu12i.w $r4, -524288 + # CHECK: ori $r4, $r4, 2048 + # CHECK: lu32i.d $r4, -524288 + # CHECK: lu52i.d $r4, $r4, -2048 +li.d $a0, 0x8008000080000fff # CHECK: lu12i.w $r4, -524288 + # CHECK: ori $r4, $r4, 4095 + # CHECK: lu32i.d $r4, -524288 + # CHECK: lu52i.d $r4, $r4, -2048 +li.d $a0, 0x80080000fffff000 # CHECK: lu12i.w $r4, -1 + # CHECK: lu32i.d $r4, -524288 + # CHECK: lu52i.d $r4, $r4, -2048 +li.d $a0, 0x80080000fffff7ff # CHECK: lu12i.w $r4, -1 + # CHECK: ori $r4, $r4, 2047 + # CHECK: lu32i.d $r4, -524288 + # CHECK: lu52i.d $r4, $r4, -2048 +li.d $a0, 0x80080000fffff800 # CHECK: addi.w $r4, $zero, -2048 + # CHECK: lu32i.d $r4, -524288 + # CHECK: lu52i.d $r4, $r4, -2048 +li.d $a0, 0x80080000ffffffff # CHECK: addi.w $r4, $zero, -1 + # CHECK: lu32i.d $r4, -524288 + # CHECK: lu52i.d $r4, $r4, -2048 +li.d $a0, 0x800fffff00000000 # CHECK: ori $r4, $zero, 0 + # CHECK: lu32i.d $r4, -1 + # CHECK: lu52i.d $r4, $r4, -2048 +li.d $a0, 0x800fffff000007ff # CHECK: ori $r4, $zero, 2047 + # CHECK: lu32i.d $r4, -1 + # CHECK: lu52i.d $r4, $r4, -2048 +li.d $a0, 0x800fffff00000800 # CHECK: ori $r4, $zero, 2048 + # CHECK: lu32i.d $r4, -1 + # CHECK: lu52i.d $r4, $r4, -2048 +li.d $a0, 0x800fffff00000fff # CHECK: ori $r4, $zero, 4095 + # CHECK: lu32i.d $r4, -1 + # CHECK: lu52i.d $r4, $r4, -2048 +li.d $a0, 0x800fffff7ffff000 # CHECK: lu12i.w $r4, 524287 + # CHECK: lu32i.d $r4, -1 + # CHECK: lu52i.d $r4, $r4, -2048 +li.d $a0, 0x800fffff7ffff7ff # CHECK: lu12i.w $r4, 524287 + # CHECK: ori $r4, $r4, 2047 + # CHECK: lu32i.d $r4, -1 + # CHECK: lu52i.d $r4, $r4, -2048 +li.d $a0, 0x800fffff7ffff800 # CHECK: lu12i.w $r4, 524287 + # CHECK: ori $r4, $r4, 2048 + # CHECK: lu32i.d $r4, -1 + # CHECK: lu52i.d $r4, $r4, -2048 +li.d $a0, 0x800fffff7fffffff # CHECK: lu12i.w $r4, 524287 + # CHECK: ori $r4, $r4, 4095 + # CHECK: lu32i.d $r4, -1 + # CHECK: lu52i.d $r4, $r4, -2048 +li.d $a0, 0x800fffff80000000 # CHECK: lu12i.w $r4, -524288 + # CHECK: lu52i.d $r4, $r4, -2048 +li.d $a0, 0x800fffff800007ff # CHECK: lu12i.w $r4, -524288 + # CHECK: ori $r4, $r4, 2047 + # CHECK: lu52i.d $r4, $r4, -2048 +li.d $a0, 0x800fffff80000800 # CHECK: lu12i.w $r4, -524288 + # CHECK: ori $r4, $r4, 2048 + # CHECK: lu52i.d $r4, $r4, -2048 +li.d $a0, 0x800fffff80000fff # CHECK: lu12i.w $r4, -524288 + # CHECK: ori $r4, $r4, 4095 + # CHECK: lu52i.d $r4, $r4, -2048 +li.d $a0, 0x800ffffffffff000 # CHECK: lu12i.w $r4, -1 + # CHECK: lu52i.d $r4, $r4, -2048 +li.d $a0, 0x800ffffffffff7ff # CHECK: lu12i.w $r4, -1 + # CHECK: ori $r4, $r4, 2047 + # CHECK: lu52i.d $r4, $r4, -2048 +li.d $a0, 0x800ffffffffff800 # CHECK: addi.w $r4, $zero, -2048 + # CHECK: lu52i.d $r4, $r4, -2048 +li.d $a0, 0x800fffffffffffff # CHECK: addi.w $r4, $zero, -1 + # CHECK: lu52i.d $r4, $r4, -2048 +li.d $a0, 0xfff0000000000000 # CHECK: lu52i.d $r4, $zero, -1 +li.d $a0, 0xfff00000000007ff # CHECK: ori $r4, $zero, 2047 + # CHECK: lu52i.d $r4, $r4, -1 +li.d $a0, 0xfff0000000000800 # CHECK: ori $r4, $zero, 2048 + # CHECK: lu52i.d $r4, $r4, -1 +li.d $a0, 0xfff0000000000fff # CHECK: ori $r4, $zero, 4095 + # CHECK: lu52i.d $r4, $r4, -1 +li.d $a0, 0xfff000007ffff000 # CHECK: lu12i.w $r4, 524287 + # CHECK: lu52i.d $r4, $r4, -1 +li.d $a0, 0xfff000007ffff7ff # CHECK: lu12i.w $r4, 524287 + # CHECK: ori $r4, $r4, 2047 + # CHECK: lu52i.d $r4, $r4, -1 +li.d $a0, 0xfff000007ffff800 # CHECK: lu12i.w $r4, 524287 + # CHECK: ori $r4, $r4, 2048 + # CHECK: lu52i.d $r4, $r4, -1 +li.d $a0, 0xfff000007fffffff # CHECK: lu12i.w $r4, 524287 + # CHECK: ori $r4, $r4, 4095 + # CHECK: lu52i.d $r4, $r4, -1 +li.d $a0, 0xfff0000080000000 # CHECK: lu12i.w $r4, -524288 + # CHECK: lu32i.d $r4, 0 + # CHECK: lu52i.d $r4, $r4, -1 +li.d $a0, 0xfff00000800007ff # CHECK: lu12i.w $r4, -524288 + # CHECK: ori $r4, $r4, 2047 + # CHECK: lu32i.d $r4, 0 + # CHECK: lu52i.d $r4, $r4, -1 +li.d $a0, 0xfff0000080000800 # CHECK: lu12i.w $r4, -524288 + # CHECK: ori $r4, $r4, 2048 + # CHECK: lu32i.d $r4, 0 + # CHECK: lu52i.d $r4, $r4, -1 +li.d $a0, 0xfff0000080000fff # CHECK: lu12i.w $r4, -524288 + # CHECK: ori $r4, $r4, 4095 + # CHECK: lu32i.d $r4, 0 + # CHECK: lu52i.d $r4, $r4, -1 +li.d $a0, 0xfff00000fffff000 # CHECK: lu12i.w $r4, -1 + # CHECK: lu32i.d $r4, 0 + # CHECK: lu52i.d $r4, $r4, -1 +li.d $a0, 0xfff00000fffff7ff # CHECK: lu12i.w $r4, -1 + # CHECK: ori $r4, $r4, 2047 + # CHECK: lu32i.d $r4, 0 + # CHECK: lu52i.d $r4, $r4, -1 +li.d $a0, 0xfff00000fffff800 # CHECK: addi.w $r4, $zero, -2048 + # CHECK: lu32i.d $r4, 0 + # CHECK: lu52i.d $r4, $r4, -1 +li.d $a0, 0xfff00000ffffffff # CHECK: addi.w $r4, $zero, -1 + # CHECK: lu32i.d $r4, 0 + # CHECK: lu52i.d $r4, $r4, -1 +li.d $a0, 0xfff7ffff00000000 # CHECK: ori $r4, $zero, 0 + # CHECK: lu32i.d $r4, 524287 + # CHECK: lu52i.d $r4, $r4, -1 +li.d $a0, 0xfff7ffff000007ff # CHECK: ori $r4, $zero, 2047 + # CHECK: lu32i.d $r4, 524287 + # CHECK: lu52i.d $r4, $r4, -1 +li.d $a0, 0xfff7ffff00000800 # CHECK: ori $r4, $zero, 2048 + # CHECK: lu32i.d $r4, 524287 + # CHECK: lu52i.d $r4, $r4, -1 +li.d $a0, 0xfff7ffff00000fff # CHECK: ori $r4, $zero, 4095 + # CHECK: lu32i.d $r4, 524287 + # CHECK: lu52i.d $r4, $r4, -1 +li.d $a0, 0xfff7ffff7ffff000 # CHECK: lu12i.w $r4, 524287 + # CHECK: lu32i.d $r4, 524287 + # CHECK: lu52i.d $r4, $r4, -1 +li.d $a0, 0xfff7ffff7ffff7ff # CHECK: lu12i.w $r4, 524287 + # CHECK: ori $r4, $r4, 2047 + # CHECK: lu32i.d $r4, 524287 + # CHECK: lu52i.d $r4, $r4, -1 +li.d $a0, 0xfff7ffff7ffff800 # CHECK: lu12i.w $r4, 524287 + # CHECK: ori $r4, $r4, 2048 + # CHECK: lu32i.d $r4, 524287 + # CHECK: lu52i.d $r4, $r4, -1 +li.d $a0, 0xfff7ffff7fffffff # CHECK: lu12i.w $r4, 524287 + # CHECK: ori $r4, $r4, 4095 + # CHECK: lu32i.d $r4, 524287 + # CHECK: lu52i.d $r4, $r4, -1 +li.d $a0, 0xfff7ffff80000000 # CHECK: lu12i.w $r4, -524288 + # CHECK: lu32i.d $r4, 524287 + # CHECK: lu52i.d $r4, $r4, -1 +li.d $a0, 0xfff7ffff800007ff # CHECK: lu12i.w $r4, -524288 + # CHECK: ori $r4, $r4, 2047 + # CHECK: lu32i.d $r4, 524287 + # CHECK: lu52i.d $r4, $r4, -1 +li.d $a0, 0xfff7ffff80000800 # CHECK: lu12i.w $r4, -524288 + # CHECK: ori $r4, $r4, 2048 + # CHECK: lu32i.d $r4, 524287 + # CHECK: lu52i.d $r4, $r4, -1 +li.d $a0, 0xfff7ffff80000fff # CHECK: lu12i.w $r4, -524288 + # CHECK: ori $r4, $r4, 4095 + # CHECK: lu32i.d $r4, 524287 + # CHECK: lu52i.d $r4, $r4, -1 +li.d $a0, 0xfff7fffffffff000 # CHECK: lu12i.w $r4, -1 + # CHECK: lu32i.d $r4, 524287 + # CHECK: lu52i.d $r4, $r4, -1 +li.d $a0, 0xfff7fffffffff7ff # CHECK: lu12i.w $r4, -1 + # CHECK: ori $r4, $r4, 2047 + # CHECK: lu32i.d $r4, 524287 + # CHECK: lu52i.d $r4, $r4, -1 +li.d $a0, 0xfff7fffffffff800 # CHECK: addi.w $r4, $zero, -2048 + # CHECK: lu32i.d $r4, 524287 + # CHECK: lu52i.d $r4, $r4, -1 +li.d $a0, 0xfff7ffffffffffff # CHECK: addi.w $r4, $zero, -1 + # CHECK: lu32i.d $r4, 524287 + # CHECK: lu52i.d $r4, $r4, -1 +li.d $a0, 0xfff8000000000000 # CHECK: ori $r4, $zero, 0 + # CHECK: lu32i.d $r4, -524288 +li.d $a0, 0xfff80000000007ff # CHECK: ori $r4, $zero, 2047 + # CHECK: lu32i.d $r4, -524288 +li.d $a0, 0xfff8000000000800 # CHECK: ori $r4, $zero, 2048 + # CHECK: lu32i.d $r4, -524288 +li.d $a0, 0xfff8000000000fff # CHECK: ori $r4, $zero, 4095 + # CHECK: lu32i.d $r4, -524288 +li.d $a0, 0xfff800007ffff000 # CHECK: lu12i.w $r4, 524287 + # CHECK: lu32i.d $r4, -524288 +li.d $a0, 0xfff800007ffff7ff # CHECK: lu12i.w $r4, 524287 + # CHECK: ori $r4, $r4, 2047 + # CHECK: lu32i.d $r4, -524288 +li.d $a0, 0xfff800007ffff800 # CHECK: lu12i.w $r4, 524287 + # CHECK: ori $r4, $r4, 2048 + # CHECK: lu32i.d $r4, -524288 +li.d $a0, 0xfff800007fffffff # CHECK: lu12i.w $r4, 524287 + # CHECK: ori $r4, $r4, 4095 + # CHECK: lu32i.d $r4, -524288 +li.d $a0, 0xfff8000080000000 # CHECK: lu12i.w $r4, -524288 + # CHECK: lu32i.d $r4, -524288 +li.d $a0, 0xfff80000800007ff # CHECK: lu12i.w $r4, -524288 + # CHECK: ori $r4, $r4, 2047 + # CHECK: lu32i.d $r4, -524288 +li.d $a0, 0xfff8000080000800 # CHECK: lu12i.w $r4, -524288 + # CHECK: ori $r4, $r4, 2048 + # CHECK: lu32i.d $r4, -524288 +li.d $a0, 0xfff8000080000fff # CHECK: lu12i.w $r4, -524288 + # CHECK: ori $r4, $r4, 4095 + # CHECK: lu32i.d $r4, -524288 +li.d $a0, 0xfff80000fffff000 # CHECK: lu12i.w $r4, -1 + # CHECK: lu32i.d $r4, -524288 +li.d $a0, 0xfff80000fffff7ff # CHECK: lu12i.w $r4, -1 + # CHECK: ori $r4, $r4, 2047 + # CHECK: lu32i.d $r4, -524288 +li.d $a0, 0xfff80000fffff800 # CHECK: addi.w $r4, $zero, -2048 + # CHECK: lu32i.d $r4, -524288 +li.d $a0, 0xfff80000ffffffff # CHECK: addi.w $r4, $zero, -1 + # CHECK: lu32i.d $r4, -524288 +li.d $a0, 0xffffffff00000000 # CHECK: ori $r4, $zero, 0 + # CHECK: lu32i.d $r4, -1 +li.d $a0, 0xffffffff000007ff # CHECK: ori $r4, $zero, 2047 + # CHECK: lu32i.d $r4, -1 +li.d $a0, 0xffffffff00000800 # CHECK: ori $r4, $zero, 2048 + # CHECK: lu32i.d $r4, -1 +li.d $a0, 0xffffffff00000fff # CHECK: ori $r4, $zero, 4095 + # CHECK: lu32i.d $r4, -1 +li.d $a0, 0xffffffff7ffff000 # CHECK: lu12i.w $r4, 524287 + # CHECK: lu32i.d $r4, -1 +li.d $a0, 0xffffffff7ffff7ff # CHECK: lu12i.w $r4, 524287 + # CHECK: ori $r4, $r4, 2047 + # CHECK: lu32i.d $r4, -1 +li.d $a0, 0xffffffff7ffff800 # CHECK: lu12i.w $r4, 524287 + # CHECK: ori $r4, $r4, 2048 + # CHECK: lu32i.d $r4, -1 +li.d $a0, 0xffffffff7fffffff # CHECK: lu12i.w $r4, 524287 + # CHECK: ori $r4, $r4, 4095 + # CHECK: lu32i.d $r4, -1 +li.d $a0, 0xffffffff80000000 # CHECK: lu12i.w $r4, -524288 +li.d $a0, 0xffffffff800007ff # CHECK: lu12i.w $r4, -524288 + # CHECK: ori $r4, $r4, 2047 +li.d $a0, 0xffffffff80000800 # CHECK: lu12i.w $r4, -524288 + # CHECK: ori $r4, $r4, 2048 +li.d $a0, 0xffffffff80000fff # CHECK: lu12i.w $r4, -524288 + # CHECK: ori $r4, $r4, 4095 +li.d $a0, 0xfffffffffffff000 # CHECK: lu12i.w $r4, -1 +li.d $a0, 0xfffffffffffff7ff # CHECK: lu12i.w $r4, -1 + # CHECK: ori $r4, $r4, 2047 +li.d $a0, 0xfffffffffffff800 # CHECK: addi.d $r4, $zero, -2048 +li.d $a0, 0xffffffffffffffff # CHECK: addi.d $r4, $zero, -1 diff --git a/test/MC/LoongArch/target-abi-valid.s b/test/MC/LoongArch/target-abi-valid.s new file mode 100644 index 00000000..7aa43b08 --- /dev/null +++ b/test/MC/LoongArch/target-abi-valid.s @@ -0,0 +1,28 @@ +# RUN: llvm-mc -triple loongarch64 -filetype=obj < %s \ +# RUN: | llvm-readelf -h - \ +# RUN: | FileCheck -check-prefix=CHECK-NONE %s + +# RUN: llvm-mc -triple loongarch64 -target-abi lp64s -filetype=obj < %s \ +# RUN: | llvm-readelf -h - \ +# RUN: | FileCheck -check-prefix=CHECK-LP64S %s + +# RUN: llvm-mc -triple loongarch64 -target-abi lp64f -filetype=obj < %s \ +# RUN: | llvm-readelf -h - \ +# RUN: | FileCheck -check-prefix=CHECK-LP64F %s + +# RUN: llvm-mc -triple loongarch64 -target-abi lp64d -filetype=obj < %s \ +# RUN: | llvm-readelf -h - \ +# RUN: | FileCheck -check-prefix=CHECK-LP64D %s + +# CHECK-NONE: Class: ELF64 +# CHECK-NONE: Flags: 0x43, DOUBLE-FLOAT, OBJ-v1 + +# CHECK-LP64S: Class: ELF64 +# CHECK-LP64S: Flags: 0x41, SOFT-FLOAT, OBJ-v1 + +# CHECK-LP64F: Class: ELF64 +# CHECK-LP64F: Flags: 0x42, SINGLE-FLOAT, OBJ-v1 + +# CHECK-LP64D: Class: ELF64 +# CHECK-LP64D: Flags: 0x43, DOUBLE-FLOAT, OBJ-v1 + diff --git a/test/MC/LoongArch/unaligned-nops.s b/test/MC/LoongArch/unaligned-nops.s new file mode 100644 index 00000000..453e2cdc --- /dev/null +++ b/test/MC/LoongArch/unaligned-nops.s @@ -0,0 +1,5 @@ +# RUN: not --crash llvm-mc -filetype=obj -triple=loongarch64 %s -o %t +.byte 1 +# CHECK: LLVM ERROR: unable to write nop sequence of 3 bytes +.p2align 2 +foo: diff --git a/test/MC/LoongArch/valid_12imm.s b/test/MC/LoongArch/valid_12imm.s new file mode 100644 index 00000000..ed44180b --- /dev/null +++ b/test/MC/LoongArch/valid_12imm.s @@ -0,0 +1,33 @@ +# RUN: llvm-mc %s -triple=loongarch64-unknown-linux-gnu -show-encoding | FileCheck %s +# CHECK: slti $r27, $ra, 235 +# CHECK: encoding: [0x3b,0xac,0x03,0x02] +slti $r27, $ra, 235 + +# CHECK: sltui $zero, $r8, 162 +# CHECK: encoding: [0x00,0x89,0x42,0x02] +sltui $zero, $r8, 162 + +# CHECK: addi.w $r5, $r7, 246 +# CHECK: encoding: [0xe5,0xd8,0x83,0x02] +addi.w $r5, $r7, 246 + +# CHECK: addi.d $r28, $r6, 75 +# CHECK: encoding: [0xdc,0x2c,0xc1,0x02] +addi.d $r28, $r6, 75 + +# CHECK: lu52i.d $r13, $r4, 195 +# CHECK: encoding: [0x8d,0x0c,0x03,0x03] +lu52i.d $r13, $r4, 195 + +# CHECK: andi $r25, $zero, 106 +# CHECK: encoding: [0x19,0xa8,0x41,0x03] +andi $r25, $zero, 106 + +# CHECK: ori $r17, $r5, 47 +# CHECK: encoding: [0xb1,0xbc,0x80,0x03] +ori $r17, $r5, 47 + +# CHECK: xori $r18, $r23, 99 +# CHECK: encoding: [0xf2,0x8e,0xc1,0x03] +xori $r18, $r23, 99 + diff --git a/test/MC/LoongArch/valid_4operands.s b/test/MC/LoongArch/valid_4operands.s new file mode 100644 index 00000000..1418bb67 --- /dev/null +++ b/test/MC/LoongArch/valid_4operands.s @@ -0,0 +1,53 @@ +# RUN: llvm-mc %s -triple=loongarch64-unknown-linux-gnu -show-encoding | FileCheck %s +# CHECK: fmadd.s $f3, $f16, $f3, $f15 +# CHECK: encoding: [0x03,0x8e,0x17,0x08] +fmadd.s $f3, $f16, $f3, $f15 + +# CHECK: fmadd.d $f21, $f24, $f28, $f24 +# CHECK: encoding: [0x15,0x73,0x2c,0x08] +fmadd.d $f21, $f24, $f28, $f24 + +# CHECK: fmsub.s $f23, $f11, $f21, $f4 +# CHECK: encoding: [0x77,0x55,0x52,0x08] +fmsub.s $f23, $f11, $f21, $f4 + +# CHECK: fmsub.d $f6, $f18, $f20, $f27 +# CHECK: encoding: [0x46,0xd2,0x6d,0x08] +fmsub.d $f6, $f18, $f20, $f27 + +# CHECK: fnmadd.s $f29, $f1, $f24, $f20 +# CHECK: encoding: [0x3d,0x60,0x9a,0x08] +fnmadd.s $f29, $f1, $f24, $f20 + +# CHECK: fnmadd.d $f25, $f13, $f19, $f30 +# CHECK: encoding: [0xb9,0x4d,0xaf,0x08] +fnmadd.d $f25, $f13, $f19, $f30 + +# CHECK: fnmsub.s $f8, $f4, $f24, $f25 +# CHECK: encoding: [0x88,0xe0,0xdc,0x08] +fnmsub.s $f8, $f4, $f24, $f25 + +# CHECK: fnmsub.d $f30, $f26, $f7, $f24 +# CHECK: encoding: [0x5e,0x1f,0xec,0x08] +fnmsub.d $f30, $f26, $f7, $f24 + +# CHECK: fcmp.ceq.s $fcc7, $f17, $f29 +# CHECK: encoding: [0x27,0x76,0x12,0x0c] +fcmp.ceq.s $fcc7, $f17, $f29 + +# CHECK: fcmp.ceq.d $fcc4, $f12, $f9 +# CHECK: encoding: [0x84,0x25,0x22,0x0c] +fcmp.ceq.d $fcc4, $f12, $f9 + +# CHECK: fcmp.cult.s $fcc0, $f0, $f1 +# CHECK: encoding: [0x00,0x04,0x15,0x0c] +fcmp.cult.s $fcc0, $f0, $f1 + +# CHECK: fcmp.cult.d $fcc2, $f3, $f4 +# CHECK: encoding: [0x62,0x10,0x25,0x0c] +fcmp.cult.d $fcc2, $f3, $f4 + +# CHECK: fsel $f18, $f20, $f21, $fcc4 +# CHECK: encoding: [0x92,0x56,0x02,0x0d] +fsel $f18, $f20, $f21, $fcc4 + diff --git a/test/MC/LoongArch/valid_bigimm.s b/test/MC/LoongArch/valid_bigimm.s new file mode 100644 index 00000000..d7b3bbb7 --- /dev/null +++ b/test/MC/LoongArch/valid_bigimm.s @@ -0,0 +1,33 @@ +# RUN: llvm-mc %s -triple=loongarch64-unknown-linux-gnu -show-encoding | FileCheck %s +# CHECK: addu16i.d $r9, $r23, 23 +# CHECK: encoding: [0xe9,0x5e,0x00,0x10] +addu16i.d $r9, $r23, 23 + +# CHECK: lu12i.w $r16, 49 +# CHECK: encoding: [0x30,0x06,0x00,0x14] +lu12i.w $r16, 49 + +# CHECK: lu12i.w $r4, -1 +# CHECK: encoding: [0xe4,0xff,0xff,0x15] +lu12i.w $r4, -1 + +# CHECK: lu32i.d $sp, 196 +# CHECK: encoding: [0x83,0x18,0x00,0x16] +lu32i.d $sp, 196 + +# CHECK: pcaddi $r9, 187 +# CHECK: encoding: [0x69,0x17,0x00,0x18] +pcaddi $r9, 187 + +# CHECK: pcalau12i $r10, 89 +# CHECK: encoding: [0x2a,0x0b,0x00,0x1a] +pcalau12i $r10, 89 + +# CHECK: pcaddu12i $zero, 37 +# CHECK: encoding: [0xa0,0x04,0x00,0x1c] +pcaddu12i $zero, 37 + +# CHECK: pcaddu18i $r12, 26 +# CHECK: encoding: [0x4c,0x03,0x00,0x1e] +pcaddu18i $r12, 26 + diff --git a/test/MC/LoongArch/valid_branch.s b/test/MC/LoongArch/valid_branch.s new file mode 100644 index 00000000..256e70b6 --- /dev/null +++ b/test/MC/LoongArch/valid_branch.s @@ -0,0 +1,155 @@ +# RUN: llvm-mc %s -triple=loongarch64-unknown-linux-gnu -show-encoding \ +# RUN: | FileCheck -check-prefixes=CHECK-ASM-AND-OBJ,CHECK-ASM %s +# RUN: llvm-mc %s -triple=loongarch64-unknown-linux-gnu -filetype=obj \ +# RUN: | llvm-objdump -d - | FileCheck -check-prefix=CHECK-ASM-AND-OBJ %s + +## random operands +# CHECK-ASM-AND-OBJ: beqz $r9, 96 +# CHECK-ASM: encoding: [0x20,0x61,0x00,0x40] +beqz $r9, 96 + +# CHECK-ASM-AND-OBJ: bnez $sp, 212 +# CHECK-ASM: encoding: [0x60,0xd4,0x00,0x44] +bnez $sp, 212 + +# CHECK-ASM-AND-OBJ: bceqz $fcc6, 12 +# CHECK-ASM: encoding: [0xc0,0x0c,0x00,0x48] +bceqz $fcc6, 12 + +# CHECK-ASM-AND-OBJ: bcnez $fcc6, 72 +# CHECK-ASM: encoding: [0xc0,0x49,0x00,0x48] +bcnez $fcc6, 72 + +# CHECK-ASM-AND-OBJ: b 248 +# CHECK-ASM: encoding: [0x00,0xf8,0x00,0x50] +b 248 + +# CHECK-ASM-AND-OBJ: bl 236 +# CHECK-ASM: encoding: [0x00,0xec,0x00,0x54] +bl 236 + +# CHECK-ASM-AND-OBJ: beq $r10, $r7, 176 +# CHECK-ASM: encoding: [0x47,0xb1,0x00,0x58] +beq $r10, $r7, 176 + +# CHECK-ASM-AND-OBJ: bne $r25, $ra, 136 +# CHECK-ASM: encoding: [0x21,0x8b,0x00,0x5c] +bne $r25, $ra, 136 + +# CHECK-ASM-AND-OBJ: blt $r15, $r30, 168 +# CHECK-ASM: encoding: [0xfe,0xa9,0x00,0x60] +blt $r15, $r30, 168 + +# CHECK-ASM-AND-OBJ: bge $r12, $r15, 148 +# CHECK-ASM: encoding: [0x8f,0x95,0x00,0x64] +bge $r12, $r15, 148 + +# CHECK-ASM-AND-OBJ: bltu $r17, $r5, 4 +# CHECK-ASM: encoding: [0x25,0x06,0x00,0x68] +bltu $r17, $r5, 4 + +# CHECK-ASM-AND-OBJ: bgeu $r6, $r23, 140 +# CHECK-ASM: encoding: [0xd7,0x8c,0x00,0x6c] +bgeu $r6, $r23, 140 + + +## immediate lower/upper boundary +### simm16 << 2 +# CHECK-ASM-AND-OBJ: beq $r10, $r7, -131072 +# CHECK-ASM: encoding: [0x47,0x01,0x00,0x5a] +beq $r10, $r7, -0x20000 + +# CHECK-ASM-AND-OBJ: beq $r10, $r7, 131068 +# CHECK-ASM: encoding: [0x47,0xfd,0xff,0x59] +beq $r10, $r7, 0x1FFFC + +# CHECK-ASM-AND-OBJ: bne $r10, $r7, -131072 +# CHECK-ASM: encoding: [0x47,0x01,0x00,0x5e] +bne $r10, $r7, -0x20000 + +# CHECK-ASM-AND-OBJ: bne $r10, $r7, 131068 +# CHECK-ASM: encoding: [0x47,0xfd,0xff,0x5d] +bne $r10, $r7, 0x1FFFC + +# CHECK-ASM-AND-OBJ: blt $r10, $r7, -131072 +# CHECK-ASM: encoding: [0x47,0x01,0x00,0x62] +blt $r10, $r7, -0x20000 + +# CHECK-ASM-AND-OBJ: blt $r10, $r7, 131068 +# CHECK-ASM: encoding: [0x47,0xfd,0xff,0x61] +blt $r10, $r7, 0x1FFFC + +# CHECK-ASM-AND-OBJ: bge $r10, $r7, -131072 +# CHECK-ASM: encoding: [0x47,0x01,0x00,0x66] +bge $r10, $r7, -0x20000 + +# CHECK-ASM-AND-OBJ: bge $r10, $r7, 131068 +# CHECK-ASM: encoding: [0x47,0xfd,0xff,0x65] +bge $r10, $r7, 0x1FFFC + +# CHECK-ASM-AND-OBJ: bltu $r10, $r7, -131072 +# CHECK-ASM: encoding: [0x47,0x01,0x00,0x6a] +bltu $r10, $r7, -0x20000 + +# CHECK-ASM-AND-OBJ: bltu $r10, $r7, 131068 +# CHECK-ASM: encoding: [0x47,0xfd,0xff,0x69] +bltu $r10, $r7, 0x1FFFC + +# CHECK-ASM-AND-OBJ: bgeu $r10, $r7, -131072 +# CHECK-ASM: encoding: [0x47,0x01,0x00,0x6e] +bgeu $r10, $r7, -0x20000 + +# CHECK-ASM-AND-OBJ: bgeu $r10, $r7, 131068 +# CHECK-ASM: encoding: [0x47,0xfd,0xff,0x6d] +bgeu $r10, $r7, 0x1FFFC + +### simm21 << 2 +# CHECK-ASM-AND-OBJ: beqz $r9, -4194304 +# CHECK-ASM: encoding: [0x30,0x01,0x00,0x40] +beqz $r9, -0x400000 + +# CHECK-ASM-AND-OBJ: beqz $r9, 4194300 +# CHECK-ASM: encoding: [0x2f,0xfd,0xff,0x43] +beqz $r9, 0x3FFFFC + +# CHECK-ASM-AND-OBJ: bnez $r9, -4194304 +# CHECK-ASM: encoding: [0x30,0x01,0x00,0x44] +bnez $r9, -0x400000 + +# CHECK-ASM-AND-OBJ: bnez $r9, 4194300 +# CHECK-ASM: encoding: [0x2f,0xfd,0xff,0x47] +bnez $r9, 0x3FFFFC + +# CHECK-ASM-AND-OBJ: bceqz $fcc6, -4194304 +# CHECK-ASM: encoding: [0xd0,0x00,0x00,0x48] +bceqz $fcc6, -0x400000 + +# CHECK-ASM-AND-OBJ: bceqz $fcc6, 4194300 +# CHECK-ASM: encoding: [0xcf,0xfc,0xff,0x4b] +bceqz $fcc6, 0x3FFFFC + +# CHECK-ASM-AND-OBJ: bcnez $fcc6, -4194304 +# CHECK-ASM: encoding: [0xd0,0x01,0x00,0x48] +bcnez $fcc6, -0x400000 + +# CHECK-ASM-AND-OBJ: bcnez $fcc6, 4194300 +# CHECK-ASM: encoding: [0xcf,0xfd,0xff,0x4b] +bcnez $fcc6, 0x3FFFFC + +### simm26 << 2 +# CHECK-ASM-AND-OBJ: b -134217728 +# CHECK-ASM: encoding: [0x00,0x02,0x00,0x50] +b -0x8000000 + +# CHECK-ASM-AND-OBJ: b 134217724 +# CHECK-ASM: encoding: [0xff,0xfd,0xff,0x53] +b 0x7FFFFFC + +# CHECK-ASM-AND-OBJ: bl -134217728 +# CHECK-ASM: encoding: [0x00,0x02,0x00,0x54] +bl -0x8000000 + +# CHECK-ASM-AND-OBJ: bl 134217724 +# CHECK-ASM: encoding: [0xff,0xfd,0xff,0x57] +bl 0x7FFFFFC + diff --git a/test/MC/LoongArch/valid_float.s b/test/MC/LoongArch/valid_float.s new file mode 100644 index 00000000..05ecefdc --- /dev/null +++ b/test/MC/LoongArch/valid_float.s @@ -0,0 +1,297 @@ +# RUN: llvm-mc %s -triple=loongarch64-unknown-linux-gnu -show-encoding | FileCheck %s +# CHECK: fadd.s $f29, $f15, $f25 +# CHECK: encoding: [0xfd,0xe5,0x00,0x01] +fadd.s $f29, $f15, $f25 + +# CHECK: fadd.d $f25, $f7, $f13 +# CHECK: encoding: [0xf9,0x34,0x01,0x01] +fadd.d $f25, $f7, $f13 + +# CHECK: fsub.s $f14, $f6, $f31 +# CHECK: encoding: [0xce,0xfc,0x02,0x01] +fsub.s $f14, $f6, $f31 + +# CHECK: fsub.d $f29, $f1, $f18 +# CHECK: encoding: [0x3d,0x48,0x03,0x01] +fsub.d $f29, $f1, $f18 + +# CHECK: fmul.s $f0, $f7, $f17 +# CHECK: encoding: [0xe0,0xc4,0x04,0x01] +fmul.s $f0, $f7, $f17 + +# CHECK: fmul.d $f4, $f30, $f7 +# CHECK: encoding: [0xc4,0x1f,0x05,0x01] +fmul.d $f4, $f30, $f7 + +# CHECK: fdiv.s $f20, $f24, $f19 +# CHECK: encoding: [0x14,0xcf,0x06,0x01] +fdiv.s $f20, $f24, $f19 + +# CHECK: fdiv.d $f3, $f25, $f28 +# CHECK: encoding: [0x23,0x73,0x07,0x01] +fdiv.d $f3, $f25, $f28 + +# CHECK: fmax.s $f22, $f6, $f27 +# CHECK: encoding: [0xd6,0xec,0x08,0x01] +fmax.s $f22, $f6, $f27 + +# CHECK: fmax.d $f11, $f26, $f13 +# CHECK: encoding: [0x4b,0x37,0x09,0x01] +fmax.d $f11, $f26, $f13 + +# CHECK: fmin.s $f14, $f10, $f19 +# CHECK: encoding: [0x4e,0xcd,0x0a,0x01] +fmin.s $f14, $f10, $f19 + +# CHECK: fmin.d $f1, $f13, $f27 +# CHECK: encoding: [0xa1,0x6d,0x0b,0x01] +fmin.d $f1, $f13, $f27 + +# CHECK: fmaxa.s $f9, $f27, $f31 +# CHECK: encoding: [0x69,0xff,0x0c,0x01] +fmaxa.s $f9, $f27, $f31 + +# CHECK: fmaxa.d $f24, $f13, $f4 +# CHECK: encoding: [0xb8,0x11,0x0d,0x01] +fmaxa.d $f24, $f13, $f4 + +# CHECK: fmina.s $f15, $f18, $f1 +# CHECK: encoding: [0x4f,0x86,0x0e,0x01] +fmina.s $f15, $f18, $f1 + +# CHECK: fmina.d $f18, $f10, $f0 +# CHECK: encoding: [0x52,0x01,0x0f,0x01] +fmina.d $f18, $f10, $f0 + +# CHECK: fscaleb.s $f21, $f23, $f6 +# CHECK: encoding: [0xf5,0x9a,0x10,0x01] +fscaleb.s $f21, $f23, $f6 + +# CHECK: fscaleb.d $f12, $f14, $f26 +# CHECK: encoding: [0xcc,0x69,0x11,0x01] +fscaleb.d $f12, $f14, $f26 + +# CHECK: fcopysign.s $f13, $f24, $f23 +# CHECK: encoding: [0x0d,0xdf,0x12,0x01] +fcopysign.s $f13, $f24, $f23 + +# CHECK: fcopysign.d $f16, $f26, $f6 +# CHECK: encoding: [0x50,0x1b,0x13,0x01] +fcopysign.d $f16, $f26, $f6 + +# CHECK: fabs.s $f28, $f12 +# CHECK: encoding: [0x9c,0x05,0x14,0x01] +fabs.s $f28, $f12 + +# CHECK: fabs.d $f23, $f3 +# CHECK: encoding: [0x77,0x08,0x14,0x01] +fabs.d $f23, $f3 + +# CHECK: fneg.s $f21, $f24 +# CHECK: encoding: [0x15,0x17,0x14,0x01] +fneg.s $f21, $f24 + +# CHECK: fneg.d $f11, $f26 +# CHECK: encoding: [0x4b,0x1b,0x14,0x01] +fneg.d $f11, $f26 + +# CHECK: flogb.s $f31, $f23 +# CHECK: encoding: [0xff,0x26,0x14,0x01] +flogb.s $f31, $f23 + +# CHECK: flogb.d $f21, $f29 +# CHECK: encoding: [0xb5,0x2b,0x14,0x01] +flogb.d $f21, $f29 + +# CHECK: fclass.s $f20, $f9 +# CHECK: encoding: [0x34,0x35,0x14,0x01] +fclass.s $f20, $f9 + +# CHECK: fclass.d $f19, $f2 +# CHECK: encoding: [0x53,0x38,0x14,0x01] +fclass.d $f19, $f2 + +# CHECK: fsqrt.s $f27, $f18 +# CHECK: encoding: [0x5b,0x46,0x14,0x01] +fsqrt.s $f27, $f18 + +# CHECK: fsqrt.d $f2, $f11 +# CHECK: encoding: [0x62,0x49,0x14,0x01] +fsqrt.d $f2, $f11 + +# CHECK: frecip.s $f17, $f27 +# CHECK: encoding: [0x71,0x57,0x14,0x01] +frecip.s $f17, $f27 + +# CHECK: frecip.d $f27, $f27 +# CHECK: encoding: [0x7b,0x5b,0x14,0x01] +frecip.d $f27, $f27 + +# CHECK: frsqrt.s $f25, $f12 +# CHECK: encoding: [0x99,0x65,0x14,0x01] +frsqrt.s $f25, $f12 + +# CHECK: frsqrt.d $f22, $f3 +# CHECK: encoding: [0x76,0x68,0x14,0x01] +frsqrt.d $f22, $f3 + +# CHECK: fmov.s $f13, $f23 +# CHECK: encoding: [0xed,0x96,0x14,0x01] +fmov.s $f13, $f23 + +# CHECK: fmov.d $f30, $f9 +# CHECK: encoding: [0x3e,0x99,0x14,0x01] +fmov.d $f30, $f9 + +# CHECK: movgr2fr.w $f6, $tp +# CHECK: encoding: [0x46,0xa4,0x14,0x01] +movgr2fr.w $f6, $tp + +# CHECK: movgr2fr.d $f30, $r11 +# CHECK: encoding: [0x7e,0xa9,0x14,0x01] +movgr2fr.d $f30, $r11 + +# CHECK: movgr2frh.w $f23, $r26 +# CHECK: encoding: [0x57,0xaf,0x14,0x01] +movgr2frh.w $f23, $r26 + +# CHECK: movfr2gr.s $r10, $f22 +# CHECK: encoding: [0xca,0xb6,0x14,0x01] +movfr2gr.s $r10, $f22 + +# CHECK: movfr2gr.d $r26, $f17 +# CHECK: encoding: [0x3a,0xba,0x14,0x01] +movfr2gr.d $r26, $f17 + +# CHECK: movfrh2gr.s $sp, $f26 +# CHECK: encoding: [0x43,0xbf,0x14,0x01] +movfrh2gr.s $sp, $f26 + +# CHECK: movfr2cf $fcc4, $f11 +# CHECK: encoding: [0x64,0xd1,0x14,0x01] +movfr2cf $fcc4, $f11 + +# CHECK: movcf2fr $f16, $fcc0 +# CHECK: encoding: [0x10,0xd4,0x14,0x01] +movcf2fr $f16, $fcc0 + +# CHECK: movgr2cf $fcc5, $ra +# CHECK: encoding: [0x25,0xd8,0x14,0x01] +movgr2cf $fcc5, $ra + +# CHECK: movcf2gr $r21, $fcc7 +# CHECK: encoding: [0xf5,0xdc,0x14,0x01] +movcf2gr $r21, $fcc7 + +# CHECK: fcvt.s.d $f12, $f19 +# CHECK: encoding: [0x6c,0x1a,0x19,0x01] +fcvt.s.d $f12, $f19 + +# CHECK: fcvt.d.s $f10, $f6 +# CHECK: encoding: [0xca,0x24,0x19,0x01] +fcvt.d.s $f10, $f6 + +# CHECK: ftintrm.w.s $f16, $f16 +# CHECK: encoding: [0x10,0x06,0x1a,0x01] +ftintrm.w.s $f16, $f16 + +# CHECK: ftintrm.w.d $f7, $f8 +# CHECK: encoding: [0x07,0x09,0x1a,0x01] +ftintrm.w.d $f7, $f8 + +# CHECK: ftintrm.l.s $f24, $f10 +# CHECK: encoding: [0x58,0x25,0x1a,0x01] +ftintrm.l.s $f24, $f10 + +# CHECK: ftintrm.l.d $f9, $f9 +# CHECK: encoding: [0x29,0x29,0x1a,0x01] +ftintrm.l.d $f9, $f9 + +# CHECK: ftintrp.w.s $f14, $f31 +# CHECK: encoding: [0xee,0x47,0x1a,0x01] +ftintrp.w.s $f14, $f31 + +# CHECK: ftintrp.w.d $f12, $f3 +# CHECK: encoding: [0x6c,0x48,0x1a,0x01] +ftintrp.w.d $f12, $f3 + +# CHECK: ftintrp.l.s $f0, $f16 +# CHECK: encoding: [0x00,0x66,0x1a,0x01] +ftintrp.l.s $f0, $f16 + +# CHECK: ftintrp.l.d $f4, $f29 +# CHECK: encoding: [0xa4,0x6b,0x1a,0x01] +ftintrp.l.d $f4, $f29 + +# CHECK: ftintrz.w.s $f4, $f29 +# CHECK: encoding: [0xa4,0x87,0x1a,0x01] +ftintrz.w.s $f4, $f29 + +# CHECK: ftintrz.w.d $f25, $f24 +# CHECK: encoding: [0x19,0x8b,0x1a,0x01] +ftintrz.w.d $f25, $f24 + +# CHECK: ftintrz.l.s $f23, $f5 +# CHECK: encoding: [0xb7,0xa4,0x1a,0x01] +ftintrz.l.s $f23, $f5 + +# CHECK: ftintrz.l.d $f3, $f10 +# CHECK: encoding: [0x43,0xa9,0x1a,0x01] +ftintrz.l.d $f3, $f10 + +# CHECK: ftintrne.w.s $f4, $f17 +# CHECK: encoding: [0x24,0xc6,0x1a,0x01] +ftintrne.w.s $f4, $f17 + +# CHECK: ftintrne.w.d $f31, $f12 +# CHECK: encoding: [0x9f,0xc9,0x1a,0x01] +ftintrne.w.d $f31, $f12 + +# CHECK: ftintrne.l.s $f22, $f27 +# CHECK: encoding: [0x76,0xe7,0x1a,0x01] +ftintrne.l.s $f22, $f27 + +# CHECK: ftintrne.l.d $f28, $f6 +# CHECK: encoding: [0xdc,0xe8,0x1a,0x01] +ftintrne.l.d $f28, $f6 + +# CHECK: ftint.w.s $f21, $f13 +# CHECK: encoding: [0xb5,0x05,0x1b,0x01] +ftint.w.s $f21, $f13 + +# CHECK: ftint.w.d $f3, $f14 +# CHECK: encoding: [0xc3,0x09,0x1b,0x01] +ftint.w.d $f3, $f14 + +# CHECK: ftint.l.s $f31, $f24 +# CHECK: encoding: [0x1f,0x27,0x1b,0x01] +ftint.l.s $f31, $f24 + +# CHECK: ftint.l.d $f16, $f24 +# CHECK: encoding: [0x10,0x2b,0x1b,0x01] +ftint.l.d $f16, $f24 + +# CHECK: ffint.s.w $f30, $f5 +# CHECK: encoding: [0xbe,0x10,0x1d,0x01] +ffint.s.w $f30, $f5 + +# CHECK: ffint.s.l $f6, $f5 +# CHECK: encoding: [0xa6,0x18,0x1d,0x01] +ffint.s.l $f6, $f5 + +# CHECK: ffint.d.w $f24, $f18 +# CHECK: encoding: [0x58,0x22,0x1d,0x01] +ffint.d.w $f24, $f18 + +# CHECK: ffint.d.l $f23, $f26 +# CHECK: encoding: [0x57,0x2b,0x1d,0x01] +ffint.d.l $f23, $f26 + +# CHECK: frint.s $f5, $f17 +# CHECK: encoding: [0x25,0x46,0x1e,0x01] +frint.s $f5, $f17 + +# CHECK: frint.d $f29, $f2 +# CHECK: encoding: [0x5d,0x48,0x1e,0x01] +frint.d $f29, $f2 + diff --git a/test/MC/LoongArch/valid_integer.s b/test/MC/LoongArch/valid_integer.s new file mode 100644 index 00000000..cc78662d --- /dev/null +++ b/test/MC/LoongArch/valid_integer.s @@ -0,0 +1,369 @@ +# RUN: llvm-mc %s -triple=loongarch64-unknown-linux-gnu -show-encoding | FileCheck %s +# CHECK: clo.w $ra, $sp +# CHECK: encoding: [0x61,0x10,0x00,0x00] +clo.w $ra, $sp + +# CHECK: clz.w $r7, $r10 +# CHECK: encoding: [0x47,0x15,0x00,0x00] +clz.w $r7, $r10 + +# CHECK: cto.w $tp, $r6 +# CHECK: encoding: [0xc2,0x18,0x00,0x00] +cto.w $tp, $r6 + +# CHECK: ctz.w $r5, $r22 +# CHECK: encoding: [0xc5,0x1e,0x00,0x00] +ctz.w $r5, $r22 + +# CHECK: clo.d $r29, $ra +# CHECK: encoding: [0x3d,0x20,0x00,0x00] +clo.d $r29, $ra + +# CHECK: clz.d $r26, $r26 +# CHECK: encoding: [0x5a,0x27,0x00,0x00] +clz.d $r26, $r26 + +# CHECK: cto.d $r18, $r20 +# CHECK: encoding: [0x92,0x2a,0x00,0x00] +cto.d $r18, $r20 + +# CHECK: ctz.d $r17, $r10 +# CHECK: encoding: [0x51,0x2d,0x00,0x00] +ctz.d $r17, $r10 + +# CHECK: revb.2h $r20, $r11 +# CHECK: encoding: [0x74,0x31,0x00,0x00] +revb.2h $r20, $r11 + +# CHECK: revb.4h $r13, $r19 +# CHECK: encoding: [0x6d,0x36,0x00,0x00] +revb.4h $r13, $r19 + +# CHECK: revb.2w $r28, $r27 +# CHECK: encoding: [0x7c,0x3b,0x00,0x00] +revb.2w $r28, $r27 + +# CHECK: revb.d $zero, $r23 +# CHECK: encoding: [0xe0,0x3e,0x00,0x00] +revb.d $zero, $r23 + +# CHECK: revh.2w $r28, $r10 +# CHECK: encoding: [0x5c,0x41,0x00,0x00] +revh.2w $r28, $r10 + +# CHECK: revh.d $r9, $r7 +# CHECK: encoding: [0xe9,0x44,0x00,0x00] +revh.d $r9, $r7 + +# CHECK: bitrev.4b $r21, $r27 +# CHECK: encoding: [0x75,0x4b,0x00,0x00] +bitrev.4b $r21, $r27 + +# CHECK: bitrev.8b $r13, $r25 +# CHECK: encoding: [0x2d,0x4f,0x00,0x00] +bitrev.8b $r13, $r25 + +# CHECK: bitrev.w $r25, $r5 +# CHECK: encoding: [0xb9,0x50,0x00,0x00] +bitrev.w $r25, $r5 + +# CHECK: bitrev.d $r19, $r23 +# CHECK: encoding: [0xf3,0x56,0x00,0x00] +bitrev.d $r19, $r23 + +# CHECK: ext.w.h $r23, $r23 +# CHECK: encoding: [0xf7,0x5a,0x00,0x00] +ext.w.h $r23, $r23 + +# CHECK: ext.w.b $r20, $r18 +# CHECK: encoding: [0x54,0x5e,0x00,0x00] +ext.w.b $r20, $r18 + +# CHECK: rdtimel.w $r24, $r4 +# CHECK: encoding: [0x98,0x60,0x00,0x00] +rdtimel.w $r24, $r4 + +# CHECK: rdtimeh.w $r11, $r5 +# CHECK: encoding: [0xab,0x64,0x00,0x00] +rdtimeh.w $r11, $r5 + +# CHECK: rdtime.d $tp, $ra +# CHECK: encoding: [0x22,0x68,0x00,0x00] +rdtime.d $tp, $ra + +# CHECK: cpucfg $sp, $ra +# CHECK: encoding: [0x23,0x6c,0x00,0x00] +cpucfg $sp, $ra + +# CHECK: asrtle.d $r21, $r19 +# CHECK: encoding: [0xa0,0x4e,0x01,0x00] +asrtle.d $r21, $r19 + +# CHECK: asrtgt.d $ra, $r19 +# CHECK: encoding: [0x20,0xcc,0x01,0x00] +asrtgt.d $ra, $r19 + +# CHECK: alsl.w $tp, $r17, $tp, 4 +# CHECK: encoding: [0x22,0x8a,0x05,0x00] +alsl.w $tp, $r17, $tp, 4 + +# CHECK: bytepick.w $r29, $zero, $r16, 0 +# CHECK: encoding: [0x1d,0x40,0x08,0x00] +bytepick.w $r29, $zero, $r16, 0 + +# CHECK: bytepick.d $r15, $r17, $r20, 4 +# CHECK: encoding: [0x2f,0x52,0x0e,0x00] +bytepick.d $r15, $r17, $r20, 4 + +# CHECK: add.w $r9, $ra, $r31 +# CHECK: encoding: [0x29,0x7c,0x10,0x00] +add.w $r9, $ra, $r31 + +# CHECK: add.d $tp, $r18, $r27 +# CHECK: encoding: [0x42,0xee,0x10,0x00] +add.d $tp, $r18, $r27 + +# CHECK: sub.w $r21, $r25, $r19 +# CHECK: encoding: [0x35,0x4f,0x11,0x00] +sub.w $r21, $r25, $r19 + +# CHECK: sub.d $r7, $r12, $r7 +# CHECK: encoding: [0x87,0x9d,0x11,0x00] +sub.d $r7, $r12, $r7 + +# CHECK: slt $r29, $r26, $tp +# CHECK: encoding: [0x5d,0x0b,0x12,0x00] +slt $r29, $r26, $tp + +# CHECK: sltu $r11, $r21, $r29 +# CHECK: encoding: [0xab,0xf6,0x12,0x00] +sltu $r11, $r21, $r29 + +# CHECK: maskeqz $r20, $r11, $r18 +# CHECK: encoding: [0x74,0x49,0x13,0x00] +maskeqz $r20, $r11, $r18 + +# CHECK: masknez $r20, $r13, $r26 +# CHECK: encoding: [0xb4,0xe9,0x13,0x00] +masknez $r20, $r13, $r26 + +# CHECK: nor $r5, $r18, $r5 +# CHECK: encoding: [0x45,0x16,0x14,0x00] +nor $r5, $r18, $r5 + +# CHECK: and $r19, $r31, $ra +# CHECK: encoding: [0xf3,0x87,0x14,0x00] +and $r19, $r31, $ra + +# CHECK: or $r17, $r16, $r30 +# CHECK: encoding: [0x11,0x7a,0x15,0x00] +or $r17, $r16, $r30 + +# CHECK: xor $r15, $r19, $r8 +# CHECK: encoding: [0x6f,0xa2,0x15,0x00] +xor $r15, $r19, $r8 + +# CHECK: orn $tp, $sp, $r25 +# CHECK: encoding: [0x62,0x64,0x16,0x00] +orn $tp, $sp, $r25 + +# CHECK: andn $r28, $r25, $r5 +# CHECK: encoding: [0x3c,0x97,0x16,0x00] +andn $r28, $r25, $r5 + +# CHECK: sll.w $r24, $r27, $r23 +# CHECK: encoding: [0x78,0x5f,0x17,0x00] +sll.w $r24, $r27, $r23 + +# CHECK: srl.w $r31, $r17, $r7 +# CHECK: encoding: [0x3f,0x9e,0x17,0x00] +srl.w $r31, $r17, $r7 + +# CHECK: sra.w $r12, $r28, $r10 +# CHECK: encoding: [0x8c,0x2b,0x18,0x00] +sra.w $r12, $r28, $r10 + +# CHECK: sll.d $r20, $r15, $sp +# CHECK: encoding: [0xf4,0x8d,0x18,0x00] +sll.d $r20, $r15, $sp + +# CHECK: srl.d $r14, $r25, $zero +# CHECK: encoding: [0x2e,0x03,0x19,0x00] +srl.d $r14, $r25, $zero + +# CHECK: sra.d $r7, $r22, $r31 +# CHECK: encoding: [0xc7,0xfe,0x19,0x00] +sra.d $r7, $r22, $r31 + +# CHECK: rotr.w $ra, $r26, $r18 +# CHECK: encoding: [0x41,0x4b,0x1b,0x00] +rotr.w $ra, $r26, $r18 + +# CHECK: rotr.d $r31, $sp, $ra +# CHECK: encoding: [0x7f,0x84,0x1b,0x00] +rotr.d $r31, $sp, $ra + +# CHECK: mul.w $r4, $r18, $sp +# CHECK: encoding: [0x44,0x0e,0x1c,0x00] +mul.w $r4, $r18, $sp + +# CHECK: mulh.w $r27, $r23, $zero +# CHECK: encoding: [0xfb,0x82,0x1c,0x00] +mulh.w $r27, $r23, $zero + +# CHECK: mulh.wu $r10, $r17, $r24 +# CHECK: encoding: [0x2a,0x62,0x1d,0x00] +mulh.wu $r10, $r17, $r24 + +# CHECK: mul.d $ra, $r14, $r24 +# CHECK: encoding: [0xc1,0xe1,0x1d,0x00] +mul.d $ra, $r14, $r24 + +# CHECK: mulh.d $r28, $ra, $r27 +# CHECK: encoding: [0x3c,0x6c,0x1e,0x00] +mulh.d $r28, $ra, $r27 + +# CHECK: mulh.du $r13, $r27, $r29 +# CHECK: encoding: [0x6d,0xf7,0x1e,0x00] +mulh.du $r13, $r27, $r29 + +# CHECK: mulw.d.w $r27, $r6, $r17 +# CHECK: encoding: [0xdb,0x44,0x1f,0x00] +mulw.d.w $r27, $r6, $r17 + +# CHECK: mulw.d.wu $r17, $r22, $r30 +# CHECK: encoding: [0xd1,0xfa,0x1f,0x00] +mulw.d.wu $r17, $r22, $r30 + +# CHECK: div.w $r30, $r13, $r25 +# CHECK: encoding: [0xbe,0x65,0x20,0x00] +div.w $r30, $r13, $r25 + +# CHECK: mod.w $ra, $r26, $r10 +# CHECK: encoding: [0x41,0xab,0x20,0x00] +mod.w $ra, $r26, $r10 + +# CHECK: div.wu $r19, $r23, $zero +# CHECK: encoding: [0xf3,0x02,0x21,0x00] +div.wu $r19, $r23, $zero + +# CHECK: mod.wu $r27, $r9, $r17 +# CHECK: encoding: [0x3b,0xc5,0x21,0x00] +mod.wu $r27, $r9, $r17 + +# CHECK: div.d $r23, $r6, $r21 +# CHECK: encoding: [0xd7,0x54,0x22,0x00] +div.d $r23, $r6, $r21 + +# CHECK: mod.d $r16, $sp, $r15 +# CHECK: encoding: [0x70,0xbc,0x22,0x00] +mod.d $r16, $sp, $r15 + +# CHECK: div.du $r31, $r24, $r14 +# CHECK: encoding: [0x1f,0x3b,0x23,0x00] +div.du $r31, $r24, $r14 + +# CHECK: mod.du $r25, $r23, $r24 +# CHECK: encoding: [0xf9,0xe2,0x23,0x00] +mod.du $r25, $r23, $r24 + +# CHECK: crc.w.b.w $r24, $r7, $tp +# CHECK: encoding: [0xf8,0x08,0x24,0x00] +crc.w.b.w $r24, $r7, $tp + +# CHECK: crc.w.h.w $r31, $r10, $r18 +# CHECK: encoding: [0x5f,0xc9,0x24,0x00] +crc.w.h.w $r31, $r10, $r18 + +# CHECK: crc.w.w.w $r28, $r6, $r10 +# CHECK: encoding: [0xdc,0x28,0x25,0x00] +crc.w.w.w $r28, $r6, $r10 + +# CHECK: crc.w.d.w $r28, $r11, $r31 +# CHECK: encoding: [0x7c,0xfd,0x25,0x00] +crc.w.d.w $r28, $r11, $r31 + +# CHECK: crcc.w.b.w $r15, $r18, $sp +# CHECK: encoding: [0x4f,0x0e,0x26,0x00] +crcc.w.b.w $r15, $r18, $sp + +# CHECK: crcc.w.h.w $r21, $r29, $r18 +# CHECK: encoding: [0xb5,0xcb,0x26,0x00] +crcc.w.h.w $r21, $r29, $r18 + +# CHECK: crcc.w.w.w $r17, $r14, $r13 +# CHECK: encoding: [0xd1,0x35,0x27,0x00] +crcc.w.w.w $r17, $r14, $r13 + +# CHECK: crcc.w.d.w $r30, $r21, $r27 +# CHECK: encoding: [0xbe,0xee,0x27,0x00] +crcc.w.d.w $r30, $r21, $r27 + +# CHECK: break 23 +# CHECK: encoding: [0x17,0x00,0x2a,0x00] +break 23 + +# CHECK: syscall 2 +# CHECK: encoding: [0x02,0x00,0x2b,0x00] +syscall 2 + +# CHECK: alsl.d $r17, $r11, $r5, 3 +# CHECK: encoding: [0x71,0x15,0x2d,0x00] +alsl.d $r17, $r11, $r5, 3 + +# CHECK: slli.w $r26, $r18, 0 +# CHECK: encoding: [0x5a,0x82,0x40,0x00] +slli.w $r26, $r18, 0 + +# CHECK: slli.d $r10, $r31, 39 +# CHECK: encoding: [0xea,0x9f,0x41,0x00] +slli.d $r10, $r31, 39 + +# CHECK: srli.w $r10, $r14, 30 +# CHECK: encoding: [0xca,0xf9,0x44,0x00] +srli.w $r10, $r14, 30 + +# CHECK: srli.d $r31, $r22, 38 +# CHECK: encoding: [0xdf,0x9a,0x45,0x00] +srli.d $r31, $r22, 38 + +# CHECK: srai.w $r8, $r17, 24 +# CHECK: encoding: [0x28,0xe2,0x48,0x00] +srai.w $r8, $r17, 24 + +# CHECK: srai.d $r9, $r21, 27 +# CHECK: encoding: [0xa9,0x6e,0x49,0x00] +srai.d $r9, $r21, 27 + +# CHECK: rotri.w $r23, $r20, 23 +# CHECK: encoding: [0x97,0xde,0x4c,0x00] +rotri.w $r23, $r20, 23 + +# CHECK: rotri.d $r29, $zero, 7 +# CHECK: encoding: [0x1d,0x1c,0x4d,0x00] +rotri.d $r29, $zero, 7 + +# CHECK: bstrins.w $r8, $r11, 7, 2 +# CHECK: encoding: [0x68,0x09,0x67,0x00] +bstrins.w $r8, $r11, 7, 2 + +# CHECK: bstrins.d $r8, $r11, 7, 2 +# CHECK: encoding: [0x68,0x09,0x87,0x00] +bstrins.d $r8, $r11, 7, 2 + +# CHECK: bstrpick.w $ra, $r9, 10, 4 +# CHECK: encoding: [0x21,0x91,0x6a,0x00] +bstrpick.w $ra, $r9, 10, 4 + +# CHECK: bstrpick.d $r31, $r27, 39, 22 +# CHECK: encoding: [0x7f,0x5b,0xe7,0x00] +bstrpick.d $r31, $r27, 39, 22 + +# CHECK: cpucfg $sp, $r8 +# CHECK: encoding: [0x03,0x6d,0x00,0x00] +cpucfg $sp, $r8 + +# CHECK: alsl.wu $r19, $r8, $r25, 1 +# CHECK: encoding: [0x13,0x65,0x06,0x00] +alsl.wu $r19, $r8, $r25, 1 + diff --git a/test/MC/LoongArch/valid_memory.s b/test/MC/LoongArch/valid_memory.s new file mode 100644 index 00000000..30ea88c9 --- /dev/null +++ b/test/MC/LoongArch/valid_memory.s @@ -0,0 +1,405 @@ +# RUN: llvm-mc %s -triple=loongarch64-unknown-linux-gnu -show-encoding | FileCheck %s +# CHECK: dbar 0 +# CHECK: encoding: [0x00,0x00,0x72,0x38] +dbar 0 + +# CHECK: ibar 0 +# CHECK: encoding: [0x00,0x80,0x72,0x38] +ibar 0 + +# CHECK: ll.w $tp, $r27, 220 +# CHECK: encoding: [0x62,0xdf,0x00,0x20] +ll.w $tp, $r27, 220 + +# CHECK: sc.w $r19, $r14, 56 +# CHECK: encoding: [0xd3,0x39,0x00,0x21] +sc.w $r19, $r14, 56 + +# CHECK: ll.d $r25, $r27, 16 +# CHECK: encoding: [0x79,0x13,0x00,0x22] +ll.d $r25, $r27, 16 + +# CHECK: sc.d $r17, $r17, 244 +# CHECK: encoding: [0x31,0xf6,0x00,0x23] +sc.d $r17, $r17, 244 + +# CHECK: ldptr.w $r26, $r6, 60 +# CHECK: encoding: [0xda,0x3c,0x00,0x24] +ldptr.w $r26, $r6, 60 + +# CHECK: stptr.w $r28, $r5, 216 +# CHECK: encoding: [0xbc,0xd8,0x00,0x25] +stptr.w $r28, $r5, 216 + +# CHECK: ldptr.d $r5, $r29, 244 +# CHECK: encoding: [0xa5,0xf7,0x00,0x26] +ldptr.d $r5, $r29, 244 + +# CHECK: stptr.d $r14, $r24, 196 +# CHECK: encoding: [0x0e,0xc7,0x00,0x27] +stptr.d $r14, $r24, 196 + +# CHECK: ld.b $r24, $r8, 21 +# CHECK: encoding: [0x18,0x55,0x00,0x28] +ld.b $r24, $r8, 21 + +# CHECK: ld.h $r7, $r18, 80 +# CHECK: encoding: [0x47,0x42,0x41,0x28] +ld.h $r7, $r18, 80 + +# CHECK: ld.w $r18, $r26, 92 +# CHECK: encoding: [0x52,0x73,0x81,0x28] +ld.w $r18, $r26, 92 + +# CHECK: ld.d $r18, $r20, 159 +# CHECK: encoding: [0x92,0x7e,0xc2,0x28] +ld.d $r18, $r20, 159 + +# CHECK: st.b $sp, $r7, 95 +# CHECK: encoding: [0xe3,0x7c,0x01,0x29] +st.b $sp, $r7, 95 + +# CHECK: st.h $r25, $r16, 122 +# CHECK: encoding: [0x19,0xea,0x41,0x29] +st.h $r25, $r16, 122 + +# CHECK: st.w $r13, $r13, 175 +# CHECK: encoding: [0xad,0xbd,0x82,0x29] +st.w $r13, $r13, 175 + +# CHECK: st.d $r30, $r30, 60 +# CHECK: encoding: [0xde,0xf3,0xc0,0x29] +st.d $r30, $r30, 60 + +# CHECK: ld.bu $r13, $r13, 150 +# CHECK: encoding: [0xad,0x59,0x02,0x2a] +ld.bu $r13, $r13, 150 + +# CHECK: ld.hu $r18, $r29, 198 +# CHECK: encoding: [0xb2,0x1b,0x43,0x2a] +ld.hu $r18, $r29, 198 + +# CHECK: ld.wu $r14, $r19, 31 +# CHECK: encoding: [0x6e,0x7e,0x80,0x2a] +ld.wu $r14, $r19, 31 + +# CHECK: fld.s $f23, $r15, 250 +# CHECK: encoding: [0xf7,0xe9,0x03,0x2b] +fld.s $f23, $r15, 250 + +# CHECK: fst.s $f30, $r19, 230 +# CHECK: encoding: [0x7e,0x9a,0x43,0x2b] +fst.s $f30, $r19, 230 + +# CHECK: fld.d $f22, $r17, 114 +# CHECK: encoding: [0x36,0xca,0x81,0x2b] +fld.d $f22, $r17, 114 + +# CHECK: fst.d $f28, $r7, 198 +# CHECK: encoding: [0xfc,0x18,0xc3,0x2b] +fst.d $f28, $r7, 198 + +# CHECK: ldx.b $r24, $ra, $tp +# CHECK: encoding: [0x38,0x08,0x00,0x38] +ldx.b $r24, $ra, $tp + +# CHECK: ldx.h $r22, $r22, $r17 +# CHECK: encoding: [0xd6,0x46,0x04,0x38] +ldx.h $r22, $r22, $r17 + +# CHECK: ldx.w $r25, $r11, $r23 +# CHECK: encoding: [0x79,0x5d,0x08,0x38] +ldx.w $r25, $r11, $r23 + +# CHECK: ldx.d $r18, $r23, $r20 +# CHECK: encoding: [0xf2,0x52,0x0c,0x38] +ldx.d $r18, $r23, $r20 + +# CHECK: stx.b $r19, $ra, $sp +# CHECK: encoding: [0x33,0x0c,0x10,0x38] +stx.b $r19, $ra, $sp + +# CHECK: stx.h $zero, $r28, $r26 +# CHECK: encoding: [0x80,0x6b,0x14,0x38] +stx.h $zero, $r28, $r26 + +# CHECK: stx.w $r7, $r4, $r31 +# CHECK: encoding: [0x87,0x7c,0x18,0x38] +stx.w $r7, $r4, $r31 + +# CHECK: stx.d $r7, $r31, $r10 +# CHECK: encoding: [0xe7,0x2b,0x1c,0x38] +stx.d $r7, $r31, $r10 + +# CHECK: ldx.bu $r11, $r9, $r9 +# CHECK: encoding: [0x2b,0x25,0x20,0x38] +ldx.bu $r11, $r9, $r9 + +# CHECK: ldx.hu $r22, $r23, $r27 +# CHECK: encoding: [0xf6,0x6e,0x24,0x38] +ldx.hu $r22, $r23, $r27 + +# CHECK: ldx.wu $r8, $r24, $r28 +# CHECK: encoding: [0x08,0x73,0x28,0x38] +ldx.wu $r8, $r24, $r28 + +# CHECK: fldx.s $f1, $r15, $r19 +# CHECK: encoding: [0xe1,0x4d,0x30,0x38] +fldx.s $f1, $r15, $r19 + +# CHECK: fldx.d $f27, $r13, $r31 +# CHECK: encoding: [0xbb,0x7d,0x34,0x38] +fldx.d $f27, $r13, $r31 + +# CHECK: fstx.s $f26, $sp, $r22 +# CHECK: encoding: [0x7a,0x58,0x38,0x38] +fstx.s $f26, $sp, $r22 + +# CHECK: fstx.d $f6, $r15, $r17 +# CHECK: encoding: [0xe6,0x45,0x3c,0x38] +fstx.d $f6, $r15, $r17 + +# CHECK: amswap_db.w $r6, $r12, $r24, 0 +# CHECK: encoding: [0x06,0x33,0x69,0x38] +amswap_db.w $r6, $r12, $r24, 0 + +# CHECK: amswap_db.d $tp, $r14, $r22, 0 +# CHECK: encoding: [0xc2,0xba,0x69,0x38] +amswap_db.d $tp, $r14, $r22, 0 + +# CHECK: amadd_db.w $r8, $r12, $r21, 0 +# CHECK: encoding: [0xa8,0x32,0x6a,0x38] +amadd_db.w $r8, $r12, $r21, 0 + +# CHECK: amadd_db.d $r5, $r17, $r29, 0 +# CHECK: encoding: [0xa5,0xc7,0x6a,0x38] +amadd_db.d $r5, $r17, $r29, 0 + +# CHECK: amand_db.w $r4, $r19, $r22, 0 +# CHECK: encoding: [0xc4,0x4e,0x6b,0x38] +amand_db.w $r4, $r19, $r22, 0 + +# CHECK: amand_db.d $r10, $r18, $r29, 0 +# CHECK: encoding: [0xaa,0xcb,0x6b,0x38] +amand_db.d $r10, $r18, $r29, 0 + +# CHECK: amor_db.w $r6, $r16, $r23, 0 +# CHECK: encoding: [0xe6,0x42,0x6c,0x38] +amor_db.w $r6, $r16, $r23, 0 + +# CHECK: amor_db.d $sp, $r16, $r24, 0 +# CHECK: encoding: [0x03,0xc3,0x6c,0x38] +amor_db.d $sp, $r16, $r24, 0 + +# CHECK: amxor_db.w $tp, $r15, $r23, 0 +# CHECK: encoding: [0xe2,0x3e,0x6d,0x38] +amxor_db.w $tp, $r15, $r23, 0 + +# CHECK: amxor_db.d $r8, $r20, $r28, 0 +# CHECK: encoding: [0x88,0xd3,0x6d,0x38] +amxor_db.d $r8, $r20, $r28, 0 + +# CHECK: ammax_db.w $ra, $r11, $r23, 0 +# CHECK: encoding: [0xe1,0x2e,0x6e,0x38] +ammax_db.w $ra, $r11, $r23, 0 + +# CHECK: ammax_db.d $r9, $r20, $r27, 0 +# CHECK: encoding: [0x69,0xd3,0x6e,0x38] +ammax_db.d $r9, $r20, $r27, 0 + +# CHECK: ammin_db.w $r9, $r14, $r23, 0 +# CHECK: encoding: [0xe9,0x3a,0x6f,0x38] +ammin_db.w $r9, $r14, $r23, 0 + +# CHECK: ammin_db.d $r9, $r13, $r22, 0 +# CHECK: encoding: [0xc9,0xb6,0x6f,0x38] +ammin_db.d $r9, $r13, $r22, 0 + +# CHECK: ammax_db.wu $r9, $r11, $r22, 0 +# CHECK: encoding: [0xc9,0x2e,0x70,0x38] +ammax_db.wu $r9, $r11, $r22, 0 + +# CHECK: ammax_db.du $r6, $r16, $r25, 0 +# CHECK: encoding: [0x26,0xc3,0x70,0x38] +ammax_db.du $r6, $r16, $r25, 0 + +# CHECK: ammin_db.wu $r8, $r18, $r30, 0 +# CHECK: encoding: [0xc8,0x4b,0x71,0x38] +ammin_db.wu $r8, $r18, $r30, 0 + +# CHECK: ammin_db.du $r7, $r16, $r25, 0 +# CHECK: encoding: [0x27,0xc3,0x71,0x38] +ammin_db.du $r7, $r16, $r25, 0 + +# CHECK: amswap.w $r6, $r12, $r24, 0 +# CHECK: encoding: [0x06,0x33,0x60,0x38] +amswap.w $r6, $r12, $r24, 0 + +# CHECK: amswap.d $tp, $r14, $r22, 0 +# CHECK: encoding: [0xc2,0xba,0x60,0x38] +amswap.d $tp, $r14, $r22, 0 + +# CHECK: amadd.w $r8, $r12, $r21, 0 +# CHECK: encoding: [0xa8,0x32,0x61,0x38] +amadd.w $r8, $r12, $r21, 0 + +# CHECK: amadd.d $r5, $r17, $r29, 0 +# CHECK: encoding: [0xa5,0xc7,0x61,0x38] +amadd.d $r5, $r17, $r29, 0 + +# CHECK: amand.w $r4, $r19, $r22, 0 +# CHECK: encoding: [0xc4,0x4e,0x62,0x38] +amand.w $r4, $r19, $r22, 0 + +# CHECK: amand.d $r10, $r18, $r29, 0 +# CHECK: encoding: [0xaa,0xcb,0x62,0x38] +amand.d $r10, $r18, $r29, 0 + +# CHECK: amor.w $r6, $r16, $r23, 0 +# CHECK: encoding: [0xe6,0x42,0x63,0x38] +amor.w $r6, $r16, $r23, 0 + +# CHECK: amor.d $sp, $r16, $r24, 0 +# CHECK: encoding: [0x03,0xc3,0x63,0x38] +amor.d $sp, $r16, $r24, 0 + +# CHECK: amxor.w $tp, $r15, $r23, 0 +# CHECK: encoding: [0xe2,0x3e,0x64,0x38] +amxor.w $tp, $r15, $r23, 0 + +# CHECK: amxor.d $r8, $r20, $r28, 0 +# CHECK: encoding: [0x88,0xd3,0x64,0x38] +amxor.d $r8, $r20, $r28, 0 + +# CHECK: ammax.w $ra, $r11, $r23, 0 +# CHECK: encoding: [0xe1,0x2e,0x65,0x38] +ammax.w $ra, $r11, $r23, 0 + +# CHECK: ammax.d $r9, $r20, $r27, 0 +# CHECK: encoding: [0x69,0xd3,0x65,0x38] +ammax.d $r9, $r20, $r27, 0 + +# CHECK: ammin.w $r9, $r14, $r23, 0 +# CHECK: encoding: [0xe9,0x3a,0x66,0x38] +ammin.w $r9, $r14, $r23, 0 + +# CHECK: ammin.d $r9, $r13, $r22, 0 +# CHECK: encoding: [0xc9,0xb6,0x66,0x38] +ammin.d $r9, $r13, $r22, 0 + +# CHECK: ammax.wu $r9, $r11, $r22, 0 +# CHECK: encoding: [0xc9,0x2e,0x67,0x38] +ammax.wu $r9, $r11, $r22, 0 + +# CHECK: ammax.du $r6, $r16, $r25, 0 +# CHECK: encoding: [0x26,0xc3,0x67,0x38] +ammax.du $r6, $r16, $r25, 0 + +# CHECK: ammin.wu $r8, $r18, $r30, 0 +# CHECK: encoding: [0xc8,0x4b,0x68,0x38] +ammin.wu $r8, $r18, $r30, 0 + +# CHECK: ammin.du $r7, $r16, $r25, 0 +# CHECK: encoding: [0x27,0xc3,0x68,0x38] +ammin.du $r7, $r16, $r25, 0 + +# CHECK: fldgt.s $f3, $r27, $r13 +# CHECK: encoding: [0x63,0x37,0x74,0x38] +fldgt.s $f3, $r27, $r13 + +# CHECK: fldgt.d $f26, $r5, $r31 +# CHECK: encoding: [0xba,0xfc,0x74,0x38] +fldgt.d $f26, $r5, $r31 + +# CHECK: fldle.s $f24, $r29, $r17 +# CHECK: encoding: [0xb8,0x47,0x75,0x38] +fldle.s $f24, $r29, $r17 + +# CHECK: fldle.d $f3, $r15, $r22 +# CHECK: encoding: [0xe3,0xd9,0x75,0x38] +fldle.d $f3, $r15, $r22 + +# CHECK: fstgt.s $f31, $r13, $r30 +# CHECK: encoding: [0xbf,0x79,0x76,0x38] +fstgt.s $f31, $r13, $r30 + +# CHECK: fstgt.d $f13, $r11, $r26 +# CHECK: encoding: [0x6d,0xe9,0x76,0x38] +fstgt.d $f13, $r11, $r26 + +# CHECK: fstle.s $f13, $r13, $r7 +# CHECK: encoding: [0xad,0x1d,0x77,0x38] +fstle.s $f13, $r13, $r7 + +# CHECK: fstle.d $f18, $r9, $r13 +# CHECK: encoding: [0x32,0xb5,0x77,0x38] +fstle.d $f18, $r9, $r13 + +# CHECK: preld 10, $zero, 23 +# CHECK: encoding: [0x0a,0x5c,0xc0,0x2a] +preld 10, $zero, 23 + +# CHECK: ldgt.b $r6, $r6, $r29 +# CHECK: encoding: [0xc6,0x74,0x78,0x38] +ldgt.b $r6, $r6, $r29 + +# CHECK: ldgt.h $r5, $r31, $ra +# CHECK: encoding: [0xe5,0x87,0x78,0x38] +ldgt.h $r5, $r31, $ra + +# CHECK: ldgt.w $r15, $r26, $r8 +# CHECK: encoding: [0x4f,0x23,0x79,0x38] +ldgt.w $r15, $r26, $r8 + +# CHECK: ldgt.d $r23, $r25, $r31 +# CHECK: encoding: [0x37,0xff,0x79,0x38] +ldgt.d $r23, $r25, $r31 + +# CHECK: ldle.b $r9, $r12, $r15 +# CHECK: encoding: [0x89,0x3d,0x7a,0x38] +ldle.b $r9, $r12, $r15 + +# CHECK: ldle.h $r11, $r11, $r23 +# CHECK: encoding: [0x6b,0xdd,0x7a,0x38] +ldle.h $r11, $r11, $r23 + +# CHECK: ldle.w $r24, $tp, $tp +# CHECK: encoding: [0x58,0x08,0x7b,0x38] +ldle.w $r24, $tp, $tp + +# CHECK: ldle.d $r20, $r15, $r16 +# CHECK: encoding: [0xf4,0xc1,0x7b,0x38] +ldle.d $r20, $r15, $r16 + +# CHECK: stgt.b $r27, $r19, $r20 +# CHECK: encoding: [0x7b,0x52,0x7c,0x38] +stgt.b $r27, $r19, $r20 + +# CHECK: stgt.h $r16, $r4, $r6 +# CHECK: encoding: [0x90,0x98,0x7c,0x38] +stgt.h $r16, $r4, $r6 + +# CHECK: stgt.w $r31, $r28, $r14 +# CHECK: encoding: [0x9f,0x3b,0x7d,0x38] +stgt.w $r31, $r28, $r14 + +# CHECK: stgt.d $r30, $r21, $r24 +# CHECK: encoding: [0xbe,0xe2,0x7d,0x38] +stgt.d $r30, $r21, $r24 + +# CHECK: stle.b $r10, $r4, $r16 +# CHECK: encoding: [0x8a,0x40,0x7e,0x38] +stle.b $r10, $r4, $r16 + +# CHECK: stle.h $r17, $r17, $r21 +# CHECK: encoding: [0x31,0xd6,0x7e,0x38] +stle.h $r17, $r17, $r21 + +# CHECK: stle.w $r23, $r28, $r29 +# CHECK: encoding: [0x97,0x77,0x7f,0x38] +stle.w $r23, $r28, $r29 + +# CHECK: stle.d $r25, $r24, $r29 +# CHECK: encoding: [0x19,0xf7,0x7f,0x38] +stle.d $r25, $r24, $r29 + diff --git a/test/MC/LoongArch/valid_priv.s b/test/MC/LoongArch/valid_priv.s new file mode 100644 index 00000000..57a252a8 --- /dev/null +++ b/test/MC/LoongArch/valid_priv.s @@ -0,0 +1,125 @@ +# RUN: llvm-mc %s -triple=loongarch64-unknown-linux-gnu -show-encoding | FileCheck %s +# CHECK: csrrd $r26, 30 +# CHECK: encoding: [0x1a,0x78,0x00,0x04] +csrrd $r26, 30 + +# CHECK: csrwr $r24, 194 +# CHECK: encoding: [0x38,0x08,0x03,0x04] +csrwr $r24, 194 + +# CHECK: csrxchg $r6, $r27, 214 +# CHECK: encoding: [0x66,0x5b,0x03,0x04] +csrxchg $r6, $r27, 214 + +# CHECK: cacop 0, $r10, 27 +# CHECK: encoding: [0x40,0x6d,0x00,0x06] +cacop 0, $r10, 27 + +# CHECK: lddir $r12, $r30, 92 +# CHECK: encoding: [0xcc,0x73,0x41,0x06] +lddir $r12, $r30, 92 + +# CHECK: ldpte $r18, 200 +# CHECK: encoding: [0x40,0x22,0x47,0x06] +ldpte $r18, 200 + +# CHECK: iocsrrd.b $r26, $r24 +# CHECK: encoding: [0x1a,0x03,0x48,0x06] +iocsrrd.b $r26, $r24 + +# CHECK: iocsrrd.h $r5, $r27 +# CHECK: encoding: [0x65,0x07,0x48,0x06] +iocsrrd.h $r5, $r27 + +# CHECK: iocsrrd.w $r10, $r20 +# CHECK: encoding: [0x8a,0x0a,0x48,0x06] +iocsrrd.w $r10, $r20 + +# CHECK: iocsrrd.d $r17, $r25 +# CHECK: encoding: [0x31,0x0f,0x48,0x06] +iocsrrd.d $r17, $r25 + +# CHECK: iocsrwr.b $r4, $r23 +# CHECK: encoding: [0xe4,0x12,0x48,0x06] +iocsrwr.b $r4, $r23 + +# CHECK: iocsrwr.h $r11, $zero +# CHECK: encoding: [0x0b,0x14,0x48,0x06] +iocsrwr.h $r11, $zero + +# CHECK: iocsrwr.w $r20, $r26 +# CHECK: encoding: [0x54,0x1b,0x48,0x06] +iocsrwr.w $r20, $r26 + +# CHECK: iocsrwr.d $r20, $r7 +# CHECK: encoding: [0xf4,0x1c,0x48,0x06] +iocsrwr.d $r20, $r7 + +# CHECK: tlbclr +# CHECK: encoding: [0x00,0x20,0x48,0x06] +tlbclr + +# CHECK: tlbflush +# CHECK: encoding: [0x00,0x24,0x48,0x06] +tlbflush + +# CHECK: tlbsrch +# CHECK: encoding: [0x00,0x28,0x48,0x06] +tlbsrch + +# CHECK: tlbrd +# CHECK: encoding: [0x00,0x2c,0x48,0x06] +tlbrd + +# CHECK: tlbwr +# CHECK: encoding: [0x00,0x30,0x48,0x06] +tlbwr + +# CHECK: tlbfill +# CHECK: encoding: [0x00,0x34,0x48,0x06] +tlbfill + +# CHECK: ertn +# CHECK: encoding: [0x00,0x38,0x48,0x06] +ertn + +# CHECK: idle 204 +# CHECK: encoding: [0xcc,0x80,0x48,0x06] +idle 204 + +# CHECK: invtlb 16, $r29, $r25 +# CHECK: encoding: [0xb0,0xe7,0x49,0x06] +invtlb 16, $r29, $r25 + +# CHECK: rdtimel.w $r30, $r19 +# CHECK: encoding: [0x7e,0x62,0x00,0x00] +rdtimel.w $r30, $r19 + +# CHECK: rdtimeh.w $r19, $r14 +# CHECK: encoding: [0xd3,0x65,0x00,0x00] +rdtimeh.w $r19, $r14 + +# CHECK: rdtime.d $tp, $r15 +# CHECK: encoding: [0xe2,0x69,0x00,0x00] +rdtime.d $tp, $r15 + +# CHECK: asrtle.d $r12, $r17 +# CHECK: encoding: [0x80,0x45,0x01,0x00] +asrtle.d $r12, $r17 + +# CHECK: asrtgt.d $r20, $r20 +# CHECK: encoding: [0x80,0xd2,0x01,0x00] +asrtgt.d $r20, $r20 + +# CHECK: break 199 +# CHECK: encoding: [0xc7,0x00,0x2a,0x00] +break 199 + +# CHECK: dbcl 201 +# CHECK: encoding: [0xc9,0x80,0x2a,0x00] +dbcl 201 + +# CHECK: syscall 100 +# CHECK: encoding: [0x64,0x00,0x2b,0x00] +syscall 100 + diff --git a/test/Object/LoongArch/lit.local.cfg b/test/Object/LoongArch/lit.local.cfg new file mode 100644 index 00000000..2b5a4893 --- /dev/null +++ b/test/Object/LoongArch/lit.local.cfg @@ -0,0 +1,2 @@ +if not 'LoongArch' in config.root.targets: + config.unsupported = True diff --git a/test/tools/llvm-readobj/ELF/loongarch-eflags.test b/test/tools/llvm-readobj/ELF/loongarch-eflags.test new file mode 100644 index 00000000..824dcb2c --- /dev/null +++ b/test/tools/llvm-readobj/ELF/loongarch-eflags.test @@ -0,0 +1,103 @@ +## Check llvm-readobj's ability to decode all possible LoongArch e_flags field +## values. + +## Not all combinations covered here exist in reality (such as the v0 ILP32* +## objects) but they are included nevertheless for completeness. + +# RUN: yaml2obj %s -o %t-lp64s -DCLASS=64 -DABI_MODIFIER=SOFT -DOBJABI_VER=0 +# RUN: llvm-readobj -h %t-lp64s | FileCheck --check-prefixes=READOBJ-LP64,READOBJ-SOFT-V0 %s +# RUN: llvm-readelf -h %t-lp64s | FileCheck --check-prefixes=READELF-LP64,READELF-SOFT-V0 --match-full-lines %s + +# RUN: yaml2obj %s -o %t-lp64f -DCLASS=64 -DABI_MODIFIER=SINGLE -DOBJABI_VER=0 +# RUN: llvm-readobj -h %t-lp64f | FileCheck --check-prefixes=READOBJ-LP64,READOBJ-SINGLE-V0 %s +# RUN: llvm-readelf -h %t-lp64f | FileCheck --check-prefixes=READELF-LP64,READELF-SINGLE-V0 --match-full-lines %s + +# RUN: yaml2obj %s -o %t-lp64d -DCLASS=64 -DABI_MODIFIER=DOUBLE -DOBJABI_VER=0 +# RUN: llvm-readobj -h %t-lp64d | FileCheck --check-prefixes=READOBJ-LP64,READOBJ-DOUBLE-V0 %s +# RUN: llvm-readelf -h %t-lp64d | FileCheck --check-prefixes=READELF-LP64,READELF-DOUBLE-V0 --match-full-lines %s + +# RUN: yaml2obj %s -o %t-ilp32s -DCLASS=32 -DABI_MODIFIER=SOFT -DOBJABI_VER=0 +# RUN: llvm-readobj -h %t-ilp32s | FileCheck --check-prefixes=READOBJ-ILP32,READOBJ-SOFT-V0 %s +# RUN: llvm-readelf -h %t-ilp32s | FileCheck --check-prefixes=READELF-ILP32,READELF-SOFT-V0 --match-full-lines %s + +# RUN: yaml2obj %s -o %t-ilp32f -DCLASS=32 -DABI_MODIFIER=SINGLE -DOBJABI_VER=0 +# RUN: llvm-readobj -h %t-ilp32f | FileCheck --check-prefixes=READOBJ-ILP32,READOBJ-SINGLE-V0 %s +# RUN: llvm-readelf -h %t-ilp32f | FileCheck --check-prefixes=READELF-ILP32,READELF-SINGLE-V0 --match-full-lines %s + +# RUN: yaml2obj %s -o %t-ilp32d -DCLASS=32 -DABI_MODIFIER=DOUBLE -DOBJABI_VER=0 +# RUN: llvm-readobj -h %t-ilp32d | FileCheck --check-prefixes=READOBJ-ILP32,READOBJ-DOUBLE-V0 %s +# RUN: llvm-readelf -h %t-ilp32d | FileCheck --check-prefixes=READELF-ILP32,READELF-DOUBLE-V0 --match-full-lines %s + +# RUN: yaml2obj %s -o %t-lp64s -DCLASS=64 -DABI_MODIFIER=SOFT -DOBJABI_VER=1 +# RUN: llvm-readobj -h %t-lp64s | FileCheck --check-prefixes=READOBJ-LP64,READOBJ-SOFT-V1 %s +# RUN: llvm-readelf -h %t-lp64s | FileCheck --check-prefixes=READELF-LP64,READELF-SOFT-V1 --match-full-lines %s + +# RUN: yaml2obj %s -o %t-lp64f -DCLASS=64 -DABI_MODIFIER=SINGLE -DOBJABI_VER=1 +# RUN: llvm-readobj -h %t-lp64f | FileCheck --check-prefixes=READOBJ-LP64,READOBJ-SINGLE-V1 %s +# RUN: llvm-readelf -h %t-lp64f | FileCheck --check-prefixes=READELF-LP64,READELF-SINGLE-V1 --match-full-lines %s + +# RUN: yaml2obj %s -o %t-lp64d -DCLASS=64 -DABI_MODIFIER=DOUBLE -DOBJABI_VER=1 +# RUN: llvm-readobj -h %t-lp64d | FileCheck --check-prefixes=READOBJ-LP64,READOBJ-DOUBLE-V1 %s +# RUN: llvm-readelf -h %t-lp64d | FileCheck --check-prefixes=READELF-LP64,READELF-DOUBLE-V1 --match-full-lines %s + +# RUN: yaml2obj %s -o %t-ilp32s -DCLASS=32 -DABI_MODIFIER=SOFT -DOBJABI_VER=1 +# RUN: llvm-readobj -h %t-ilp32s | FileCheck --check-prefixes=READOBJ-ILP32,READOBJ-SOFT-V1 %s +# RUN: llvm-readelf -h %t-ilp32s | FileCheck --check-prefixes=READELF-ILP32,READELF-SOFT-V1 --match-full-lines %s + +# RUN: yaml2obj %s -o %t-ilp32f -DCLASS=32 -DABI_MODIFIER=SINGLE -DOBJABI_VER=1 +# RUN: llvm-readobj -h %t-ilp32f | FileCheck --check-prefixes=READOBJ-ILP32,READOBJ-SINGLE-V1 %s +# RUN: llvm-readelf -h %t-ilp32f | FileCheck --check-prefixes=READELF-ILP32,READELF-SINGLE-V1 --match-full-lines %s + +# RUN: yaml2obj %s -o %t-ilp32d -DCLASS=32 -DABI_MODIFIER=DOUBLE -DOBJABI_VER=1 +# RUN: llvm-readobj -h %t-ilp32d | FileCheck --check-prefixes=READOBJ-ILP32,READOBJ-DOUBLE-V1 %s +# RUN: llvm-readelf -h %t-ilp32d | FileCheck --check-prefixes=READELF-ILP32,READELF-DOUBLE-V1 --match-full-lines %s + +# READOBJ-LP64: Class: 64-bit (0x2) +# READELF-LP64: Class: ELF64 +# READOBJ-ILP32: Class: 32-bit (0x1) +# READELF-ILP32: Class: ELF32 + +# READOBJ-SOFT-V0: Flags [ (0x1) +# READOBJ-SOFT-V0-NEXT: EF_LOONGARCH_ABI_SOFT_FLOAT (0x1) +# READOBJ-SOFT-V0-NEXT: ] + +# READOBJ-SINGLE-V0: Flags [ (0x2) +# READOBJ-SINGLE-V0-NEXT: EF_LOONGARCH_ABI_SINGLE_FLOAT (0x2) +# READOBJ-SINGLE-V0-NEXT: ] + +# READOBJ-DOUBLE-V0: Flags [ (0x3) +# READOBJ-DOUBLE-V0-NEXT: EF_LOONGARCH_ABI_DOUBLE_FLOAT (0x3) +# READOBJ-DOUBLE-V0-NEXT: ] + +# READOBJ-SOFT-V1: Flags [ (0x41) +# READOBJ-SOFT-V1-NEXT: EF_LOONGARCH_ABI_SOFT_FLOAT (0x1) +# READOBJ-SOFT-V1-NEXT: EF_LOONGARCH_OBJABI_V1 (0x40) +# READOBJ-SOFT-V1-NEXT: ] + +# READOBJ-SINGLE-V1: Flags [ (0x42) +# READOBJ-SINGLE-V1-NEXT: EF_LOONGARCH_ABI_SINGLE_FLOAT (0x2) +# READOBJ-SINGLE-V1-NEXT: EF_LOONGARCH_OBJABI_V1 (0x40) +# READOBJ-SINGLE-V1-NEXT: ] + +# READOBJ-DOUBLE-V1: Flags [ (0x43) +# READOBJ-DOUBLE-V1-NEXT: EF_LOONGARCH_ABI_DOUBLE_FLOAT (0x3) +# READOBJ-DOUBLE-V1-NEXT: EF_LOONGARCH_OBJABI_V1 (0x40) +# READOBJ-DOUBLE-V1-NEXT: ] + +# READELF-SOFT-V0: Flags: 0x1, SOFT-FLOAT +# READELF-SINGLE-V0: Flags: 0x2, SINGLE-FLOAT +# READELF-DOUBLE-V0: Flags: 0x3, DOUBLE-FLOAT +# READELF-SOFT-V1: Flags: 0x41, SOFT-FLOAT, OBJ-v1 +# READELF-SINGLE-V1: Flags: 0x42, SINGLE-FLOAT, OBJ-v1 +# READELF-DOUBLE-V1: Flags: 0x43, DOUBLE-FLOAT, OBJ-v1 + +--- !ELF +FileHeader: + Class: ELFCLASS[[CLASS]] + Data: ELFDATA2LSB + Type: ET_EXEC + Machine: EM_LOONGARCH + Flags: [ + EF_LOONGARCH_ABI_[[ABI_MODIFIER]]_FLOAT, + EF_LOONGARCH_OBJABI_V[[OBJABI_VER]], + ] diff --git a/test/tools/llvm-readobj/ELF/reloc-types-loongarch64.test b/test/tools/llvm-readobj/ELF/reloc-types-loongarch64.test new file mode 100644 index 00000000..c26fae7e --- /dev/null +++ b/test/tools/llvm-readobj/ELF/reloc-types-loongarch64.test @@ -0,0 +1,195 @@ +## Test that llvm-readobj/llvm-readelf shows proper relocation type +## names and values for loongarch64 target. + +# RUN: yaml2obj %s -o %t-loongarch64.o +# RUN: llvm-readobj -r --expand-relocs %t-loongarch64.o | FileCheck %s + +# CHECK: Type: R_LARCH_NONE (0) +# CHECK: Type: R_LARCH_32 (1) +# CHECK: Type: R_LARCH_64 (2) +# CHECK: Type: R_LARCH_RELATIVE (3) +# CHECK: Type: R_LARCH_COPY (4) +# CHECK: Type: R_LARCH_JUMP_SLOT (5) +# CHECK: Type: R_LARCH_TLS_DTPMOD32 (6) +# CHECK: Type: R_LARCH_TLS_DTPMOD64 (7) +# CHECK: Type: R_LARCH_TLS_DTPREL32 (8) +# CHECK: Type: R_LARCH_TLS_DTPREL64 (9) +# CHECK: Type: R_LARCH_TLS_TPREL32 (10) +# CHECK: Type: R_LARCH_TLS_TPREL64 (11) +# CHECK: Type: R_LARCH_IRELATIVE (12) +# CHECK: Type: R_LARCH_MARK_LA (20) +# CHECK: Type: R_LARCH_MARK_PCREL (21) +# CHECK: Type: R_LARCH_SOP_PUSH_PCREL (22) +# CHECK: Type: R_LARCH_SOP_PUSH_ABSOLUTE (23) +# CHECK: Type: R_LARCH_SOP_PUSH_DUP (24) +# CHECK: Type: R_LARCH_SOP_PUSH_GPREL (25) +# CHECK: Type: R_LARCH_SOP_PUSH_TLS_TPREL (26) +# CHECK: Type: R_LARCH_SOP_PUSH_TLS_GOT (27) +# CHECK: Type: R_LARCH_SOP_PUSH_TLS_GD (28) +# CHECK: Type: R_LARCH_SOP_PUSH_PLT_PCREL (29) +# CHECK: Type: R_LARCH_SOP_ASSERT (30) +# CHECK: Type: R_LARCH_SOP_NOT (31) +# CHECK: Type: R_LARCH_SOP_SUB (32) +# CHECK: Type: R_LARCH_SOP_SL (33) +# CHECK: Type: R_LARCH_SOP_SR (34) +# CHECK: Type: R_LARCH_SOP_ADD (35) +# CHECK: Type: R_LARCH_SOP_AND (36) +# CHECK: Type: R_LARCH_SOP_IF_ELSE (37) +# CHECK: Type: R_LARCH_SOP_POP_32_S_10_5 (38) +# CHECK: Type: R_LARCH_SOP_POP_32_U_10_12 (39) +# CHECK: Type: R_LARCH_SOP_POP_32_S_10_12 (40) +# CHECK: Type: R_LARCH_SOP_POP_32_S_10_16 (41) +# CHECK: Type: R_LARCH_SOP_POP_32_S_10_16_S2 (42) +# CHECK: Type: R_LARCH_SOP_POP_32_S_5_20 (43) +# CHECK: Type: R_LARCH_SOP_POP_32_S_0_5_10_16_S2 (44) +# CHECK: Type: R_LARCH_SOP_POP_32_S_0_10_10_16_S2 (45) +# CHECK: Type: R_LARCH_SOP_POP_32_U (46) +# CHECK: Type: R_LARCH_ADD8 (47) +# CHECK: Type: R_LARCH_ADD16 (48) +# CHECK: Type: R_LARCH_ADD24 (49) +# CHECK: Type: R_LARCH_ADD32 (50) +# CHECK: Type: R_LARCH_ADD64 (51) +# CHECK: Type: R_LARCH_SUB8 (52) +# CHECK: Type: R_LARCH_SUB16 (53) +# CHECK: Type: R_LARCH_SUB24 (54) +# CHECK: Type: R_LARCH_SUB32 (55) +# CHECK: Type: R_LARCH_SUB64 (56) +# CHECK: Type: R_LARCH_GNU_VTINHERIT (57) +# CHECK: Type: R_LARCH_GNU_VTENTRY (58) +# CHECK: Type: R_LARCH_B16 (64) +# CHECK: Type: R_LARCH_B21 (65) +# CHECK: Type: R_LARCH_B26 (66) +# CHECK: Type: R_LARCH_ABS_HI20 (67) +# CHECK: Type: R_LARCH_ABS_LO12 (68) +# CHECK: Type: R_LARCH_ABS64_LO20 (69) +# CHECK: Type: R_LARCH_ABS64_HI12 (70) +# CHECK: Type: R_LARCH_PCALA_HI20 (71) +# CHECK: Type: R_LARCH_PCALA_LO12 (72) +# CHECK: Type: R_LARCH_PCALA64_LO20 (73) +# CHECK: Type: R_LARCH_PCALA64_HI12 (74) +# CHECK: Type: R_LARCH_GOT_PC_HI20 (75) +# CHECK: Type: R_LARCH_GOT_PC_LO12 (76) +# CHECK: Type: R_LARCH_GOT64_PC_LO20 (77) +# CHECK: Type: R_LARCH_GOT64_PC_HI12 (78) +# CHECK: Type: R_LARCH_GOT_HI20 (79) +# CHECK: Type: R_LARCH_GOT_LO12 (80) +# CHECK: Type: R_LARCH_GOT64_LO20 (81) +# CHECK: Type: R_LARCH_GOT64_HI12 (82) +# CHECK: Type: R_LARCH_TLS_LE_HI20 (83) +# CHECK: Type: R_LARCH_TLS_LE_LO12 (84) +# CHECK: Type: R_LARCH_TLS_LE64_LO20 (85) +# CHECK: Type: R_LARCH_TLS_LE64_HI12 (86) +# CHECK: Type: R_LARCH_TLS_IE_PC_HI20 (87) +# CHECK: Type: R_LARCH_TLS_IE_PC_LO12 (88) +# CHECK: Type: R_LARCH_TLS_IE64_PC_LO20 (89) +# CHECK: Type: R_LARCH_TLS_IE64_PC_HI12 (90) +# CHECK: Type: R_LARCH_TLS_IE_HI20 (91) +# CHECK: Type: R_LARCH_TLS_IE_LO12 (92) +# CHECK: Type: R_LARCH_TLS_IE64_LO20 (93) +# CHECK: Type: R_LARCH_TLS_IE64_HI12 (94) +# CHECK: Type: R_LARCH_TLS_LD_PC_HI20 (95) +# CHECK: Type: R_LARCH_TLS_LD_HI20 (96) +# CHECK: Type: R_LARCH_TLS_GD_PC_HI20 (97) +# CHECK: Type: R_LARCH_TLS_GD_HI20 (98) +# CHECK: Type: R_LARCH_32_PCREL (99) +# CHECK: Type: R_LARCH_RELAX (100) + +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_REL + Machine: EM_LOONGARCH +Sections: + - Name: .rela.text + Type: SHT_RELA + Relocations: + - Type: R_LARCH_NONE + - Type: R_LARCH_32 + - Type: R_LARCH_64 + - Type: R_LARCH_RELATIVE + - Type: R_LARCH_COPY + - Type: R_LARCH_JUMP_SLOT + - Type: R_LARCH_TLS_DTPMOD32 + - Type: R_LARCH_TLS_DTPMOD64 + - Type: R_LARCH_TLS_DTPREL32 + - Type: R_LARCH_TLS_DTPREL64 + - Type: R_LARCH_TLS_TPREL32 + - Type: R_LARCH_TLS_TPREL64 + - Type: R_LARCH_IRELATIVE + - Type: R_LARCH_MARK_LA + - Type: R_LARCH_MARK_PCREL + - Type: R_LARCH_SOP_PUSH_PCREL + - Type: R_LARCH_SOP_PUSH_ABSOLUTE + - Type: R_LARCH_SOP_PUSH_DUP + - Type: R_LARCH_SOP_PUSH_GPREL + - Type: R_LARCH_SOP_PUSH_TLS_TPREL + - Type: R_LARCH_SOP_PUSH_TLS_GOT + - Type: R_LARCH_SOP_PUSH_TLS_GD + - Type: R_LARCH_SOP_PUSH_PLT_PCREL + - Type: R_LARCH_SOP_ASSERT + - Type: R_LARCH_SOP_NOT + - Type: R_LARCH_SOP_SUB + - Type: R_LARCH_SOP_SL + - Type: R_LARCH_SOP_SR + - Type: R_LARCH_SOP_ADD + - Type: R_LARCH_SOP_AND + - Type: R_LARCH_SOP_IF_ELSE + - Type: R_LARCH_SOP_POP_32_S_10_5 + - Type: R_LARCH_SOP_POP_32_U_10_12 + - Type: R_LARCH_SOP_POP_32_S_10_12 + - Type: R_LARCH_SOP_POP_32_S_10_16 + - Type: R_LARCH_SOP_POP_32_S_10_16_S2 + - Type: R_LARCH_SOP_POP_32_S_5_20 + - Type: R_LARCH_SOP_POP_32_S_0_5_10_16_S2 + - Type: R_LARCH_SOP_POP_32_S_0_10_10_16_S2 + - Type: R_LARCH_SOP_POP_32_U + - Type: R_LARCH_ADD8 + - Type: R_LARCH_ADD16 + - Type: R_LARCH_ADD24 + - Type: R_LARCH_ADD32 + - Type: R_LARCH_ADD64 + - Type: R_LARCH_SUB8 + - Type: R_LARCH_SUB16 + - Type: R_LARCH_SUB24 + - Type: R_LARCH_SUB32 + - Type: R_LARCH_SUB64 + - Type: R_LARCH_GNU_VTINHERIT + - Type: R_LARCH_GNU_VTENTRY + - Type: R_LARCH_B16 + - Type: R_LARCH_B21 + - Type: R_LARCH_B26 + - Type: R_LARCH_ABS_HI20 + - Type: R_LARCH_ABS_LO12 + - Type: R_LARCH_ABS64_LO20 + - Type: R_LARCH_ABS64_HI12 + - Type: R_LARCH_PCALA_HI20 + - Type: R_LARCH_PCALA_LO12 + - Type: R_LARCH_PCALA64_LO20 + - Type: R_LARCH_PCALA64_HI12 + - Type: R_LARCH_GOT_PC_HI20 + - Type: R_LARCH_GOT_PC_LO12 + - Type: R_LARCH_GOT64_PC_LO20 + - Type: R_LARCH_GOT64_PC_HI12 + - Type: R_LARCH_GOT_HI20 + - Type: R_LARCH_GOT_LO12 + - Type: R_LARCH_GOT64_LO20 + - Type: R_LARCH_GOT64_HI12 + - Type: R_LARCH_TLS_LE_HI20 + - Type: R_LARCH_TLS_LE_LO12 + - Type: R_LARCH_TLS_LE64_LO20 + - Type: R_LARCH_TLS_LE64_HI12 + - Type: R_LARCH_TLS_IE_PC_HI20 + - Type: R_LARCH_TLS_IE_PC_LO12 + - Type: R_LARCH_TLS_IE64_PC_LO20 + - Type: R_LARCH_TLS_IE64_PC_HI12 + - Type: R_LARCH_TLS_IE_HI20 + - Type: R_LARCH_TLS_IE_LO12 + - Type: R_LARCH_TLS_IE64_LO20 + - Type: R_LARCH_TLS_IE64_HI12 + - Type: R_LARCH_TLS_LD_PC_HI20 + - Type: R_LARCH_TLS_LD_HI20 + - Type: R_LARCH_TLS_GD_PC_HI20 + - Type: R_LARCH_TLS_GD_HI20 + - Type: R_LARCH_32_PCREL + - Type: R_LARCH_RELAX diff --git a/test/tools/obj2yaml/ELF/loongarch-eflags.yaml b/test/tools/obj2yaml/ELF/loongarch-eflags.yaml new file mode 100644 index 00000000..2e4ee1da --- /dev/null +++ b/test/tools/obj2yaml/ELF/loongarch-eflags.yaml @@ -0,0 +1,51 @@ +## Check obj2yaml is able to decode all possible LoongArch e_flags field values. + +# RUN: yaml2obj %s -o %t-lp64s -DCLASS=64 -DABI_MODIFIER=SOFT -DOBJABI_VER=0 +# RUN: obj2yaml %t-lp64s | FileCheck -DCLASS=64 -DABI_MODIFIER=SOFT -DOBJABI_VER=0 %s + +# RUN: yaml2obj %s -o %t-lp64f -DCLASS=64 -DABI_MODIFIER=SINGLE -DOBJABI_VER=0 +# RUN: obj2yaml %t-lp64f | FileCheck -DCLASS=64 -DABI_MODIFIER=SINGLE -DOBJABI_VER=0 %s + +# RUN: yaml2obj %s -o %t-lp64d -DCLASS=64 -DABI_MODIFIER=DOUBLE -DOBJABI_VER=0 +# RUN: obj2yaml %t-lp64d | FileCheck -DCLASS=64 -DABI_MODIFIER=DOUBLE -DOBJABI_VER=0 %s + +# RUN: yaml2obj %s -o %t-ilp32s -DCLASS=32 -DABI_MODIFIER=SOFT -DOBJABI_VER=0 +# RUN: obj2yaml %t-ilp32s | FileCheck -DCLASS=32 -DABI_MODIFIER=SOFT -DOBJABI_VER=0 %s + +# RUN: yaml2obj %s -o %t-ilp32f -DCLASS=32 -DABI_MODIFIER=SINGLE -DOBJABI_VER=0 +# RUN: obj2yaml %t-ilp32f | FileCheck -DCLASS=32 -DABI_MODIFIER=SINGLE -DOBJABI_VER=0 %s + +# RUN: yaml2obj %s -o %t-ilp32d -DCLASS=32 -DABI_MODIFIER=DOUBLE -DOBJABI_VER=0 +# RUN: obj2yaml %t-ilp32d | FileCheck -DCLASS=32 -DABI_MODIFIER=DOUBLE -DOBJABI_VER=0 %s + +# RUN: yaml2obj %s -o %t-lp64s -DCLASS=64 -DABI_MODIFIER=SOFT -DOBJABI_VER=1 +# RUN: obj2yaml %t-lp64s | FileCheck -DCLASS=64 -DABI_MODIFIER=SOFT -DOBJABI_VER=1 %s + +# RUN: yaml2obj %s -o %t-lp64f -DCLASS=64 -DABI_MODIFIER=SINGLE -DOBJABI_VER=1 +# RUN: obj2yaml %t-lp64f | FileCheck -DCLASS=64 -DABI_MODIFIER=SINGLE -DOBJABI_VER=1 %s + +# RUN: yaml2obj %s -o %t-lp64d -DCLASS=64 -DABI_MODIFIER=DOUBLE -DOBJABI_VER=1 +# RUN: obj2yaml %t-lp64d | FileCheck -DCLASS=64 -DABI_MODIFIER=DOUBLE -DOBJABI_VER=1 %s + +# RUN: yaml2obj %s -o %t-ilp32s -DCLASS=32 -DABI_MODIFIER=SOFT -DOBJABI_VER=1 +# RUN: obj2yaml %t-ilp32s | FileCheck -DCLASS=32 -DABI_MODIFIER=SOFT -DOBJABI_VER=1 %s + +# RUN: yaml2obj %s -o %t-ilp32f -DCLASS=32 -DABI_MODIFIER=SINGLE -DOBJABI_VER=1 +# RUN: obj2yaml %t-ilp32f | FileCheck -DCLASS=32 -DABI_MODIFIER=SINGLE -DOBJABI_VER=1 %s + +# RUN: yaml2obj %s -o %t-ilp32d -DCLASS=32 -DABI_MODIFIER=DOUBLE -DOBJABI_VER=1 +# RUN: obj2yaml %t-ilp32d | FileCheck -DCLASS=32 -DABI_MODIFIER=DOUBLE -DOBJABI_VER=1 %s + +# CHECK: Class: ELFCLASS[[CLASS]] +# CHECK: Flags: [ EF_LOONGARCH_ABI_[[ABI_MODIFIER]]_FLOAT, EF_LOONGARCH_OBJABI_V[[OBJABI_VER]] ] + +--- !ELF +FileHeader: + Class: ELFCLASS[[CLASS]] + Data: ELFDATA2LSB + Type: ET_EXEC + Machine: EM_LOONGARCH + Flags: [ + EF_LOONGARCH_ABI_[[ABI_MODIFIER]]_FLOAT, + EF_LOONGARCH_OBJABI_V[[OBJABI_VER]], + ] diff --git a/utils/gn/secondary/llvm/lib/Target/LoongArch/AsmParser/BUILD.gn b/utils/gn/secondary/llvm/lib/Target/LoongArch/AsmParser/BUILD.gn new file mode 100644 index 00000000..cc3bb49a --- /dev/null +++ b/utils/gn/secondary/llvm/lib/Target/LoongArch/AsmParser/BUILD.gn @@ -0,0 +1,24 @@ +import("//llvm/utils/TableGen/tablegen.gni") + +tablegen("LoongArchGenAsmMatcher") { + visibility = [ ":AsmParser" ] + args = [ "-gen-asm-matcher" ] + td_file = "../LoongArch.td" +} + +static_library("AsmParser") { + output_name = "LLVMLoongArchAsmParser" + deps = [ + ":LoongArchGenAsmMatcher", + "//llvm/lib/MC", + "//llvm/lib/MC/MCParser", + "//llvm/lib/Support", + "//llvm/lib/Target/LoongArch/MCTargetDesc", + "//llvm/lib/Target/LoongArch/TargetInfo", + ] + include_dirs = [ ".." ] + sources = [ + # Make `gn format` not collapse this, for sync_source_lists_from_cmake.py. + "LoongArchAsmParser.cpp", + ] +} diff --git a/utils/gn/secondary/llvm/lib/Target/LoongArch/BUILD.gn b/utils/gn/secondary/llvm/lib/Target/LoongArch/BUILD.gn new file mode 100644 index 00000000..e89db520 --- /dev/null +++ b/utils/gn/secondary/llvm/lib/Target/LoongArch/BUILD.gn @@ -0,0 +1,102 @@ +import("//llvm/utils/TableGen/tablegen.gni") + +tablegen("LoongArchGenCallingConv") { + visibility = [ ":LLVMLoongArchCodeGen" ] + args = [ "-gen-callingconv" ] + td_file = "LoongArch.td" +} + +tablegen("LoongArchGenDAGISel") { + visibility = [ ":LLVMLoongArchCodeGen" ] + args = [ "-gen-dag-isel" ] + td_file = "LoongArch.td" +} + +tablegen("LoongArchGenFastISel") { + visibility = [ ":LLVMLoongArchCodeGen" ] + args = [ "-gen-fast-isel" ] + td_file = "LoongArch.td" +} + +tablegen("LoongArchGenGlobalISel") { + visibility = [ ":LLVMLoongArchCodeGen" ] + args = [ "-gen-global-isel" ] + td_file = "LoongArch.td" +} + +tablegen("LoongArchGenMCPseudoLowering") { + visibility = [ ":LLVMLoongArchCodeGen" ] + args = [ "-gen-pseudo-lowering" ] + td_file = "LoongArch.td" +} + +tablegen("LoongArchGenRegisterBank") { + visibility = [ ":LLVMLoongArchCodeGen" ] + args = [ "-gen-register-bank" ] + td_file = "LoongArch.td" +} + +static_library("LLVMLoongArchCodeGen") { + deps = [ + ":LoongArchGenCallingConv", + ":LoongArchGenDAGISel", + ":LoongArchGenFastISel", + ":LoongArchGenGlobalISel", + ":LoongArchGenMCPseudoLowering", + ":LoongArchGenRegisterBank", + "MCTargetDesc", + "TargetInfo", + "//llvm/include/llvm/Config:llvm-config", + "//llvm/lib/Analysis", + "//llvm/lib/CodeGen", + "//llvm/lib/CodeGen/AsmPrinter", + "//llvm/lib/CodeGen/GlobalISel", + "//llvm/lib/CodeGen/SelectionDAG", + "//llvm/lib/IR", + "//llvm/lib/MC", + "//llvm/lib/Support", + "//llvm/lib/Target", + ] + include_dirs = [ "." ] + sources = [ + "LoongArchAnalyzeImmediate.cpp", + "LoongArchAsmPrinter.cpp", + "LoongArchCCState.cpp", + "LoongArchCallLowering.cpp", + "LoongArchConstantIslandPass.cpp", + "LoongArchDelaySlotFiller.cpp", + "LoongArchExpandPseudo.cpp", + "LoongArchFrameLowering.cpp", + "LoongArchISelDAGToDAG.cpp", + "LoongArchISelLowering.cpp", + "LoongArchInstrInfo.cpp", + "LoongArchInstructionSelector.cpp", + "LoongArchLegalizerInfo.cpp", + "LoongArchMCInstLower.cpp", + "LoongArchMachineFunction.cpp", + "LoongArchModuleISelDAGToDAG.cpp", + "LoongArchOptimizePICCall.cpp", + "LoongArchPreLegalizerCombiner.cpp", + "LoongArchRegisterBankInfo.cpp", + "LoongArchRegisterInfo.cpp", + "LoongArchSubtarget.cpp", + "LoongArchTargetMachine.cpp", + "LoongArchTargetObjectFile.cpp", + ] +} + +# This is a bit different from most build files: Due to this group +# having the directory's name, "//llvm/lib/Target/LoongArch" will refer to this +# target, which pulls in the code in this directory *and all subdirectories*. +# For most other directories, "//llvm/lib/Foo" only pulls in the code directly +# in "llvm/lib/Foo". The forwarding targets in //llvm/lib/Target expect this +# different behavior. +group("LoongArch") { + deps = [ + ":LLVMLoongArchCodeGen", + "AsmParser", + "Disassembler", + "MCTargetDesc", + "TargetInfo", + ] +} diff --git a/utils/gn/secondary/llvm/lib/Target/LoongArch/Disassembler/BUILD.gn b/utils/gn/secondary/llvm/lib/Target/LoongArch/Disassembler/BUILD.gn new file mode 100644 index 00000000..0a9b4cf5 --- /dev/null +++ b/utils/gn/secondary/llvm/lib/Target/LoongArch/Disassembler/BUILD.gn @@ -0,0 +1,23 @@ +import("//llvm/utils/TableGen/tablegen.gni") + +tablegen("LoongArchGenDisassemblerTables") { + visibility = [ ":Disassembler" ] + args = [ "-gen-disassembler" ] + td_file = "../LoongArch.td" +} + +static_library("Disassembler") { + output_name = "LLVMLoongArchDisassembler" + deps = [ + ":LoongArchGenDisassemblerTables", + "//llvm/lib/MC/MCDisassembler", + "//llvm/lib/Support", + "//llvm/lib/Target/LoongArch/MCTargetDesc", + "//llvm/lib/Target/LoongArch/TargetInfo", + ] + include_dirs = [ ".." ] + sources = [ + # Make `gn format` not collapse this, for sync_source_lists_from_cmake.py. + "LoongArchDisassembler.cpp", + ] +} diff --git a/utils/gn/secondary/llvm/lib/Target/LoongArch/MCTargetDesc/BUILD.gn b/utils/gn/secondary/llvm/lib/Target/LoongArch/MCTargetDesc/BUILD.gn new file mode 100644 index 00000000..f0b96c96 --- /dev/null +++ b/utils/gn/secondary/llvm/lib/Target/LoongArch/MCTargetDesc/BUILD.gn @@ -0,0 +1,74 @@ +import("//llvm/utils/TableGen/tablegen.gni") + +tablegen("LoongArchGenAsmWriter") { + visibility = [ ":MCTargetDesc" ] + args = [ "-gen-asm-writer" ] + td_file = "../LoongArch.td" +} + +tablegen("LoongArchGenInstrInfo") { + visibility = [ ":tablegen" ] + args = [ "-gen-instr-info" ] + td_file = "../LoongArch.td" +} + +tablegen("LoongArchGenMCCodeEmitter") { + visibility = [ ":MCTargetDesc" ] + args = [ "-gen-emitter" ] + td_file = "../LoongArch.td" +} + +tablegen("LoongArchGenRegisterInfo") { + visibility = [ ":tablegen" ] + args = [ "-gen-register-info" ] + td_file = "../LoongArch.td" +} + +tablegen("LoongArchGenSubtargetInfo") { + visibility = [ ":tablegen" ] + args = [ "-gen-subtarget" ] + td_file = "../LoongArch.td" +} + +# This should contain tablegen targets generating .inc files included +# by other targets. .inc files only used by .cpp files in this directory +# should be in deps on the static_library instead. +group("tablegen") { + visibility = [ + ":MCTargetDesc", + "../TargetInfo", + ] + public_deps = [ + ":LoongArchGenInstrInfo", + ":LoongArchGenRegisterInfo", + ":LoongArchGenSubtargetInfo", + ] +} + +static_library("MCTargetDesc") { + output_name = "LLVMLoongArchDesc" + public_deps = [ ":tablegen" ] + deps = [ + ":LoongArchGenAsmWriter", + ":LoongArchGenMCCodeEmitter", + "//llvm/lib/MC", + "//llvm/lib/Support", + "//llvm/lib/Target/LoongArch/TargetInfo", + ] + include_dirs = [ ".." ] + sources = [ + "LoongArchABIFlagsSection.cpp", + "LoongArchABIInfo.cpp", + "LoongArchAsmBackend.cpp", + "LoongArchELFObjectWriter.cpp", + "LoongArchELFStreamer.cpp", + "LoongArchInstPrinter.cpp", + "LoongArchMCAsmInfo.cpp", + "LoongArchMCCodeEmitter.cpp", + "LoongArchMCExpr.cpp", + "LoongArchMCTargetDesc.cpp", + "LoongArchNaClELFStreamer.cpp", + "LoongArchOptionRecord.cpp", + "LoongArchTargetStreamer.cpp", + ] +} diff --git a/utils/gn/secondary/llvm/lib/Target/LoongArch/TargetInfo/BUILD.gn b/utils/gn/secondary/llvm/lib/Target/LoongArch/TargetInfo/BUILD.gn new file mode 100644 index 00000000..a476bdd5 --- /dev/null +++ b/utils/gn/secondary/llvm/lib/Target/LoongArch/TargetInfo/BUILD.gn @@ -0,0 +1,9 @@ +static_library("TargetInfo") { + output_name = "LLVMLoongArchInfo" + deps = [ "//llvm/lib/Support" ] + include_dirs = [ ".." ] + sources = [ + # Make `gn format` not collapse this, for sync_source_lists_from_cmake.py. + "LoongArchTargetInfo.cpp", + ] +} -- 2.41.0