Syntax of gfx908 Instructions¶
Introduction¶
This document describes the syntax of instructions specific to gfx908.
For a description of other gfx908 instructions see Syntax of Core GFX9 Instructions.
Notation¶
Notation used in this document is explained here.
Overview¶
An overview of generic syntax and other features of AMDGPU instructions may be found in this document.
Instructions¶
FLAT¶
INSTRUCTION SRC0 SRC1 SRC2 MODIFIERS ——————————————————————————————————————————————————————————————————————————————— global_atomic_add_f32 vaddr, vdata, saddr offset13s slc global_atomic_pk_add_f16 vaddr, vdata, saddr offset13s slc
MUBUF¶
INSTRUCTION SRC0 SRC1 SRC2 SRC3 MODIFIERS ———————————————————————————————————————————————————————————————————————————————————————————————————— buffer_atomic_add_f32 vdata, vaddr, srsrc, soffset idxen offen offset12 slc buffer_atomic_pk_add_f16 vdata, vaddr, srsrc, soffset idxen offen offset12 slc
VOP2¶
INSTRUCTION DST SRC0 SRC1 MODIFIERS ————————————————————————————————————————————————————————————————————————————————————————————————————— v_dot2c_f32_f16 vdst, src0:f16x2, vsrc1:f16x2 v_dot2c_f32_f16_dpp vdst, vsrc0:f16x2, vsrc1:f16x2 dpp_ctrl row_mask bank_mask bound_ctrl v_dot2c_i32_i16 vdst, src0:i16x2, vsrc1:i16x2 v_dot2c_i32_i16_dpp vdst, vsrc0:i16x2, vsrc1:i16x2 dpp_ctrl row_mask bank_mask bound_ctrl v_dot4c_i32_i8 vdst, src0:i8x4, vsrc1:i8x4 v_dot4c_i32_i8_dpp vdst, vsrc0:i8x4, vsrc1:i8x4 dpp_ctrl row_mask bank_mask bound_ctrl v_dot8c_i32_i4 vdst, src0:i4x8, vsrc1:i4x8 v_dot8c_i32_i4_dpp vdst, vsrc0:i4x8, vsrc1:i4x8 dpp_ctrl row_mask bank_mask bound_ctrl v_fmac_f32 vdst, src0, vsrc1 v_fmac_f32_dpp vdst, vsrc0:m, vsrc1:m dpp_ctrl row_mask bank_mask bound_ctrl v_pk_fmac_f16 vdst, src0, vsrc1 v_xnor_b32 vdst, src0, vsrc1 v_xnor_b32_dpp vdst, vsrc0, vsrc1 dpp_ctrl row_mask bank_mask bound_ctrl v_xnor_b32_sdwa vdst, src0:m, src1:m dst_sel dst_unused src0_sel src1_sel
VOP3¶
INSTRUCTION DST SRC0 SRC1 MODIFIERS ———————————————————————————————————————————————————————————————————————————— v_fmac_f32_e64 vdst, src0:m, src1:m clamp omod v_xnor_b32_e64 vdst, src0, src1
VOP3P¶
INSTRUCTION DST SRC0 SRC1 SRC2 MODIFIERS ————————————————————————————————————————————————————————————————————————————————————————————————————————— v_accvgpr_read_b32 vdst, vsrc v_accvgpr_write_b32 vdst, src v_dot2_f32_f16 vdst, src0:f16x2, src1:f16x2, src2:f32 neg_lo neg_hi clamp v_dot2_i32_i16 vdst, src0:i16x2, src1:i16x2, src2:i32 clamp v_dot2_u32_u16 vdst, src0:u16x2, src1:u16x2, src2:u32 clamp v_dot4_i32_i8 vdst, src0:i8x4, src1:i8x4, src2:i32 clamp v_dot4_u32_u8 vdst, src0:u8x4, src1:u8x4, src2:u32 clamp v_dot8_i32_i4 vdst, src0:i4x8, src1:i4x8, src2:i32 clamp v_dot8_u32_u4 vdst, src0:u4x8, src1:u4x8, src2:u32 clamp v_fma_mix_f32 vdst, src0:m:fx, src1:m:fx, src2:m:fx m_op_sel m_op_sel_hi clamp v_fma_mixhi_f16 vdst, src0:m:fx, src1:m:fx, src2:m:fx m_op_sel m_op_sel_hi clamp v_fma_mixlo_f16 vdst, src0:m:fx, src1:m:fx, src2:m:fx m_op_sel m_op_sel_hi clamp v_mfma_f32_16x16x16f16 vdst:f32x4, vsrc0:f16x4, vsrc1:f16x4, vsrc2:f32x4 cbsz abid blgp v_mfma_f32_16x16x1f32 vdst:f32x16, vsrc0:f32, vsrc1:f32, vsrc2:f32x16 cbsz abid blgp v_mfma_f32_16x16x2bf16 vdst:f32x16, vsrc0:bf16x2, vsrc1:bf16x2, vsrc2:f32x16 cbsz abid blgp v_mfma_f32_16x16x4f16 vdst:f32x16, vsrc0:f16x4, vsrc1:f16x4, vsrc2:f32x16 cbsz abid blgp v_mfma_f32_16x16x4f32 vdst:f32x4, vsrc0:f32, vsrc1:f32, vsrc2:f32x4 cbsz abid blgp v_mfma_f32_16x16x8bf16 vdst:f32x4, vsrc0:bf16x2, vsrc1:bf16x2, vsrc2:f32x4 cbsz abid blgp v_mfma_f32_32x32x1f32 vdst:f32x32, vsrc0:f32, vsrc1:f32, vsrc2:f32x32 cbsz abid blgp v_mfma_f32_32x32x2bf16 vdst:f32x32, vsrc0:bf16x2, vsrc1:bf16x2, vsrc2:f32x32 cbsz abid blgp v_mfma_f32_32x32x2f32 vdst:f32x16, vsrc0:f32, vsrc1:f32, vsrc2:f32x16 cbsz abid blgp v_mfma_f32_32x32x4bf16 vdst:f32x16, vsrc0:bf16x2, vsrc1:bf16x2, vsrc2:f32x16 cbsz abid blgp v_mfma_f32_32x32x4f16 vdst:f32x32, vsrc0:f16x4, vsrc1:f16x4, vsrc2:f32x32 cbsz abid blgp v_mfma_f32_32x32x8f16 vdst:f32x16, vsrc0:f16x4, vsrc1:f16x4, vsrc2:f32x16 cbsz abid blgp v_mfma_f32_4x4x1f32 vdst:f32x4, vsrc0:f32, vsrc1:f32, vsrc2:f32x4 cbsz abid blgp v_mfma_f32_4x4x2bf16 vdst:f32x4, vsrc0:bf16x2, vsrc1:bf16x2, vsrc2:f32x4 cbsz abid blgp v_mfma_f32_4x4x4f16 vdst:f32x4, vsrc0:f16x4, vsrc1:f16x4, vsrc2:f32x4 cbsz abid blgp v_mfma_i32_16x16x16i8 vdst:i32x4, vsrc0:i8x4, vsrc1:i8x4, vsrc2:i32x4 cbsz abid blgp v_mfma_i32_16x16x4i8 vdst:i32x16, vsrc0:i8x4, vsrc1:i8x4, vsrc2:i32x16 cbsz abid blgp v_mfma_i32_32x32x4i8 vdst:i32x32, vsrc0:i8x4, vsrc1:i8x4, vsrc2:i32x32 cbsz abid blgp v_mfma_i32_32x32x8i8 vdst:i32x16, vsrc0:i8x4, vsrc1:i8x4, vsrc2:i32x16 cbsz abid blgp v_mfma_i32_4x4x4i8 vdst:i32x4, vsrc0:i8x4, vsrc1:i8x4, vsrc2:i32x4 cbsz abid blgp