mesa/src/freedreno/computerator/examples/stg_ldg_offset.asm
Danylo Piliaiev fdc0f489e0 ir3: add ldg.a,stg.a which allow complex in-place offset calculation
The full form for ldg.a/stg.a offset is:
 g[reg_address + reg_offset << (imm_shift + 2) + imm_offset << 2]

where imm_shift is in [0, 3] and imm_offset is in [0, 3]

a6xx blob was found to produce a bit simplier offset calculations
for TES/TCS shaders in GTA V:

 [c002000a_03c14215] ldg.a.f32 r2.z, g[r1.y+((r2.z+1)<<2)], 3;
 [c0020004_01c14609] ldg.a.f32 r1.x, g[r1.y+((r1.x+3)<<2)], 1;

Our new syntax:
 stg.a.u32 g[r2.x+(r1.x+1)<<2], r5.x, 1
 stg.a.u32 g[r2.x+r1.x<<4+3<<2], r5.x, 1
 ldg.a.f32 r1.w, g[r1.y+(r1.w+1)<<2], 3
 ldg.a.f32 r1.w, g[r1.y+r1.w<<5+2<<2], 3

Signed-off-by: Danylo Piliaiev <dpiliaiev@igalia.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/11431>
2021-06-25 15:39:51 +00:00

17 lines
346 B
NASM

@localsize 16, 1, 1
@buf 128 (c2.x) ; c2.xy
@invocationid(r0.x) ; r0.xyz
mov.u32u32 r0.y, r0.x
mov.u32u32 r1.x, c2.x
mov.u32u32 r1.y, c2.y
mov.u32u32 r2.x, 0xff
(rpt5)nop
stg.a.u32 g[r1.x+r0.y<<4+2<<2], r2.x, 1
nop(sy)
ldg.a.u32 r4.x, g[r1.x+r0.y<<4+2<<2], 1
nop(sy)
add.u r4.x, r4.x, 1
(rpt3)nop
stg.a.u32 g[r1.x+r0.y<<4+1<<2], r4.x, 1
end
nop