mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-25 08:18:11 +02:00
The full form for ldg.a/stg.a offset is: g[reg_address + reg_offset << (imm_shift + 2) + imm_offset << 2] where imm_shift is in [0, 3] and imm_offset is in [0, 3] a6xx blob was found to produce a bit simplier offset calculations for TES/TCS shaders in GTA V: [c002000a_03c14215] ldg.a.f32 r2.z, g[r1.y+((r2.z+1)<<2)], 3; [c0020004_01c14609] ldg.a.f32 r1.x, g[r1.y+((r1.x+3)<<2)], 1; Our new syntax: stg.a.u32 g[r2.x+(r1.x+1)<<2], r5.x, 1 stg.a.u32 g[r2.x+r1.x<<4+3<<2], r5.x, 1 ldg.a.f32 r1.w, g[r1.y+(r1.w+1)<<2], 3 ldg.a.f32 r1.w, g[r1.y+r1.w<<5+2<<2], 3 Signed-off-by: Danylo Piliaiev <dpiliaiev@igalia.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/11431>
17 lines
346 B
NASM
17 lines
346 B
NASM
@localsize 16, 1, 1
|
|
@buf 128 (c2.x) ; c2.xy
|
|
@invocationid(r0.x) ; r0.xyz
|
|
mov.u32u32 r0.y, r0.x
|
|
mov.u32u32 r1.x, c2.x
|
|
mov.u32u32 r1.y, c2.y
|
|
mov.u32u32 r2.x, 0xff
|
|
(rpt5)nop
|
|
stg.a.u32 g[r1.x+r0.y<<4+2<<2], r2.x, 1
|
|
nop(sy)
|
|
ldg.a.u32 r4.x, g[r1.x+r0.y<<4+2<<2], 1
|
|
nop(sy)
|
|
add.u r4.x, r4.x, 1
|
|
(rpt3)nop
|
|
stg.a.u32 g[r1.x+r0.y<<4+1<<2], r4.x, 1
|
|
end
|
|
nop
|