From e6529b54c04674db6a06f15db7eef4e93497b08a Mon Sep 17 00:00:00 2001 From: Job Noorman Date: Tue, 14 Apr 2026 14:41:20 +0200 Subject: [PATCH] ir3: add support for the ldg.k a1.x addressing mode We assumed a1.x addressing doesn't work. However, it turns out it actually does work but instead of taking the offset's hight bits from a1.x and adding an immediate to the low bits, the full offset is stored in a1.x and the offset is ignored. Signed-off-by: Job Noorman Part-of: --- src/freedreno/ir3/ir3_compiler_nir.c | 11 ++++++----- src/freedreno/ir3/ir3_nir_analyze_ubo_ranges.c | 16 ++-------------- src/freedreno/isa/ir3-cat6.xml | 3 +++ 3 files changed, 11 insertions(+), 19 deletions(-) diff --git a/src/freedreno/ir3/ir3_compiler_nir.c b/src/freedreno/ir3/ir3_compiler_nir.c index 3856088dd43..38d9db43782 100644 --- a/src/freedreno/ir3/ir3_compiler_nir.c +++ b/src/freedreno/ir3/ir3_compiler_nir.c @@ -1244,16 +1244,17 @@ emit_intrinsic_copy_global_to_uniform(struct ir3_context *ctx, unsigned size = nir_intrinsic_range(intr); unsigned dst = nir_intrinsic_range_base(intr); unsigned addr_offset = nir_intrinsic_base(intr); - unsigned dst_lo = dst & 0xff; - unsigned dst_hi = dst >> 8; struct ir3_instruction *a1 = NULL; - if (dst_hi) - a1 = ir3_create_addr1(&ctx->build, dst_hi << 8); + unsigned dst_imm = dst; + if (dst > 256) { + a1 = ir3_create_addr1(&ctx->build, dst); + dst_imm = 0; + } struct ir3_instruction *addr = ir3_collect(b, ir3_get_src_shared(ctx, &intr->src[0], true)[0]); - struct ir3_instruction *ldg = ir3_LDG_K(b, create_immed(b, dst_lo), 0, addr, 0, + struct ir3_instruction *ldg = ir3_LDG_K(b, create_immed(b, dst_imm), 0, addr, 0, create_immed(b, addr_offset), 0, create_immed(b, size), 0); ldg->barrier_class = ldg->barrier_conflict = IR3_BARRIER_CONST_W; diff --git a/src/freedreno/ir3/ir3_nir_analyze_ubo_ranges.c b/src/freedreno/ir3/ir3_nir_analyze_ubo_ranges.c index 622f1d6f729..2457a4f5e77 100644 --- a/src/freedreno/ir3/ir3_nir_analyze_ubo_ranges.c +++ b/src/freedreno/ir3/ir3_nir_analyze_ubo_ranges.c @@ -408,20 +408,8 @@ copy_global_to_uniform(nir_shader *nir, struct ir3_ubo_analysis_state *state) unsigned size = (range->end - range->start); for (unsigned offset = 0; offset < size; offset += 16) { unsigned const_offset = range->offset / 4 + offset / 4; - if (const_offset < 256) { - nir_copy_global_to_uniform_ir3(b, base, - .base = start + offset, - .range_base = const_offset, - .range = 1); - } else { - /* It seems that the a1.x format doesn't work, so we need to - * decompose the ldg.k into ldg + stc. - */ - nir_def *load = - nir_load_global_ir3(b, 4, 32, base, - nir_imm_int(b, (start + offset) / 4)); - nir_store_const_ir3(b, load, .base = const_offset); - } + nir_copy_global_to_uniform_ir3(b, base, .base = start + offset, + .range_base = const_offset, .range = 1); } } diff --git a/src/freedreno/isa/ir3-cat6.xml b/src/freedreno/isa/ir3-cat6.xml index 44eda4104a0..fce0f624f84 100644 --- a/src/freedreno/isa/ir3-cat6.xml +++ b/src/freedreno/isa/ir3-cat6.xml @@ -220,6 +220,9 @@ TODO rename UAV src to "UAV" so disasm_field_cb can find it easily? LoaD Global Constants + + DST: offset into the const file as an immediate or value in a1.x. + The a1.x+offset form is not supported (i.e., the offset is ignored).