From 997c500cc4977e67dc7ecf40ce4f95126dbff309 Mon Sep 17 00:00:00 2001 From: Emma Anholt Date: Wed, 26 Nov 2025 16:51:39 -0800 Subject: [PATCH] ir3: Drop ir3_nir_lower_64b_intrinsics Our 64-bit memory load/stores are already split to 32 bits by nir_lower_mem_access_bit_sizes. Part-of: --- src/freedreno/ir3/ir3_nir.c | 1 - src/freedreno/ir3/ir3_nir.h | 1 - src/freedreno/ir3/ir3_nir_lower_64b.c | 158 -------------------------- 3 files changed, 160 deletions(-) diff --git a/src/freedreno/ir3/ir3_nir.c b/src/freedreno/ir3/ir3_nir.c index da0b82cb229..4ca00b0d21a 100644 --- a/src/freedreno/ir3/ir3_nir.c +++ b/src/freedreno/ir3/ir3_nir.c @@ -1225,7 +1225,6 @@ ir3_nir_lower_variant(struct ir3_shader_variant *so, progress |= OPT(s, ir3_nir_lower_64b_global); progress |= OPT(s, ir3_nir_lower_64b_undef); progress |= OPT(s, nir_lower_int64); - progress |= OPT(s, ir3_nir_lower_64b_intrinsics); progress |= OPT(s, nir_lower_64bit_phis); progress |= OPT(s, ir3_nir_opt_subgroups, so); diff --git a/src/freedreno/ir3/ir3_nir.h b/src/freedreno/ir3/ir3_nir.h index 491a030af63..5093881407d 100644 --- a/src/freedreno/ir3/ir3_nir.h +++ b/src/freedreno/ir3/ir3_nir.h @@ -51,7 +51,6 @@ uint8_t ir3_nir_vectorize_filter(const nir_instr *instr, const void *data); /* * 64b related lowering: */ -bool ir3_nir_lower_64b_intrinsics(nir_shader *shader); bool ir3_nir_lower_64b_undef(nir_shader *shader); bool ir3_nir_lower_64b_global(nir_shader *shader); bool ir3_nir_lower_64b_regs(nir_shader *shader); diff --git a/src/freedreno/ir3/ir3_nir_lower_64b.c b/src/freedreno/ir3/ir3_nir_lower_64b.c index 6460a236e7b..32e4556aadb 100644 --- a/src/freedreno/ir3/ir3_nir_lower_64b.c +++ b/src/freedreno/ir3/ir3_nir_lower_64b.c @@ -9,164 +9,6 @@ #include "nir_builder_opcodes.h" #include "nir_intrinsics.h" -/* - * Lowering for 64b intrinsics generated with OpenCL or with - * VK_KHR_buffer_device_address. All our intrinsics from a hw - * standpoint are 32b, so we just need to combine in zero for - * the upper 32bits and let the other nir passes clean up the mess. - */ - -static bool -lower_64b_intrinsics_filter(const nir_instr *instr, const void *unused) -{ - (void)unused; - - if (instr->type != nir_instr_type_intrinsic) - return false; - - nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); - - if (intr->intrinsic == nir_intrinsic_load_deref || - intr->intrinsic == nir_intrinsic_store_deref) - return false; - - if (is_intrinsic_store(intr->intrinsic)) - return nir_src_bit_size(intr->src[0]) == 64; - - /* skip over ssbo atomics, we'll lower them later */ - if (intr->intrinsic == nir_intrinsic_ssbo_atomic || - intr->intrinsic == nir_intrinsic_ssbo_atomic_swap || - intr->intrinsic == nir_intrinsic_global_atomic || - intr->intrinsic == nir_intrinsic_global_atomic_swap) - return false; - - if (nir_intrinsic_dest_components(intr) == 0) - return false; - - return intr->def.bit_size == 64; -} - -static nir_def * -lower_64b_intrinsics(nir_builder *b, nir_instr *instr, void *unused) -{ - (void)unused; - - nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); - - /* We could be *slightly* more clever and, for ex, turn a 64b vec4 - * load into two 32b vec4 loads, rather than 4 32b vec2 loads. - */ - - if (is_intrinsic_store(intr->intrinsic)) { - unsigned offset_src_idx; - switch (intr->intrinsic) { - case nir_intrinsic_store_ssbo: - case nir_intrinsic_store_global_ir3: - case nir_intrinsic_store_per_view_output: - offset_src_idx = 2; - break; - default: - offset_src_idx = 1; - } - - unsigned num_comp = nir_intrinsic_src_components(intr, 0); - unsigned wrmask = nir_intrinsic_has_write_mask(intr) ? - nir_intrinsic_write_mask(intr) : BITSET_MASK(num_comp); - nir_def *val = intr->src[0].ssa; - nir_def *off = intr->src[offset_src_idx].ssa; - - for (unsigned i = 0; i < num_comp; i++) { - if (!(wrmask & BITFIELD_BIT(i))) - continue; - - nir_def *c64 = nir_channel(b, val, i); - nir_def *c32 = nir_unpack_64_2x32(b, c64); - - nir_intrinsic_instr *store = - nir_instr_as_intrinsic(nir_instr_clone(b->shader, &intr->instr)); - store->num_components = 2; - store->src[0] = nir_src_for_ssa(c32); - store->src[offset_src_idx] = nir_src_for_ssa(off); - - if (nir_intrinsic_has_write_mask(intr)) - nir_intrinsic_set_write_mask(store, 0x3); - nir_builder_instr_insert(b, &store->instr); - - off = nir_iadd_imm(b, off, 8); - } - - return NIR_LOWER_INSTR_PROGRESS_REPLACE; - } - - unsigned num_comp = nir_intrinsic_dest_components(intr); - - nir_def *def = &intr->def; - def->bit_size = 32; - - /* load_kernel_input is handled specially, lowering to two 32b inputs: - */ - if (intr->intrinsic == nir_intrinsic_load_kernel_input) { - assert(num_comp == 1); - - nir_def *offset = nir_iadd_imm(b, - intr->src[0].ssa, 4); - - nir_def *upper = nir_load_kernel_input(b, 1, 32, offset); - - return nir_pack_64_2x32_split(b, def, upper); - } - - nir_def *components[num_comp]; - - if (is_intrinsic_load(intr->intrinsic)) { - unsigned offset_src_idx; - switch(intr->intrinsic) { - case nir_intrinsic_load_ssbo: - case nir_intrinsic_load_ubo: - case nir_intrinsic_load_global_ir3: - case nir_intrinsic_load_per_view_output: - offset_src_idx = 1; - break; - default: - offset_src_idx = 0; - } - - nir_def *off = intr->src[offset_src_idx].ssa; - - for (unsigned i = 0; i < num_comp; i++) { - nir_intrinsic_instr *load = - nir_instr_as_intrinsic(nir_instr_clone(b->shader, &intr->instr)); - load->num_components = 2; - load->src[offset_src_idx] = nir_src_for_ssa(off); - - nir_def_init(&load->instr, &load->def, 2, 32); - nir_builder_instr_insert(b, &load->instr); - - components[i] = nir_pack_64_2x32(b, &load->def); - - off = nir_iadd_imm(b, off, 8); - } - } else { - /* The remaining (non load/store) intrinsics just get zero- - * extended from 32b to 64b: - */ - for (unsigned i = 0; i < num_comp; i++) { - nir_def *c = nir_channel(b, def, i); - components[i] = nir_pack_64_2x32_split(b, c, nir_imm_zero(b, 1, 32)); - } - } - - return nir_build_alu_src_arr(b, nir_op_vec(num_comp), components); -} - -bool -ir3_nir_lower_64b_intrinsics(nir_shader *shader) -{ - return nir_shader_lower_instructions( - shader, lower_64b_intrinsics_filter, - lower_64b_intrinsics, NULL); -} - /* * Lowering for 64b undef instructions, splitting into a two 32b undefs */