nir: remove manual nir_load_global

Acked-by: Alyssa Rosenzweig <alyssa.rosenzweig@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/37959>
This commit is contained in:
Georg Lehmann 2025-10-20 16:46:58 +02:00
parent 9ebda88e34
commit 77540cac8c
18 changed files with 60 additions and 76 deletions

View file

@ -174,7 +174,7 @@ radv_build_printf_args(nir_builder *b, nir_def *cond, const char *format_string,
offset = nir_read_first_invocation(b, nir_if_phi(b, offset, undef));
nir_def *buffer_size = nir_load_global(
b, nir_imm_int64(b, device->printf.buffer_addr + offsetof(struct radv_printf_buffer_header, size)), 4, 1, 32);
b, 1, 32, nir_imm_int64(b, device->printf.buffer_addr + offsetof(struct radv_printf_buffer_header, size)));
nir_push_if(b, nir_ige(b, buffer_size, nir_iadd(b, offset, size)));
{

View file

@ -859,7 +859,7 @@ dgc_load_shader_metadata(struct dgc_cmdbuf *cs, uint32_t bitsize, uint32_t field
va = load_param64(b, params_addr);
}
return nir_load_global(b, nir_iadd_imm(b, va, field_offset), 4, 1, bitsize);
return nir_load_global(b, 1, bitsize, nir_iadd_imm(b, va, field_offset), .align_mul = 4);
}
#define load_shader_metadata32(cs, field) \
@ -875,7 +875,7 @@ dgc_load_vbo_metadata(struct dgc_cmdbuf *cs, uint32_t bitsize, nir_def *idx, uin
nir_def *va = load_param64(b, params_addr);
nir_def *offset = nir_iadd_imm(b, nir_imul_imm(b, idx, DGC_VBO_INFO_SIZE), field_offset);
return nir_load_global(b, nir_iadd(b, va, nir_u2u64(b, offset)), 4, 1, bitsize);
return nir_load_global(b, 1, bitsize, nir_iadd(b, va, nir_u2u64(b, offset)), .align_mul = 4);
}
#define load_vbo_metadata32(cs, idx, field) dgc_load_vbo_metadata(cs, 32, idx, offsetof(struct radv_vbo_info, field))
@ -2615,7 +2615,7 @@ dgc_is_cond_render_enabled(nir_builder *b)
nir_push_if(b, nir_ieq_imm(b, load_param8(b, predicating), 1));
{
nir_def *val = nir_load_global(b, load_param64(b, predication_va), 4, 1, 32);
nir_def *val = nir_load_global(b, 1, 32, load_param64(b, predication_va));
/* By default, all rendering commands are discarded if the 32-bit value is zero. If the
* inverted flag is set, they are discarded if the value is non-zero.
*/

View file

@ -419,9 +419,9 @@ load_store_formatted(nir_builder *b, nir_def *base, nir_def *index,
nir_store_global(b, addr, blocksize_B, raw,
nir_component_mask(raw->num_components));
} else {
nir_def *raw =
nir_load_global(b, addr, blocksize_B, DIV_ROUND_UP(blocksize_B, 4),
MIN2(32, blocksize_B * 8));
nir_def *raw = nir_load_global(b, DIV_ROUND_UP(blocksize_B, 4),
MIN2(32, blocksize_B * 8), addr,
.align_mul = blocksize_B);
return nir_format_unpack_rgba(b, raw, format);
}

View file

@ -2062,21 +2062,6 @@ nir_store_array_var_imm(nir_builder *build, nir_variable *var, int64_t index,
nir_store_deref(build, deref, value, writemask);
}
#undef nir_load_global
static inline nir_def *
nir_load_global(nir_builder *build, nir_def *addr, unsigned align,
unsigned num_components, unsigned bit_size)
{
nir_intrinsic_instr *load =
nir_intrinsic_instr_create(build->shader, nir_intrinsic_load_global);
load->num_components = (uint8_t)num_components;
load->src[0] = nir_src_for_ssa(addr);
nir_intrinsic_set_align(load, align, 0);
nir_def_init(&load->instr, &load->def, num_components, bit_size);
nir_builder_instr_insert(build, &load->instr);
return &load->def;
}
#undef nir_store_global
static inline void
nir_store_global(nir_builder *build, nir_def *addr, unsigned align,

View file

@ -62,7 +62,7 @@ TEST_F(nir_minimize_call_live_states_test, life_intrinsics)
nir_def *callee = nir_load_push_constant(b, 1, 32, nir_imm_int(b, 0));
nir_def *v1 = nir_load_push_constant(b, 1, 64, nir_imm_int(b, 8));
nir_def *v2 = nir_load_global(b, v1, 4, 3, 32);
nir_def *v2 = nir_load_global(b, 3, 32, v1);
nir_def *v3 = nir_load_global_constant(b, v1, 4, 1, 32);
nir_build_indirect_call(b, indirect_decl, callee, 0, NULL);

View file

@ -300,8 +300,8 @@ TEST_F(unsigned_upper_bound_test, loop_phi_bcsel)
TEST_F(ssa_def_bits_used_test, ubfe_ibfe)
{
nir_def *load1 = nir_load_global(b, nir_undef(b, 1, 64), 4, 1, 32);
nir_def *load2 = nir_load_global(b, nir_undef(b, 1, 64), 4, 1, 32);
nir_def *load1 = nir_load_global(b, 1, 32, nir_undef(b, 1, 64));
nir_def *load2 = nir_load_global(b, 1, 32, nir_undef(b, 1, 64));
nir_def *alu1 = nir_ubfe_imm(b, load1, 14, 3);
nir_def *alu2 = nir_ibfe_imm(b, load2, 12, 7);
@ -315,7 +315,7 @@ TEST_F(ssa_def_bits_used_test, ubfe_ibfe)
TEST_F(ssa_def_bits_used_test, ibfe_iand)
{
nir_def *load = nir_load_global(b, nir_undef(b, 1, 64), 4, 1, 32);
nir_def *load = nir_load_global(b, 1, 32, nir_undef(b, 1, 64));
nir_def *alu = nir_iand_imm(b, nir_ibfe_imm(b, load, 14, 3), 0x80000000);
nir_store_global(b, nir_undef(b, 1, 64), 4, alu, 0x1);
@ -324,7 +324,7 @@ TEST_F(ssa_def_bits_used_test, ibfe_iand)
TEST_F(ssa_def_bits_used_test, ubfe_iand)
{
nir_def *load = nir_load_global(b, nir_undef(b, 1, 64), 4, 1, 32);
nir_def *load = nir_load_global(b, 1, 32, nir_undef(b, 1, 64));
nir_def *alu = nir_iand_imm(b, nir_ubfe_imm(b, load, 14, 3), 0x2);
nir_store_global(b, nir_undef(b, 1, 64), 4, alu, 0x1);
@ -333,8 +333,8 @@ TEST_F(ssa_def_bits_used_test, ubfe_iand)
TEST_F(ssa_def_bits_used_test, ishr_signed)
{
nir_def *load1 = nir_load_global(b, nir_undef(b, 1, 64), 4, 1, 32);
nir_def *load2 = nir_load_global(b, nir_undef(b, 1, 64), 4, 1, 32);
nir_def *load1 = nir_load_global(b, 1, 32, nir_undef(b, 1, 64));
nir_def *load2 = nir_load_global(b, 1, 32, nir_undef(b, 1, 64));
nir_def *alu1 = nir_iand_imm(b, nir_ishr_imm(b, load1, 13), 0x80000000);
nir_def *alu2 = nir_iand_imm(b, nir_ishr_imm(b, load2, 13), 0x8000);
nir_store_global(b, nir_undef(b, 1, 64), 4, alu1, 0x1);
@ -346,9 +346,9 @@ TEST_F(ssa_def_bits_used_test, ishr_signed)
TEST_F(ssa_def_bits_used_test, ushr_ishr_ishl)
{
nir_def *load1 = nir_load_global(b, nir_undef(b, 1, 64), 4, 1, 32);
nir_def *load2 = nir_load_global(b, nir_undef(b, 1, 64), 4, 1, 32);
nir_def *load3 = nir_load_global(b, nir_undef(b, 1, 64), 4, 1, 32);
nir_def *load1 = nir_load_global(b, 1, 32, nir_undef(b, 1, 64));
nir_def *load2 = nir_load_global(b, 1, 32, nir_undef(b, 1, 64));
nir_def *load3 = nir_load_global(b, 1, 32, nir_undef(b, 1, 64));
nir_def *alu1 = nir_ushr_imm(b, load1, 7);
nir_def *alu2 = nir_ishr_imm(b, load2, 11);
@ -378,7 +378,7 @@ TEST_F(ssa_def_bits_used_test, u2u_i2i_iand)
nir_def *load[ARRAY_SIZE(ops)];
for (unsigned i = 0; i < ARRAY_SIZE(ops); i++) {
load[i] = nir_load_global(b, nir_undef(b, 1, 64), 4, 1, 64);
load[i] = nir_load_global(b, 1, 64, nir_undef(b, 1, 64));
nir_def *alu = nir_iand_imm(b, ops[i](b, load[i]), 0x1020304050607080ull);
nir_store_global(b, nir_undef(b, 1, 64), 4, alu, 0x1);
}
@ -404,7 +404,7 @@ TEST_F(ssa_def_bits_used_test, u2u_i2i_upcast_bits)
nir_def *load[ARRAY_SIZE(ops)];
for (unsigned i = 0; i < ARRAY_SIZE(ops); i++) {
load[i] = nir_load_global(b, nir_undef(b, 1, 64), 4, 1, 8);
load[i] = nir_load_global(b, 1, 8, nir_undef(b, 1, 64));
nir_def *upcast = ops[i](b, load[i]);
/* Using one of the sing-extended bits implies using the last bit. */
nir_def *alu = nir_iand_imm(b, upcast, BITFIELD64_BIT(upcast->bit_size - 1));
@ -430,7 +430,7 @@ TEST_F(ssa_def_bits_used_test, iand_ior_ishl)
nir_def *load[ARRAY_SIZE(ops)];
for (unsigned i = 0; i < ARRAY_SIZE(ops); i++) {
load[i] = nir_load_global(b, nir_undef(b, 1, 64), 4, 1, 32);
load[i] = nir_load_global(b, 1, 32, nir_undef(b, 1, 64));
nir_def *alu = nir_ishl_imm(b, ops[i](b, load[i], 0x12345678), 8);
nir_store_global(b, nir_undef(b, 1, 64), 4, alu, 0x1);
}
@ -441,7 +441,7 @@ TEST_F(ssa_def_bits_used_test, iand_ior_ishl)
TEST_F(ssa_def_bits_used_test, mov_iand)
{
nir_def *load = nir_load_global(b, nir_undef(b, 1, 64), 4, 1, 32);
nir_def *load = nir_load_global(b, 1, 32, nir_undef(b, 1, 64));
nir_def *alu = nir_iand_imm(b, nir_mov(b, load), 0x8);
nir_store_global(b, nir_undef(b, 1, 64), 4, alu, 0x1);
@ -450,9 +450,9 @@ TEST_F(ssa_def_bits_used_test, mov_iand)
TEST_F(ssa_def_bits_used_test, bcsel_iand)
{
nir_def *load1 = nir_i2b(b, nir_load_global(b, nir_undef(b, 1, 64), 4, 1, 32));
nir_def *load2 = nir_load_global(b, nir_undef(b, 1, 64), 4, 1, 32);
nir_def *load3 = nir_load_global(b, nir_undef(b, 1, 64), 4, 1, 32);
nir_def *load1 = nir_i2b(b, nir_load_global(b, 1, 32, nir_undef(b, 1, 64)));
nir_def *load2 = nir_load_global(b, 1, 32, nir_undef(b, 1, 64));
nir_def *load3 = nir_load_global(b, 1, 32, nir_undef(b, 1, 64));
nir_def *alu = nir_iand_imm(b, nir_bcsel(b, load1, load2, load3), 0x8);
nir_store_global(b, nir_undef(b, 1, 64), 4, alu, 0x1);

View file

@ -64,8 +64,9 @@ static nir_def *
read_afbc_header(nir_builder *b, nir_def *buf, nir_def *idx)
{
nir_def *offset = nir_imul_imm(b, idx, AFBC_HEADER_BYTES_PER_TILE);
return nir_load_global(b, nir_iadd(b, buf, nir_u2u64(b, offset)), 16,
AFBC_HEADER_BYTES_PER_TILE / 4, 32);
return nir_load_global(b, AFBC_HEADER_BYTES_PER_TILE / 4, 32,
nir_iadd(b, buf, nir_u2u64(b, offset)),
.align_mul = 16);
}
static void
@ -131,8 +132,8 @@ get_packed_offset(nir_builder *b, nir_def *layout, nir_def *idx,
nir_def *layout_offset =
nir_u2u64(b, nir_imul_imm(b, idx, sizeof(struct pan_afbc_payload_extent)));
nir_def *range_ptr = nir_iadd(b, layout, layout_offset);
nir_def *entry = nir_load_global(b, range_ptr, 4,
sizeof(struct pan_afbc_payload_extent) / 4, 32);
nir_def *entry = nir_load_global(
b, sizeof(struct pan_afbc_payload_extent) / 4, 32, range_ptr);
nir_def *offset =
nir_channel(b, entry, offsetof(struct pan_afbc_payload_extent, offset) / 4);
@ -179,7 +180,8 @@ copy_superblock(nir_builder *b, nir_def *dst, nir_def *hdr_sz, nir_def *src,
nir_def *dst_line = nir_iadd(b, dst_bodyptr, nir_u2u64(b, offset));
nir_store_global(
b, dst_line, line_sz,
nir_load_global(b, src_line, line_sz, line_sz / 4, 32), ~0);
nir_load_global(b, line_sz / 4, 32, src_line, .align_mul = line_sz),
~0);
offset = nir_iadd_imm(b, offset, line_sz);
}
nir_store_var(b, offset_var, offset, 0x1);

View file

@ -139,7 +139,7 @@ build_accept_ray(nir_builder *b)
nir_def *ray_addr = brw_nir_rt_mem_hit_addr(b, false /* committed */);
nir_def *flags_dw_addr = nir_iadd_imm(b, ray_addr, 12);
nir_store_global(b, flags_dw_addr, 4,
nir_ior(b, nir_load_global(b, flags_dw_addr, 4, 1, 32),
nir_ior(b, nir_load_global(b, 1, 32, flags_dw_addr),
nir_imm_int(b, 1 << 16)), 0x1 /* write_mask */);
nir_accept_ray_intersection(b);

View file

@ -427,8 +427,7 @@ lower_ray_query_intrinsic(nir_builder *b,
case nir_ray_query_value_intersection_geometry_index: {
nir_def *geometry_index_dw =
nir_load_global(b, nir_iadd_imm(b, hit_in.prim_leaf_ptr, 4), 4,
1, 32);
nir_load_global(b, 1, 32, nir_iadd_imm(b, hit_in.prim_leaf_ptr, 4));
sysval = nir_iand_imm(b, geometry_index_dw, BITFIELD_MASK(24));
break;
}

View file

@ -76,7 +76,7 @@ lower_rt_intrinsics_impl(nir_function_impl *impl,
brw_nir_rt_load_globals(b, &globals, devinfo);
nir_def *hotzone_addr = brw_nir_rt_sw_hotzone_addr(b, devinfo);
nir_def *hotzone = nir_load_global(b, hotzone_addr, 16, 4, 32);
nir_def *hotzone = nir_load_global(b, 4, 32, hotzone_addr, .align_mul = 16);
mesa_shader_stage stage = b->shader->info.stage;
struct brw_nir_rt_mem_ray_defs world_ray_in = {};
@ -280,8 +280,7 @@ lower_rt_intrinsics_impl(nir_function_impl *impl,
case nir_intrinsic_load_ray_geometry_index: {
nir_def *geometry_index_dw =
nir_load_global(b, nir_iadd_imm(b, hit_in.prim_leaf_ptr, 4), 4,
1, 32);
nir_load_global(b, 1, 32, nir_iadd_imm(b, hit_in.prim_leaf_ptr, 4));
sysval = nir_iand_imm(b, geometry_index_dw, BITFIELD_MASK(24));
break;
}
@ -369,8 +368,7 @@ lower_rt_intrinsics_impl(nir_function_impl *impl,
sysval = hit_in.front_face;
} else {
nir_def *flags_dw =
nir_load_global(b, nir_iadd_imm(b, hit_in.prim_leaf_ptr, 4), 4,
1, 32);
nir_load_global(b, 1, 32, nir_iadd_imm(b, hit_in.prim_leaf_ptr, 4));
sysval = nir_i2b(b, nir_iand_imm(b, flags_dw, 1u << 30));
}
break;

View file

@ -1144,17 +1144,15 @@ brw_nir_rt_load_primitive_id_from_hit(nir_builder *b,
/* For procedural leafs, the index is in dw[3]. */
nir_def *offset =
nir_iadd_imm(b, nir_ishl_imm(b, defs->prim_leaf_index, 2), 12);
prim_id_proc = nir_load_global(b, nir_iadd(b, defs->prim_leaf_ptr,
nir_u2u64(b, offset)),
4, /* align */ 1, 32);
prim_id_proc = nir_load_global(b, 1, 32, nir_iadd(b, defs->prim_leaf_ptr,
nir_u2u64(b, offset)));
}
nir_push_else(b, NULL);
{
/* For quad leafs, the index is dw[2] and there is a 16bit additional
* offset in dw[3].
*/
prim_id_quad = nir_load_global(b, nir_iadd_imm(b, defs->prim_leaf_ptr, 8),
4, /* align */ 1, 32);
prim_id_quad = nir_load_global(b, 1, 32, nir_iadd_imm(b, defs->prim_leaf_ptr, 8));
prim_id_quad = nir_iadd(b,
prim_id_quad,
defs->prim_index_delta);

View file

@ -114,7 +114,7 @@ build_exec_set_addr(nir_builder *b, struct process_cmd_in *in, nir_def *idx)
static nir_def *
load_global_dw(nir_builder *b, nir_def *addr, uint32_t offset_dw)
{
return nir_load_global(b, nir_iadd_imm(b, addr, offset_dw * 4), 4, 1, 32);
return nir_load_global(b, 1, 32, nir_iadd_imm(b, addr, offset_dw * 4));
}
static void

View file

@ -237,7 +237,7 @@ midgard_nir_lower_global_load_instr(nir_builder *b, nir_intrinsic_instr *intr,
nir_def *load;
if (intr->intrinsic == nir_intrinsic_load_global) {
load = nir_load_global(b, addr, compsz / 8, loadncomps, compsz);
load = nir_load_global(b, loadncomps, compsz, addr);
} else {
assert(intr->intrinsic == nir_intrinsic_load_shared);
nir_intrinsic_instr *shared_load =

View file

@ -73,7 +73,7 @@ pan_lower_sample_pos_impl(struct nir_builder *b, nir_intrinsic_instr *intr,
nir_u2u64(b, nir_imul_imm(b, nir_load_sample_id(b), 4)));
/* Decode 8:8 fixed-point */
nir_def *raw = nir_load_global(b, addr, 2, 2, 16);
nir_def *raw = nir_load_global(b, 2, 16, addr);
nir_def *decoded = nir_fmul_imm(b, nir_i2f16(b, raw), 1.0 / 256.0);
/* Make NIR validator happy */

View file

@ -100,8 +100,9 @@ set_to_table_copy(nir_builder *b, nir_def *set_ptr, nir_def *set_desc_count,
{
nir_def *src_offset =
nir_u2u64(b, nir_imul_imm(b, src_desc_idx, PANVK_DESCRIPTOR_SIZE));
nir_def *desc = nir_load_global(b, nir_iadd(b, set_ptr, src_offset),
element_size, element_size / 4, 32);
nir_def *desc = nir_load_global(b, element_size / 4, 32,
nir_iadd(b, set_ptr, src_offset),
.align_mul = element_size);
nir_store_global(b, nir_iadd(b, table_ptr, dst_offset), element_size,
desc, ~0);
}
@ -143,8 +144,9 @@ set_to_table_img_copy(nir_builder *b, nir_def *set_ptr, nir_def *set_desc_count,
get_input_field(b, desc_copy.attrib_buf_idx_offset);
nir_def *src_offset =
nir_u2u64(b, nir_imul_imm(b, src_desc_idx, PANVK_DESCRIPTOR_SIZE));
nir_def *src_desc = nir_load_global(b, nir_iadd(b, set_ptr, src_offset),
element_size, element_size / 4, 32);
nir_def *src_desc = nir_load_global(b, element_size / 4, 32,
nir_iadd(b, set_ptr, src_offset),
.align_mul = element_size);
nir_def *fmt = nir_iand_imm(b, nir_channel(b, src_desc, 2), 0xfffffc00);
/* Each image descriptor takes two attribute buffer slots, and we need
@ -198,7 +200,7 @@ single_desc_copy(nir_builder *b, nir_def *desc_copy_idx)
nir_def *desc_copy_offset = nir_imul_imm(b, desc_copy_idx, sizeof(uint32_t));
nir_def *desc_copy_ptr = nir_iadd(b, get_input_field(b, desc_copy.table),
nir_u2u64(b, desc_copy_offset));
nir_def *src_copy_handle = nir_load_global(b, desc_copy_ptr, 4, 1, 32);
nir_def *src_copy_handle = nir_load_global(b, 1, 32, desc_copy_ptr);
nir_def *set_idx, *src_desc_idx;
extract_desc_info_from_handle(b, src_copy_handle, &set_idx, &src_desc_idx);

View file

@ -412,8 +412,8 @@ build_buffer_addr_for_res_index(nir_builder *b, nir_def *res_index,
: load_sysval_entry(b, graphics, 64, desc.sets, desc_table_index);
nir_def *desc_addr = nir_iadd(b, base_addr, nir_u2u64(b, desc_offset));
nir_def *desc =
nir_load_global(b, desc_addr, PANVK_DESCRIPTOR_SIZE, 4, 32);
nir_def *desc = nir_load_global(b, 4, 32, desc_addr,
.align_mul = PANVK_DESCRIPTOR_SIZE);
/* The offset in the descriptor is guaranteed to be zero when it's
* written into the descriptor set. This lets us avoid some unnecessary
@ -554,9 +554,9 @@ load_resource_deref_desc(nir_builder *b, nir_deref_instr *deref,
unsigned desc_align = 1 << (ffs(PANVK_DESCRIPTOR_SIZE + desc_offset) - 1);
return nir_load_global(b,
return nir_load_global(b, num_components, bit_size,
nir_iadd(b, set_base_addr, nir_u2u64(b, set_offset)),
desc_align, num_components, bit_size);
.align_mul = desc_align);
#else
/* note that user sets start from index 1 */
return nir_load_ubo(

View file

@ -651,9 +651,9 @@ move_push_constant(struct nir_builder *b, nir_intrinsic_instr *intr, void *data)
/* We assume an alignment of 64-bit max for packed push-constants. */
align = MIN2(align, FAU_WORD_SIZE);
nir_def *value =
nir_load_global(b, nir_iadd(b, push_const_buf, nir_u2u64(b, offset)),
align, intr->def.num_components, intr->def.bit_size);
nir_def *value = nir_load_global(
b, intr->def.num_components, intr->def.bit_size,
nir_iadd(b, push_const_buf, nir_u2u64(b, offset)), .align_mul = align);
nir_def_replace(&intr->def, value);
}

View file

@ -126,14 +126,14 @@ lower_tcs_impl(nir_builder *b, nir_intrinsic_instr *intr)
case nir_intrinsic_load_output: {
nir_def *addr = tcs_out_addr(b, intr, nir_undef(b, 1, 32));
return nir_load_global(b, addr, 4, intr->def.num_components,
intr->def.bit_size);
return nir_load_global(b, intr->def.num_components, intr->def.bit_size,
addr, .align_mul = 4);
}
case nir_intrinsic_load_per_vertex_output: {
nir_def *addr = tcs_out_addr(b, intr, intr->src[0].ssa);
return nir_load_global(b, addr, 4, intr->def.num_components,
intr->def.bit_size);
return nir_load_global(b, intr->def.num_components, intr->def.bit_size,
addr, .align_mul = 4);
}
case nir_intrinsic_store_output: {