diff --git a/src/intel/compiler/brw_from_nir.cpp b/src/intel/compiler/brw_from_nir.cpp index ab3c50e77d5..0853281aa60 100644 --- a/src/intel/compiler/brw_from_nir.cpp +++ b/src/intel/compiler/brw_from_nir.cpp @@ -2012,6 +2012,7 @@ get_nir_def(nir_to_brw_state &ntb, const nir_def &def, bool all_sources_uniform) break; case nir_intrinsic_load_uniform: + case nir_intrinsic_load_push_constant: is_scalar = get_nir_src(ntb, instr->src[0], 0).is_scalar; break; @@ -6206,7 +6207,8 @@ brw_from_nir_emit_intrinsic(nir_to_brw_state &ntb, break; } - case nir_intrinsic_load_uniform: { + case nir_intrinsic_load_uniform: + case nir_intrinsic_load_push_constant: { /* Offsets are in bytes but they should always aligned to * the type size */ diff --git a/src/intel/compiler/brw_nir.c b/src/intel/compiler/brw_nir.c index 14cead50f49..bf0b2896aa0 100644 --- a/src/intel/compiler/brw_nir.c +++ b/src/intel/compiler/brw_nir.c @@ -2449,20 +2449,21 @@ brw_nir_create_passthrough_tcs(void *mem_ctx, const struct brw_compiler *compile } nir_def * -brw_nir_load_global_const(nir_builder *b, nir_intrinsic_instr *load_uniform, +brw_nir_load_global_const(nir_builder *b, nir_intrinsic_instr *load, nir_def *base_addr, unsigned off) { - assert(load_uniform->intrinsic == nir_intrinsic_load_uniform); + assert(load->intrinsic == nir_intrinsic_load_push_constant || + load->intrinsic == nir_intrinsic_load_uniform); - unsigned bit_size = load_uniform->def.bit_size; + unsigned bit_size = load->def.bit_size; assert(bit_size >= 8 && bit_size % 8 == 0); unsigned byte_size = bit_size / 8; nir_def *sysval; - if (nir_src_is_const(load_uniform->src[0])) { + if (nir_src_is_const(load->src[0])) { uint64_t offset = off + - nir_intrinsic_base(load_uniform) + - nir_src_as_uint(load_uniform->src[0]); + nir_intrinsic_base(load) + + nir_src_as_uint(load->src[0]); /* Things should be component-aligned. */ assert(offset % byte_size == 0); @@ -2482,14 +2483,14 @@ brw_nir_load_global_const(nir_builder *b, nir_intrinsic_instr *load_uniform, } sysval = nir_extract_bits(b, data, 2, suboffset * 8, - load_uniform->num_components, bit_size); + load->num_components, bit_size); } else { nir_def *offset32 = - nir_iadd_imm(b, load_uniform->src[0].ssa, - off + nir_intrinsic_base(load_uniform)); + nir_iadd_imm(b, load->src[0].ssa, + off + nir_intrinsic_base(load)); nir_def *addr = nir_iadd(b, base_addr, nir_u2u64(b, offset32)); sysval = nir_load_global_constant(b, addr, byte_size, - load_uniform->num_components, bit_size); + load->num_components, bit_size); } return sysval; diff --git a/src/intel/compiler/brw_nir_lower_rt_intrinsics.c b/src/intel/compiler/brw_nir_lower_rt_intrinsics.c index b6c25867dc4..f81aecd5f22 100644 --- a/src/intel/compiler/brw_nir_lower_rt_intrinsics.c +++ b/src/intel/compiler/brw_nir_lower_rt_intrinsics.c @@ -153,7 +153,8 @@ lower_rt_intrinsics_impl(nir_function_impl *impl, nir_instr_remove(instr); break; - case nir_intrinsic_load_uniform: { + case nir_intrinsic_load_uniform: + case nir_intrinsic_load_push_constant: /* We don't want to lower this in the launch trampoline. * * Also if the driver chooses to use an inline push address, we @@ -169,7 +170,6 @@ lower_rt_intrinsics_impl(nir_function_impl *impl, BRW_RT_PUSH_CONST_OFFSET); break; - } case nir_intrinsic_load_ray_launch_id: sysval = nir_channels(b, hotzone, 0xe); diff --git a/src/intel/vulkan/anv_nir.h b/src/intel/vulkan/anv_nir.h index f1469be59c5..f012db1f140 100644 --- a/src/intel/vulkan/anv_nir.h +++ b/src/intel/vulkan/anv_nir.h @@ -42,13 +42,17 @@ extern "C" { nir_imm_int(b, 0), \ .base = anv_drv_const_offset(field), \ .range = components * anv_drv_const_size(field)) +/* Use load_uniform for indexed values since load_push_constant requires that + * the offset source is dynamically uniform in the subgroup which we cannot + * guarantee. + */ #define anv_load_driver_uniform_indexed(b, components, field, idx) \ - nir_load_push_constant(b, components, \ - anv_drv_const_size(field[0]) * 8, \ - nir_imul_imm(b, idx, \ - anv_drv_const_size(field[0])), \ - .base = anv_drv_const_offset(field), \ - .range = anv_drv_const_size(field)) + nir_load_uniform(b, components, \ + anv_drv_const_size(field[0]) * 8, \ + nir_imul_imm(b, idx, \ + anv_drv_const_size(field[0])), \ + .base = anv_drv_const_offset(field), \ + .range = anv_drv_const_size(field)) diff --git a/src/intel/vulkan/anv_nir_compute_push_layout.c b/src/intel/vulkan/anv_nir_compute_push_layout.c index e2e3f0d36cc..14d858a2cd5 100644 --- a/src/intel/vulkan/anv_nir_compute_push_layout.c +++ b/src/intel/vulkan/anv_nir_compute_push_layout.c @@ -58,6 +58,7 @@ anv_nir_compute_push_layout(nir_shader *nir, has_const_ubo = true; break; + case nir_intrinsic_load_uniform: case nir_intrinsic_load_push_constant: { unsigned base = nir_intrinsic_base(intrin); unsigned range = nir_intrinsic_range(intrin); @@ -176,6 +177,7 @@ anv_nir_compute_push_layout(nir_shader *nir, nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); switch (intrin->intrinsic) { + case nir_intrinsic_load_uniform: case nir_intrinsic_load_push_constant: { /* With bindless shaders we load uniforms with SEND * messages. All the push constants are located after the @@ -185,7 +187,6 @@ anv_nir_compute_push_layout(nir_shader *nir, */ unsigned base_offset = brw_shader_stage_is_bindless(nir->info.stage) ? 0 : push_start; - intrin->intrinsic = nir_intrinsic_load_uniform; nir_intrinsic_set_base(intrin, nir_intrinsic_base(intrin) - base_offset);