From 46d42b63da8e0e68cb134b1ce9dc14e2d6a379d2 Mon Sep 17 00:00:00 2001 From: Lionel Landwerlin Date: Tue, 7 Apr 2026 09:24:31 +0300 Subject: [PATCH] brw: add support for < 32bit io values Signed-off-by: Lionel Landwerlin Acked-by: Alyssa Rosenzweig Part-of: --- src/intel/compiler/brw/brw_compile_mesh.cpp | 10 +-- src/intel/compiler/brw/brw_nir.c | 93 +++++++++++++++++++++ src/intel/compiler/brw/brw_nir.h | 2 + 3 files changed, 96 insertions(+), 9 deletions(-) diff --git a/src/intel/compiler/brw/brw_compile_mesh.cpp b/src/intel/compiler/brw/brw_compile_mesh.cpp index 715a8ef6849..3d2077bb710 100644 --- a/src/intel/compiler/brw/brw_compile_mesh.cpp +++ b/src/intel/compiler/brw/brw_compile_mesh.cpp @@ -624,14 +624,6 @@ brw_print_mue_map(FILE *fp, const struct brw_mue_map *map, struct nir_shader *ni brw_print_vue_map(fp, &map->vue_map, MESA_SHADER_MESH); } -static void -brw_nir_lower_mue_outputs(brw_pass_tracker *pt, const struct brw_mue_map *map) -{ - BRW_NIR_PASS(nir_lower_io, nir_var_shader_out, - type_size_vec4, - nir_lower_io_lower_64bit_to_32); -} - static bool brw_nir_initialize_mue(nir_shader *nir, const struct brw_mue_map *map) { @@ -1031,7 +1023,7 @@ brw_compile_mesh(const struct brw_compiler *compiler, prog_data->index_format, key->base.vue_layout, apply_wa_18019110168 ? wa_18019110168_mapping : NULL); - brw_nir_lower_mue_outputs(pt, &prog_data->map); + brw_nir_lower_mesh_outputs(nir, &prog_data->map); /* When Primitive Header is enabled, we may not generates writes to all * fields, so let's initialize everything. diff --git a/src/intel/compiler/brw/brw_nir.c b/src/intel/compiler/brw/brw_nir.c index 545f83e139e..e38b513a4cd 100644 --- a/src/intel/compiler/brw/brw_nir.c +++ b/src/intel/compiler/brw/brw_nir.c @@ -1019,6 +1019,73 @@ brw_nir_opt_vectorize_urb(brw_pass_tracker *pt) BRW_NIR_PASS(nir_opt_load_store_vectorize, &options); } +static bool +lower_16bit_io_instr(nir_builder *b, nir_intrinsic_instr *intrin, void *data) +{ + const nir_variable_mode *mode = data; + + switch (intrin->intrinsic) { + case nir_intrinsic_load_input: + case nir_intrinsic_load_interpolated_input: + case nir_intrinsic_load_per_primitive_input: + case nir_intrinsic_load_per_primitive_output: + case nir_intrinsic_load_per_vertex_input: + case nir_intrinsic_load_per_vertex_output: { + if ((*mode & nir_var_shader_in) == 0) + return false; + + nir_alu_type type = nir_intrinsic_dest_type(intrin); + unsigned bit_size = intrin->def.bit_size; + if (bit_size != 16) + return false; + + b->cursor = nir_after_instr(&intrin->instr); + intrin->def.bit_size = 32; + nir_intrinsic_set_dest_type(intrin, + nir_alu_type_get_base_type(type) | 32); + + nir_def *new_val = + (type & nir_type_float) ? nir_f2fN(b, &intrin->def, bit_size) : + (type & nir_type_int) ? nir_i2iN(b, &intrin->def, bit_size) : + nir_u2uN(b, &intrin->def, bit_size); + nir_def_rewrite_uses_after(&intrin->def, new_val); + return true; + } + case nir_intrinsic_store_output: + case nir_intrinsic_store_per_primitive_output: + case nir_intrinsic_store_per_vertex_output: { + if ((*mode & nir_var_shader_out) == 0) + return false; + + nir_src *src = nir_get_io_data_src(intrin); + if (src->ssa->bit_size != 16) + return false; + + b->cursor = nir_before_instr(&intrin->instr); + nir_alu_type type = nir_intrinsic_src_type(intrin); + nir_intrinsic_set_src_type( + intrin, + nir_alu_type_get_base_type(type) | 32); + nir_def *new_val = + (type & nir_type_float) ? nir_f2f32(b, src->ssa) : + (type & nir_type_int) ? nir_i2i32(b, src->ssa) : + nir_u2u32(b, src->ssa); + nir_src_rewrite(src, new_val); + return true; + } + default: + return false; + } +} + +static bool +brw_nir_lower_16bit_io(nir_shader *nir, nir_variable_mode mode) +{ + return nir_shader_intrinsics_pass(nir, lower_16bit_io_instr, + nir_metadata_control_flow, + &mode); +} + void brw_nir_lower_vs_inputs(nir_shader *nir) { @@ -1033,6 +1100,8 @@ brw_nir_lower_vs_inputs(nir_shader *nir) NIR_PASS(_, nir, nir_lower_io, nir_var_shader_in, type_size_vec4, nir_lower_io_lower_64bit_to_32_new); + NIR_PASS(_, nir, brw_nir_lower_16bit_io, nir_var_shader_in); + /* Fold constant offset srcs for IO. */ NIR_PASS(_, nir, nir_opt_constant_folding); @@ -1171,6 +1240,8 @@ brw_nir_lower_gs_inputs(nir_shader *nir, NIR_PASS(_, nir, nir_lower_io, nir_var_shader_in, type_size_vec4, nir_lower_io_lower_64bit_to_32); + NIR_PASS(_, nir, brw_nir_lower_16bit_io, nir_var_shader_in); + /* Fold constant offset srcs for IO. */ NIR_PASS(_, nir, nir_opt_constant_folding); @@ -1216,6 +1287,8 @@ brw_nir_lower_tes_inputs(nir_shader *nir, NIR_PASS(_, nir, nir_lower_io, nir_var_shader_in, type_size_vec4, nir_lower_io_lower_64bit_to_32); + NIR_PASS(_, nir, brw_nir_lower_16bit_io, nir_var_shader_in); + /* Run nir_opt_constant_folding to allow update base/io_semantic::location * for the remapping pass to look into the VUE mapping. */ @@ -1443,6 +1516,9 @@ brw_nir_lower_fs_inputs(nir_shader *nir, nir_var_shader_in, type_size_vec4, nir_lower_io_lower_64bit_to_32 | nir_lower_io_use_interpolated_input_intrinsics); + + NIR_PASS(_, nir, brw_nir_lower_16bit_io, nir_var_shader_in); + if (devinfo->ver >= 11) NIR_PASS(_, nir, nir_lower_interpolation, ~0); @@ -1484,6 +1560,8 @@ brw_nir_lower_vue_outputs(nir_shader *nir) { NIR_PASS(_, nir, nir_lower_io, nir_var_shader_out, type_size_vec4, nir_lower_io_lower_64bit_to_32); + + NIR_PASS(_, nir, brw_nir_lower_16bit_io, nir_var_shader_out); } void @@ -1495,6 +1573,8 @@ brw_nir_lower_tcs_inputs(nir_shader *nir, NIR_PASS(_, nir, nir_lower_io, nir_var_shader_in, type_size_vec4, nir_lower_io_lower_64bit_to_32); + NIR_PASS(_, nir, brw_nir_lower_16bit_io, nir_var_shader_in); + /* Fold constant offset srcs for IO. */ NIR_PASS(_, nir, nir_opt_constant_folding); @@ -1518,6 +1598,8 @@ brw_nir_lower_tcs_outputs(nir_shader *nir, NIR_PASS(_, nir, nir_lower_io, nir_var_shader_out, type_size_vec4, nir_lower_io_lower_64bit_to_32); + NIR_PASS(_, nir, brw_nir_lower_16bit_io, nir_var_shader_out); + /* Run nir_opt_constant_folding to allow update base/io_semantic::location * for the remapping pass to look into the VUE mapping. */ @@ -1534,6 +1616,16 @@ brw_nir_lower_tcs_outputs(nir_shader *nir, NIR_PASS(_, nir, brw_nir_lower_outputs_to_urb_intrinsics, &cb_data); } +void +brw_nir_lower_mesh_outputs(nir_shader *nir, + const struct brw_mue_map *map) +{ + NIR_PASS(_, nir, nir_lower_io, nir_var_shader_out, + type_size_vec4, nir_lower_io_lower_64bit_to_32); + + NIR_PASS(_, nir, brw_nir_lower_16bit_io, nir_var_shader_out); +} + void brw_nir_lower_fs_outputs(nir_shader *nir) { @@ -1544,6 +1636,7 @@ brw_nir_lower_fs_outputs(nir_shader *nir) } NIR_PASS(_, nir, nir_lower_io, nir_var_shader_out, type_size_vec4, 0); + NIR_PASS(_, nir, brw_nir_lower_16bit_io, nir_var_shader_out); nir->info.disable_output_offset_src_constant_folding = true; } diff --git a/src/intel/compiler/brw/brw_nir.h b/src/intel/compiler/brw/brw_nir.h index 456a8a0a857..e57a875d308 100644 --- a/src/intel/compiler/brw/brw_nir.h +++ b/src/intel/compiler/brw/brw_nir.h @@ -232,6 +232,8 @@ void brw_nir_lower_tcs_outputs(nir_shader *nir, const struct intel_device_info *devinfo, const struct intel_vue_map *vue, enum tess_primitive_mode tes_primitive_mode); +void brw_nir_lower_mesh_outputs(nir_shader *nir, + const struct brw_mue_map *map); void brw_nir_lower_fs_outputs(nir_shader *nir); bool brw_nir_lower_fs_load_output(nir_shader *shader, const struct brw_fs_prog_key *key);