From da80122257e19e4d3d26c16911b2da25a36bc249 Mon Sep 17 00:00:00 2001 From: Caio Oliveira Date: Thu, 8 Jan 2026 09:40:43 -0800 Subject: [PATCH] brw: Include backend NIR passes in mda files Add a pass tracker struct that can live the whole lifetime of brw_compile() functions, it will keep track of the debug_archiver and also store some metadata that allow us to name the passes. With that, we can also embed the loop tracking in the same struct, so that is free for any loop to use the "early break" optimization. There are other brw_nir_* passes that are called in the pre-processing phase. These are not currently included in the mda yet. Will be handled when we hook debug_archiver or similar to the runtime/driver. Reviewed-by: Lionel Landwerlin Reviewed-by: Kenneth Graunke Part-of: --- src/intel/compiler/brw/brw_compile_bs.cpp | 14 +- src/intel/compiler/brw/brw_compile_cs.cpp | 31 ++-- src/intel/compiler/brw/brw_compile_fs.cpp | 57 ++++--- src/intel/compiler/brw/brw_compile_gs.cpp | 20 ++- src/intel/compiler/brw/brw_compile_mesh.cpp | 130 +++++++++------- src/intel/compiler/brw/brw_compile_tcs.cpp | 21 ++- src/intel/compiler/brw/brw_compile_tes.cpp | 22 ++- src/intel/compiler/brw/brw_compile_vs.cpp | 15 +- src/intel/compiler/brw/brw_nir.c | 160 +++++++++----------- src/intel/compiler/brw/brw_nir.h | 27 ++-- src/intel/compiler/brw/brw_nir_rt.c | 8 +- src/intel/compiler/brw/brw_private.h | 77 +++++++++- src/intel/compiler/brw/brw_shader.cpp | 19 +-- 13 files changed, 361 insertions(+), 240 deletions(-) diff --git a/src/intel/compiler/brw/brw_compile_bs.cpp b/src/intel/compiler/brw/brw_compile_bs.cpp index f279d9c2a77..2e92357019f 100644 --- a/src/intel/compiler/brw/brw_compile_bs.cpp +++ b/src/intel/compiler/brw/brw_compile_bs.cpp @@ -83,13 +83,17 @@ compile_single_bs(const struct brw_compiler *compiler, */ const unsigned required_width = compiler->devinfo->ver >= 20 ? 16u : 8u; - brw_nir_apply_key(shader, compiler, &key->base, required_width); + brw_pass_tracker pt_ = { + .nir = shader, + .dispatch_width = required_width, + .compiler = compiler, + .archiver = params->base.archiver, + }, *pt = &pt_; - brw_debug_archive_nir(params->base.archiver, shader, required_width, "first"); + BRW_NIR_SNAPSHOT("first"); + brw_nir_apply_key(pt, &key->base, required_width); - brw_postprocess_nir(shader, compiler, required_width, - params->base.archiver, debug_enabled, - key->base.robust_flags); + brw_postprocess_nir(pt, debug_enabled, key->base.robust_flags); const brw_shader_params shader_params = { .compiler = compiler, diff --git a/src/intel/compiler/brw/brw_compile_cs.cpp b/src/intel/compiler/brw/brw_compile_cs.cpp index 4325d4335b4..5ea2f1ce115 100644 --- a/src/intel/compiler/brw/brw_compile_cs.cpp +++ b/src/intel/compiler/brw/brw_compile_cs.cpp @@ -141,7 +141,15 @@ brw_compile_cs(const struct brw_compiler *compiler, prog_data->local_size[2] = nir->info.workgroup_size[2]; } - brw_postprocess_nir_opts(nir, compiler, key->base.robust_flags); + brw_pass_tracker pt_ = { + .nir = nir, + .dispatch_width = 0, + .compiler = compiler, + .archiver = params->base.archiver, + }, *pt = &pt_; + + BRW_NIR_SNAPSHOT("first"); + brw_postprocess_nir_opts(pt, key->base.robust_flags); brw_simd_selection_state simd_state{ .devinfo = compiler->devinfo, @@ -170,18 +178,23 @@ brw_compile_cs(const struct brw_compiler *compiler, const unsigned dispatch_width = 8u << simd; nir_shader *shader = nir_shader_clone(params->base.mem_ctx, nir); - brw_debug_archive_nir(params->base.archiver, shader, dispatch_width, "first"); - brw_nir_apply_key(shader, compiler, &key->base, - dispatch_width); + pt_ = { + .nir = shader, + .dispatch_width = dispatch_width, + .compiler = compiler, + .archiver = params->base.archiver, + }; - NIR_PASS(_, shader, brw_nir_lower_simd, dispatch_width); + BRW_NIR_SNAPSHOT("first"); + brw_nir_apply_key(pt, &key->base, dispatch_width); - brw_nir_optimize(shader, devinfo); + BRW_NIR_PASS(brw_nir_lower_simd, dispatch_width); + + brw_nir_optimize(pt); /* brw_nir_optimize undoes late lowerings. */ - NIR_PASS(_, shader, nir_opt_algebraic_late); - brw_postprocess_nir_out_of_ssa(shader, dispatch_width, - params->base.archiver, debug_enabled); + BRW_NIR_PASS(nir_opt_algebraic_late); + brw_postprocess_nir_out_of_ssa(pt, debug_enabled); const brw_shader_params shader_params = { .compiler = compiler, diff --git a/src/intel/compiler/brw/brw_compile_fs.cpp b/src/intel/compiler/brw/brw_compile_fs.cpp index eb2f3c27935..61c0320db21 100644 --- a/src/intel/compiler/brw/brw_compile_fs.cpp +++ b/src/intel/compiler/brw/brw_compile_fs.cpp @@ -1423,17 +1423,16 @@ brw_print_fs_urb_setup(FILE *fp, const struct brw_wm_prog_data *prog_data, } static void -brw_nir_cleanup_pre_wm_prog_data(nir_shader *nir) +brw_nir_cleanup_pre_wm_prog_data(brw_pass_tracker *pt) { - bool progress; do { - progress = false; - NIR_PASS(progress, nir, nir_opt_algebraic); - NIR_PASS(progress, nir, nir_opt_copy_prop); - NIR_PASS(progress, nir, nir_opt_constant_folding); - NIR_PASS(progress, nir, nir_opt_dce); - NIR_PASS(progress, nir, nir_opt_cse); - } while (progress); + pt->progress = false; + BRW_NIR_PASS(nir_opt_algebraic); + BRW_NIR_PASS(nir_opt_copy_prop); + BRW_NIR_PASS(nir_opt_constant_folding); + BRW_NIR_PASS(nir_opt_dce); + BRW_NIR_PASS(nir_opt_cse); + } while (pt->progress); } const unsigned * @@ -1455,17 +1454,24 @@ brw_compile_fs(const struct brw_compiler *compiler, const unsigned max_subgroup_size = 32; unsigned max_polygons = MAX2(1, params->max_polygons); - brw_debug_archive_nir(params->base.archiver, nir, 0, "first"); + brw_pass_tracker pt_ = { + .nir = nir, + .dispatch_width = 0, + .compiler = compiler, + .archiver = params->base.archiver, + }, *pt = &pt_; - brw_nir_apply_key(nir, compiler, &key->base, max_subgroup_size); + BRW_NIR_SNAPSHOT("first"); + + brw_nir_apply_key(pt, &key->base, max_subgroup_size); if (brw_nir_fragment_shader_needs_wa_18019110168(devinfo, key->mesh_input, nir)) { if (params->mue_map && params->mue_map->wa_18019110168_active) { brw_nir_frag_convert_attrs_prim_to_vert( nir, params->mue_map->per_primitive_offsets); } else { - NIR_PASS(_, nir, brw_nir_frag_convert_attrs_prim_to_vert_indirect, - devinfo, params); + BRW_NIR_PASS(brw_nir_frag_convert_attrs_prim_to_vert_indirect, + devinfo, params); } /* Remapping per-primitive inputs into unused per-vertex inputs cannot * work with multipolygon. @@ -1476,16 +1482,18 @@ brw_compile_fs(const struct brw_compiler *compiler, brw_nir_lower_fs_inputs(nir, devinfo, key); brw_nir_lower_fs_outputs(nir); + BRW_NIR_SNAPSHOT("after_lower_io"); + if (!brw_can_coherent_fb_fetch(devinfo)) - NIR_PASS(_, nir, brw_nir_lower_fs_load_output, key); + BRW_NIR_PASS(brw_nir_lower_fs_load_output, key); /* Do this lowering before brw_nir_populate_wm_prog_data(). */ - NIR_PASS(_, nir, nir_opt_frag_coord_to_pixel_coord); - NIR_PASS(_, nir, nir_lower_frag_coord_to_pixel_coord); + BRW_NIR_PASS(nir_opt_frag_coord_to_pixel_coord); + BRW_NIR_PASS(nir_lower_frag_coord_to_pixel_coord); - NIR_PASS(_, nir, brw_nir_move_interpolation_to_top); + BRW_NIR_PASS(brw_nir_move_interpolation_to_top); - brw_nir_cleanup_pre_wm_prog_data(nir); + brw_nir_cleanup_pre_wm_prog_data(pt); int per_primitive_offsets[VARYING_SLOT_MAX]; memset(per_primitive_offsets, -1, sizeof(per_primitive_offsets)); @@ -1503,12 +1511,12 @@ brw_compile_fs(const struct brw_compiler *compiler, * offset to determine render target 0 store instruction in * emit_alpha_to_coverage pass. */ - NIR_PASS(_, nir, nir_opt_constant_folding); - NIR_PASS(_, nir, brw_nir_lower_alpha_to_coverage); + BRW_NIR_PASS(nir_opt_constant_folding); + BRW_NIR_PASS(brw_nir_lower_alpha_to_coverage); } if (prog_data->coarse_pixel_dispatch != INTEL_NEVER) - NIR_PASS(_, nir, brw_nir_lower_frag_coord_z, devinfo); + BRW_NIR_PASS(brw_nir_lower_frag_coord_z, devinfo); if (!brw_wm_prog_key_is_dynamic(key)) { uint32_t f = 0; @@ -1530,10 +1538,10 @@ brw_compile_fs(const struct brw_compiler *compiler, if (prog_data->coarse_pixel_dispatch == INTEL_ALWAYS) f |= INTEL_MSAA_FLAG_COARSE_RT_WRITES; - NIR_PASS(_, nir, nir_inline_sysval, nir_intrinsic_load_fs_msaa_intel, f); + BRW_NIR_PASS(nir_inline_sysval, nir_intrinsic_load_fs_msaa_intel, f); } - brw_postprocess_nir_opts(nir, compiler, key->base.robust_flags); + brw_postprocess_nir_opts(pt, key->base.robust_flags); unsigned pressure[SIMD_COUNT]; brw_nir_quick_pressure_estimate(nir, devinfo, pressure); @@ -1544,8 +1552,7 @@ brw_compile_fs(const struct brw_compiler *compiler, pressure[i] > compiler->register_pressure_threshold; } - brw_postprocess_nir_out_of_ssa(nir, 0, params->base.archiver, - debug_enabled); + brw_postprocess_nir_out_of_ssa(pt, debug_enabled); if (unlikely(debug_enabled)) brw_print_fs_urb_setup(stderr, prog_data, per_primitive_offsets); diff --git a/src/intel/compiler/brw/brw_compile_gs.cpp b/src/intel/compiler/brw/brw_compile_gs.cpp index 3feb982dd66..b9c79742fa6 100644 --- a/src/intel/compiler/brw/brw_compile_gs.cpp +++ b/src/intel/compiler/brw/brw_compile_gs.cpp @@ -148,7 +148,14 @@ brw_compile_gs(const struct brw_compiler *compiler, const bool debug_enabled = brw_should_print_shader(nir, DEBUG_GS, params->base.source_hash); - brw_debug_archive_nir(params->base.archiver, nir, dispatch_width, "first"); + brw_pass_tracker pt_ = { + .nir = nir, + .dispatch_width = dispatch_width, + .compiler = compiler, + .archiver = params->base.archiver, + }, *pt = &pt_; + + BRW_NIR_SNAPSHOT("first"); brw_prog_data_init(&prog_data->base.base, ¶ms->base); @@ -175,14 +182,15 @@ brw_compile_gs(const struct brw_compiler *compiler, key->base.vue_layout, pos_slots); - brw_nir_apply_key(nir, compiler, &key->base, dispatch_width); + brw_nir_apply_key(pt, &key->base, dispatch_width); brw_nir_lower_gs_inputs(nir, compiler->devinfo, &input_vue_map, &prog_data->base.urb_read_length); brw_nir_lower_vue_outputs(nir); - brw_nir_opt_vectorize_urb(nir, compiler->devinfo); - brw_postprocess_nir(nir, compiler, dispatch_width, - params->base.archiver, debug_enabled, - key->base.robust_flags); + + BRW_NIR_SNAPSHOT("after_lower_io"); + + brw_nir_opt_vectorize_urb(pt); + brw_postprocess_nir(pt, debug_enabled, key->base.robust_flags); prog_data->include_primitive_id = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_PRIMITIVE_ID); diff --git a/src/intel/compiler/brw/brw_compile_mesh.cpp b/src/intel/compiler/brw/brw_compile_mesh.cpp index 5379316c96a..38510232c40 100644 --- a/src/intel/compiler/brw/brw_compile_mesh.cpp +++ b/src/intel/compiler/brw/brw_compile_mesh.cpp @@ -90,21 +90,23 @@ brw_nir_lower_launch_mesh_workgroups(nir_shader *nir) #define BRW_PER_TASK_DATA_START_DW 8 static void -brw_nir_lower_tue_outputs(nir_shader *nir, brw_tue_map *map) +brw_nir_lower_tue_outputs(brw_pass_tracker *pt, brw_tue_map *map) { + nir_shader *nir = pt->nir; + memset(map, 0, sizeof(*map)); - NIR_PASS(_, nir, nir_lower_io, nir_var_shader_out, - type_size_scalar_dwords, nir_lower_io_lower_64bit_to_32); + BRW_NIR_PASS(nir_lower_io, nir_var_shader_out, + type_size_scalar_dwords, nir_lower_io_lower_64bit_to_32); /* Lowering to explicit types will start offsets from task_payload_size, so * set it to start after the header. */ nir->info.task_payload_size = BRW_PER_TASK_DATA_START_DW * 4; - NIR_PASS(_, nir, nir_lower_vars_to_explicit_types, - nir_var_mem_task_payload, shared_type_info); - NIR_PASS(_, nir, nir_lower_explicit_io, - nir_var_mem_task_payload, nir_address_format_32bit_offset); + BRW_NIR_PASS(nir_lower_vars_to_explicit_types, + nir_var_mem_task_payload, shared_type_info); + BRW_NIR_PASS(nir_lower_explicit_io, + nir_var_mem_task_payload, nir_address_format_32bit_offset); map->size_dw = align(DIV_ROUND_UP(nir->info.task_payload_size, 4), 8); } @@ -279,11 +281,18 @@ brw_compile_task(const struct brw_compiler *compiler, struct brw_task_prog_data *prog_data = params->prog_data; const bool debug_enabled = brw_should_print_shader(nir, DEBUG_TASK, params->base.source_hash); - brw_debug_archive_nir(params->base.archiver, nir, 0, "first"); + brw_pass_tracker pt_ = { + .nir = nir, + .dispatch_width = 0, + .compiler = compiler, + .archiver = params->base.archiver, + }, *pt = &pt_; - brw_nir_lower_tue_outputs(nir, &prog_data->map); + BRW_NIR_SNAPSHOT("first"); - NIR_PASS(_, nir, brw_nir_align_launch_mesh_workgroups); + brw_nir_lower_tue_outputs(pt, &prog_data->map); + + BRW_NIR_PASS(brw_nir_align_launch_mesh_workgroups); nir_lower_task_shader_options lower_ts_opt = { .payload_to_shared_for_atomics = true, @@ -293,9 +302,9 @@ brw_compile_task(const struct brw_compiler *compiler, */ .payload_offset_in_bytes = BRW_PER_TASK_DATA_START_DW * 4, }; - NIR_PASS(_, nir, nir_lower_task_shader, lower_ts_opt); + BRW_NIR_PASS(nir_lower_task_shader, lower_ts_opt); - NIR_PASS(_, nir, brw_nir_lower_launch_mesh_workgroups); + BRW_NIR_PASS(brw_nir_lower_launch_mesh_workgroups); NIR_PASS(_, nir, brw_nir_lower_cs_intrinsics, compiler->devinfo, NULL); @@ -312,7 +321,7 @@ brw_compile_task(const struct brw_compiler *compiler, prog_data->base.uses_inline_data = brw_nir_uses_inline_data(nir) || key->base.uses_inline_push_addr; - brw_postprocess_nir_opts(nir, compiler, key->base.robust_flags); + brw_postprocess_nir_opts(pt, key->base.robust_flags); brw_simd_selection_state simd_state{ .devinfo = compiler->devinfo, @@ -320,7 +329,7 @@ brw_compile_task(const struct brw_compiler *compiler, .required_width = brw_required_dispatch_width(&nir->info), }; - brw_debug_archive_nir(params->base.archiver, nir, 0, "before-simd"); + BRW_NIR_SNAPSHOT("before_simd"); unsigned pressure[SIMD_COUNT]; brw_nir_quick_pressure_estimate(nir, devinfo, pressure); @@ -341,17 +350,23 @@ brw_compile_task(const struct brw_compiler *compiler, const unsigned dispatch_width = 8 << simd; nir_shader *shader = nir_shader_clone(params->base.mem_ctx, nir); - brw_nir_apply_key(shader, compiler, &key->base, dispatch_width); - brw_debug_archive_nir(params->base.archiver, shader, dispatch_width, "first"); + pt_ = { + .nir = shader, + .dispatch_width = dispatch_width, + .compiler = compiler, + .archiver = params->base.archiver, + }; - NIR_PASS(_, shader, brw_nir_lower_simd, dispatch_width); + BRW_NIR_SNAPSHOT("first"); + brw_nir_apply_key(pt, &key->base, dispatch_width); - brw_nir_optimize(shader, devinfo); + BRW_NIR_PASS(brw_nir_lower_simd, dispatch_width); + + brw_nir_optimize(pt); /* brw_nir_optimize undoes late lowerings. */ - NIR_PASS(_, shader, nir_opt_algebraic_late); - brw_postprocess_nir_out_of_ssa(shader, dispatch_width, - params->base.archiver, debug_enabled); + BRW_NIR_PASS(nir_opt_algebraic_late); + brw_postprocess_nir_out_of_ssa(pt, debug_enabled); const brw_shader_params shader_params = { .compiler = compiler, @@ -416,7 +431,7 @@ brw_compile_task(const struct brw_compiler *compiler, } static void -brw_nir_lower_tue_inputs(nir_shader *nir, const brw_tue_map *map) +brw_nir_lower_tue_inputs(brw_pass_tracker *pt, const brw_tue_map *map) { /* See brw_nir_lower_tue_outputs. If a task payload is read by this shader, * task_payload_size will be used to start offsets, and that's always @@ -424,12 +439,11 @@ brw_nir_lower_tue_inputs(nir_shader *nir, const brw_tue_map *map) * We can't always use map, as it may not be present if task and mesh * shaders are not compiled together. This is possible with shader objects. */ + nir_shader *nir = pt->nir; nir->info.task_payload_size = BRW_PER_TASK_DATA_START_DW * 4; - bool progress = false; - - NIR_PASS(progress, nir, nir_lower_vars_to_explicit_types, - nir_var_mem_task_payload, shared_type_info); + bool progress = BRW_NIR_PASS(nir_lower_vars_to_explicit_types, + nir_var_mem_task_payload, shared_type_info); if (progress) { /* The types for Task Output and Mesh Input should match, so their sizes @@ -444,8 +458,8 @@ brw_nir_lower_tue_inputs(nir_shader *nir, const brw_tue_map *map) nir->info.task_payload_size = 0; } - NIR_PASS(_, nir, nir_lower_explicit_io, nir_var_mem_task_payload, - nir_address_format_32bit_offset); + BRW_NIR_PASS(nir_lower_explicit_io, nir_var_mem_task_payload, + nir_address_format_32bit_offset); } /* Attribute types. Flat attributes have to be a separate class because @@ -612,11 +626,11 @@ brw_print_mue_map(FILE *fp, const struct brw_mue_map *map, struct nir_shader *ni } static void -brw_nir_lower_mue_outputs(nir_shader *nir, const struct brw_mue_map *map) +brw_nir_lower_mue_outputs(brw_pass_tracker *pt, const struct brw_mue_map *map) { - NIR_PASS(_, nir, nir_lower_io, nir_var_shader_out, - type_size_vec4, - nir_lower_io_lower_64bit_to_32); + BRW_NIR_PASS(nir_lower_io, nir_var_shader_out, + type_size_vec4, + nir_lower_io_lower_64bit_to_32); } static bool @@ -965,7 +979,14 @@ brw_compile_mesh(const struct brw_compiler *compiler, struct brw_mesh_prog_data *prog_data = params->prog_data; const bool debug_enabled = brw_should_print_shader(nir, DEBUG_MESH, params->base.source_hash); - brw_debug_archive_nir(params->base.archiver, nir, 0, "first"); + brw_pass_tracker pt_ = { + .nir = nir, + .dispatch_width = 0, + .compiler = compiler, + .archiver = params->base.archiver, + }, *pt = &pt_; + + BRW_NIR_SNAPSHOT("first"); brw_prog_data_init(&prog_data->base.base, ¶ms->base); @@ -1000,23 +1021,23 @@ brw_compile_mesh(const struct brw_compiler *compiler, prog_data->uses_drawid = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_DRAW_ID); - brw_nir_lower_tue_inputs(nir, params->tue_map); + brw_nir_lower_tue_inputs(pt, params->tue_map); - NIR_PASS(_, nir, brw_nir_lower_mesh_primitive_count); - NIR_PASS(_, nir, nir_opt_dce); - NIR_PASS(_, nir, nir_remove_dead_variables, nir_var_shader_out, NULL); + BRW_NIR_PASS(brw_nir_lower_mesh_primitive_count); + BRW_NIR_PASS(nir_opt_dce); + BRW_NIR_PASS(nir_remove_dead_variables, nir_var_shader_out, NULL); brw_compute_mue_map(compiler, nir, &prog_data->map, prog_data->index_format, key->base.vue_layout, apply_wa_18019110168 ? wa_18019110168_mapping : NULL); - brw_nir_lower_mue_outputs(nir, &prog_data->map); + brw_nir_lower_mue_outputs(pt, &prog_data->map); /* When Primitive Header is enabled, we may not generates writes to all * fields, so let's initialize everything. */ if (prog_data->map.has_per_primitive_header) - NIR_PASS(_, nir, brw_nir_initialize_mue, &prog_data->map); + BRW_NIR_PASS(brw_nir_initialize_mue, &prog_data->map); NIR_PASS(_, nir, brw_nir_lower_cs_intrinsics, compiler->devinfo, NULL); @@ -1026,7 +1047,7 @@ brw_compile_mesh(const struct brw_compiler *compiler, prog_data->base.uses_inline_data = brw_nir_uses_inline_data(nir) || key->base.uses_inline_push_addr; - brw_postprocess_nir_opts(nir, compiler, key->base.robust_flags); + brw_postprocess_nir_opts(pt, key->base.robust_flags); const struct brw_lower_urb_cb_data cb_data = { .devinfo = devinfo, @@ -1039,10 +1060,10 @@ brw_compile_mesh(const struct brw_compiler *compiler, prog_data->map.per_primitive_indices_stride, .per_primitive_byte_offsets = prog_data->map.per_primitive_offsets, }; - NIR_PASS(_, nir, brw_nir_lower_outputs_to_urb_intrinsics, &cb_data); - brw_nir_opt_vectorize_urb(nir, devinfo); + BRW_NIR_PASS(brw_nir_lower_outputs_to_urb_intrinsics, &cb_data); + brw_nir_opt_vectorize_urb(pt); struct nir_opt_offsets_options offset_options = {}; - NIR_PASS(_, nir, nir_opt_offsets, &offset_options); + BRW_NIR_PASS(nir_opt_offsets, &offset_options); brw_simd_selection_state simd_state{ .devinfo = compiler->devinfo, @@ -1052,7 +1073,7 @@ brw_compile_mesh(const struct brw_compiler *compiler, std::unique_ptr v[3]; - brw_debug_archive_nir(params->base.archiver, nir, 0, "before-simd"); + BRW_NIR_SNAPSHOT("before_simd"); for (unsigned i = 0; i < 3; i++) { const unsigned simd = devinfo->ver >= 30 ? 2 - i : i; @@ -1064,20 +1085,25 @@ brw_compile_mesh(const struct brw_compiler *compiler, nir_shader *shader = nir_shader_clone(params->base.mem_ctx, nir); - brw_debug_archive_nir(params->base.archiver, shader, dispatch_width, "first"); + pt_ = { + .nir = shader, + .dispatch_width = dispatch_width, + .compiler = compiler, + .archiver = params->base.archiver, + }; - brw_nir_apply_key(shader, compiler, &key->base, dispatch_width); + BRW_NIR_SNAPSHOT("first"); + brw_nir_apply_key(pt, &key->base, dispatch_width); /* Load uniforms can do a better job for constants, so fold before it. */ - NIR_PASS(_, shader, nir_opt_constant_folding); + BRW_NIR_PASS(nir_opt_constant_folding); - NIR_PASS(_, shader, brw_nir_lower_simd, dispatch_width); + BRW_NIR_PASS(brw_nir_lower_simd, dispatch_width); - brw_nir_optimize(shader, devinfo); + brw_nir_optimize(pt); /* brw_nir_optimize undoes late lowerings. */ - NIR_PASS(_, shader, nir_opt_algebraic_late); - brw_postprocess_nir_out_of_ssa(shader, dispatch_width, - params->base.archiver, debug_enabled); + BRW_NIR_PASS(nir_opt_algebraic_late); + brw_postprocess_nir_out_of_ssa(pt, debug_enabled); const brw_shader_params shader_params = { .compiler = compiler, diff --git a/src/intel/compiler/brw/brw_compile_tcs.cpp b/src/intel/compiler/brw/brw_compile_tcs.cpp index fb450065f16..dd62ee40c4d 100644 --- a/src/intel/compiler/brw/brw_compile_tcs.cpp +++ b/src/intel/compiler/brw/brw_compile_tcs.cpp @@ -211,7 +211,14 @@ brw_compile_tcs(const struct brw_compiler *compiler, const bool debug_enabled = brw_should_print_shader(nir, DEBUG_TCS, params->base.source_hash); - brw_debug_archive_nir(params->base.archiver, nir, dispatch_width, "first"); + brw_pass_tracker pt_ = { + .nir = nir, + .dispatch_width = dispatch_width, + .compiler = compiler, + .archiver = params->base.archiver, + }, *pt = &pt_; + + BRW_NIR_SNAPSHOT("first"); brw_prog_data_init(&prog_data->base.base, ¶ms->base); @@ -229,16 +236,16 @@ brw_compile_tcs(const struct brw_compiler *compiler, nir->info.patch_outputs_written, key->separate_tess_vue_layout); - brw_nir_apply_key(nir, compiler, &key->base, dispatch_width); + brw_nir_apply_key(pt, &key->base, dispatch_width); brw_nir_lower_tcs_inputs(nir, devinfo, &input_vue_map); brw_nir_lower_tcs_outputs(nir, devinfo, &vue_prog_data->vue_map, key->_tes_primitive_mode); - brw_nir_opt_vectorize_urb(nir, devinfo); - intel_nir_lower_patch_vertices_in(nir, key->input_vertices); + BRW_NIR_SNAPSHOT("after_lower_io"); - brw_postprocess_nir(nir, compiler, dispatch_width, - params->base.archiver, debug_enabled, - key->base.robust_flags); + brw_nir_opt_vectorize_urb(pt); + BRW_NIR_PASS(intel_nir_lower_patch_vertices_in, key->input_vertices); + + brw_postprocess_nir(pt, debug_enabled, key->base.robust_flags); bool has_primitive_id = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_PRIMITIVE_ID); diff --git a/src/intel/compiler/brw/brw_compile_tes.cpp b/src/intel/compiler/brw/brw_compile_tes.cpp index 21c52a98f3b..2ed998fb29f 100644 --- a/src/intel/compiler/brw/brw_compile_tes.cpp +++ b/src/intel/compiler/brw/brw_compile_tes.cpp @@ -88,7 +88,14 @@ brw_compile_tes(const struct brw_compiler *compiler, const bool debug_enabled = brw_should_print_shader(nir, DEBUG_TES, params->base.source_hash); - brw_debug_archive_nir(params->base.archiver, nir, dispatch_width, "first"); + brw_pass_tracker pt_ = { + .nir = nir, + .dispatch_width = dispatch_width, + .compiler = compiler, + .archiver = params->base.archiver, + }, *pt = &pt_; + + BRW_NIR_SNAPSHOT("first"); brw_prog_data_init(&prog_data->base.base, ¶ms->base); @@ -104,14 +111,15 @@ brw_compile_tes(const struct brw_compiler *compiler, nir->info.patch_inputs_read, key->separate_tess_vue_layout); } - - brw_nir_apply_key(nir, compiler, &key->base, dispatch_width); + brw_nir_apply_key(pt, &key->base, dispatch_width); brw_nir_lower_tes_inputs(nir, devinfo, &input_vue_map); brw_nir_lower_vue_outputs(nir); - brw_nir_opt_vectorize_urb(nir, devinfo); - NIR_PASS(_, nir, intel_nir_lower_patch_vertices_tes); - brw_postprocess_nir(nir, compiler, dispatch_width, params->base.archiver, - debug_enabled, key->base.robust_flags); + BRW_NIR_SNAPSHOT("after_lower_io"); + + brw_nir_opt_vectorize_urb(pt); + BRW_NIR_PASS(intel_nir_lower_patch_vertices_tes); + + brw_postprocess_nir(pt, debug_enabled, key->base.robust_flags); const uint32_t pos_slots = (nir->info.per_view_outputs & VARYING_BIT_POS) ? diff --git a/src/intel/compiler/brw/brw_compile_vs.cpp b/src/intel/compiler/brw/brw_compile_vs.cpp index e9bb184a4b5..526d0091ae7 100644 --- a/src/intel/compiler/brw/brw_compile_vs.cpp +++ b/src/intel/compiler/brw/brw_compile_vs.cpp @@ -253,7 +253,14 @@ brw_compile_vs(const struct brw_compiler *compiler, */ assert(!key->no_vf_slot_compaction || key->vf_component_packing); - brw_debug_archive_nir(params->base.archiver, nir, dispatch_width, "first"); + brw_pass_tracker pt_ = { + .nir = nir, + .dispatch_width = dispatch_width, + .compiler = compiler, + .archiver = params->base.archiver, + }, *pt = &pt_; + + BRW_NIR_SNAPSHOT("first"); brw_prog_data_init(&prog_data->base.base, ¶ms->base); @@ -271,7 +278,7 @@ brw_compile_vs(const struct brw_compiler *compiler, &prog_data->base.vue_map, nir->info.outputs_written, key->base.vue_layout, pos_slots); - brw_nir_apply_key(nir, compiler, &key->base, dispatch_width); + brw_nir_apply_key(pt, &key->base, dispatch_width); prog_data->inputs_read = nir->info.inputs_read; prog_data->double_inputs_read = nir->info.vs.double_inputs; @@ -279,6 +286,7 @@ brw_compile_vs(const struct brw_compiler *compiler, brw_nir_lower_vs_inputs(nir); brw_nir_lower_vue_outputs(nir); + BRW_NIR_SNAPSHOT("after_lower_io"); memset(prog_data->vf_component_packing, 0, sizeof(prog_data->vf_component_packing)); @@ -286,8 +294,7 @@ brw_compile_vs(const struct brw_compiler *compiler, if (key->vf_component_packing) nr_packed_regs = brw_nir_pack_vs_input(nir, prog_data); - brw_postprocess_nir(nir, compiler, dispatch_width, - params->base.archiver, debug_enabled, + brw_postprocess_nir(pt, debug_enabled, key->base.robust_flags); unsigned nr_attribute_slots = util_bitcount64(prog_data->inputs_read); diff --git a/src/intel/compiler/brw/brw_nir.c b/src/intel/compiler/brw/brw_nir.c index 772b50d6f2c..9c1c02bc4ce 100644 --- a/src/intel/compiler/brw/brw_nir.c +++ b/src/intel/compiler/brw/brw_nir.c @@ -816,10 +816,11 @@ lsc_urb_round_up_components(unsigned n) } void -brw_nir_opt_vectorize_urb(nir_shader *nir, - const struct intel_device_info *devinfo) +brw_nir_opt_vectorize_urb(brw_pass_tracker *pt) { - NIR_PASS(_, nir, nir_opt_cse); + const struct intel_device_info *devinfo = pt->compiler->devinfo; + + BRW_NIR_PASS(nir_opt_cse); nir_load_store_vectorize_options options = { .modes = nir_var_shader_in | nir_var_shader_out, @@ -829,7 +830,7 @@ brw_nir_opt_vectorize_urb(nir_shader *nir, devinfo->ver >= 20 ? lsc_urb_round_up_components : vec4_urb_round_up_components, }; - NIR_PASS(_, nir, nir_opt_load_store_vectorize, &options); + BRW_NIR_PASS(nir_opt_load_store_vectorize, &options); } void @@ -1423,45 +1424,19 @@ brw_nir_tag_speculative_access(nir_shader *nir) nir_metadata_all, NULL); } -#define OPT(pass, ...) ({ \ - bool this_progress = false; \ - NIR_PASS(this_progress, nir, pass, ##__VA_ARGS__); \ - if (this_progress) \ - progress = true; \ - this_progress; \ -}) - -#define LOOP_OPT(pass, ...) ({ \ - const unsigned long this_line = __LINE__; \ - bool this_progress = false; \ - if (opt_line == this_line) \ - break; \ - NIR_PASS(this_progress, nir, pass, ##__VA_ARGS__); \ - if (this_progress) { \ - progress = true; \ - opt_line = this_line; \ - } \ - this_progress; \ -}) - -#define LOOP_OPT_NOT_IDEMPOTENT(pass, ...) ({ \ - bool this_progress = false; \ - NIR_PASS(this_progress, nir, pass, ##__VA_ARGS__); \ - if (this_progress) { \ - progress = true; \ - opt_line = 0; \ - } \ - this_progress; \ -}) +#define OPT BRW_NIR_PASS +#define LOOP_OPT BRW_NIR_LOOP_PASS +#define LOOP_OPT_NOT_IDEMPOTENT BRW_NIR_LOOP_PASS_NOT_IDEMPOTENT void -brw_nir_optimize(nir_shader *nir, - const struct intel_device_info *devinfo) +brw_nir_optimize(brw_pass_tracker *pt) { - bool progress; - unsigned long opt_line = 0; + nir_shader *nir = pt->nir; + + pass_tracker_new_loop(pt); do { - progress = false; + pass_tracker_new_iteration(pt); + /* This pass is causing problems with types used by OpenCL : * https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/13955 * @@ -1529,7 +1504,7 @@ brw_nir_optimize(nir_shader *nir, LOOP_OPT(nir_opt_gcm, false); LOOP_OPT(nir_opt_undef); LOOP_OPT(nir_lower_pack); - } while (progress); + } while (pt->progress); /* Workaround Gfxbench unused local sampler variable which will trigger an * assert in the opt_large_constants pass. @@ -1718,7 +1693,15 @@ brw_preprocess_nir(const struct brw_compiler *compiler, nir_shader *nir, const struct brw_nir_compiler_opts *opts) { const struct intel_device_info *devinfo = compiler->devinfo; - UNUSED bool progress; /* Written by OPT */ + + /* TODO: This is part of the "pre-processing" before the shader is fed to + * brw_compile_* functions, so there's no debug archiver available yet. + * In the future runtime/driver will create one for us to use here. + */ + brw_pass_tracker pt_ = { + .nir = nir, + .compiler = compiler, + }, *pt = &pt_; nir_validate_ssa_dominance(nir, "before brw_preprocess_nir"); @@ -1753,7 +1736,7 @@ brw_preprocess_nir(const struct brw_compiler *compiler, nir_shader *nir, if (OPT(nir_opt_memcpy)) OPT(nir_split_var_copies); - brw_nir_optimize(nir, devinfo); + brw_nir_optimize(pt); if (nir->info.ray_queries) { OPT(nir_opt_ray_queries); @@ -1853,7 +1836,7 @@ brw_preprocess_nir(const struct brw_compiler *compiler, nir_shader *nir, OPT(intel_nir_clamp_per_vertex_loads); /* Get rid of split copies */ - brw_nir_optimize(nir, devinfo); + brw_nir_optimize(pt); } static bool @@ -1969,8 +1952,6 @@ void brw_nir_link_shaders(const struct brw_compiler *compiler, nir_shader *producer, nir_shader *consumer) { - const struct intel_device_info *devinfo = compiler->devinfo; - if (producer->info.stage == MESA_SHADER_MESH && consumer->info.stage == MESA_SHADER_FRAGMENT) { uint64_t fs_inputs = 0, ms_outputs = 0; @@ -2009,11 +1990,19 @@ brw_nir_link_shaders(const struct brw_compiler *compiler, NIR_PASS(_, producer, nir_lower_io_vars_to_scalar, nir_var_shader_out); NIR_PASS(_, consumer, nir_lower_io_vars_to_scalar, nir_var_shader_in); - brw_nir_optimize(producer, devinfo); - brw_nir_optimize(consumer, devinfo); + + /* TODO: This is part of the "pre-processing" before the shader is fed to + * brw_compile_* functions, so there's no debug archiver available yet. + * In the future runtime/driver will create one for us to use here. + */ + brw_pass_tracker pt_producer = { .nir = producer, .compiler = compiler }; + brw_pass_tracker pt_consumer = { .nir = consumer, .compiler = compiler }; + + brw_nir_optimize(&pt_producer); + brw_nir_optimize(&pt_consumer); if (nir_link_opt_varyings(producer, consumer)) - brw_nir_optimize(consumer, devinfo); + brw_nir_optimize(&pt_consumer); NIR_PASS(_, producer, nir_remove_dead_variables, nir_var_shader_out, NULL); NIR_PASS(_, consumer, nir_remove_dead_variables, nir_var_shader_in, NULL); @@ -2031,8 +2020,8 @@ brw_nir_link_shaders(const struct brw_compiler *compiler, NIR_PASS(_, producer, nir_lower_global_vars_to_local); NIR_PASS(_, consumer, nir_lower_global_vars_to_local); - brw_nir_optimize(producer, devinfo); - brw_nir_optimize(consumer, devinfo); + brw_nir_optimize(&pt_producer); + brw_nir_optimize(&pt_consumer); if (producer->info.stage == MESA_SHADER_MESH && consumer->info.stage == MESA_SHADER_FRAGMENT) { @@ -2329,11 +2318,10 @@ brw_nir_ssbo_intel(nir_shader *shader) } static void -brw_vectorize_lower_mem_access(nir_shader *nir, - const struct brw_compiler *compiler, +brw_vectorize_lower_mem_access(brw_pass_tracker *pt, enum brw_robustness_flags robust_flags) { - UNUSED bool progress = false; + const struct brw_compiler *compiler = pt->compiler; nir_load_store_vectorize_options options = { .modes = nir_var_mem_ubo | nir_var_mem_ssbo | @@ -2569,10 +2557,8 @@ flag_fused_eu_disable_instr(nir_builder *b, nir_instr *instr, void *data) } static void -brw_nir_lower_int64(nir_shader *nir, const struct intel_device_info *devinfo) +brw_nir_lower_int64(brw_pass_tracker *pt) { - UNUSED bool progress; /* Written by OPT */ - /* Potentially perform this optimization pass twice because it can create * additional opportunities for itself. */ @@ -2580,7 +2566,7 @@ brw_nir_lower_int64(nir_shader *nir, const struct intel_device_info *devinfo) OPT(nir_opt_algebraic_before_lower_int64); if (OPT(nir_lower_int64)) - brw_nir_optimize(nir, devinfo); + brw_nir_optimize(pt); } /* Prepare the given shader for codegen @@ -2589,12 +2575,12 @@ brw_nir_lower_int64(nir_shader *nir, const struct intel_device_info *devinfo) * backend and is highly backend-specific. */ void -brw_postprocess_nir_opts(nir_shader *nir, const struct brw_compiler *compiler, +brw_postprocess_nir_opts(brw_pass_tracker *pt, enum brw_robustness_flags robust_flags) { + const struct brw_compiler *compiler = pt->compiler; const struct intel_device_info *devinfo = compiler->devinfo; - - UNUSED bool progress; /* Written by OPT */ + nir_shader *nir = pt->nir; const nir_lower_tex_options tex_options = { .lower_txp = ~0, @@ -2637,10 +2623,7 @@ brw_postprocess_nir_opts(nir_shader *nir, const struct brw_compiler *compiler, OPT(nir_opt_combine_barriers, combine_all_memory_barriers, NULL); - do { - progress = false; - OPT(nir_opt_algebraic_before_ffma); - } while (progress); + while (OPT(nir_opt_algebraic_before_ffma)) {} OPT(nir_opt_idiv_const, 32); @@ -2665,23 +2648,23 @@ brw_postprocess_nir_opts(nir_shader *nir, const struct brw_compiler *compiler, OPT(brw_nir_tag_speculative_access); - brw_nir_optimize(nir, devinfo); + brw_nir_optimize(pt); if (nir_shader_has_local_variables(nir)) { OPT(nir_lower_vars_to_explicit_types, nir_var_function_temp, glsl_get_natural_size_align_bytes); OPT(nir_lower_explicit_io, nir_var_function_temp, nir_address_format_32bit_offset); - brw_nir_optimize(nir, devinfo); + brw_nir_optimize(pt); } - brw_vectorize_lower_mem_access(nir, compiler, robust_flags); + brw_vectorize_lower_mem_access(pt, robust_flags); /* Do this after lowering memory access bit-sizes */ if (nir->info.stage == MESA_SHADER_MESH || nir->info.stage == MESA_SHADER_TASK) { OPT(lower_task_payload_to_urb_intrinsics, devinfo); - brw_nir_opt_vectorize_urb(nir, devinfo); + brw_nir_opt_vectorize_urb(pt); } /* Needs to be prior int64 lower because it generates 64bit address @@ -2689,7 +2672,7 @@ brw_postprocess_nir_opts(nir_shader *nir, const struct brw_compiler *compiler, */ OPT(intel_nir_lower_printf); - brw_nir_lower_int64(nir, devinfo); + brw_nir_lower_int64(pt); /* This pass specifically looks for sequences of fmul and fadd that * intel_nir_opt_peephole_ffma will try to eliminate. Call this @@ -2736,18 +2719,17 @@ brw_postprocess_nir_opts(nir_shader *nir, const struct brw_compiler *compiler, OPT(brw_nir_opt_fsat); do { - progress = false; + pt->progress = false; OPT(nir_opt_algebraic_late); - if (progress) { + if (pt->progress) { OPT(nir_opt_constant_folding); OPT(nir_opt_copy_prop); OPT(nir_opt_dce); OPT(nir_opt_cse); } - } while (progress); - + } while (pt->progress); OPT(nir_lower_fp16_casts, nir_lower_fp16_split_fp64); @@ -2791,7 +2773,7 @@ brw_postprocess_nir_opts(nir_shader *nir, const struct brw_compiler *compiler, * allows the elimination of some loops over, say, a TXF instruction * with a non-uniform texture handle. */ - brw_nir_optimize(nir, devinfo); + brw_nir_optimize(pt); OPT(nir_lower_subgroups, &subgroups_options); } @@ -2801,7 +2783,7 @@ brw_postprocess_nir_opts(nir_shader *nir, const struct brw_compiler *compiler, * and peephole_select may generate a 64-bit select. So do another * round at the tail end. */ - brw_nir_lower_int64(nir, devinfo); + brw_nir_lower_int64(pt); /* Deal with EU fusion */ if (devinfo->ver == 12) { @@ -2828,12 +2810,10 @@ brw_postprocess_nir_opts(nir_shader *nir, const struct brw_compiler *compiler, } void -brw_postprocess_nir_out_of_ssa(nir_shader *nir, - unsigned dispatch_width, - debug_archiver *archiver, +brw_postprocess_nir_out_of_ssa(brw_pass_tracker *pt, bool debug_enabled) { - UNUSED bool progress; /* Written by OPT */ + nir_shader *nir = pt->nir; /* Run fsign lowering again after the last time brw_nir_optimize is called. * As is the case with conversion lowering (below), brw_nir_optimize can @@ -2867,10 +2847,10 @@ brw_postprocess_nir_out_of_ssa(nir_shader *nir, /* Rerun the divergence analysis before convert_from_ssa as this pass has * some assert on consistent divergence flags. */ - NIR_PASS(_, nir, nir_convert_to_lcssa, true, true); + OPT(nir_convert_to_lcssa, true, true); nir_divergence_analysis(nir); - if (unlikely(debug_enabled || archiver)) { + if (unlikely(debug_enabled || pt->archiver)) { /* Re-index SSA defs so we print more sensible numbers. */ nir_foreach_function_impl(impl, nir) { nir_index_ssa_defs(impl); @@ -2882,8 +2862,7 @@ brw_postprocess_nir_out_of_ssa(nir_shader *nir, nir_print_shader(nir, stderr); } - if (unlikely(archiver)) - brw_debug_archive_nir(archiver, nir, dispatch_width, "ssa"); + BRW_NIR_SNAPSHOT("ssa"); } OPT(nir_convert_from_ssa, true, true); @@ -2900,8 +2879,7 @@ brw_postprocess_nir_out_of_ssa(nir_shader *nir, nir_print_shader(nir, stderr); } - if (unlikely(archiver)) - brw_debug_archive_nir(archiver, nir, dispatch_width, "out"); + BRW_NIR_SNAPSHOT("out"); } static unsigned @@ -2944,12 +2922,13 @@ brw_nir_api_subgroup_size(const nir_shader *nir, } void -brw_nir_apply_key(nir_shader *nir, - const struct brw_compiler *compiler, +brw_nir_apply_key(brw_pass_tracker *pt, const struct brw_base_prog_key *key, unsigned max_subgroup_size) { - bool progress = false; + nir_shader *nir = pt->nir; + + pt->progress = false; const nir_lower_subgroups_options subgroups_options = { .subgroup_size = get_subgroup_size(&nir->info, max_subgroup_size), @@ -2962,9 +2941,8 @@ brw_nir_apply_key(nir_shader *nir, if (key->limit_trig_input_range) OPT(brw_nir_limit_trig_input_range_workaround); - if (progress) { - brw_nir_optimize(nir, compiler->devinfo); - } + if (pt->progress) + brw_nir_optimize(pt); } enum brw_conditional_mod diff --git a/src/intel/compiler/brw/brw_nir.h b/src/intel/compiler/brw/brw_nir.h index 52a140750a0..978ad981e87 100644 --- a/src/intel/compiler/brw/brw_nir.h +++ b/src/intel/compiler/brw/brw_nir.h @@ -17,6 +17,7 @@ extern "C" { #define BRW_TEX_INSTR_FUSED_EU_DISABLE (1u << 30) extern const struct nir_shader_compiler_options brw_scalar_nir_options; +struct brw_pass_tracker; void brw_fill_tess_info_from_shader_info(struct brw_tess_info *brw_info, @@ -219,8 +220,7 @@ bool brw_nir_lower_inputs_to_urb_intrinsics(nir_shader *, const struct brw_lower bool brw_nir_lower_outputs_to_urb_intrinsics(nir_shader *, const struct brw_lower_urb_cb_data *); -void brw_nir_opt_vectorize_urb(nir_shader *nir, - const struct intel_device_info *devinfo); +void brw_nir_opt_vectorize_urb(struct brw_pass_tracker *pt); void brw_nir_lower_vs_inputs(nir_shader *nir); void brw_nir_lower_gs_inputs(nir_shader *nir, @@ -285,26 +285,19 @@ bool brw_nir_lower_mem_access_bit_sizes(nir_shader *shader, bool brw_nir_lower_simd(nir_shader *nir, unsigned dispatch_width); -void brw_postprocess_nir_opts(nir_shader *nir, - const struct brw_compiler *compiler, +void brw_postprocess_nir_opts(struct brw_pass_tracker *pt, enum brw_robustness_flags robust_flags); -void brw_postprocess_nir_out_of_ssa(nir_shader *nir, - unsigned dispatch_width, - debug_archiver *archiver, +void brw_postprocess_nir_out_of_ssa(struct brw_pass_tracker *pt, bool debug_enabled); static inline void -brw_postprocess_nir(nir_shader *nir, - const struct brw_compiler *compiler, - unsigned dispatch_width, - debug_archiver *archiver, +brw_postprocess_nir(struct brw_pass_tracker *pt, bool debug_enabled, enum brw_robustness_flags robust_flags) { - brw_postprocess_nir_opts(nir, compiler, robust_flags); - brw_postprocess_nir_out_of_ssa(nir, dispatch_width, archiver, - debug_enabled); + brw_postprocess_nir_opts(pt, robust_flags); + brw_postprocess_nir_out_of_ssa(pt, debug_enabled); } bool brw_nir_apply_attribute_workarounds(nir_shader *nir, @@ -320,8 +313,7 @@ bool brw_nir_lower_fsign(nir_shader *nir); bool brw_nir_opt_fsat(nir_shader *); -void brw_nir_apply_key(nir_shader *nir, - const struct brw_compiler *compiler, +void brw_nir_apply_key(struct brw_pass_tracker *pt, const struct brw_base_prog_key *key, unsigned max_subgroup_size); @@ -349,8 +341,7 @@ void brw_nir_analyze_ubo_ranges(const struct brw_compiler *compiler, bool brw_nir_lower_ubo_ranges(nir_shader *nir, struct brw_ubo_range out_ranges[4]); -void brw_nir_optimize(nir_shader *nir, - const struct intel_device_info *devinfo); +void brw_nir_optimize(struct brw_pass_tracker *pt); nir_shader *brw_nir_create_passthrough_tcs(void *mem_ctx, const struct brw_compiler *compiler, diff --git a/src/intel/compiler/brw/brw_nir_rt.c b/src/intel/compiler/brw/brw_nir_rt.c index 4435d57a1e7..184657d5434 100644 --- a/src/intel/compiler/brw/brw_nir_rt.c +++ b/src/intel/compiler/brw/brw_nir_rt.c @@ -7,6 +7,7 @@ #include "brw_nir_rt.h" #include "brw_nir_rt_builder.h" #include "intel_nir.h" +#include "brw_private.h" static bool resize_deref(nir_builder *b, nir_deref_instr *deref, @@ -499,7 +500,12 @@ brw_nir_create_raygen_trampoline(const struct brw_compiler *compiler, NIR_PASS(_, nir, brw_nir_lower_cs_intrinsics, devinfo, NULL); - brw_nir_optimize(nir, devinfo); + brw_pass_tracker pt = { + .nir = nir, + .compiler = compiler, + }; + + brw_nir_optimize(&pt); return nir; } diff --git a/src/intel/compiler/brw/brw_private.h b/src/intel/compiler/brw/brw_private.h index 62f628ef856..398a5233130 100644 --- a/src/intel/compiler/brw/brw_private.h +++ b/src/intel/compiler/brw/brw_private.h @@ -19,15 +19,84 @@ void brw_alloc_reg_sets(struct brw_compiler *compiler); extern const char *const conditional_modifier[16]; extern const char *const pred_ctrl_align16[16]; +typedef struct brw_pass_tracker { + nir_shader *nir; + unsigned dispatch_width; + + const struct brw_compiler *compiler; + + bool progress; + + /* Filled with the last line that made progress. + * Used to perform early break in loops. + * See BRW_NIR_LOOP_PASS macros below. + */ + unsigned long opt_line; + + /* Tracking information for the debug archiver. */ + unsigned pass_num; + debug_archiver *archiver; +} brw_pass_tracker; + #ifndef NDEBUG -void brw_debug_archive_nir(debug_archiver *archiver, nir_shader *nir, - unsigned dispatch_width, const char *step); +void +brw_pass_tracker_archive(brw_pass_tracker *pt, const char *pass_name); #else static inline void -brw_debug_archive_nir(debug_archiver *archiver, nir_shader *nir, - unsigned dispatch_width, const char *step) {} +brw_pass_tracker_archive(brw_pass_tracker *pt, const char *pass_name) +{} #endif +/* To be used in conjunction to BRW_NIR_LOOP_* macros. */ +static inline void +pass_tracker_new_loop(brw_pass_tracker *pt) +{ + pt->opt_line = 0; +} + +/* To be used in conjunction to BRW_NIR_LOOP_* macros. */ +static inline void +pass_tracker_new_iteration(brw_pass_tracker *pt) +{ + pt->progress = false; +} + +#define BRW_NIR_SNAPSHOT(name) do { \ + pt->pass_num++; \ + brw_pass_tracker_archive(pt, name); \ +} while (false); + +#define BRW_NIR_PASS(pass, ...) ({ \ + pt->pass_num++; \ + bool this_progress = false; \ + NIR_PASS(this_progress, pt->nir, pass, ##__VA_ARGS__); \ + if (this_progress) { \ + pt->progress = true; \ + if (unlikely(pt->archiver)) \ + brw_pass_tracker_archive(pt, #pass); \ + } \ + this_progress; \ +}) + +#define BRW_NIR_LOOP_PASS(pass, ...) ({ \ + const unsigned long this_line = __LINE__; \ + if (pt->opt_line == this_line) { \ + pt->pass_num++; \ + break; \ + } \ + bool this_progress = BRW_NIR_PASS(pass, ##__VA_ARGS__); \ + if (this_progress) \ + pt->opt_line = this_line; \ + this_progress; \ +}) + +#define BRW_NIR_LOOP_PASS_NOT_IDEMPOTENT(pass, ...) ({ \ + bool this_progress = BRW_NIR_PASS(pass, ##__VA_ARGS__); \ + if (this_progress) \ + pt->opt_line = 0; \ + this_progress; \ +}) + #ifdef __cplusplus } #endif diff --git a/src/intel/compiler/brw/brw_shader.cpp b/src/intel/compiler/brw/brw_shader.cpp index 265357ab81d..e541a1c0585 100644 --- a/src/intel/compiler/brw/brw_shader.cpp +++ b/src/intel/compiler/brw/brw_shader.cpp @@ -1317,21 +1317,18 @@ brw_allocate_registers(brw_shader &s, bool allow_spilling) #ifndef NDEBUG void -brw_debug_archive_nir(debug_archiver *archiver, nir_shader *nir, - unsigned dispatch_width, const char *step) +brw_pass_tracker_archive(brw_pass_tracker *pt, const char *pass_name) { - if (!archiver) + if (!pt->archiver) return; - const bool prefix_dispatch_width = - dispatch_width > 0 && mesa_shader_stage_uses_workgroup(nir->info.stage); - const char *filename = prefix_dispatch_width ? - ralloc_asprintf(archiver, "NIR%d/%s", dispatch_width, step) : - ralloc_asprintf(archiver, "NIR/%s", step); + const char *filename = + ralloc_asprintf(pt->archiver, "NIR%d/%03d-%s", + pt->dispatch_width, pt->pass_num, pass_name); - FILE *f = debug_archiver_start_file(archiver, filename); - nir_print_shader(nir, f); - debug_archiver_finish_file(archiver); + FILE *f = debug_archiver_start_file(pt->archiver, filename); + nir_print_shader(pt->nir, f); + debug_archiver_finish_file(pt->archiver); } #endif