diff --git a/src/intel/compiler/brw/brw_compile_bs.cpp b/src/intel/compiler/brw/brw_compile_bs.cpp index f279d9c2a77..2e92357019f 100644 --- a/src/intel/compiler/brw/brw_compile_bs.cpp +++ b/src/intel/compiler/brw/brw_compile_bs.cpp @@ -83,13 +83,17 @@ compile_single_bs(const struct brw_compiler *compiler, */ const unsigned required_width = compiler->devinfo->ver >= 20 ? 16u : 8u; - brw_nir_apply_key(shader, compiler, &key->base, required_width); + brw_pass_tracker pt_ = { + .nir = shader, + .dispatch_width = required_width, + .compiler = compiler, + .archiver = params->base.archiver, + }, *pt = &pt_; - brw_debug_archive_nir(params->base.archiver, shader, required_width, "first"); + BRW_NIR_SNAPSHOT("first"); + brw_nir_apply_key(pt, &key->base, required_width); - brw_postprocess_nir(shader, compiler, required_width, - params->base.archiver, debug_enabled, - key->base.robust_flags); + brw_postprocess_nir(pt, debug_enabled, key->base.robust_flags); const brw_shader_params shader_params = { .compiler = compiler, diff --git a/src/intel/compiler/brw/brw_compile_cs.cpp b/src/intel/compiler/brw/brw_compile_cs.cpp index 4325d4335b4..5ea2f1ce115 100644 --- a/src/intel/compiler/brw/brw_compile_cs.cpp +++ b/src/intel/compiler/brw/brw_compile_cs.cpp @@ -141,7 +141,15 @@ brw_compile_cs(const struct brw_compiler *compiler, prog_data->local_size[2] = nir->info.workgroup_size[2]; } - brw_postprocess_nir_opts(nir, compiler, key->base.robust_flags); + brw_pass_tracker pt_ = { + .nir = nir, + .dispatch_width = 0, + .compiler = compiler, + .archiver = params->base.archiver, + }, *pt = &pt_; + + BRW_NIR_SNAPSHOT("first"); + brw_postprocess_nir_opts(pt, key->base.robust_flags); brw_simd_selection_state simd_state{ .devinfo = compiler->devinfo, @@ -170,18 +178,23 @@ brw_compile_cs(const struct brw_compiler *compiler, const unsigned dispatch_width = 8u << simd; nir_shader *shader = nir_shader_clone(params->base.mem_ctx, nir); - brw_debug_archive_nir(params->base.archiver, shader, dispatch_width, "first"); - brw_nir_apply_key(shader, compiler, &key->base, - dispatch_width); + pt_ = { + .nir = shader, + .dispatch_width = dispatch_width, + .compiler = compiler, + .archiver = params->base.archiver, + }; - NIR_PASS(_, shader, brw_nir_lower_simd, dispatch_width); + BRW_NIR_SNAPSHOT("first"); + brw_nir_apply_key(pt, &key->base, dispatch_width); - brw_nir_optimize(shader, devinfo); + BRW_NIR_PASS(brw_nir_lower_simd, dispatch_width); + + brw_nir_optimize(pt); /* brw_nir_optimize undoes late lowerings. */ - NIR_PASS(_, shader, nir_opt_algebraic_late); - brw_postprocess_nir_out_of_ssa(shader, dispatch_width, - params->base.archiver, debug_enabled); + BRW_NIR_PASS(nir_opt_algebraic_late); + brw_postprocess_nir_out_of_ssa(pt, debug_enabled); const brw_shader_params shader_params = { .compiler = compiler, diff --git a/src/intel/compiler/brw/brw_compile_fs.cpp b/src/intel/compiler/brw/brw_compile_fs.cpp index eb2f3c27935..61c0320db21 100644 --- a/src/intel/compiler/brw/brw_compile_fs.cpp +++ b/src/intel/compiler/brw/brw_compile_fs.cpp @@ -1423,17 +1423,16 @@ brw_print_fs_urb_setup(FILE *fp, const struct brw_wm_prog_data *prog_data, } static void -brw_nir_cleanup_pre_wm_prog_data(nir_shader *nir) +brw_nir_cleanup_pre_wm_prog_data(brw_pass_tracker *pt) { - bool progress; do { - progress = false; - NIR_PASS(progress, nir, nir_opt_algebraic); - NIR_PASS(progress, nir, nir_opt_copy_prop); - NIR_PASS(progress, nir, nir_opt_constant_folding); - NIR_PASS(progress, nir, nir_opt_dce); - NIR_PASS(progress, nir, nir_opt_cse); - } while (progress); + pt->progress = false; + BRW_NIR_PASS(nir_opt_algebraic); + BRW_NIR_PASS(nir_opt_copy_prop); + BRW_NIR_PASS(nir_opt_constant_folding); + BRW_NIR_PASS(nir_opt_dce); + BRW_NIR_PASS(nir_opt_cse); + } while (pt->progress); } const unsigned * @@ -1455,17 +1454,24 @@ brw_compile_fs(const struct brw_compiler *compiler, const unsigned max_subgroup_size = 32; unsigned max_polygons = MAX2(1, params->max_polygons); - brw_debug_archive_nir(params->base.archiver, nir, 0, "first"); + brw_pass_tracker pt_ = { + .nir = nir, + .dispatch_width = 0, + .compiler = compiler, + .archiver = params->base.archiver, + }, *pt = &pt_; - brw_nir_apply_key(nir, compiler, &key->base, max_subgroup_size); + BRW_NIR_SNAPSHOT("first"); + + brw_nir_apply_key(pt, &key->base, max_subgroup_size); if (brw_nir_fragment_shader_needs_wa_18019110168(devinfo, key->mesh_input, nir)) { if (params->mue_map && params->mue_map->wa_18019110168_active) { brw_nir_frag_convert_attrs_prim_to_vert( nir, params->mue_map->per_primitive_offsets); } else { - NIR_PASS(_, nir, brw_nir_frag_convert_attrs_prim_to_vert_indirect, - devinfo, params); + BRW_NIR_PASS(brw_nir_frag_convert_attrs_prim_to_vert_indirect, + devinfo, params); } /* Remapping per-primitive inputs into unused per-vertex inputs cannot * work with multipolygon. @@ -1476,16 +1482,18 @@ brw_compile_fs(const struct brw_compiler *compiler, brw_nir_lower_fs_inputs(nir, devinfo, key); brw_nir_lower_fs_outputs(nir); + BRW_NIR_SNAPSHOT("after_lower_io"); + if (!brw_can_coherent_fb_fetch(devinfo)) - NIR_PASS(_, nir, brw_nir_lower_fs_load_output, key); + BRW_NIR_PASS(brw_nir_lower_fs_load_output, key); /* Do this lowering before brw_nir_populate_wm_prog_data(). */ - NIR_PASS(_, nir, nir_opt_frag_coord_to_pixel_coord); - NIR_PASS(_, nir, nir_lower_frag_coord_to_pixel_coord); + BRW_NIR_PASS(nir_opt_frag_coord_to_pixel_coord); + BRW_NIR_PASS(nir_lower_frag_coord_to_pixel_coord); - NIR_PASS(_, nir, brw_nir_move_interpolation_to_top); + BRW_NIR_PASS(brw_nir_move_interpolation_to_top); - brw_nir_cleanup_pre_wm_prog_data(nir); + brw_nir_cleanup_pre_wm_prog_data(pt); int per_primitive_offsets[VARYING_SLOT_MAX]; memset(per_primitive_offsets, -1, sizeof(per_primitive_offsets)); @@ -1503,12 +1511,12 @@ brw_compile_fs(const struct brw_compiler *compiler, * offset to determine render target 0 store instruction in * emit_alpha_to_coverage pass. */ - NIR_PASS(_, nir, nir_opt_constant_folding); - NIR_PASS(_, nir, brw_nir_lower_alpha_to_coverage); + BRW_NIR_PASS(nir_opt_constant_folding); + BRW_NIR_PASS(brw_nir_lower_alpha_to_coverage); } if (prog_data->coarse_pixel_dispatch != INTEL_NEVER) - NIR_PASS(_, nir, brw_nir_lower_frag_coord_z, devinfo); + BRW_NIR_PASS(brw_nir_lower_frag_coord_z, devinfo); if (!brw_wm_prog_key_is_dynamic(key)) { uint32_t f = 0; @@ -1530,10 +1538,10 @@ brw_compile_fs(const struct brw_compiler *compiler, if (prog_data->coarse_pixel_dispatch == INTEL_ALWAYS) f |= INTEL_MSAA_FLAG_COARSE_RT_WRITES; - NIR_PASS(_, nir, nir_inline_sysval, nir_intrinsic_load_fs_msaa_intel, f); + BRW_NIR_PASS(nir_inline_sysval, nir_intrinsic_load_fs_msaa_intel, f); } - brw_postprocess_nir_opts(nir, compiler, key->base.robust_flags); + brw_postprocess_nir_opts(pt, key->base.robust_flags); unsigned pressure[SIMD_COUNT]; brw_nir_quick_pressure_estimate(nir, devinfo, pressure); @@ -1544,8 +1552,7 @@ brw_compile_fs(const struct brw_compiler *compiler, pressure[i] > compiler->register_pressure_threshold; } - brw_postprocess_nir_out_of_ssa(nir, 0, params->base.archiver, - debug_enabled); + brw_postprocess_nir_out_of_ssa(pt, debug_enabled); if (unlikely(debug_enabled)) brw_print_fs_urb_setup(stderr, prog_data, per_primitive_offsets); diff --git a/src/intel/compiler/brw/brw_compile_gs.cpp b/src/intel/compiler/brw/brw_compile_gs.cpp index 3feb982dd66..b9c79742fa6 100644 --- a/src/intel/compiler/brw/brw_compile_gs.cpp +++ b/src/intel/compiler/brw/brw_compile_gs.cpp @@ -148,7 +148,14 @@ brw_compile_gs(const struct brw_compiler *compiler, const bool debug_enabled = brw_should_print_shader(nir, DEBUG_GS, params->base.source_hash); - brw_debug_archive_nir(params->base.archiver, nir, dispatch_width, "first"); + brw_pass_tracker pt_ = { + .nir = nir, + .dispatch_width = dispatch_width, + .compiler = compiler, + .archiver = params->base.archiver, + }, *pt = &pt_; + + BRW_NIR_SNAPSHOT("first"); brw_prog_data_init(&prog_data->base.base, ¶ms->base); @@ -175,14 +182,15 @@ brw_compile_gs(const struct brw_compiler *compiler, key->base.vue_layout, pos_slots); - brw_nir_apply_key(nir, compiler, &key->base, dispatch_width); + brw_nir_apply_key(pt, &key->base, dispatch_width); brw_nir_lower_gs_inputs(nir, compiler->devinfo, &input_vue_map, &prog_data->base.urb_read_length); brw_nir_lower_vue_outputs(nir); - brw_nir_opt_vectorize_urb(nir, compiler->devinfo); - brw_postprocess_nir(nir, compiler, dispatch_width, - params->base.archiver, debug_enabled, - key->base.robust_flags); + + BRW_NIR_SNAPSHOT("after_lower_io"); + + brw_nir_opt_vectorize_urb(pt); + brw_postprocess_nir(pt, debug_enabled, key->base.robust_flags); prog_data->include_primitive_id = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_PRIMITIVE_ID); diff --git a/src/intel/compiler/brw/brw_compile_mesh.cpp b/src/intel/compiler/brw/brw_compile_mesh.cpp index 5379316c96a..38510232c40 100644 --- a/src/intel/compiler/brw/brw_compile_mesh.cpp +++ b/src/intel/compiler/brw/brw_compile_mesh.cpp @@ -90,21 +90,23 @@ brw_nir_lower_launch_mesh_workgroups(nir_shader *nir) #define BRW_PER_TASK_DATA_START_DW 8 static void -brw_nir_lower_tue_outputs(nir_shader *nir, brw_tue_map *map) +brw_nir_lower_tue_outputs(brw_pass_tracker *pt, brw_tue_map *map) { + nir_shader *nir = pt->nir; + memset(map, 0, sizeof(*map)); - NIR_PASS(_, nir, nir_lower_io, nir_var_shader_out, - type_size_scalar_dwords, nir_lower_io_lower_64bit_to_32); + BRW_NIR_PASS(nir_lower_io, nir_var_shader_out, + type_size_scalar_dwords, nir_lower_io_lower_64bit_to_32); /* Lowering to explicit types will start offsets from task_payload_size, so * set it to start after the header. */ nir->info.task_payload_size = BRW_PER_TASK_DATA_START_DW * 4; - NIR_PASS(_, nir, nir_lower_vars_to_explicit_types, - nir_var_mem_task_payload, shared_type_info); - NIR_PASS(_, nir, nir_lower_explicit_io, - nir_var_mem_task_payload, nir_address_format_32bit_offset); + BRW_NIR_PASS(nir_lower_vars_to_explicit_types, + nir_var_mem_task_payload, shared_type_info); + BRW_NIR_PASS(nir_lower_explicit_io, + nir_var_mem_task_payload, nir_address_format_32bit_offset); map->size_dw = align(DIV_ROUND_UP(nir->info.task_payload_size, 4), 8); } @@ -279,11 +281,18 @@ brw_compile_task(const struct brw_compiler *compiler, struct brw_task_prog_data *prog_data = params->prog_data; const bool debug_enabled = brw_should_print_shader(nir, DEBUG_TASK, params->base.source_hash); - brw_debug_archive_nir(params->base.archiver, nir, 0, "first"); + brw_pass_tracker pt_ = { + .nir = nir, + .dispatch_width = 0, + .compiler = compiler, + .archiver = params->base.archiver, + }, *pt = &pt_; - brw_nir_lower_tue_outputs(nir, &prog_data->map); + BRW_NIR_SNAPSHOT("first"); - NIR_PASS(_, nir, brw_nir_align_launch_mesh_workgroups); + brw_nir_lower_tue_outputs(pt, &prog_data->map); + + BRW_NIR_PASS(brw_nir_align_launch_mesh_workgroups); nir_lower_task_shader_options lower_ts_opt = { .payload_to_shared_for_atomics = true, @@ -293,9 +302,9 @@ brw_compile_task(const struct brw_compiler *compiler, */ .payload_offset_in_bytes = BRW_PER_TASK_DATA_START_DW * 4, }; - NIR_PASS(_, nir, nir_lower_task_shader, lower_ts_opt); + BRW_NIR_PASS(nir_lower_task_shader, lower_ts_opt); - NIR_PASS(_, nir, brw_nir_lower_launch_mesh_workgroups); + BRW_NIR_PASS(brw_nir_lower_launch_mesh_workgroups); NIR_PASS(_, nir, brw_nir_lower_cs_intrinsics, compiler->devinfo, NULL); @@ -312,7 +321,7 @@ brw_compile_task(const struct brw_compiler *compiler, prog_data->base.uses_inline_data = brw_nir_uses_inline_data(nir) || key->base.uses_inline_push_addr; - brw_postprocess_nir_opts(nir, compiler, key->base.robust_flags); + brw_postprocess_nir_opts(pt, key->base.robust_flags); brw_simd_selection_state simd_state{ .devinfo = compiler->devinfo, @@ -320,7 +329,7 @@ brw_compile_task(const struct brw_compiler *compiler, .required_width = brw_required_dispatch_width(&nir->info), }; - brw_debug_archive_nir(params->base.archiver, nir, 0, "before-simd"); + BRW_NIR_SNAPSHOT("before_simd"); unsigned pressure[SIMD_COUNT]; brw_nir_quick_pressure_estimate(nir, devinfo, pressure); @@ -341,17 +350,23 @@ brw_compile_task(const struct brw_compiler *compiler, const unsigned dispatch_width = 8 << simd; nir_shader *shader = nir_shader_clone(params->base.mem_ctx, nir); - brw_nir_apply_key(shader, compiler, &key->base, dispatch_width); - brw_debug_archive_nir(params->base.archiver, shader, dispatch_width, "first"); + pt_ = { + .nir = shader, + .dispatch_width = dispatch_width, + .compiler = compiler, + .archiver = params->base.archiver, + }; - NIR_PASS(_, shader, brw_nir_lower_simd, dispatch_width); + BRW_NIR_SNAPSHOT("first"); + brw_nir_apply_key(pt, &key->base, dispatch_width); - brw_nir_optimize(shader, devinfo); + BRW_NIR_PASS(brw_nir_lower_simd, dispatch_width); + + brw_nir_optimize(pt); /* brw_nir_optimize undoes late lowerings. */ - NIR_PASS(_, shader, nir_opt_algebraic_late); - brw_postprocess_nir_out_of_ssa(shader, dispatch_width, - params->base.archiver, debug_enabled); + BRW_NIR_PASS(nir_opt_algebraic_late); + brw_postprocess_nir_out_of_ssa(pt, debug_enabled); const brw_shader_params shader_params = { .compiler = compiler, @@ -416,7 +431,7 @@ brw_compile_task(const struct brw_compiler *compiler, } static void -brw_nir_lower_tue_inputs(nir_shader *nir, const brw_tue_map *map) +brw_nir_lower_tue_inputs(brw_pass_tracker *pt, const brw_tue_map *map) { /* See brw_nir_lower_tue_outputs. If a task payload is read by this shader, * task_payload_size will be used to start offsets, and that's always @@ -424,12 +439,11 @@ brw_nir_lower_tue_inputs(nir_shader *nir, const brw_tue_map *map) * We can't always use map, as it may not be present if task and mesh * shaders are not compiled together. This is possible with shader objects. */ + nir_shader *nir = pt->nir; nir->info.task_payload_size = BRW_PER_TASK_DATA_START_DW * 4; - bool progress = false; - - NIR_PASS(progress, nir, nir_lower_vars_to_explicit_types, - nir_var_mem_task_payload, shared_type_info); + bool progress = BRW_NIR_PASS(nir_lower_vars_to_explicit_types, + nir_var_mem_task_payload, shared_type_info); if (progress) { /* The types for Task Output and Mesh Input should match, so their sizes @@ -444,8 +458,8 @@ brw_nir_lower_tue_inputs(nir_shader *nir, const brw_tue_map *map) nir->info.task_payload_size = 0; } - NIR_PASS(_, nir, nir_lower_explicit_io, nir_var_mem_task_payload, - nir_address_format_32bit_offset); + BRW_NIR_PASS(nir_lower_explicit_io, nir_var_mem_task_payload, + nir_address_format_32bit_offset); } /* Attribute types. Flat attributes have to be a separate class because @@ -612,11 +626,11 @@ brw_print_mue_map(FILE *fp, const struct brw_mue_map *map, struct nir_shader *ni } static void -brw_nir_lower_mue_outputs(nir_shader *nir, const struct brw_mue_map *map) +brw_nir_lower_mue_outputs(brw_pass_tracker *pt, const struct brw_mue_map *map) { - NIR_PASS(_, nir, nir_lower_io, nir_var_shader_out, - type_size_vec4, - nir_lower_io_lower_64bit_to_32); + BRW_NIR_PASS(nir_lower_io, nir_var_shader_out, + type_size_vec4, + nir_lower_io_lower_64bit_to_32); } static bool @@ -965,7 +979,14 @@ brw_compile_mesh(const struct brw_compiler *compiler, struct brw_mesh_prog_data *prog_data = params->prog_data; const bool debug_enabled = brw_should_print_shader(nir, DEBUG_MESH, params->base.source_hash); - brw_debug_archive_nir(params->base.archiver, nir, 0, "first"); + brw_pass_tracker pt_ = { + .nir = nir, + .dispatch_width = 0, + .compiler = compiler, + .archiver = params->base.archiver, + }, *pt = &pt_; + + BRW_NIR_SNAPSHOT("first"); brw_prog_data_init(&prog_data->base.base, ¶ms->base); @@ -1000,23 +1021,23 @@ brw_compile_mesh(const struct brw_compiler *compiler, prog_data->uses_drawid = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_DRAW_ID); - brw_nir_lower_tue_inputs(nir, params->tue_map); + brw_nir_lower_tue_inputs(pt, params->tue_map); - NIR_PASS(_, nir, brw_nir_lower_mesh_primitive_count); - NIR_PASS(_, nir, nir_opt_dce); - NIR_PASS(_, nir, nir_remove_dead_variables, nir_var_shader_out, NULL); + BRW_NIR_PASS(brw_nir_lower_mesh_primitive_count); + BRW_NIR_PASS(nir_opt_dce); + BRW_NIR_PASS(nir_remove_dead_variables, nir_var_shader_out, NULL); brw_compute_mue_map(compiler, nir, &prog_data->map, prog_data->index_format, key->base.vue_layout, apply_wa_18019110168 ? wa_18019110168_mapping : NULL); - brw_nir_lower_mue_outputs(nir, &prog_data->map); + brw_nir_lower_mue_outputs(pt, &prog_data->map); /* When Primitive Header is enabled, we may not generates writes to all * fields, so let's initialize everything. */ if (prog_data->map.has_per_primitive_header) - NIR_PASS(_, nir, brw_nir_initialize_mue, &prog_data->map); + BRW_NIR_PASS(brw_nir_initialize_mue, &prog_data->map); NIR_PASS(_, nir, brw_nir_lower_cs_intrinsics, compiler->devinfo, NULL); @@ -1026,7 +1047,7 @@ brw_compile_mesh(const struct brw_compiler *compiler, prog_data->base.uses_inline_data = brw_nir_uses_inline_data(nir) || key->base.uses_inline_push_addr; - brw_postprocess_nir_opts(nir, compiler, key->base.robust_flags); + brw_postprocess_nir_opts(pt, key->base.robust_flags); const struct brw_lower_urb_cb_data cb_data = { .devinfo = devinfo, @@ -1039,10 +1060,10 @@ brw_compile_mesh(const struct brw_compiler *compiler, prog_data->map.per_primitive_indices_stride, .per_primitive_byte_offsets = prog_data->map.per_primitive_offsets, }; - NIR_PASS(_, nir, brw_nir_lower_outputs_to_urb_intrinsics, &cb_data); - brw_nir_opt_vectorize_urb(nir, devinfo); + BRW_NIR_PASS(brw_nir_lower_outputs_to_urb_intrinsics, &cb_data); + brw_nir_opt_vectorize_urb(pt); struct nir_opt_offsets_options offset_options = {}; - NIR_PASS(_, nir, nir_opt_offsets, &offset_options); + BRW_NIR_PASS(nir_opt_offsets, &offset_options); brw_simd_selection_state simd_state{ .devinfo = compiler->devinfo, @@ -1052,7 +1073,7 @@ brw_compile_mesh(const struct brw_compiler *compiler, std::unique_ptr v[3]; - brw_debug_archive_nir(params->base.archiver, nir, 0, "before-simd"); + BRW_NIR_SNAPSHOT("before_simd"); for (unsigned i = 0; i < 3; i++) { const unsigned simd = devinfo->ver >= 30 ? 2 - i : i; @@ -1064,20 +1085,25 @@ brw_compile_mesh(const struct brw_compiler *compiler, nir_shader *shader = nir_shader_clone(params->base.mem_ctx, nir); - brw_debug_archive_nir(params->base.archiver, shader, dispatch_width, "first"); + pt_ = { + .nir = shader, + .dispatch_width = dispatch_width, + .compiler = compiler, + .archiver = params->base.archiver, + }; - brw_nir_apply_key(shader, compiler, &key->base, dispatch_width); + BRW_NIR_SNAPSHOT("first"); + brw_nir_apply_key(pt, &key->base, dispatch_width); /* Load uniforms can do a better job for constants, so fold before it. */ - NIR_PASS(_, shader, nir_opt_constant_folding); + BRW_NIR_PASS(nir_opt_constant_folding); - NIR_PASS(_, shader, brw_nir_lower_simd, dispatch_width); + BRW_NIR_PASS(brw_nir_lower_simd, dispatch_width); - brw_nir_optimize(shader, devinfo); + brw_nir_optimize(pt); /* brw_nir_optimize undoes late lowerings. */ - NIR_PASS(_, shader, nir_opt_algebraic_late); - brw_postprocess_nir_out_of_ssa(shader, dispatch_width, - params->base.archiver, debug_enabled); + BRW_NIR_PASS(nir_opt_algebraic_late); + brw_postprocess_nir_out_of_ssa(pt, debug_enabled); const brw_shader_params shader_params = { .compiler = compiler, diff --git a/src/intel/compiler/brw/brw_compile_tcs.cpp b/src/intel/compiler/brw/brw_compile_tcs.cpp index fb450065f16..dd62ee40c4d 100644 --- a/src/intel/compiler/brw/brw_compile_tcs.cpp +++ b/src/intel/compiler/brw/brw_compile_tcs.cpp @@ -211,7 +211,14 @@ brw_compile_tcs(const struct brw_compiler *compiler, const bool debug_enabled = brw_should_print_shader(nir, DEBUG_TCS, params->base.source_hash); - brw_debug_archive_nir(params->base.archiver, nir, dispatch_width, "first"); + brw_pass_tracker pt_ = { + .nir = nir, + .dispatch_width = dispatch_width, + .compiler = compiler, + .archiver = params->base.archiver, + }, *pt = &pt_; + + BRW_NIR_SNAPSHOT("first"); brw_prog_data_init(&prog_data->base.base, ¶ms->base); @@ -229,16 +236,16 @@ brw_compile_tcs(const struct brw_compiler *compiler, nir->info.patch_outputs_written, key->separate_tess_vue_layout); - brw_nir_apply_key(nir, compiler, &key->base, dispatch_width); + brw_nir_apply_key(pt, &key->base, dispatch_width); brw_nir_lower_tcs_inputs(nir, devinfo, &input_vue_map); brw_nir_lower_tcs_outputs(nir, devinfo, &vue_prog_data->vue_map, key->_tes_primitive_mode); - brw_nir_opt_vectorize_urb(nir, devinfo); - intel_nir_lower_patch_vertices_in(nir, key->input_vertices); + BRW_NIR_SNAPSHOT("after_lower_io"); - brw_postprocess_nir(nir, compiler, dispatch_width, - params->base.archiver, debug_enabled, - key->base.robust_flags); + brw_nir_opt_vectorize_urb(pt); + BRW_NIR_PASS(intel_nir_lower_patch_vertices_in, key->input_vertices); + + brw_postprocess_nir(pt, debug_enabled, key->base.robust_flags); bool has_primitive_id = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_PRIMITIVE_ID); diff --git a/src/intel/compiler/brw/brw_compile_tes.cpp b/src/intel/compiler/brw/brw_compile_tes.cpp index 21c52a98f3b..2ed998fb29f 100644 --- a/src/intel/compiler/brw/brw_compile_tes.cpp +++ b/src/intel/compiler/brw/brw_compile_tes.cpp @@ -88,7 +88,14 @@ brw_compile_tes(const struct brw_compiler *compiler, const bool debug_enabled = brw_should_print_shader(nir, DEBUG_TES, params->base.source_hash); - brw_debug_archive_nir(params->base.archiver, nir, dispatch_width, "first"); + brw_pass_tracker pt_ = { + .nir = nir, + .dispatch_width = dispatch_width, + .compiler = compiler, + .archiver = params->base.archiver, + }, *pt = &pt_; + + BRW_NIR_SNAPSHOT("first"); brw_prog_data_init(&prog_data->base.base, ¶ms->base); @@ -104,14 +111,15 @@ brw_compile_tes(const struct brw_compiler *compiler, nir->info.patch_inputs_read, key->separate_tess_vue_layout); } - - brw_nir_apply_key(nir, compiler, &key->base, dispatch_width); + brw_nir_apply_key(pt, &key->base, dispatch_width); brw_nir_lower_tes_inputs(nir, devinfo, &input_vue_map); brw_nir_lower_vue_outputs(nir); - brw_nir_opt_vectorize_urb(nir, devinfo); - NIR_PASS(_, nir, intel_nir_lower_patch_vertices_tes); - brw_postprocess_nir(nir, compiler, dispatch_width, params->base.archiver, - debug_enabled, key->base.robust_flags); + BRW_NIR_SNAPSHOT("after_lower_io"); + + brw_nir_opt_vectorize_urb(pt); + BRW_NIR_PASS(intel_nir_lower_patch_vertices_tes); + + brw_postprocess_nir(pt, debug_enabled, key->base.robust_flags); const uint32_t pos_slots = (nir->info.per_view_outputs & VARYING_BIT_POS) ? diff --git a/src/intel/compiler/brw/brw_compile_vs.cpp b/src/intel/compiler/brw/brw_compile_vs.cpp index e9bb184a4b5..526d0091ae7 100644 --- a/src/intel/compiler/brw/brw_compile_vs.cpp +++ b/src/intel/compiler/brw/brw_compile_vs.cpp @@ -253,7 +253,14 @@ brw_compile_vs(const struct brw_compiler *compiler, */ assert(!key->no_vf_slot_compaction || key->vf_component_packing); - brw_debug_archive_nir(params->base.archiver, nir, dispatch_width, "first"); + brw_pass_tracker pt_ = { + .nir = nir, + .dispatch_width = dispatch_width, + .compiler = compiler, + .archiver = params->base.archiver, + }, *pt = &pt_; + + BRW_NIR_SNAPSHOT("first"); brw_prog_data_init(&prog_data->base.base, ¶ms->base); @@ -271,7 +278,7 @@ brw_compile_vs(const struct brw_compiler *compiler, &prog_data->base.vue_map, nir->info.outputs_written, key->base.vue_layout, pos_slots); - brw_nir_apply_key(nir, compiler, &key->base, dispatch_width); + brw_nir_apply_key(pt, &key->base, dispatch_width); prog_data->inputs_read = nir->info.inputs_read; prog_data->double_inputs_read = nir->info.vs.double_inputs; @@ -279,6 +286,7 @@ brw_compile_vs(const struct brw_compiler *compiler, brw_nir_lower_vs_inputs(nir); brw_nir_lower_vue_outputs(nir); + BRW_NIR_SNAPSHOT("after_lower_io"); memset(prog_data->vf_component_packing, 0, sizeof(prog_data->vf_component_packing)); @@ -286,8 +294,7 @@ brw_compile_vs(const struct brw_compiler *compiler, if (key->vf_component_packing) nr_packed_regs = brw_nir_pack_vs_input(nir, prog_data); - brw_postprocess_nir(nir, compiler, dispatch_width, - params->base.archiver, debug_enabled, + brw_postprocess_nir(pt, debug_enabled, key->base.robust_flags); unsigned nr_attribute_slots = util_bitcount64(prog_data->inputs_read); diff --git a/src/intel/compiler/brw/brw_nir.c b/src/intel/compiler/brw/brw_nir.c index 772b50d6f2c..9c1c02bc4ce 100644 --- a/src/intel/compiler/brw/brw_nir.c +++ b/src/intel/compiler/brw/brw_nir.c @@ -816,10 +816,11 @@ lsc_urb_round_up_components(unsigned n) } void -brw_nir_opt_vectorize_urb(nir_shader *nir, - const struct intel_device_info *devinfo) +brw_nir_opt_vectorize_urb(brw_pass_tracker *pt) { - NIR_PASS(_, nir, nir_opt_cse); + const struct intel_device_info *devinfo = pt->compiler->devinfo; + + BRW_NIR_PASS(nir_opt_cse); nir_load_store_vectorize_options options = { .modes = nir_var_shader_in | nir_var_shader_out, @@ -829,7 +830,7 @@ brw_nir_opt_vectorize_urb(nir_shader *nir, devinfo->ver >= 20 ? lsc_urb_round_up_components : vec4_urb_round_up_components, }; - NIR_PASS(_, nir, nir_opt_load_store_vectorize, &options); + BRW_NIR_PASS(nir_opt_load_store_vectorize, &options); } void @@ -1423,45 +1424,19 @@ brw_nir_tag_speculative_access(nir_shader *nir) nir_metadata_all, NULL); } -#define OPT(pass, ...) ({ \ - bool this_progress = false; \ - NIR_PASS(this_progress, nir, pass, ##__VA_ARGS__); \ - if (this_progress) \ - progress = true; \ - this_progress; \ -}) - -#define LOOP_OPT(pass, ...) ({ \ - const unsigned long this_line = __LINE__; \ - bool this_progress = false; \ - if (opt_line == this_line) \ - break; \ - NIR_PASS(this_progress, nir, pass, ##__VA_ARGS__); \ - if (this_progress) { \ - progress = true; \ - opt_line = this_line; \ - } \ - this_progress; \ -}) - -#define LOOP_OPT_NOT_IDEMPOTENT(pass, ...) ({ \ - bool this_progress = false; \ - NIR_PASS(this_progress, nir, pass, ##__VA_ARGS__); \ - if (this_progress) { \ - progress = true; \ - opt_line = 0; \ - } \ - this_progress; \ -}) +#define OPT BRW_NIR_PASS +#define LOOP_OPT BRW_NIR_LOOP_PASS +#define LOOP_OPT_NOT_IDEMPOTENT BRW_NIR_LOOP_PASS_NOT_IDEMPOTENT void -brw_nir_optimize(nir_shader *nir, - const struct intel_device_info *devinfo) +brw_nir_optimize(brw_pass_tracker *pt) { - bool progress; - unsigned long opt_line = 0; + nir_shader *nir = pt->nir; + + pass_tracker_new_loop(pt); do { - progress = false; + pass_tracker_new_iteration(pt); + /* This pass is causing problems with types used by OpenCL : * https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/13955 * @@ -1529,7 +1504,7 @@ brw_nir_optimize(nir_shader *nir, LOOP_OPT(nir_opt_gcm, false); LOOP_OPT(nir_opt_undef); LOOP_OPT(nir_lower_pack); - } while (progress); + } while (pt->progress); /* Workaround Gfxbench unused local sampler variable which will trigger an * assert in the opt_large_constants pass. @@ -1718,7 +1693,15 @@ brw_preprocess_nir(const struct brw_compiler *compiler, nir_shader *nir, const struct brw_nir_compiler_opts *opts) { const struct intel_device_info *devinfo = compiler->devinfo; - UNUSED bool progress; /* Written by OPT */ + + /* TODO: This is part of the "pre-processing" before the shader is fed to + * brw_compile_* functions, so there's no debug archiver available yet. + * In the future runtime/driver will create one for us to use here. + */ + brw_pass_tracker pt_ = { + .nir = nir, + .compiler = compiler, + }, *pt = &pt_; nir_validate_ssa_dominance(nir, "before brw_preprocess_nir"); @@ -1753,7 +1736,7 @@ brw_preprocess_nir(const struct brw_compiler *compiler, nir_shader *nir, if (OPT(nir_opt_memcpy)) OPT(nir_split_var_copies); - brw_nir_optimize(nir, devinfo); + brw_nir_optimize(pt); if (nir->info.ray_queries) { OPT(nir_opt_ray_queries); @@ -1853,7 +1836,7 @@ brw_preprocess_nir(const struct brw_compiler *compiler, nir_shader *nir, OPT(intel_nir_clamp_per_vertex_loads); /* Get rid of split copies */ - brw_nir_optimize(nir, devinfo); + brw_nir_optimize(pt); } static bool @@ -1969,8 +1952,6 @@ void brw_nir_link_shaders(const struct brw_compiler *compiler, nir_shader *producer, nir_shader *consumer) { - const struct intel_device_info *devinfo = compiler->devinfo; - if (producer->info.stage == MESA_SHADER_MESH && consumer->info.stage == MESA_SHADER_FRAGMENT) { uint64_t fs_inputs = 0, ms_outputs = 0; @@ -2009,11 +1990,19 @@ brw_nir_link_shaders(const struct brw_compiler *compiler, NIR_PASS(_, producer, nir_lower_io_vars_to_scalar, nir_var_shader_out); NIR_PASS(_, consumer, nir_lower_io_vars_to_scalar, nir_var_shader_in); - brw_nir_optimize(producer, devinfo); - brw_nir_optimize(consumer, devinfo); + + /* TODO: This is part of the "pre-processing" before the shader is fed to + * brw_compile_* functions, so there's no debug archiver available yet. + * In the future runtime/driver will create one for us to use here. + */ + brw_pass_tracker pt_producer = { .nir = producer, .compiler = compiler }; + brw_pass_tracker pt_consumer = { .nir = consumer, .compiler = compiler }; + + brw_nir_optimize(&pt_producer); + brw_nir_optimize(&pt_consumer); if (nir_link_opt_varyings(producer, consumer)) - brw_nir_optimize(consumer, devinfo); + brw_nir_optimize(&pt_consumer); NIR_PASS(_, producer, nir_remove_dead_variables, nir_var_shader_out, NULL); NIR_PASS(_, consumer, nir_remove_dead_variables, nir_var_shader_in, NULL); @@ -2031,8 +2020,8 @@ brw_nir_link_shaders(const struct brw_compiler *compiler, NIR_PASS(_, producer, nir_lower_global_vars_to_local); NIR_PASS(_, consumer, nir_lower_global_vars_to_local); - brw_nir_optimize(producer, devinfo); - brw_nir_optimize(consumer, devinfo); + brw_nir_optimize(&pt_producer); + brw_nir_optimize(&pt_consumer); if (producer->info.stage == MESA_SHADER_MESH && consumer->info.stage == MESA_SHADER_FRAGMENT) { @@ -2329,11 +2318,10 @@ brw_nir_ssbo_intel(nir_shader *shader) } static void -brw_vectorize_lower_mem_access(nir_shader *nir, - const struct brw_compiler *compiler, +brw_vectorize_lower_mem_access(brw_pass_tracker *pt, enum brw_robustness_flags robust_flags) { - UNUSED bool progress = false; + const struct brw_compiler *compiler = pt->compiler; nir_load_store_vectorize_options options = { .modes = nir_var_mem_ubo | nir_var_mem_ssbo | @@ -2569,10 +2557,8 @@ flag_fused_eu_disable_instr(nir_builder *b, nir_instr *instr, void *data) } static void -brw_nir_lower_int64(nir_shader *nir, const struct intel_device_info *devinfo) +brw_nir_lower_int64(brw_pass_tracker *pt) { - UNUSED bool progress; /* Written by OPT */ - /* Potentially perform this optimization pass twice because it can create * additional opportunities for itself. */ @@ -2580,7 +2566,7 @@ brw_nir_lower_int64(nir_shader *nir, const struct intel_device_info *devinfo) OPT(nir_opt_algebraic_before_lower_int64); if (OPT(nir_lower_int64)) - brw_nir_optimize(nir, devinfo); + brw_nir_optimize(pt); } /* Prepare the given shader for codegen @@ -2589,12 +2575,12 @@ brw_nir_lower_int64(nir_shader *nir, const struct intel_device_info *devinfo) * backend and is highly backend-specific. */ void -brw_postprocess_nir_opts(nir_shader *nir, const struct brw_compiler *compiler, +brw_postprocess_nir_opts(brw_pass_tracker *pt, enum brw_robustness_flags robust_flags) { + const struct brw_compiler *compiler = pt->compiler; const struct intel_device_info *devinfo = compiler->devinfo; - - UNUSED bool progress; /* Written by OPT */ + nir_shader *nir = pt->nir; const nir_lower_tex_options tex_options = { .lower_txp = ~0, @@ -2637,10 +2623,7 @@ brw_postprocess_nir_opts(nir_shader *nir, const struct brw_compiler *compiler, OPT(nir_opt_combine_barriers, combine_all_memory_barriers, NULL); - do { - progress = false; - OPT(nir_opt_algebraic_before_ffma); - } while (progress); + while (OPT(nir_opt_algebraic_before_ffma)) {} OPT(nir_opt_idiv_const, 32); @@ -2665,23 +2648,23 @@ brw_postprocess_nir_opts(nir_shader *nir, const struct brw_compiler *compiler, OPT(brw_nir_tag_speculative_access); - brw_nir_optimize(nir, devinfo); + brw_nir_optimize(pt); if (nir_shader_has_local_variables(nir)) { OPT(nir_lower_vars_to_explicit_types, nir_var_function_temp, glsl_get_natural_size_align_bytes); OPT(nir_lower_explicit_io, nir_var_function_temp, nir_address_format_32bit_offset); - brw_nir_optimize(nir, devinfo); + brw_nir_optimize(pt); } - brw_vectorize_lower_mem_access(nir, compiler, robust_flags); + brw_vectorize_lower_mem_access(pt, robust_flags); /* Do this after lowering memory access bit-sizes */ if (nir->info.stage == MESA_SHADER_MESH || nir->info.stage == MESA_SHADER_TASK) { OPT(lower_task_payload_to_urb_intrinsics, devinfo); - brw_nir_opt_vectorize_urb(nir, devinfo); + brw_nir_opt_vectorize_urb(pt); } /* Needs to be prior int64 lower because it generates 64bit address @@ -2689,7 +2672,7 @@ brw_postprocess_nir_opts(nir_shader *nir, const struct brw_compiler *compiler, */ OPT(intel_nir_lower_printf); - brw_nir_lower_int64(nir, devinfo); + brw_nir_lower_int64(pt); /* This pass specifically looks for sequences of fmul and fadd that * intel_nir_opt_peephole_ffma will try to eliminate. Call this @@ -2736,18 +2719,17 @@ brw_postprocess_nir_opts(nir_shader *nir, const struct brw_compiler *compiler, OPT(brw_nir_opt_fsat); do { - progress = false; + pt->progress = false; OPT(nir_opt_algebraic_late); - if (progress) { + if (pt->progress) { OPT(nir_opt_constant_folding); OPT(nir_opt_copy_prop); OPT(nir_opt_dce); OPT(nir_opt_cse); } - } while (progress); - + } while (pt->progress); OPT(nir_lower_fp16_casts, nir_lower_fp16_split_fp64); @@ -2791,7 +2773,7 @@ brw_postprocess_nir_opts(nir_shader *nir, const struct brw_compiler *compiler, * allows the elimination of some loops over, say, a TXF instruction * with a non-uniform texture handle. */ - brw_nir_optimize(nir, devinfo); + brw_nir_optimize(pt); OPT(nir_lower_subgroups, &subgroups_options); } @@ -2801,7 +2783,7 @@ brw_postprocess_nir_opts(nir_shader *nir, const struct brw_compiler *compiler, * and peephole_select may generate a 64-bit select. So do another * round at the tail end. */ - brw_nir_lower_int64(nir, devinfo); + brw_nir_lower_int64(pt); /* Deal with EU fusion */ if (devinfo->ver == 12) { @@ -2828,12 +2810,10 @@ brw_postprocess_nir_opts(nir_shader *nir, const struct brw_compiler *compiler, } void -brw_postprocess_nir_out_of_ssa(nir_shader *nir, - unsigned dispatch_width, - debug_archiver *archiver, +brw_postprocess_nir_out_of_ssa(brw_pass_tracker *pt, bool debug_enabled) { - UNUSED bool progress; /* Written by OPT */ + nir_shader *nir = pt->nir; /* Run fsign lowering again after the last time brw_nir_optimize is called. * As is the case with conversion lowering (below), brw_nir_optimize can @@ -2867,10 +2847,10 @@ brw_postprocess_nir_out_of_ssa(nir_shader *nir, /* Rerun the divergence analysis before convert_from_ssa as this pass has * some assert on consistent divergence flags. */ - NIR_PASS(_, nir, nir_convert_to_lcssa, true, true); + OPT(nir_convert_to_lcssa, true, true); nir_divergence_analysis(nir); - if (unlikely(debug_enabled || archiver)) { + if (unlikely(debug_enabled || pt->archiver)) { /* Re-index SSA defs so we print more sensible numbers. */ nir_foreach_function_impl(impl, nir) { nir_index_ssa_defs(impl); @@ -2882,8 +2862,7 @@ brw_postprocess_nir_out_of_ssa(nir_shader *nir, nir_print_shader(nir, stderr); } - if (unlikely(archiver)) - brw_debug_archive_nir(archiver, nir, dispatch_width, "ssa"); + BRW_NIR_SNAPSHOT("ssa"); } OPT(nir_convert_from_ssa, true, true); @@ -2900,8 +2879,7 @@ brw_postprocess_nir_out_of_ssa(nir_shader *nir, nir_print_shader(nir, stderr); } - if (unlikely(archiver)) - brw_debug_archive_nir(archiver, nir, dispatch_width, "out"); + BRW_NIR_SNAPSHOT("out"); } static unsigned @@ -2944,12 +2922,13 @@ brw_nir_api_subgroup_size(const nir_shader *nir, } void -brw_nir_apply_key(nir_shader *nir, - const struct brw_compiler *compiler, +brw_nir_apply_key(brw_pass_tracker *pt, const struct brw_base_prog_key *key, unsigned max_subgroup_size) { - bool progress = false; + nir_shader *nir = pt->nir; + + pt->progress = false; const nir_lower_subgroups_options subgroups_options = { .subgroup_size = get_subgroup_size(&nir->info, max_subgroup_size), @@ -2962,9 +2941,8 @@ brw_nir_apply_key(nir_shader *nir, if (key->limit_trig_input_range) OPT(brw_nir_limit_trig_input_range_workaround); - if (progress) { - brw_nir_optimize(nir, compiler->devinfo); - } + if (pt->progress) + brw_nir_optimize(pt); } enum brw_conditional_mod diff --git a/src/intel/compiler/brw/brw_nir.h b/src/intel/compiler/brw/brw_nir.h index 52a140750a0..978ad981e87 100644 --- a/src/intel/compiler/brw/brw_nir.h +++ b/src/intel/compiler/brw/brw_nir.h @@ -17,6 +17,7 @@ extern "C" { #define BRW_TEX_INSTR_FUSED_EU_DISABLE (1u << 30) extern const struct nir_shader_compiler_options brw_scalar_nir_options; +struct brw_pass_tracker; void brw_fill_tess_info_from_shader_info(struct brw_tess_info *brw_info, @@ -219,8 +220,7 @@ bool brw_nir_lower_inputs_to_urb_intrinsics(nir_shader *, const struct brw_lower bool brw_nir_lower_outputs_to_urb_intrinsics(nir_shader *, const struct brw_lower_urb_cb_data *); -void brw_nir_opt_vectorize_urb(nir_shader *nir, - const struct intel_device_info *devinfo); +void brw_nir_opt_vectorize_urb(struct brw_pass_tracker *pt); void brw_nir_lower_vs_inputs(nir_shader *nir); void brw_nir_lower_gs_inputs(nir_shader *nir, @@ -285,26 +285,19 @@ bool brw_nir_lower_mem_access_bit_sizes(nir_shader *shader, bool brw_nir_lower_simd(nir_shader *nir, unsigned dispatch_width); -void brw_postprocess_nir_opts(nir_shader *nir, - const struct brw_compiler *compiler, +void brw_postprocess_nir_opts(struct brw_pass_tracker *pt, enum brw_robustness_flags robust_flags); -void brw_postprocess_nir_out_of_ssa(nir_shader *nir, - unsigned dispatch_width, - debug_archiver *archiver, +void brw_postprocess_nir_out_of_ssa(struct brw_pass_tracker *pt, bool debug_enabled); static inline void -brw_postprocess_nir(nir_shader *nir, - const struct brw_compiler *compiler, - unsigned dispatch_width, - debug_archiver *archiver, +brw_postprocess_nir(struct brw_pass_tracker *pt, bool debug_enabled, enum brw_robustness_flags robust_flags) { - brw_postprocess_nir_opts(nir, compiler, robust_flags); - brw_postprocess_nir_out_of_ssa(nir, dispatch_width, archiver, - debug_enabled); + brw_postprocess_nir_opts(pt, robust_flags); + brw_postprocess_nir_out_of_ssa(pt, debug_enabled); } bool brw_nir_apply_attribute_workarounds(nir_shader *nir, @@ -320,8 +313,7 @@ bool brw_nir_lower_fsign(nir_shader *nir); bool brw_nir_opt_fsat(nir_shader *); -void brw_nir_apply_key(nir_shader *nir, - const struct brw_compiler *compiler, +void brw_nir_apply_key(struct brw_pass_tracker *pt, const struct brw_base_prog_key *key, unsigned max_subgroup_size); @@ -349,8 +341,7 @@ void brw_nir_analyze_ubo_ranges(const struct brw_compiler *compiler, bool brw_nir_lower_ubo_ranges(nir_shader *nir, struct brw_ubo_range out_ranges[4]); -void brw_nir_optimize(nir_shader *nir, - const struct intel_device_info *devinfo); +void brw_nir_optimize(struct brw_pass_tracker *pt); nir_shader *brw_nir_create_passthrough_tcs(void *mem_ctx, const struct brw_compiler *compiler, diff --git a/src/intel/compiler/brw/brw_nir_rt.c b/src/intel/compiler/brw/brw_nir_rt.c index 4435d57a1e7..184657d5434 100644 --- a/src/intel/compiler/brw/brw_nir_rt.c +++ b/src/intel/compiler/brw/brw_nir_rt.c @@ -7,6 +7,7 @@ #include "brw_nir_rt.h" #include "brw_nir_rt_builder.h" #include "intel_nir.h" +#include "brw_private.h" static bool resize_deref(nir_builder *b, nir_deref_instr *deref, @@ -499,7 +500,12 @@ brw_nir_create_raygen_trampoline(const struct brw_compiler *compiler, NIR_PASS(_, nir, brw_nir_lower_cs_intrinsics, devinfo, NULL); - brw_nir_optimize(nir, devinfo); + brw_pass_tracker pt = { + .nir = nir, + .compiler = compiler, + }; + + brw_nir_optimize(&pt); return nir; } diff --git a/src/intel/compiler/brw/brw_private.h b/src/intel/compiler/brw/brw_private.h index 62f628ef856..398a5233130 100644 --- a/src/intel/compiler/brw/brw_private.h +++ b/src/intel/compiler/brw/brw_private.h @@ -19,15 +19,84 @@ void brw_alloc_reg_sets(struct brw_compiler *compiler); extern const char *const conditional_modifier[16]; extern const char *const pred_ctrl_align16[16]; +typedef struct brw_pass_tracker { + nir_shader *nir; + unsigned dispatch_width; + + const struct brw_compiler *compiler; + + bool progress; + + /* Filled with the last line that made progress. + * Used to perform early break in loops. + * See BRW_NIR_LOOP_PASS macros below. + */ + unsigned long opt_line; + + /* Tracking information for the debug archiver. */ + unsigned pass_num; + debug_archiver *archiver; +} brw_pass_tracker; + #ifndef NDEBUG -void brw_debug_archive_nir(debug_archiver *archiver, nir_shader *nir, - unsigned dispatch_width, const char *step); +void +brw_pass_tracker_archive(brw_pass_tracker *pt, const char *pass_name); #else static inline void -brw_debug_archive_nir(debug_archiver *archiver, nir_shader *nir, - unsigned dispatch_width, const char *step) {} +brw_pass_tracker_archive(brw_pass_tracker *pt, const char *pass_name) +{} #endif +/* To be used in conjunction to BRW_NIR_LOOP_* macros. */ +static inline void +pass_tracker_new_loop(brw_pass_tracker *pt) +{ + pt->opt_line = 0; +} + +/* To be used in conjunction to BRW_NIR_LOOP_* macros. */ +static inline void +pass_tracker_new_iteration(brw_pass_tracker *pt) +{ + pt->progress = false; +} + +#define BRW_NIR_SNAPSHOT(name) do { \ + pt->pass_num++; \ + brw_pass_tracker_archive(pt, name); \ +} while (false); + +#define BRW_NIR_PASS(pass, ...) ({ \ + pt->pass_num++; \ + bool this_progress = false; \ + NIR_PASS(this_progress, pt->nir, pass, ##__VA_ARGS__); \ + if (this_progress) { \ + pt->progress = true; \ + if (unlikely(pt->archiver)) \ + brw_pass_tracker_archive(pt, #pass); \ + } \ + this_progress; \ +}) + +#define BRW_NIR_LOOP_PASS(pass, ...) ({ \ + const unsigned long this_line = __LINE__; \ + if (pt->opt_line == this_line) { \ + pt->pass_num++; \ + break; \ + } \ + bool this_progress = BRW_NIR_PASS(pass, ##__VA_ARGS__); \ + if (this_progress) \ + pt->opt_line = this_line; \ + this_progress; \ +}) + +#define BRW_NIR_LOOP_PASS_NOT_IDEMPOTENT(pass, ...) ({ \ + bool this_progress = BRW_NIR_PASS(pass, ##__VA_ARGS__); \ + if (this_progress) \ + pt->opt_line = 0; \ + this_progress; \ +}) + #ifdef __cplusplus } #endif diff --git a/src/intel/compiler/brw/brw_shader.cpp b/src/intel/compiler/brw/brw_shader.cpp index 265357ab81d..e541a1c0585 100644 --- a/src/intel/compiler/brw/brw_shader.cpp +++ b/src/intel/compiler/brw/brw_shader.cpp @@ -1317,21 +1317,18 @@ brw_allocate_registers(brw_shader &s, bool allow_spilling) #ifndef NDEBUG void -brw_debug_archive_nir(debug_archiver *archiver, nir_shader *nir, - unsigned dispatch_width, const char *step) +brw_pass_tracker_archive(brw_pass_tracker *pt, const char *pass_name) { - if (!archiver) + if (!pt->archiver) return; - const bool prefix_dispatch_width = - dispatch_width > 0 && mesa_shader_stage_uses_workgroup(nir->info.stage); - const char *filename = prefix_dispatch_width ? - ralloc_asprintf(archiver, "NIR%d/%s", dispatch_width, step) : - ralloc_asprintf(archiver, "NIR/%s", step); + const char *filename = + ralloc_asprintf(pt->archiver, "NIR%d/%03d-%s", + pt->dispatch_width, pt->pass_num, pass_name); - FILE *f = debug_archiver_start_file(archiver, filename); - nir_print_shader(nir, f); - debug_archiver_finish_file(archiver); + FILE *f = debug_archiver_start_file(pt->archiver, filename); + nir_print_shader(pt->nir, f); + debug_archiver_finish_file(pt->archiver); } #endif