diff --git a/src/intel/compiler/brw_compiler.h b/src/intel/compiler/brw_compiler.h index a714aeab010..f456acc35ae 100644 --- a/src/intel/compiler/brw_compiler.h +++ b/src/intel/compiler/brw_compiler.h @@ -1696,6 +1696,7 @@ DEFINE_PROG_DATA_DOWNCAST(sf, true) struct brw_compile_stats { uint32_t dispatch_width; /**< 0 for vec4 */ + uint32_t max_dispatch_width; uint32_t instructions; uint32_t sends; uint32_t loops; diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp index 2335e80eabe..e481791ad5a 100644 --- a/src/intel/compiler/brw_fs.cpp +++ b/src/intel/compiler/brw_fs.cpp @@ -7631,12 +7631,14 @@ brw_compile_fs(const struct brw_compiler *compiler, } struct brw_compile_stats *stats = params->stats; + uint32_t max_dispatch_width = 0; if (simd8_cfg) { prog_data->dispatch_8 = true; g.generate_code(simd8_cfg, 8, v8->shader_stats, v8->performance_analysis.require(), stats); stats = stats ? stats + 1 : NULL; + max_dispatch_width = 8; } if (simd16_cfg) { @@ -7645,6 +7647,7 @@ brw_compile_fs(const struct brw_compiler *compiler, simd16_cfg, 16, v16->shader_stats, v16->performance_analysis.require(), stats); stats = stats ? stats + 1 : NULL; + max_dispatch_width = 16; } if (simd32_cfg) { @@ -7653,8 +7656,12 @@ brw_compile_fs(const struct brw_compiler *compiler, simd32_cfg, 32, v32->shader_stats, v32->performance_analysis.require(), stats); stats = stats ? stats + 1 : NULL; + max_dispatch_width = 32; } + for (struct brw_compile_stats *s = params->stats; s != NULL && s != stats; s++) + s->max_dispatch_width = max_dispatch_width; + g.add_const_data(nir->constant_data, nir->constant_data_size); return g.get_assembly(); } @@ -7890,6 +7897,8 @@ brw_compile_cs(const struct brw_compiler *compiler, g.enable_debug(name); } + uint32_t max_dispatch_width = 8u << (util_last_bit(prog_data->prog_mask) - 1); + struct brw_compile_stats *stats = params->stats; for (unsigned simd = 0; simd < 3; simd++) { if (prog_data->prog_mask & (1u << simd)) { @@ -7897,7 +7906,10 @@ brw_compile_cs(const struct brw_compiler *compiler, prog_data->prog_offset[simd] = g.generate_code(v[simd]->cfg, 8u << simd, v[simd]->shader_stats, v[simd]->performance_analysis.require(), stats); + if (stats) + stats->max_dispatch_width = max_dispatch_width; stats = stats ? stats + 1 : NULL; + max_dispatch_width = 8u << simd; } } diff --git a/src/intel/compiler/brw_fs_generator.cpp b/src/intel/compiler/brw_fs_generator.cpp index 95227e9655d..56f7815fa5d 100644 --- a/src/intel/compiler/brw_fs_generator.cpp +++ b/src/intel/compiler/brw_fs_generator.cpp @@ -2485,6 +2485,7 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width, before_size, after_size); if (stats) { stats->dispatch_width = dispatch_width; + stats->max_dispatch_width = dispatch_width; stats->instructions = before_size / 16 - nop_count; stats->sends = send_count; stats->loops = loop_count; diff --git a/src/intel/compiler/brw_vec4_generator.cpp b/src/intel/compiler/brw_vec4_generator.cpp index c6bee0141ff..f47fee23a73 100644 --- a/src/intel/compiler/brw_vec4_generator.cpp +++ b/src/intel/compiler/brw_vec4_generator.cpp @@ -2269,6 +2269,7 @@ generate_code(struct brw_codegen *p, fill_count, send_count, before_size, after_size); if (stats) { stats->dispatch_width = 0; + stats->max_dispatch_width = 0; stats->instructions = before_size / 16; stats->sends = send_count; stats->loops = loop_count;