2024-07-12 13:52:46 -07:00
|
|
|
/*
|
|
|
|
|
* Copyright © 2010 Intel Corporation
|
|
|
|
|
* SPDX-License-Identifier: MIT
|
|
|
|
|
*/
|
|
|
|
|
|
2025-02-05 14:25:15 -08:00
|
|
|
#include "brw_shader.h"
|
2024-12-06 19:48:54 -08:00
|
|
|
#include "brw_analysis.h"
|
2025-01-15 08:20:46 -08:00
|
|
|
#include "brw_builder.h"
|
2024-12-06 16:17:46 -08:00
|
|
|
#include "brw_generator.h"
|
2024-07-12 13:52:46 -07:00
|
|
|
#include "brw_nir.h"
|
|
|
|
|
#include "brw_cfg.h"
|
|
|
|
|
#include "brw_private.h"
|
|
|
|
|
#include "intel_nir.h"
|
|
|
|
|
#include "shader_enums.h"
|
|
|
|
|
#include "dev/intel_debug.h"
|
|
|
|
|
#include "dev/intel_wa.h"
|
|
|
|
|
|
|
|
|
|
#include <memory>
|
|
|
|
|
|
|
|
|
|
static void
|
|
|
|
|
fill_push_const_block_info(struct brw_push_const_block *block, unsigned dwords)
|
|
|
|
|
{
|
|
|
|
|
block->dwords = dwords;
|
|
|
|
|
block->regs = DIV_ROUND_UP(dwords, 8);
|
|
|
|
|
block->size = block->regs * 32;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void
|
|
|
|
|
cs_fill_push_const_info(const struct intel_device_info *devinfo,
|
|
|
|
|
struct brw_cs_prog_data *cs_prog_data)
|
|
|
|
|
{
|
|
|
|
|
const struct brw_stage_prog_data *prog_data = &cs_prog_data->base;
|
|
|
|
|
int subgroup_id_index = brw_get_subgroup_id_param_index(devinfo, prog_data);
|
|
|
|
|
|
|
|
|
|
/* The thread ID should be stored in the last param dword */
|
|
|
|
|
assert(subgroup_id_index == -1 ||
|
|
|
|
|
subgroup_id_index == (int)prog_data->nr_params - 1);
|
|
|
|
|
|
|
|
|
|
unsigned cross_thread_dwords, per_thread_dwords;
|
|
|
|
|
if (subgroup_id_index >= 0) {
|
|
|
|
|
/* Fill all but the last register with cross-thread payload */
|
|
|
|
|
cross_thread_dwords = 8 * (subgroup_id_index / 8);
|
|
|
|
|
per_thread_dwords = prog_data->nr_params - cross_thread_dwords;
|
|
|
|
|
assert(per_thread_dwords > 0 && per_thread_dwords <= 8);
|
|
|
|
|
} else {
|
|
|
|
|
/* Fill all data using cross-thread payload */
|
|
|
|
|
cross_thread_dwords = prog_data->nr_params;
|
|
|
|
|
per_thread_dwords = 0u;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
fill_push_const_block_info(&cs_prog_data->push.cross_thread, cross_thread_dwords);
|
|
|
|
|
fill_push_const_block_info(&cs_prog_data->push.per_thread, per_thread_dwords);
|
|
|
|
|
|
|
|
|
|
assert(cs_prog_data->push.cross_thread.dwords % 8 == 0 ||
|
|
|
|
|
cs_prog_data->push.per_thread.size == 0);
|
|
|
|
|
assert(cs_prog_data->push.cross_thread.dwords +
|
|
|
|
|
cs_prog_data->push.per_thread.dwords ==
|
|
|
|
|
prog_data->nr_params);
|
|
|
|
|
}
|
|
|
|
|
|
2024-07-12 14:20:57 -07:00
|
|
|
static bool
|
2024-12-07 10:25:45 -08:00
|
|
|
run_cs(brw_shader &s, bool allow_spilling)
|
2024-07-12 14:20:57 -07:00
|
|
|
{
|
2025-08-05 16:43:06 +08:00
|
|
|
assert(mesa_shader_stage_is_compute(s.stage));
|
2024-07-12 14:20:57 -07:00
|
|
|
|
2024-12-06 22:13:36 -08:00
|
|
|
s.payload_ = new brw_cs_thread_payload(s);
|
2024-07-12 14:20:57 -07:00
|
|
|
|
2024-12-07 09:36:03 -08:00
|
|
|
brw_from_nir(&s);
|
2024-07-12 14:20:57 -07:00
|
|
|
|
|
|
|
|
if (s.failed)
|
|
|
|
|
return false;
|
|
|
|
|
|
|
|
|
|
s.emit_cs_terminate();
|
|
|
|
|
|
2024-07-12 17:08:46 -07:00
|
|
|
brw_calculate_cfg(s);
|
2024-07-12 14:20:57 -07:00
|
|
|
|
2024-12-06 11:37:57 -08:00
|
|
|
brw_optimize(s);
|
2024-07-12 14:20:57 -07:00
|
|
|
|
|
|
|
|
s.assign_curb_setup();
|
|
|
|
|
|
2024-12-06 11:37:57 -08:00
|
|
|
brw_lower_3src_null_dest(s);
|
|
|
|
|
brw_workaround_emit_dummy_mov_instruction(s);
|
2024-07-12 14:20:57 -07:00
|
|
|
|
2024-07-12 16:55:33 -07:00
|
|
|
brw_allocate_registers(s, allow_spilling);
|
2024-07-12 14:20:57 -07:00
|
|
|
|
2024-12-06 11:37:57 -08:00
|
|
|
brw_workaround_source_arf_before_eot(s);
|
2024-10-19 12:53:21 +03:00
|
|
|
|
2024-07-12 14:20:57 -07:00
|
|
|
return !s.failed;
|
|
|
|
|
}
|
|
|
|
|
|
2023-08-09 17:15:11 -07:00
|
|
|
static bool
|
|
|
|
|
instr_uses_sampler(nir_builder *b, nir_instr *instr, void *cb_data)
|
|
|
|
|
{
|
|
|
|
|
if (instr->type != nir_instr_type_tex)
|
|
|
|
|
return false;
|
|
|
|
|
|
|
|
|
|
switch (nir_instr_as_tex(instr)->op) {
|
|
|
|
|
case nir_texop_tex:
|
|
|
|
|
case nir_texop_txd:
|
|
|
|
|
case nir_texop_txf:
|
|
|
|
|
case nir_texop_txl:
|
|
|
|
|
case nir_texop_txb:
|
|
|
|
|
case nir_texop_txf_ms:
|
|
|
|
|
case nir_texop_txf_ms_mcs_intel:
|
|
|
|
|
case nir_texop_lod:
|
|
|
|
|
case nir_texop_tg4:
|
|
|
|
|
case nir_texop_texture_samples:
|
|
|
|
|
return true;
|
|
|
|
|
|
|
|
|
|
default:
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static bool
|
|
|
|
|
brw_nir_uses_sampler(nir_shader *shader)
|
|
|
|
|
{
|
|
|
|
|
return nir_shader_instructions_pass(shader, instr_uses_sampler,
|
|
|
|
|
nir_metadata_all,
|
|
|
|
|
NULL);
|
|
|
|
|
}
|
|
|
|
|
|
2025-02-14 09:08:01 -08:00
|
|
|
static inline uint32_t *
|
|
|
|
|
brw_stage_prog_data_add_params(struct brw_stage_prog_data *prog_data,
|
|
|
|
|
unsigned nr_new_params)
|
|
|
|
|
{
|
|
|
|
|
unsigned old_nr_params = prog_data->nr_params;
|
|
|
|
|
prog_data->nr_params += nr_new_params;
|
|
|
|
|
prog_data->param = reralloc(ralloc_parent(prog_data->param),
|
|
|
|
|
prog_data->param, uint32_t,
|
|
|
|
|
prog_data->nr_params);
|
|
|
|
|
return prog_data->param + old_nr_params;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void
|
|
|
|
|
brw_adjust_uniforms(brw_shader &s)
|
|
|
|
|
{
|
|
|
|
|
if (s.devinfo->verx10 >= 125)
|
|
|
|
|
return;
|
|
|
|
|
|
|
|
|
|
assert(mesa_shader_stage_is_compute(s.stage));
|
|
|
|
|
|
|
|
|
|
if (brw_get_subgroup_id_param_index(s.devinfo, s.prog_data) == -1) {
|
|
|
|
|
/* Add uniforms for builtins after regular NIR uniforms. */
|
|
|
|
|
assert(s.uniforms == s.prog_data->nr_params);
|
|
|
|
|
|
|
|
|
|
/* Subgroup ID must be the last uniform on the list. This will make
|
|
|
|
|
* easier later to split between cross thread and per thread
|
|
|
|
|
* uniforms.
|
|
|
|
|
*/
|
|
|
|
|
uint32_t *param = brw_stage_prog_data_add_params(s.prog_data, 1);
|
|
|
|
|
*param = BRW_PARAM_BUILTIN_SUBGROUP_ID;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
s.uniforms = s.prog_data->nr_params;
|
|
|
|
|
}
|
|
|
|
|
|
2024-07-12 13:52:46 -07:00
|
|
|
const unsigned *
|
|
|
|
|
brw_compile_cs(const struct brw_compiler *compiler,
|
|
|
|
|
struct brw_compile_cs_params *params)
|
|
|
|
|
{
|
2024-09-24 16:53:25 -07:00
|
|
|
const struct intel_device_info *devinfo = compiler->devinfo;
|
2024-09-30 08:45:21 +03:00
|
|
|
struct nir_shader *nir = params->base.nir;
|
2024-07-12 13:52:46 -07:00
|
|
|
const struct brw_cs_prog_key *key = params->key;
|
|
|
|
|
struct brw_cs_prog_data *prog_data = params->prog_data;
|
|
|
|
|
|
|
|
|
|
const bool debug_enabled =
|
|
|
|
|
brw_should_print_shader(nir, params->base.debug_flag ?
|
2025-05-16 23:28:04 +00:00
|
|
|
params->base.debug_flag : DEBUG_CS,
|
|
|
|
|
params->base.source_hash);
|
2024-07-12 13:52:46 -07:00
|
|
|
|
2025-02-12 12:42:08 +02:00
|
|
|
brw_prog_data_init(&prog_data->base, ¶ms->base);
|
2024-04-24 16:14:16 +03:00
|
|
|
prog_data->uses_inline_data = brw_nir_uses_inline_data(nir) ||
|
|
|
|
|
key->base.uses_inline_push_addr;
|
2024-09-30 08:45:21 +03:00
|
|
|
assert(compiler->devinfo->verx10 >= 125 || !prog_data->uses_inline_data);
|
2024-07-12 13:52:46 -07:00
|
|
|
|
|
|
|
|
if (!nir->info.workgroup_size_variable) {
|
|
|
|
|
prog_data->local_size[0] = nir->info.workgroup_size[0];
|
|
|
|
|
prog_data->local_size[1] = nir->info.workgroup_size[1];
|
|
|
|
|
prog_data->local_size[2] = nir->info.workgroup_size[2];
|
|
|
|
|
}
|
|
|
|
|
|
brw: Do most of NIR postprocessing before cloning for SIMD variants
We were doing a lot of NIR work repeatedly for each SIMD variant of
compute and mesh shaders. Instead, do it once before cloning, and
just do one final optimization loop and out-of-SSA for each.
fossil-db results on Arc B580:
Totals:
Instrs: 233771096 -> 233794024 (+0.01%); split: -0.01%, +0.02%
Subgroup size: 15922768 -> 15922736 (-0.00%); split: +0.00%, -0.00%
Send messages: 12095619 -> 12098234 (+0.02%); split: -0.00%, +0.02%
Loop count: 137562 -> 137523 (-0.03%)
Cycle count: 32600323744 -> 32667411252 (+0.21%); split: -0.06%, +0.27%
Spill count: 540908 -> 542027 (+0.21%); split: -0.07%, +0.28%
Fill count: 700938 -> 698983 (-0.28%); split: -0.73%, +0.45%
Scratch Memory Size: 37266432 -> 37304320 (+0.10%); split: -0.10%, +0.20%
Max live registers: 72691728 -> 72692987 (+0.00%); split: -0.00%, +0.00%
Non SSA regs after NIR: 67690309 -> 67688352 (-0.00%); split: -0.01%, +0.00%
Totals from 3576 (0.45% of 789301) affected shaders:
Instrs: 6932956 -> 6955884 (+0.33%); split: -0.41%, +0.74%
Subgroup size: 88816 -> 88784 (-0.04%); split: +0.09%, -0.13%
Send messages: 329168 -> 331783 (+0.79%); split: -0.02%, +0.81%
Loop count: 8753 -> 8714 (-0.45%)
Cycle count: 15153678820 -> 15220766328 (+0.44%); split: -0.14%, +0.58%
Spill count: 213751 -> 214870 (+0.52%); split: -0.18%, +0.71%
Fill count: 282616 -> 280661 (-0.69%); split: -1.82%, +1.13%
Scratch Memory Size: 13056000 -> 13093888 (+0.29%); split: -0.27%, +0.56%
Max live registers: 834757 -> 836016 (+0.15%); split: -0.11%, +0.26%
Non SSA regs after NIR: 995033 -> 993076 (-0.20%); split: -0.48%, +0.28%
Looking at a few of the shaders with substantial instruction count
increases, it appears that it is largely due to more loops being
unrolled, which is probably actually a good thing.
The compile time impact of this patch appears to be negligable.
However, doing postprocessing before SIMD cloning allows us to
examine the postprocessed SSA-form NIR for improvements in an
upcoming patch.
Reviewed-by: Ian Romanick <ian.d.romanick@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/36750>
2025-08-11 14:54:09 -07:00
|
|
|
brw_postprocess_nir_opts(nir, compiler, key->base.robust_flags);
|
|
|
|
|
|
2024-07-12 13:52:46 -07:00
|
|
|
brw_simd_selection_state simd_state{
|
|
|
|
|
.devinfo = compiler->devinfo,
|
|
|
|
|
.prog_data = prog_data,
|
|
|
|
|
.required_width = brw_required_dispatch_width(&nir->info),
|
|
|
|
|
};
|
|
|
|
|
|
brw: Skip compilation of larger SIMDs when pressure is too high
This allows us to skip the entire backend compilation process for
large SIMD widths when register pressure is high enough that we'd
likely decide to prefer a smaller one in the end anyway. The hope
is to make the same decisions as before, but with less CPU overhead.
We are making mostly the same decisions as before:
| API / Platform | Total Shaders | Changed | % Identical
--------------------------------------------------
| VK / Arc A770 | 905,525 | 1,157 | 99.872% |
| VK / Arc B580 | 788,127 | 53 | 99.993% |
| VK / Panther | 786,333 | 13 | 99.998% |
| GL / Arc A770 | 308,618 | 269 | 99.913% |
| GL / Arc B580 | 264,066 | 13 | 99.995% |
| GL / Panther | 273,212 | 0 | 100.000% |
Improves compile times on my i7-12700K:
| Game | Arc B580 | Arc A770 |
---------------------------------------------------
| Assassins Creed: Odyssey | -13.47% | -10.98% |
| Borderlands 3 (DX12) | -10.05% | -11.31% |
| Dark Souls 3 | -21.06% | -21.08% |
| Oblivion Remastered | -11.10% | -9.82% |
| Phasmophobia | -32.73% | -31.00% |
| Red Dead Redemption 2 | -20.10% | -14.38% |
| Total War: Warhammer III | -10.11% | -14.44% |
| Wolfenstein Youngblood | -15.91% | -13.47% |
| Shadow of the Tomb Raider | -30.23% | -25.86% |
It seems to have nearly no effect on compile times on Xe3 unfortunately,
as only 1,014 shaders in fossil-db even fail SIMD32 compilation in the
first place, and we want to let most of the "might succeed" cases
through to the backend for throughput analysis.
Reviewed-by: Ian Romanick <ian.d.romanick@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/36750>
2025-08-11 02:39:49 -07:00
|
|
|
unsigned pressure[SIMD_COUNT];
|
|
|
|
|
brw_nir_quick_pressure_estimate(nir, devinfo, pressure);
|
|
|
|
|
|
|
|
|
|
for (unsigned i = 0; i < SIMD_COUNT; i++) {
|
|
|
|
|
simd_state.beyond_threshold[i] =
|
|
|
|
|
pressure[i] > compiler->register_pressure_threshold;
|
|
|
|
|
}
|
|
|
|
|
|
2023-08-09 17:15:11 -07:00
|
|
|
prog_data->uses_sampler = brw_nir_uses_sampler(params->base.nir);
|
|
|
|
|
|
2024-12-07 10:25:45 -08:00
|
|
|
std::unique_ptr<brw_shader> v[3];
|
2024-07-12 13:52:46 -07:00
|
|
|
|
2024-09-24 16:53:25 -07:00
|
|
|
for (unsigned i = 0; i < 3; i++) {
|
|
|
|
|
const unsigned simd = devinfo->ver >= 30 ? 2 - i : i;
|
|
|
|
|
|
2024-07-12 13:52:46 -07:00
|
|
|
if (!brw_simd_should_compile(simd_state, simd))
|
|
|
|
|
continue;
|
|
|
|
|
|
|
|
|
|
const unsigned dispatch_width = 8u << simd;
|
|
|
|
|
|
|
|
|
|
nir_shader *shader = nir_shader_clone(params->base.mem_ctx, nir);
|
2024-05-10 13:44:44 -07:00
|
|
|
brw_debug_archive_nir(params->base.archiver, shader, dispatch_width, "first");
|
|
|
|
|
|
2024-07-12 13:52:46 -07:00
|
|
|
brw_nir_apply_key(shader, compiler, &key->base,
|
|
|
|
|
dispatch_width);
|
|
|
|
|
|
|
|
|
|
NIR_PASS(_, shader, brw_nir_lower_simd, dispatch_width);
|
|
|
|
|
|
brw: Do most of NIR postprocessing before cloning for SIMD variants
We were doing a lot of NIR work repeatedly for each SIMD variant of
compute and mesh shaders. Instead, do it once before cloning, and
just do one final optimization loop and out-of-SSA for each.
fossil-db results on Arc B580:
Totals:
Instrs: 233771096 -> 233794024 (+0.01%); split: -0.01%, +0.02%
Subgroup size: 15922768 -> 15922736 (-0.00%); split: +0.00%, -0.00%
Send messages: 12095619 -> 12098234 (+0.02%); split: -0.00%, +0.02%
Loop count: 137562 -> 137523 (-0.03%)
Cycle count: 32600323744 -> 32667411252 (+0.21%); split: -0.06%, +0.27%
Spill count: 540908 -> 542027 (+0.21%); split: -0.07%, +0.28%
Fill count: 700938 -> 698983 (-0.28%); split: -0.73%, +0.45%
Scratch Memory Size: 37266432 -> 37304320 (+0.10%); split: -0.10%, +0.20%
Max live registers: 72691728 -> 72692987 (+0.00%); split: -0.00%, +0.00%
Non SSA regs after NIR: 67690309 -> 67688352 (-0.00%); split: -0.01%, +0.00%
Totals from 3576 (0.45% of 789301) affected shaders:
Instrs: 6932956 -> 6955884 (+0.33%); split: -0.41%, +0.74%
Subgroup size: 88816 -> 88784 (-0.04%); split: +0.09%, -0.13%
Send messages: 329168 -> 331783 (+0.79%); split: -0.02%, +0.81%
Loop count: 8753 -> 8714 (-0.45%)
Cycle count: 15153678820 -> 15220766328 (+0.44%); split: -0.14%, +0.58%
Spill count: 213751 -> 214870 (+0.52%); split: -0.18%, +0.71%
Fill count: 282616 -> 280661 (-0.69%); split: -1.82%, +1.13%
Scratch Memory Size: 13056000 -> 13093888 (+0.29%); split: -0.27%, +0.56%
Max live registers: 834757 -> 836016 (+0.15%); split: -0.11%, +0.26%
Non SSA regs after NIR: 995033 -> 993076 (-0.20%); split: -0.48%, +0.28%
Looking at a few of the shaders with substantial instruction count
increases, it appears that it is largely due to more loops being
unrolled, which is probably actually a good thing.
The compile time impact of this patch appears to be negligable.
However, doing postprocessing before SIMD cloning allows us to
examine the postprocessed SSA-form NIR for improvements in an
upcoming patch.
Reviewed-by: Ian Romanick <ian.d.romanick@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/36750>
2025-08-11 14:54:09 -07:00
|
|
|
brw_nir_optimize(shader, devinfo);
|
|
|
|
|
brw_postprocess_nir_out_of_ssa(shader, dispatch_width,
|
|
|
|
|
params->base.archiver, debug_enabled);
|
2024-07-12 13:52:46 -07:00
|
|
|
|
2025-08-27 13:34:40 -07:00
|
|
|
const brw_shader_params shader_params = {
|
|
|
|
|
.compiler = compiler,
|
|
|
|
|
.mem_ctx = params->base.mem_ctx,
|
|
|
|
|
.nir = shader,
|
|
|
|
|
.key = &key->base,
|
|
|
|
|
.prog_data = &prog_data->base,
|
|
|
|
|
.dispatch_width = dispatch_width,
|
|
|
|
|
.needs_register_pressure = params->base.stats != NULL,
|
|
|
|
|
.log_data = params->base.log_data,
|
|
|
|
|
.debug_enabled = debug_enabled,
|
2024-05-10 13:44:44 -07:00
|
|
|
.archiver = params->base.archiver,
|
2025-08-27 13:34:40 -07:00
|
|
|
};
|
|
|
|
|
v[simd] = std::make_unique<brw_shader>(&shader_params);
|
2025-02-14 09:08:01 -08:00
|
|
|
brw_adjust_uniforms(*v[simd]);
|
2024-07-12 13:52:46 -07:00
|
|
|
|
2024-09-24 16:53:25 -07:00
|
|
|
const bool allow_spilling = simd == 0 ||
|
|
|
|
|
(!simd_state.compiled[simd - 1] && !brw_simd_should_compile(simd_state, simd - 1)) ||
|
|
|
|
|
nir->info.workgroup_size_variable;
|
2024-07-12 13:52:46 -07:00
|
|
|
|
2024-09-24 16:53:25 -07:00
|
|
|
if (devinfo->ver < 30 || nir->info.workgroup_size_variable) {
|
2025-02-14 09:08:01 -08:00
|
|
|
ASSERTED const int first = brw_simd_first_compiled(simd_state);
|
2024-09-24 16:53:25 -07:00
|
|
|
assert(allow_spilling == (first < 0 || nir->info.workgroup_size_variable));
|
|
|
|
|
}
|
2024-07-12 13:52:46 -07:00
|
|
|
|
2024-07-12 14:20:57 -07:00
|
|
|
if (run_cs(*v[simd], allow_spilling)) {
|
2024-07-12 13:52:46 -07:00
|
|
|
cs_fill_push_const_info(compiler->devinfo, prog_data);
|
|
|
|
|
|
|
|
|
|
brw_simd_mark_compiled(simd_state, simd, v[simd]->spilled_any_registers);
|
2024-09-24 16:53:25 -07:00
|
|
|
|
|
|
|
|
if (devinfo->ver >= 30 && !v[simd]->spilled_any_registers &&
|
|
|
|
|
!nir->info.workgroup_size_variable)
|
|
|
|
|
break;
|
2024-07-12 13:52:46 -07:00
|
|
|
} else {
|
|
|
|
|
simd_state.error[simd] = ralloc_strdup(params->base.mem_ctx, v[simd]->fail_msg);
|
|
|
|
|
if (simd > 0) {
|
|
|
|
|
brw_shader_perf_log(compiler, params->base.log_data,
|
|
|
|
|
"SIMD%u shader failed to compile: %s\n",
|
|
|
|
|
dispatch_width, v[simd]->fail_msg);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
const int selected_simd = brw_simd_select(simd_state);
|
|
|
|
|
if (selected_simd < 0) {
|
|
|
|
|
params->base.error_str =
|
|
|
|
|
ralloc_asprintf(params->base.mem_ctx,
|
|
|
|
|
"Can't compile shader: "
|
|
|
|
|
"SIMD8 '%s', SIMD16 '%s' and SIMD32 '%s'.\n",
|
|
|
|
|
simd_state.error[0], simd_state.error[1],
|
|
|
|
|
simd_state.error[2]);
|
|
|
|
|
return NULL;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
assert(selected_simd < 3);
|
|
|
|
|
|
|
|
|
|
if (!nir->info.workgroup_size_variable)
|
|
|
|
|
prog_data->prog_mask = 1 << selected_simd;
|
|
|
|
|
|
2024-12-06 16:33:35 -08:00
|
|
|
brw_generator g(compiler, ¶ms->base, &prog_data->base,
|
2024-07-12 13:52:46 -07:00
|
|
|
MESA_SHADER_COMPUTE);
|
|
|
|
|
if (unlikely(debug_enabled)) {
|
|
|
|
|
char *name = ralloc_asprintf(params->base.mem_ctx,
|
|
|
|
|
"%s compute shader %s",
|
|
|
|
|
nir->info.label ?
|
|
|
|
|
nir->info.label : "unnamed",
|
|
|
|
|
nir->info.name);
|
|
|
|
|
g.enable_debug(name);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
uint32_t max_dispatch_width = 8u << (util_last_bit(prog_data->prog_mask) - 1);
|
|
|
|
|
|
|
|
|
|
struct brw_compile_stats *stats = params->base.stats;
|
|
|
|
|
for (unsigned simd = 0; simd < 3; simd++) {
|
|
|
|
|
if (prog_data->prog_mask & (1u << simd)) {
|
|
|
|
|
assert(v[simd]);
|
2025-02-13 21:56:22 -08:00
|
|
|
prog_data->prog_offset[simd] = g.generate_code(*v[simd], stats);
|
2024-07-12 13:52:46 -07:00
|
|
|
if (stats)
|
|
|
|
|
stats->max_dispatch_width = max_dispatch_width;
|
|
|
|
|
stats = stats ? stats + 1 : NULL;
|
2024-09-18 14:32:58 -07:00
|
|
|
|
|
|
|
|
prog_data->base.grf_used = MAX2(prog_data->base.grf_used,
|
|
|
|
|
v[simd]->grf_used);
|
|
|
|
|
|
2024-07-12 13:52:46 -07:00
|
|
|
max_dispatch_width = 8u << simd;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
g.add_const_data(nir->constant_data, nir->constant_data_size);
|
|
|
|
|
|
|
|
|
|
return g.get_assembly();
|
|
|
|
|
}
|