mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-09 04:38:03 +02:00
intel/elk: Remove Gfx9+ from compile/run functions
Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27629>
This commit is contained in:
parent
cb2d96af6a
commit
7b651ac6c3
6 changed files with 25 additions and 207 deletions
|
|
@ -5935,35 +5935,6 @@ elk_fs_visitor::fixup_3src_null_dest()
|
|||
DEPENDENCY_VARIABLES);
|
||||
}
|
||||
|
||||
/* Wa_14015360517
|
||||
*
|
||||
* The first instruction of any kernel should have non-zero emask.
|
||||
* Make sure this happens by introducing a dummy mov instruction.
|
||||
*/
|
||||
void
|
||||
elk_fs_visitor::emit_dummy_mov_instruction()
|
||||
{
|
||||
if (!intel_needs_workaround(devinfo, 14015360517))
|
||||
return;
|
||||
|
||||
struct elk_backend_instruction *first_inst =
|
||||
cfg->first_block()->start();
|
||||
|
||||
/* We can skip the WA if first instruction is marked with
|
||||
* force_writemask_all or exec_size equals dispatch_width.
|
||||
*/
|
||||
if (first_inst->force_writemask_all ||
|
||||
first_inst->exec_size == dispatch_width)
|
||||
return;
|
||||
|
||||
/* Insert dummy mov as first instruction. */
|
||||
const fs_builder ubld =
|
||||
fs_builder(this, cfg->first_block(), (elk_fs_inst *)first_inst).exec_all().group(8, 0);
|
||||
ubld.MOV(ubld.null_reg_ud(), elk_imm_ud(0u));
|
||||
|
||||
invalidate_analysis(DEPENDENCY_INSTRUCTIONS | DEPENDENCY_VARIABLES);
|
||||
}
|
||||
|
||||
/**
|
||||
* Find the first instruction in the program that might start a region of
|
||||
* divergent control flow due to a HALT jump. There is no
|
||||
|
|
@ -6350,9 +6321,6 @@ elk_fs_visitor::run_vs()
|
|||
|
||||
fixup_3src_null_dest();
|
||||
|
||||
/* Wa_14015360517 */
|
||||
emit_dummy_mov_instruction();
|
||||
|
||||
allocate_registers(true /* allow_spilling */);
|
||||
|
||||
return !failed;
|
||||
|
|
@ -6479,9 +6447,6 @@ elk_fs_visitor::run_tcs()
|
|||
|
||||
fixup_3src_null_dest();
|
||||
|
||||
/* Wa_14015360517 */
|
||||
emit_dummy_mov_instruction();
|
||||
|
||||
allocate_registers(true /* allow_spilling */);
|
||||
|
||||
return !failed;
|
||||
|
|
@ -6510,9 +6475,6 @@ elk_fs_visitor::run_tes()
|
|||
|
||||
fixup_3src_null_dest();
|
||||
|
||||
/* Wa_14015360517 */
|
||||
emit_dummy_mov_instruction();
|
||||
|
||||
allocate_registers(true /* allow_spilling */);
|
||||
|
||||
return !failed;
|
||||
|
|
@ -6558,41 +6520,11 @@ elk_fs_visitor::run_gs()
|
|||
|
||||
fixup_3src_null_dest();
|
||||
|
||||
/* Wa_14015360517 */
|
||||
emit_dummy_mov_instruction();
|
||||
|
||||
allocate_registers(true /* allow_spilling */);
|
||||
|
||||
return !failed;
|
||||
}
|
||||
|
||||
/* From the SKL PRM, Volume 16, Workarounds:
|
||||
*
|
||||
* 0877 3D Pixel Shader Hang possible when pixel shader dispatched with
|
||||
* only header phases (R0-R2)
|
||||
*
|
||||
* WA: Enable a non-header phase (e.g. push constant) when dispatch would
|
||||
* have been header only.
|
||||
*
|
||||
* Instead of enabling push constants one can alternatively enable one of the
|
||||
* inputs. Here one simply chooses "layer" which shouldn't impose much
|
||||
* overhead.
|
||||
*/
|
||||
static void
|
||||
gfx9_ps_header_only_workaround(struct elk_wm_prog_data *wm_prog_data)
|
||||
{
|
||||
if (wm_prog_data->num_varying_inputs)
|
||||
return;
|
||||
|
||||
if (wm_prog_data->base.curb_read_length)
|
||||
return;
|
||||
|
||||
wm_prog_data->urb_setup[VARYING_SLOT_LAYER] = 0;
|
||||
wm_prog_data->num_varying_inputs = 1;
|
||||
|
||||
elk_compute_urb_setup_index(wm_prog_data);
|
||||
}
|
||||
|
||||
bool
|
||||
elk_fs_visitor::run_fs(bool allow_spilling, bool do_rep_send)
|
||||
{
|
||||
|
|
@ -6626,11 +6558,9 @@ elk_fs_visitor::run_fs(bool allow_spilling, bool do_rep_send)
|
|||
for (unsigned i = 0; i < dispatch_width / lower_width; i++) {
|
||||
/* According to the "PS Thread Payload for Normal
|
||||
* Dispatch" pages on the BSpec, the dispatch mask is
|
||||
* stored in R0.15/R1.15 on gfx20+ and in R1.7/R2.7 on
|
||||
* gfx6+.
|
||||
* stored in R1.7/R2.7 on gfx6+.
|
||||
*/
|
||||
const elk_fs_reg dispatch_mask =
|
||||
devinfo->ver >= 20 ? xe2_vec1_grf(i, 15) :
|
||||
devinfo->ver >= 6 ? elk_vec1_grf(i + 1, 7) :
|
||||
elk_vec1_grf(0, 0);
|
||||
bld.exec_all().group(1, 0)
|
||||
|
|
@ -6658,16 +6588,10 @@ elk_fs_visitor::run_fs(bool allow_spilling, bool do_rep_send)
|
|||
|
||||
assign_curb_setup();
|
||||
|
||||
if (devinfo->ver == 9)
|
||||
gfx9_ps_header_only_workaround(wm_prog_data);
|
||||
|
||||
assign_urb_setup();
|
||||
|
||||
fixup_3src_null_dest();
|
||||
|
||||
/* Wa_14015360517 */
|
||||
emit_dummy_mov_instruction();
|
||||
|
||||
allocate_registers(allow_spilling);
|
||||
}
|
||||
|
||||
|
|
@ -6705,9 +6629,6 @@ elk_fs_visitor::run_cs(bool allow_spilling)
|
|||
|
||||
fixup_3src_null_dest();
|
||||
|
||||
/* Wa_14015360517 */
|
||||
emit_dummy_mov_instruction();
|
||||
|
||||
allocate_registers(allow_spilling);
|
||||
|
||||
return !failed;
|
||||
|
|
@ -7120,32 +7041,29 @@ elk_compile_fs(const struct elk_compiler *compiler,
|
|||
elk_nir_populate_wm_prog_data(nir, compiler->devinfo, key, prog_data);
|
||||
|
||||
std::unique_ptr<elk_fs_visitor> v8, v16, v32, vmulti;
|
||||
elk_cfg_t *simd8_cfg = NULL, *simd16_cfg = NULL, *simd32_cfg = NULL,
|
||||
*multi_cfg = NULL;
|
||||
elk_cfg_t *simd8_cfg = NULL, *simd16_cfg = NULL, *simd32_cfg = NULL;
|
||||
float throughput = 0;
|
||||
bool has_spilled = false;
|
||||
|
||||
if (devinfo->ver < 20) {
|
||||
v8 = std::make_unique<elk_fs_visitor>(compiler, ¶ms->base, key,
|
||||
prog_data, nir, 8, 1,
|
||||
params->base.stats != NULL,
|
||||
debug_enabled);
|
||||
if (!v8->run_fs(allow_spilling, false /* do_rep_send */)) {
|
||||
params->base.error_str = ralloc_strdup(params->base.mem_ctx,
|
||||
v8->fail_msg);
|
||||
return NULL;
|
||||
} else if (INTEL_SIMD(FS, 8)) {
|
||||
simd8_cfg = v8->cfg;
|
||||
v8 = std::make_unique<elk_fs_visitor>(compiler, ¶ms->base, key,
|
||||
prog_data, nir, 8, 1,
|
||||
params->base.stats != NULL,
|
||||
debug_enabled);
|
||||
if (!v8->run_fs(allow_spilling, false /* do_rep_send */)) {
|
||||
params->base.error_str = ralloc_strdup(params->base.mem_ctx,
|
||||
v8->fail_msg);
|
||||
return NULL;
|
||||
} else if (INTEL_SIMD(FS, 8)) {
|
||||
simd8_cfg = v8->cfg;
|
||||
|
||||
assert(v8->payload().num_regs % reg_unit(devinfo) == 0);
|
||||
prog_data->base.dispatch_grf_start_reg = v8->payload().num_regs / reg_unit(devinfo);
|
||||
assert(v8->payload().num_regs % reg_unit(devinfo) == 0);
|
||||
prog_data->base.dispatch_grf_start_reg = v8->payload().num_regs / reg_unit(devinfo);
|
||||
|
||||
prog_data->reg_blocks_8 = elk_register_blocks(v8->grf_used);
|
||||
const performance &perf = v8->performance_analysis.require();
|
||||
throughput = MAX2(throughput, perf.throughput);
|
||||
has_spilled = v8->spilled_any_registers;
|
||||
allow_spilling = false;
|
||||
}
|
||||
prog_data->reg_blocks_8 = elk_register_blocks(v8->grf_used);
|
||||
const performance &perf = v8->performance_analysis.require();
|
||||
throughput = MAX2(throughput, perf.throughput);
|
||||
has_spilled = v8->spilled_any_registers;
|
||||
allow_spilling = false;
|
||||
}
|
||||
|
||||
/* Limit dispatch width to simd8 with dual source blending on gfx8.
|
||||
|
|
@ -7158,18 +7076,6 @@ elk_compile_fs(const struct elk_compiler *compiler,
|
|||
"using SIMD8 when dual src blending.\n");
|
||||
}
|
||||
|
||||
if (key->coarse_pixel && devinfo->ver < 20) {
|
||||
if (prog_data->dual_src_blend) {
|
||||
v8->limit_dispatch_width(8, "SIMD16 coarse pixel shading cannot"
|
||||
" use SIMD8 messages.\n");
|
||||
}
|
||||
v8->limit_dispatch_width(16, "SIMD32 not supported with coarse"
|
||||
" pixel shading.\n");
|
||||
}
|
||||
|
||||
if (nir->info.ray_queries > 0 && v8)
|
||||
v8->limit_dispatch_width(16, "SIMD32 with ray queries.\n");
|
||||
|
||||
if (!has_spilled &&
|
||||
(!v8 || v8->max_dispatch_width >= 16) &&
|
||||
(INTEL_SIMD(FS, 16) || params->use_rep_send)) {
|
||||
|
|
@ -7238,78 +7144,6 @@ elk_compile_fs(const struct elk_compiler *compiler,
|
|||
}
|
||||
}
|
||||
|
||||
if (devinfo->ver >= 12 && !has_spilled &&
|
||||
params->max_polygons >= 2 && !key->coarse_pixel) {
|
||||
elk_fs_visitor *vbase = v8 ? v8.get() : v16 ? v16.get() : v32.get();
|
||||
assert(vbase);
|
||||
|
||||
if (devinfo->ver >= 20 &&
|
||||
params->max_polygons >= 4 &&
|
||||
vbase->max_dispatch_width >= 32 &&
|
||||
4 * prog_data->num_varying_inputs <= MAX_VARYING &&
|
||||
INTEL_SIMD(FS, 4X8)) {
|
||||
/* Try a quad-SIMD8 compile */
|
||||
vmulti = std::make_unique<elk_fs_visitor>(compiler, ¶ms->base, key,
|
||||
prog_data, nir, 32, 4,
|
||||
params->base.stats != NULL,
|
||||
debug_enabled);
|
||||
vmulti->import_uniforms(vbase);
|
||||
if (!vmulti->run_fs(false, params->use_rep_send)) {
|
||||
elk_shader_perf_log(compiler, params->base.log_data,
|
||||
"Quad-SIMD8 shader failed to compile: %s\n",
|
||||
vmulti->fail_msg);
|
||||
} else {
|
||||
multi_cfg = vmulti->cfg;
|
||||
assert(!vmulti->spilled_any_registers);
|
||||
}
|
||||
}
|
||||
|
||||
if (!multi_cfg && devinfo->ver >= 20 &&
|
||||
vbase->max_dispatch_width >= 32 &&
|
||||
2 * prog_data->num_varying_inputs <= MAX_VARYING &&
|
||||
INTEL_SIMD(FS, 2X16)) {
|
||||
/* Try a dual-SIMD16 compile */
|
||||
vmulti = std::make_unique<elk_fs_visitor>(compiler, ¶ms->base, key,
|
||||
prog_data, nir, 32, 2,
|
||||
params->base.stats != NULL,
|
||||
debug_enabled);
|
||||
vmulti->import_uniforms(vbase);
|
||||
if (!vmulti->run_fs(false, params->use_rep_send)) {
|
||||
elk_shader_perf_log(compiler, params->base.log_data,
|
||||
"Dual-SIMD16 shader failed to compile: %s\n",
|
||||
vmulti->fail_msg);
|
||||
} else {
|
||||
multi_cfg = vmulti->cfg;
|
||||
assert(!vmulti->spilled_any_registers);
|
||||
}
|
||||
}
|
||||
|
||||
if (!multi_cfg && vbase->max_dispatch_width >= 16 &&
|
||||
2 * prog_data->num_varying_inputs <= MAX_VARYING &&
|
||||
INTEL_SIMD(FS, 2X8)) {
|
||||
/* Try a dual-SIMD8 compile */
|
||||
vmulti = std::make_unique<elk_fs_visitor>(compiler, ¶ms->base, key,
|
||||
prog_data, nir, 16, 2,
|
||||
params->base.stats != NULL,
|
||||
debug_enabled);
|
||||
vmulti->import_uniforms(vbase);
|
||||
if (!vmulti->run_fs(allow_spilling, params->use_rep_send)) {
|
||||
elk_shader_perf_log(compiler, params->base.log_data,
|
||||
"Dual-SIMD8 shader failed to compile: %s\n",
|
||||
vmulti->fail_msg);
|
||||
} else {
|
||||
multi_cfg = vmulti->cfg;
|
||||
}
|
||||
}
|
||||
|
||||
if (multi_cfg) {
|
||||
assert(vmulti->payload().num_regs % reg_unit(devinfo) == 0);
|
||||
prog_data->base.dispatch_grf_start_reg = vmulti->payload().num_regs / reg_unit(devinfo);
|
||||
|
||||
prog_data->reg_blocks_8 = elk_register_blocks(vmulti->grf_used);
|
||||
}
|
||||
}
|
||||
|
||||
/* When the caller requests a repclear shader, they want SIMD16-only */
|
||||
if (params->use_rep_send)
|
||||
simd8_cfg = NULL;
|
||||
|
|
@ -7358,16 +7192,7 @@ elk_compile_fs(const struct elk_compiler *compiler,
|
|||
struct elk_compile_stats *stats = params->base.stats;
|
||||
uint32_t max_dispatch_width = 0;
|
||||
|
||||
if (multi_cfg) {
|
||||
prog_data->dispatch_multi = vmulti->dispatch_width;
|
||||
prog_data->max_polygons = vmulti->max_polygons;
|
||||
g.generate_code(multi_cfg, vmulti->dispatch_width, vmulti->shader_stats,
|
||||
vmulti->performance_analysis.require(),
|
||||
stats, vmulti->max_polygons);
|
||||
stats = stats ? stats + 1 : NULL;
|
||||
max_dispatch_width = vmulti->dispatch_width;
|
||||
|
||||
} else if (simd8_cfg) {
|
||||
if (simd8_cfg) {
|
||||
prog_data->dispatch_8 = true;
|
||||
g.generate_code(simd8_cfg, 8, v8->shader_stats,
|
||||
v8->performance_analysis.require(), stats, 1);
|
||||
|
|
|
|||
|
|
@ -213,7 +213,6 @@ public:
|
|||
void allocate_registers(bool allow_spilling);
|
||||
uint32_t compute_max_register_pressure();
|
||||
void fixup_3src_null_dest();
|
||||
void emit_dummy_mov_instruction();
|
||||
bool fixup_nomask_control_flow();
|
||||
void assign_curb_setup();
|
||||
void assign_urb_setup();
|
||||
|
|
|
|||
|
|
@ -1343,7 +1343,7 @@ elk_compile_tes(const struct elk_compiler *compiler,
|
|||
}
|
||||
|
||||
if (is_scalar) {
|
||||
const unsigned dispatch_width = devinfo->ver >= 20 ? 16 : 8;
|
||||
const unsigned dispatch_width = 8;
|
||||
elk_fs_visitor v(compiler, ¶ms->base, &key->base,
|
||||
&prog_data->base.base, nir, dispatch_width,
|
||||
params->base.stats != NULL, debug_enabled);
|
||||
|
|
|
|||
|
|
@ -102,8 +102,7 @@ elk_simd_should_compile(elk_simd_selection_state &state, unsigned simd)
|
|||
|
||||
unsigned max_threads = state.devinfo->max_cs_workgroup_threads;
|
||||
|
||||
const unsigned min_simd = state.devinfo->ver >= 20 ? 1 : 0;
|
||||
if (simd > min_simd && state.compiled[simd - 1] &&
|
||||
if (simd > 0 && state.compiled[simd - 1] &&
|
||||
workgroup_size <= (width / 2)) {
|
||||
state.error[simd] = "Workgroup size already fits in smaller SIMD";
|
||||
return false;
|
||||
|
|
@ -119,7 +118,7 @@ elk_simd_should_compile(elk_simd_selection_state &state, unsigned simd)
|
|||
*
|
||||
* TODO: Use performance_analysis and drop this rule.
|
||||
*/
|
||||
if (width == 32 && state.devinfo->ver < 20) {
|
||||
if (width == 32) {
|
||||
if (!INTEL_DEBUG(DEBUG_DO32) && (state.compiled[0] || state.compiled[1])) {
|
||||
state.error[simd] = "SIMD32 not required (use INTEL_DEBUG=do32 to force)";
|
||||
return false;
|
||||
|
|
@ -127,11 +126,6 @@ elk_simd_should_compile(elk_simd_selection_state &state, unsigned simd)
|
|||
}
|
||||
}
|
||||
|
||||
if (width == 8 && state.devinfo->ver >= 20) {
|
||||
state.error[simd] = "SIMD8 not supported on Xe2+";
|
||||
return false;
|
||||
}
|
||||
|
||||
uint64_t start;
|
||||
switch (prog_data->stage) {
|
||||
case MESA_SHADER_COMPUTE:
|
||||
|
|
|
|||
|
|
@ -2648,7 +2648,7 @@ elk_compile_vs(const struct elk_compiler *compiler,
|
|||
}
|
||||
|
||||
if (is_scalar) {
|
||||
const unsigned dispatch_width = compiler->devinfo->ver >= 20 ? 16 : 8;
|
||||
const unsigned dispatch_width = 8;
|
||||
prog_data->base.dispatch_mode = INTEL_DISPATCH_MODE_SIMD8;
|
||||
|
||||
elk_fs_visitor v(compiler, ¶ms->base, &key->base,
|
||||
|
|
|
|||
|
|
@ -447,7 +447,7 @@ elk_compile_tcs(const struct elk_compiler *compiler,
|
|||
}
|
||||
|
||||
if (is_scalar) {
|
||||
const unsigned dispatch_width = devinfo->ver >= 20 ? 16 : 8;
|
||||
const unsigned dispatch_width = 8;
|
||||
elk_fs_visitor v(compiler, ¶ms->base, &key->base,
|
||||
&prog_data->base.base, nir, dispatch_width,
|
||||
params->base.stats != NULL, debug_enabled);
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue