mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-04 20:38:06 +02:00
intel: Use Morton compute walk order
According to HSD 14016252163 if compute shader uses the sample operation, morton walk order and set the thread group batch size to 4 is expected to increase sampler cache hit rates by increasing sample address locality within a subslice. Rework: * Caio: "||" => "&&" for type checking in instr_uses_sampler() * Jordan: Use nir's foreach macros rather than nir_shader_lower_instructions() Signed-off-by: Sagar Ghuge <sagar.ghuge@intel.com> Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Reviewed-by: Rohan Garg <rohan.garg@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/32430>
This commit is contained in:
parent
4bd958243d
commit
d3f9139e49
6 changed files with 60 additions and 0 deletions
|
|
@ -379,6 +379,7 @@ struct iris_cs_data {
|
|||
enum intel_compute_walk_order walk_order;
|
||||
|
||||
bool uses_barrier;
|
||||
bool uses_sampler;
|
||||
bool first_param_is_builtin_subgroup_id;
|
||||
};
|
||||
|
||||
|
|
|
|||
|
|
@ -150,6 +150,7 @@ iris_apply_brw_cs_prog_data(struct iris_compiled_shader *shader,
|
|||
iris->generate_local_id = brw->generate_local_id;
|
||||
iris->walk_order = brw->walk_order;
|
||||
iris->uses_barrier = brw->uses_barrier;
|
||||
iris->uses_sampler = brw->uses_sampler;
|
||||
iris->prog_mask = brw->prog_mask;
|
||||
|
||||
iris->first_param_is_builtin_subgroup_id =
|
||||
|
|
|
|||
|
|
@ -8941,6 +8941,16 @@ static void iris_emit_execute_indirect_dispatch(struct iris_context *ice,
|
|||
body.ExecutionMask = dispatch.right_mask;
|
||||
body.PostSync.MOCS = iris_mocs(NULL, &screen->isl_dev, 0);
|
||||
body.InterfaceDescriptor = idd;
|
||||
/* HSD 14016252163: Use of Morton walk order (and batching using a batch
|
||||
* size of 4) is expected to increase sampler cache hit rates by
|
||||
* increasing sample address locality within a subslice.
|
||||
*/
|
||||
#if GFX_VER >= 30
|
||||
body.DispatchWalkOrder =
|
||||
cs_data->uses_sampler ? MortonWalk : LinearWalk;
|
||||
body.ThreadGroupBatchSize =
|
||||
cs_data->uses_sampler ? TG_BATCH_4 : TG_BATCH_1;
|
||||
#endif
|
||||
|
||||
struct iris_address indirect_bo = ro_bo(indirect, grid->indirect_offset);
|
||||
iris_emit_cmd(batch, GENX(EXECUTE_INDIRECT_DISPATCH), ind) {
|
||||
|
|
|
|||
|
|
@ -98,6 +98,38 @@ run_cs(fs_visitor &s, bool allow_spilling)
|
|||
return !s.failed;
|
||||
}
|
||||
|
||||
static bool
|
||||
instr_uses_sampler(nir_builder *b, nir_instr *instr, void *cb_data)
|
||||
{
|
||||
if (instr->type != nir_instr_type_tex)
|
||||
return false;
|
||||
|
||||
switch (nir_instr_as_tex(instr)->op) {
|
||||
case nir_texop_tex:
|
||||
case nir_texop_txd:
|
||||
case nir_texop_txf:
|
||||
case nir_texop_txl:
|
||||
case nir_texop_txb:
|
||||
case nir_texop_txf_ms:
|
||||
case nir_texop_txf_ms_mcs_intel:
|
||||
case nir_texop_lod:
|
||||
case nir_texop_tg4:
|
||||
case nir_texop_texture_samples:
|
||||
return true;
|
||||
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
static bool
|
||||
brw_nir_uses_sampler(nir_shader *shader)
|
||||
{
|
||||
return nir_shader_instructions_pass(shader, instr_uses_sampler,
|
||||
nir_metadata_all,
|
||||
NULL);
|
||||
}
|
||||
|
||||
const unsigned *
|
||||
brw_compile_cs(const struct brw_compiler *compiler,
|
||||
struct brw_compile_cs_params *params)
|
||||
|
|
@ -129,6 +161,8 @@ brw_compile_cs(const struct brw_compiler *compiler,
|
|||
.required_width = brw_required_dispatch_width(&nir->info),
|
||||
};
|
||||
|
||||
prog_data->uses_sampler = brw_nir_uses_sampler(params->base.nir);
|
||||
|
||||
std::unique_ptr<fs_visitor> v[3];
|
||||
|
||||
for (unsigned simd = 0; simd < 3; simd++) {
|
||||
|
|
|
|||
|
|
@ -881,6 +881,9 @@ struct brw_cs_prog_data {
|
|||
uint8_t generate_local_id;
|
||||
enum intel_compute_walk_order walk_order;
|
||||
|
||||
/* True if shader has any sample operation */
|
||||
bool uses_sampler;
|
||||
|
||||
struct {
|
||||
struct brw_push_const_block cross_thread;
|
||||
struct brw_push_const_block per_thread;
|
||||
|
|
|
|||
|
|
@ -390,6 +390,17 @@ emit_indirect_compute_walker(struct anv_cmd_buffer *cmd_buffer,
|
|||
|
||||
struct GENX(COMPUTE_WALKER_BODY) body = {
|
||||
.SIMDSize = dispatch_size,
|
||||
/* HSD 14016252163: Use of Morton walk order (and batching using a batch
|
||||
* size of 4) is expected to increase sampler cache hit rates by
|
||||
* increasing sample address locality within a subslice.
|
||||
*/
|
||||
#if GFX_VER >= 30
|
||||
.DispatchWalkOrder = prog_data->uses_sampler ?
|
||||
MortonWalk :
|
||||
LinearWalk,
|
||||
.ThreadGroupBatchSize = prog_data->uses_sampler ? TG_BATCH_4 :
|
||||
TG_BATCH_1,
|
||||
#endif
|
||||
.MessageSIMD = dispatch_size,
|
||||
.IndirectDataStartAddress = comp_state->base.push_constants_state.offset,
|
||||
.IndirectDataLength = comp_state->base.push_constants_state.alloc_size,
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue