anv/xe3+: Set RegistersPerThread during shader state setup based on prog_data.

Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/32664>
This commit is contained in:
Francisco Jerez 2024-09-18 14:40:01 -07:00 committed by Marge Bot
parent 7537f8edee
commit b25d0f899b
3 changed files with 44 additions and 0 deletions

View file

@ -367,6 +367,9 @@ get_interface_descriptor_data(struct anv_cmd_buffer *cmd_buffer,
dispatch->group_size,
dispatch->simd_size),
.NumberOfBarriers = prog_data->uses_barrier,
#if GFX_VER >= 30
.RegistersPerThread = ptl_register_blocks(prog_data->base.grf_used),
#endif
};
}
@ -1369,6 +1372,9 @@ cmd_buffer_trace_rays(struct anv_cmd_buffer *cmd_buffer,
.BTDMode = true,
#if INTEL_NEEDS_WA_14017794102 || INTEL_NEEDS_WA_14023061436
.ThreadPreemption = false,
#endif
#if GFX_VER >= 30
.RegistersPerThread = ptl_register_blocks(cs_prog_data->base.grf_used),
#endif
},
};

View file

@ -1183,6 +1183,10 @@ emit_3dstate_vs(struct anv_graphics_pipeline *pipeline)
vs.ScratchSpaceBasePointer =
get_scratch_address(&pipeline->base.base, MESA_SHADER_VERTEX, vs_bin);
#endif
#if GFX_VER >= 30
vs.RegistersPerThread = ptl_register_blocks(vs_prog_data->base.base.grf_used);
#endif
}
anv_pipeline_emit_merge(pipeline, final.vs, vs_dwords, GENX(3DSTATE_VS), vs) {
@ -1274,6 +1278,10 @@ emit_3dstate_hs_ds(struct anv_graphics_pipeline *pipeline,
hs.DispatchMode = tcs_prog_data->base.dispatch_mode;
#endif
hs.IncludePrimitiveID = tcs_prog_data->include_primitive_id;
#if GFX_VER >= 30
hs.RegistersPerThread = ptl_register_blocks(tcs_prog_data->base.base.grf_used);
#endif
};
uint32_t ds_dwords[GENX(3DSTATE_DS_length)];
@ -1317,6 +1325,10 @@ emit_3dstate_hs_ds(struct anv_graphics_pipeline *pipeline,
ds.ScratchSpaceBasePointer =
get_scratch_address(&pipeline->base.base, MESA_SHADER_TESS_EVAL, tes_bin);
#endif
#if GFX_VER >= 30
ds.RegistersPerThread = ptl_register_blocks(tes_prog_data->base.base.grf_used);
#endif
}
anv_pipeline_emit_merge(pipeline, final.hs, hs_dwords, GENX(3DSTATE_HS), hs) {
@ -1480,6 +1492,10 @@ emit_3dstate_gs(struct anv_graphics_pipeline *pipeline)
gs.ScratchSpaceBasePointer =
get_scratch_address(&pipeline->base.base, MESA_SHADER_GEOMETRY, gs_bin);
#endif
#if GFX_VER >= 30
gs.RegistersPerThread = ptl_register_blocks(gs_prog_data->base.base.grf_used);
#endif
}
anv_pipeline_emit_merge(pipeline, partial.gs, gs_dwords, GENX(3DSTATE_GS), gs) {
@ -1579,6 +1595,10 @@ emit_3dstate_ps(struct anv_graphics_pipeline *pipeline,
ps.ScratchSpaceBasePointer =
get_scratch_address(&pipeline->base.base, MESA_SHADER_FRAGMENT, fs_bin);
#endif
#if GFX_VER >= 30
ps.RegistersPerThread = ptl_register_blocks(wm_prog_data->base.grf_used);
#endif
}
anv_pipeline_emit_merge(pipeline, partial.ps, ps_dwords, GENX(3DSTATE_PS), ps) {
#if GFX_VERx10 >= 125
@ -1797,6 +1817,10 @@ emit_task_state(struct anv_graphics_pipeline *pipeline)
task.EmitInlineParameter = true;
task.XP0Required = task_prog_data->uses_drawid;
#if GFX_VER >= 30
task.RegistersPerThread = ptl_register_blocks(task_prog_data->base.base.grf_used);
#endif
}
/* Recommended values from "Task and Mesh Distribution Programming". */
@ -1896,6 +1920,10 @@ emit_mesh_state(struct anv_graphics_pipeline *pipeline)
mesh.EmitInlineParameter = true;
mesh.XP0Required = mesh_prog_data->uses_drawid;
#if GFX_VER >= 30
mesh.RegistersPerThread = ptl_register_blocks(mesh_prog_data->base.base.grf_used);
#endif
}
/* Recommended values from "Task and Mesh Distribution Programming". */

View file

@ -208,6 +208,10 @@ genX(emit_simpler_shader_init_fragment)(struct anv_simple_shader *state)
brw_wm_prog_data_prog_offset(prog_data, ps, 2);
#endif
#if GFX_VER >= 30
ps.RegistersPerThread = ptl_register_blocks(prog_data->base.grf_used);
#endif
ps.MaximumNumberofThreadsPerPSD = device->info->max_threads_per_psd - 1;
}
@ -603,6 +607,9 @@ genX(emit_simple_shader_dispatch)(struct anv_simple_shader *state,
.SharedLocalMemorySize = intel_compute_slm_encode_size(GFX_VER,
prog_data->base.total_shared),
.NumberOfBarriers = prog_data->uses_barrier,
#if GFX_VER >= 30
.RegistersPerThread = ptl_register_blocks(prog_data->base.grf_used),
#endif
},
};
@ -692,6 +699,9 @@ genX(emit_simple_shader_dispatch)(struct anv_simple_shader *state,
.ThreadPreemptionDisable = true,
#endif
.NumberofThreadsinGPGPUThreadGroup = dispatch.threads,
#if GFX_VER >= 30
.RegistersPerThread = ptl_register_blocks(prog_data->base.grf_used),
#endif
};
GENX(INTERFACE_DESCRIPTOR_DATA_pack)(batch, iface_desc_state.map, &iface_desc);
anv_batch_emit(batch, GENX(MEDIA_INTERFACE_DESCRIPTOR_LOAD), mid) {