mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-09 02:28:10 +02:00
radv: Use I/O lowering for task and mesh shaders.
We set the number of task shader ring entries in radv_device based on the generous assumption that each CU can run task/mesh shaders with maximum occupancy. Signed-off-by: Timur Kristóf <timur.kristof@gmail.com> Reviewed-by: Rhys Perry <pendingchaos02@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/14929>
This commit is contained in:
parent
6e8f3677c7
commit
c17c523ec0
4 changed files with 35 additions and 0 deletions
|
|
@ -91,6 +91,11 @@
|
||||||
*/
|
*/
|
||||||
#define RADV_MAX_MEMORY_ALLOCATION_SIZE 0xFFFFFFFCull
|
#define RADV_MAX_MEMORY_ALLOCATION_SIZE 0xFFFFFFFCull
|
||||||
|
|
||||||
|
/* Size of each payload entry in the task payload ring.
|
||||||
|
* Spec requires minimum 16K bytes.
|
||||||
|
*/
|
||||||
|
#define RADV_TASK_PAYLOAD_ENTRY_BYTES 16384
|
||||||
|
|
||||||
/* Number of invocations in each subgroup. */
|
/* Number of invocations in each subgroup. */
|
||||||
#define RADV_SUBGROUP_SIZE 64
|
#define RADV_SUBGROUP_SIZE 64
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -3331,6 +3331,24 @@ radv_CreateDevice(VkPhysicalDevice physicalDevice, const VkDeviceCreateInfo *pCr
|
||||||
ac_get_hs_info(&device->physical_device->rad_info,
|
ac_get_hs_info(&device->physical_device->rad_info,
|
||||||
&device->hs);
|
&device->hs);
|
||||||
|
|
||||||
|
/* Number of task shader ring entries. Needs to be a power of two.
|
||||||
|
* Use a low number on smaller chips so we don't waste space,
|
||||||
|
* but keep it high on bigger chips so it doesn't inhibit parallelism.
|
||||||
|
*/
|
||||||
|
switch (device->physical_device->rad_info.family) {
|
||||||
|
case CHIP_VANGOGH:
|
||||||
|
case CHIP_BEIGE_GOBY:
|
||||||
|
case CHIP_YELLOW_CARP:
|
||||||
|
device->task_num_entries = 256;
|
||||||
|
break;
|
||||||
|
case CHIP_SIENNA_CICHLID:
|
||||||
|
case CHIP_NAVY_FLOUNDER:
|
||||||
|
case CHIP_DIMGREY_CAVEFISH:
|
||||||
|
default:
|
||||||
|
device->task_num_entries = 1024;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
if (device->instance->debug_flags & RADV_DEBUG_HANG) {
|
if (device->instance->debug_flags & RADV_DEBUG_HANG) {
|
||||||
/* Enable GPU hangs detection and dump logs if a GPU hang is
|
/* Enable GPU hangs detection and dump logs if a GPU hang is
|
||||||
* detected.
|
* detected.
|
||||||
|
|
|
||||||
|
|
@ -777,6 +777,9 @@ struct radv_device {
|
||||||
uint32_t scratch_waves;
|
uint32_t scratch_waves;
|
||||||
uint32_t dispatch_initiator;
|
uint32_t dispatch_initiator;
|
||||||
|
|
||||||
|
/* Number of entries in the task shader ring buffers. */
|
||||||
|
uint32_t task_num_entries;
|
||||||
|
|
||||||
uint32_t gs_table_depth;
|
uint32_t gs_table_depth;
|
||||||
struct ac_hs_info hs;
|
struct ac_hs_info hs;
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1042,6 +1042,15 @@ radv_lower_io_to_mem(struct radv_device *device, struct radv_pipeline_stage *sta
|
||||||
ac_nir_lower_gs_inputs_to_mem(nir, device->physical_device->rad_info.chip_class,
|
ac_nir_lower_gs_inputs_to_mem(nir, device->physical_device->rad_info.chip_class,
|
||||||
info->gs.num_linked_inputs);
|
info->gs.num_linked_inputs);
|
||||||
return true;
|
return true;
|
||||||
|
} else if (nir->info.stage == MESA_SHADER_TASK) {
|
||||||
|
ac_nir_apply_first_task_to_task_shader(nir);
|
||||||
|
ac_nir_lower_task_outputs_to_mem(nir, RADV_TASK_PAYLOAD_ENTRY_BYTES,
|
||||||
|
device->task_num_entries);
|
||||||
|
return true;
|
||||||
|
} else if (nir->info.stage == MESA_SHADER_MESH) {
|
||||||
|
ac_nir_lower_mesh_inputs_to_mem(nir, RADV_TASK_PAYLOAD_ENTRY_BYTES,
|
||||||
|
device->task_num_entries);
|
||||||
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
return false;
|
return false;
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue