mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-25 04:20:08 +01:00
ac: Add task shader ring information.
Similarly to tessellation rings information, move the task rings info to ac_gpu_info. Signed-off-by: Timur Kristóf <timur.kristof@gmail.com> Reviewed-by: Samuel Pitoiset <samuel.pitoiset@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/16737>
This commit is contained in:
parent
086e499b47
commit
ac5ab8d227
6 changed files with 89 additions and 30 deletions
|
|
@ -1858,3 +1858,42 @@ void ac_get_hs_info(struct radeon_info *info,
|
|||
hs->tess_offchip_ring_offset = align(hs->tess_factor_ring_size, 64 * 1024);
|
||||
hs->tess_offchip_ring_size = hs->max_offchip_buffers * hs->tess_offchip_block_dw_size * 4;
|
||||
}
|
||||
|
||||
static uint16_t get_task_num_entries(enum radeon_family fam)
|
||||
{
|
||||
/* Number of task shader ring entries. Needs to be a power of two.
|
||||
* Use a low number on smaller chips so we don't waste space,
|
||||
* but keep it high on bigger chips so it doesn't inhibit parallelism.
|
||||
*
|
||||
* This number is compiled into task/mesh shaders as a constant.
|
||||
* In order to ensure this works fine with the shader cache, we must
|
||||
* base this decision on the chip family, not the number of CUs in
|
||||
* the current GPU. (So, the cache remains consistent for all
|
||||
* chips in the same family.)
|
||||
*/
|
||||
switch (fam) {
|
||||
case CHIP_VANGOGH:
|
||||
case CHIP_NAVI24:
|
||||
case CHIP_REMBRANDT:
|
||||
return 256;
|
||||
case CHIP_NAVI21:
|
||||
case CHIP_NAVI22:
|
||||
case CHIP_NAVI23:
|
||||
default:
|
||||
return 1024;
|
||||
}
|
||||
}
|
||||
|
||||
void ac_get_task_info(struct radeon_info *info,
|
||||
struct ac_task_info *task_info)
|
||||
{
|
||||
const uint16_t num_entries = get_task_num_entries(info->family);
|
||||
const uint32_t draw_ring_bytes = num_entries * AC_TASK_DRAW_ENTRY_BYTES;
|
||||
const uint32_t payload_ring_bytes = num_entries * AC_TASK_PAYLOAD_ENTRY_BYTES;
|
||||
|
||||
/* Ensure that the addresses of each ring are 256 byte aligned. */
|
||||
task_info->num_entries = num_entries;
|
||||
task_info->draw_ring_offset = ALIGN(AC_TASK_CTRLBUF_BYTES, 256);
|
||||
task_info->payload_ring_offset = ALIGN(task_info->draw_ring_offset + draw_ring_bytes, 256);
|
||||
task_info->bo_size_bytes = task_info->payload_ring_offset + payload_ring_bytes;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -274,6 +274,50 @@ struct ac_hs_info {
|
|||
void ac_get_hs_info(struct radeon_info *info,
|
||||
struct ac_hs_info *hs);
|
||||
|
||||
/* Task rings BO layout information.
|
||||
* This BO is shared between GFX and ACE queues so that the ACE and GFX
|
||||
* firmware can cooperate on task->mesh dispatches and is also used to
|
||||
* store the task payload which is passed to mesh shaders.
|
||||
*
|
||||
* The driver only needs to create this BO once,
|
||||
* and it will always be able to accomodate the maximum needed
|
||||
* task payload size.
|
||||
*
|
||||
* The following memory layout is used:
|
||||
* 1. Control buffer: 9 DWORDs, 256 byte aligned
|
||||
* Used by the firmware to maintain the current state.
|
||||
* (padding)
|
||||
* 2. Draw ring: 4 DWORDs per entry, 256 byte aligned
|
||||
* Task shaders store the mesh dispatch size here.
|
||||
* (padding)
|
||||
* 3. Payload ring: 16K bytes per entry, 256 byte aligned.
|
||||
* This is where task payload is stored by task shaders and
|
||||
* read by mesh shaders.
|
||||
*
|
||||
*/
|
||||
struct ac_task_info {
|
||||
uint32_t draw_ring_offset;
|
||||
uint32_t payload_ring_offset;
|
||||
uint32_t bo_size_bytes;
|
||||
uint16_t num_entries;
|
||||
};
|
||||
|
||||
/* Size of each payload entry in the task payload ring.
|
||||
* Spec requires minimum 16K bytes.
|
||||
*/
|
||||
#define AC_TASK_PAYLOAD_ENTRY_BYTES 16384
|
||||
|
||||
/* Size of each draw entry in the task draw ring.
|
||||
* 4 DWORDs per entry.
|
||||
*/
|
||||
#define AC_TASK_DRAW_ENTRY_BYTES 16
|
||||
|
||||
/* Size of the task control buffer. 9 DWORDs. */
|
||||
#define AC_TASK_CTRLBUF_BYTES 36
|
||||
|
||||
void ac_get_task_info(struct radeon_info *info,
|
||||
struct ac_task_info *task_info);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
|
|
|||
|
|
@ -91,11 +91,6 @@
|
|||
*/
|
||||
#define RADV_MAX_MEMORY_ALLOCATION_SIZE 0xFFFFFFFCull
|
||||
|
||||
/* Size of each payload entry in the task payload ring.
|
||||
* Spec requires minimum 16K bytes.
|
||||
*/
|
||||
#define RADV_TASK_PAYLOAD_ENTRY_BYTES 16384
|
||||
|
||||
/* Number of invocations in each subgroup. */
|
||||
#define RADV_SUBGROUP_SIZE 64
|
||||
|
||||
|
|
|
|||
|
|
@ -834,24 +834,7 @@ radv_physical_device_try_create(struct radv_instance *instance, drmDevicePtr drm
|
|||
ac_get_gs_table_depth(device->rad_info.gfx_level, device->rad_info.family);
|
||||
|
||||
ac_get_hs_info(&device->rad_info, &device->hs);
|
||||
|
||||
/* Number of task shader ring entries. Needs to be a power of two.
|
||||
* Use a low number on smaller chips so we don't waste space,
|
||||
* but keep it high on bigger chips so it doesn't inhibit parallelism.
|
||||
*/
|
||||
switch (device->rad_info.family) {
|
||||
case CHIP_VANGOGH:
|
||||
case CHIP_NAVI24:
|
||||
case CHIP_REMBRANDT:
|
||||
device->task_num_entries = 256;
|
||||
break;
|
||||
case CHIP_NAVI21:
|
||||
case CHIP_NAVI22:
|
||||
case CHIP_NAVI23:
|
||||
default:
|
||||
device->task_num_entries = 1024;
|
||||
break;
|
||||
}
|
||||
ac_get_task_info(&device->rad_info, &device->task_info);
|
||||
|
||||
*device_out = device;
|
||||
|
||||
|
|
|
|||
|
|
@ -332,9 +332,7 @@ struct radv_physical_device {
|
|||
uint32_t gs_table_depth;
|
||||
|
||||
struct ac_hs_info hs;
|
||||
|
||||
/* Number of entries in the task shader ring buffers. */
|
||||
uint32_t task_num_entries;
|
||||
struct ac_task_info task_info;
|
||||
};
|
||||
|
||||
struct radv_instance {
|
||||
|
|
|
|||
|
|
@ -1094,12 +1094,12 @@ radv_lower_io_to_mem(struct radv_device *device, struct radv_pipeline_stage *sta
|
|||
return true;
|
||||
} else if (nir->info.stage == MESA_SHADER_TASK) {
|
||||
ac_nir_apply_first_task_to_task_shader(nir);
|
||||
ac_nir_lower_task_outputs_to_mem(nir, RADV_TASK_PAYLOAD_ENTRY_BYTES,
|
||||
device->physical_device->task_num_entries);
|
||||
ac_nir_lower_task_outputs_to_mem(nir, AC_TASK_PAYLOAD_ENTRY_BYTES,
|
||||
device->physical_device->task_info.num_entries);
|
||||
return true;
|
||||
} else if (nir->info.stage == MESA_SHADER_MESH) {
|
||||
ac_nir_lower_mesh_inputs_to_mem(nir, RADV_TASK_PAYLOAD_ENTRY_BYTES,
|
||||
device->physical_device->task_num_entries);
|
||||
ac_nir_lower_mesh_inputs_to_mem(nir, AC_TASK_PAYLOAD_ENTRY_BYTES,
|
||||
device->physical_device->task_info.num_entries);
|
||||
return true;
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue