mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-01-03 02:50:16 +01:00
v3dv: implement compute dispatch
for now this only implements regular dispatches, not indirect. Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6766>
This commit is contained in:
parent
d0b1bb3032
commit
7e990683fd
4 changed files with 252 additions and 7 deletions
|
|
@ -195,6 +195,20 @@ job_destroy_cloned_gpu_cl_resources(struct v3dv_job *job)
|
|||
}
|
||||
}
|
||||
|
||||
static void
|
||||
job_destroy_gpu_csd_resources(struct v3dv_job *job)
|
||||
{
|
||||
assert(job->type == V3DV_JOB_TYPE_GPU_CSD);
|
||||
assert(job->cmd_buffer);
|
||||
|
||||
v3dv_cl_destroy(&job->indirect);
|
||||
|
||||
_mesa_set_destroy(job->bos, NULL);
|
||||
|
||||
if (job->csd.shared_memory)
|
||||
v3dv_bo_free(job->device, job->csd.shared_memory);
|
||||
}
|
||||
|
||||
static void
|
||||
job_destroy_cpu_wait_events_resources(struct v3dv_job *job)
|
||||
{
|
||||
|
|
@ -220,6 +234,9 @@ v3dv_job_destroy(struct v3dv_job *job)
|
|||
case V3DV_JOB_TYPE_GPU_CL_SECONDARY:
|
||||
job_destroy_gpu_cl_resources(job);
|
||||
break;
|
||||
case V3DV_JOB_TYPE_GPU_CSD:
|
||||
job_destroy_gpu_csd_resources(job);
|
||||
break;
|
||||
case V3DV_JOB_TYPE_CPU_WAIT_EVENTS:
|
||||
job_destroy_cpu_wait_events_resources(job);
|
||||
break;
|
||||
|
|
@ -716,19 +733,24 @@ v3dv_job_init(struct v3dv_job *job,
|
|||
list_inithead(&job->list_link);
|
||||
|
||||
if (type == V3DV_JOB_TYPE_GPU_CL ||
|
||||
type == V3DV_JOB_TYPE_GPU_CL_SECONDARY) {
|
||||
type == V3DV_JOB_TYPE_GPU_CL_SECONDARY ||
|
||||
type == V3DV_JOB_TYPE_GPU_CSD) {
|
||||
job->bos =
|
||||
_mesa_set_create(NULL, _mesa_hash_pointer, _mesa_key_pointer_equal);
|
||||
job->bo_count = 0;
|
||||
|
||||
v3dv_cl_init(job, &job->bcl);
|
||||
v3dv_cl_init(job, &job->rcl);
|
||||
v3dv_cl_init(job, &job->indirect);
|
||||
|
||||
if (V3D_DEBUG & V3D_DEBUG_ALWAYS_FLUSH)
|
||||
job->always_flush = true;
|
||||
}
|
||||
|
||||
if (type == V3DV_JOB_TYPE_GPU_CL ||
|
||||
type == V3DV_JOB_TYPE_GPU_CL_SECONDARY) {
|
||||
v3dv_cl_init(job, &job->bcl);
|
||||
v3dv_cl_init(job, &job->rcl);
|
||||
}
|
||||
|
||||
if (cmd_buffer) {
|
||||
/* Flag all state as dirty. Generally, we need to re-emit state for each
|
||||
* new job.
|
||||
|
|
@ -784,6 +806,28 @@ v3dv_cmd_buffer_start_job(struct v3dv_cmd_buffer *cmd_buffer,
|
|||
return job;
|
||||
}
|
||||
|
||||
static struct v3dv_job *
|
||||
cmd_buffer_start_compute_job(struct v3dv_cmd_buffer *cmd_buffer)
|
||||
{
|
||||
/* Compute jobs can only happen outside a render pass */
|
||||
assert(!cmd_buffer->state.job);
|
||||
assert(!cmd_buffer->state.pass);
|
||||
|
||||
struct v3dv_job *job = vk_zalloc(&cmd_buffer->device->alloc,
|
||||
sizeof(struct v3dv_job), 8,
|
||||
VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
|
||||
cmd_buffer->state.job = job;
|
||||
|
||||
if (!job) {
|
||||
v3dv_flag_oom(cmd_buffer, NULL);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
v3dv_job_init(job, V3DV_JOB_TYPE_GPU_CSD, cmd_buffer->device, cmd_buffer, -1);
|
||||
|
||||
return job;
|
||||
}
|
||||
|
||||
static VkResult
|
||||
cmd_buffer_reset(struct v3dv_cmd_buffer *cmd_buffer,
|
||||
VkCommandBufferResetFlags flags)
|
||||
|
|
@ -2677,6 +2721,33 @@ update_vs_variant(struct v3dv_cmd_buffer *cmd_buffer)
|
|||
p_stage->current_variant = variant;
|
||||
}
|
||||
|
||||
static void
|
||||
update_cs_variant(struct v3dv_cmd_buffer *cmd_buffer)
|
||||
{
|
||||
struct v3dv_shader_variant *variant;
|
||||
struct v3dv_pipeline_stage *p_stage = cmd_buffer->state.pipeline->cs;
|
||||
struct v3d_key local_key;
|
||||
|
||||
/* We start with a copy of the original pipeline key */
|
||||
memcpy(&local_key, &p_stage->key.base, sizeof(struct v3d_key));
|
||||
|
||||
cmd_buffer_populate_v3d_key(&local_key, cmd_buffer,
|
||||
VK_PIPELINE_BIND_POINT_COMPUTE);
|
||||
|
||||
VkResult result;
|
||||
variant = v3dv_get_shader_variant(p_stage, &local_key,
|
||||
sizeof(struct v3d_key),
|
||||
&cmd_buffer->device->alloc,
|
||||
&result);
|
||||
/* At this point we are not creating a vulkan object to return to the
|
||||
* API user, so we can't really return back a OOM error
|
||||
*/
|
||||
assert(variant);
|
||||
assert(result == VK_SUCCESS);
|
||||
|
||||
p_stage->current_variant = variant;
|
||||
}
|
||||
|
||||
/*
|
||||
* Some updates on the cmd buffer requires also updates on the shader being
|
||||
* compiled at the pipeline. The poster boy here are textures, as the compiler
|
||||
|
|
@ -2690,8 +2761,13 @@ update_pipeline_variants(struct v3dv_cmd_buffer *cmd_buffer)
|
|||
{
|
||||
assert(cmd_buffer->state.pipeline);
|
||||
|
||||
update_fs_variant(cmd_buffer);
|
||||
update_vs_variant(cmd_buffer);
|
||||
if (v3dv_pipeline_get_binding_point(cmd_buffer->state.pipeline) ==
|
||||
VK_PIPELINE_BIND_POINT_GRAPHICS) {
|
||||
update_fs_variant(cmd_buffer);
|
||||
update_vs_variant(cmd_buffer);
|
||||
} else {
|
||||
update_cs_variant(cmd_buffer);
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
|
|
@ -4471,13 +4547,120 @@ v3dv_CmdWriteTimestamp(VkCommandBuffer commandBuffer,
|
|||
unreachable("Timestamp queries are not supported.");
|
||||
}
|
||||
|
||||
static void
|
||||
cmd_buffer_emit_pre_dispatch(struct v3dv_cmd_buffer *cmd_buffer)
|
||||
{
|
||||
assert(cmd_buffer->state.pipeline);
|
||||
assert(cmd_buffer->state.pipeline->active_stages == VK_SHADER_STAGE_COMPUTE_BIT);
|
||||
|
||||
/* We may need to compile shader variants based on bound textures */
|
||||
uint32_t *dirty = &cmd_buffer->state.dirty;
|
||||
if (*dirty & (V3DV_CMD_DIRTY_PIPELINE |
|
||||
V3DV_CMD_DIRTY_COMPUTE_DESCRIPTOR_SETS)) {
|
||||
update_pipeline_variants(cmd_buffer);
|
||||
}
|
||||
|
||||
*dirty &= ~(V3DV_CMD_DIRTY_PIPELINE |
|
||||
V3DV_CMD_DIRTY_COMPUTE_DESCRIPTOR_SETS);
|
||||
}
|
||||
|
||||
#define V3D_CSD_CFG012_WG_COUNT_SHIFT 16
|
||||
#define V3D_CSD_CFG012_WG_OFFSET_SHIFT 0
|
||||
/* Allow this dispatch to start while the last one is still running. */
|
||||
#define V3D_CSD_CFG3_OVERLAP_WITH_PREV (1 << 26)
|
||||
/* Maximum supergroup ID. 6 bits. */
|
||||
#define V3D_CSD_CFG3_MAX_SG_ID_SHIFT 20
|
||||
/* Batches per supergroup minus 1. 8 bits. */
|
||||
#define V3D_CSD_CFG3_BATCHES_PER_SG_M1_SHIFT 12
|
||||
/* Workgroups per supergroup, 0 means 16 */
|
||||
#define V3D_CSD_CFG3_WGS_PER_SG_SHIFT 8
|
||||
#define V3D_CSD_CFG3_WG_SIZE_SHIFT 0
|
||||
|
||||
#define V3D_CSD_CFG5_PROPAGATE_NANS (1 << 2)
|
||||
#define V3D_CSD_CFG5_SINGLE_SEG (1 << 1)
|
||||
#define V3D_CSD_CFG5_THREADING (1 << 0)
|
||||
|
||||
static void
|
||||
cmd_buffer_dispatch(struct v3dv_cmd_buffer *cmd_buffer,
|
||||
uint32_t group_count_x,
|
||||
uint32_t group_count_y,
|
||||
uint32_t group_count_z)
|
||||
{
|
||||
if (group_count_x == 0 || group_count_y == 0 || group_count_z == 0)
|
||||
return;
|
||||
|
||||
struct v3dv_job *job = cmd_buffer_start_compute_job(cmd_buffer);
|
||||
if (!job)
|
||||
return;
|
||||
|
||||
struct drm_v3d_submit_csd *submit = &job->csd.submit;
|
||||
|
||||
job->csd.workgroup_count[0] = group_count_x;
|
||||
job->csd.workgroup_count[1] = group_count_y;
|
||||
job->csd.workgroup_count[2] = group_count_z;
|
||||
|
||||
submit->cfg[0] |= group_count_x << V3D_CSD_CFG012_WG_COUNT_SHIFT;
|
||||
submit->cfg[1] |= group_count_y << V3D_CSD_CFG012_WG_COUNT_SHIFT;
|
||||
submit->cfg[2] |= group_count_z << V3D_CSD_CFG012_WG_COUNT_SHIFT;
|
||||
|
||||
struct v3dv_pipeline *pipeline = cmd_buffer->state.pipeline;
|
||||
assert(pipeline->cs && pipeline->cs->nir);
|
||||
|
||||
const struct nir_shader *cs = pipeline->cs->nir;
|
||||
|
||||
const uint32_t wgs_per_sg = 1; /* FIXME */
|
||||
const uint32_t wg_size = cs->info.cs.local_size[0] *
|
||||
cs->info.cs.local_size[1] *
|
||||
cs->info.cs.local_size[2];
|
||||
submit->cfg[3] |= wgs_per_sg << V3D_CSD_CFG3_WGS_PER_SG_SHIFT;
|
||||
submit->cfg[3] |= ((DIV_ROUND_UP(wgs_per_sg * wg_size, 16) - 1) <<
|
||||
V3D_CSD_CFG3_BATCHES_PER_SG_M1_SHIFT);
|
||||
submit->cfg[3] |= (wg_size & 0xff) << V3D_CSD_CFG3_WG_SIZE_SHIFT;
|
||||
|
||||
uint32_t batches_per_wg = DIV_ROUND_UP(wg_size, 16);
|
||||
submit->cfg[4] = batches_per_wg *
|
||||
(group_count_x * group_count_y * group_count_z) - 1;
|
||||
assert(submit->cfg[4] != ~0);
|
||||
|
||||
assert(pipeline->cs->current_variant &&
|
||||
pipeline->cs->current_variant->assembly_bo);
|
||||
const struct v3dv_shader_variant *variant = pipeline->cs->current_variant;
|
||||
submit->cfg[5] = variant->assembly_bo->offset;
|
||||
submit->cfg[5] |= V3D_CSD_CFG5_PROPAGATE_NANS;
|
||||
if (variant->prog_data.base->single_seg)
|
||||
submit->cfg[5] |= V3D_CSD_CFG5_SINGLE_SEG;
|
||||
if (variant->prog_data.base->threads == 4)
|
||||
submit->cfg[5] |= V3D_CSD_CFG5_THREADING;
|
||||
|
||||
if (variant->prog_data.cs->shared_size > 0) {
|
||||
job->csd.shared_memory =
|
||||
v3dv_bo_alloc(cmd_buffer->device,
|
||||
variant->prog_data.cs->shared_size * wgs_per_sg,
|
||||
"shared_vars", true);
|
||||
}
|
||||
|
||||
v3dv_job_add_bo(job, variant->assembly_bo);
|
||||
|
||||
struct v3dv_cl_reloc uniforms =
|
||||
v3dv_write_uniforms(cmd_buffer, pipeline->cs);
|
||||
submit->cfg[6] = uniforms.bo->offset + uniforms.offset;
|
||||
|
||||
v3dv_job_add_bo(job, uniforms.bo);
|
||||
|
||||
list_addtail(&job->list_link, &cmd_buffer->jobs);
|
||||
cmd_buffer->state.job = NULL;
|
||||
}
|
||||
|
||||
void
|
||||
v3dv_CmdDispatch(VkCommandBuffer commandBuffer,
|
||||
uint32_t groupCountX,
|
||||
uint32_t groupCountY,
|
||||
uint32_t groupCountZ)
|
||||
{
|
||||
unreachable("vkCmdDispatch not implemented.");
|
||||
V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer);
|
||||
|
||||
cmd_buffer_emit_pre_dispatch(cmd_buffer);
|
||||
cmd_buffer_dispatch(cmd_buffer, groupCountX, groupCountY, groupCountZ);
|
||||
}
|
||||
|
||||
void
|
||||
|
|
|
|||
|
|
@ -798,8 +798,15 @@ struct v3dv_job {
|
|||
struct v3dv_copy_buffer_to_image_cpu_job_info copy_buffer_to_image;
|
||||
} cpu;
|
||||
|
||||
/* Job spects for TFU jobs */
|
||||
/* Job specs for TFU jobs */
|
||||
struct drm_v3d_submit_tfu tfu;
|
||||
|
||||
/* Job specs for CSD jobs */
|
||||
struct {
|
||||
struct v3dv_bo *shared_memory;
|
||||
uint32_t workgroup_count[3];
|
||||
struct drm_v3d_submit_csd submit;
|
||||
} csd;
|
||||
};
|
||||
|
||||
void v3dv_job_init(struct v3dv_job *job,
|
||||
|
|
|
|||
|
|
@ -578,6 +578,47 @@ handle_tfu_job(struct v3dv_queue *queue,
|
|||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
static VkResult
|
||||
handle_csd_job(struct v3dv_queue *queue,
|
||||
struct v3dv_job *job,
|
||||
bool do_wait)
|
||||
{
|
||||
struct v3dv_device *device = queue->device;
|
||||
|
||||
struct drm_v3d_submit_csd *submit = &job->csd.submit;
|
||||
|
||||
submit->bo_handle_count = job->bo_count;
|
||||
uint32_t *bo_handles =
|
||||
(uint32_t *) malloc(sizeof(uint32_t) * MAX2(4, submit->bo_handle_count * 2));
|
||||
uint32_t bo_idx = 0;
|
||||
set_foreach(job->bos, entry) {
|
||||
struct v3dv_bo *bo = (struct v3dv_bo *)entry->key;
|
||||
bo_handles[bo_idx++] = bo->handle;
|
||||
}
|
||||
assert(bo_idx == submit->bo_handle_count);
|
||||
submit->bo_handles = (uintptr_t)(void *)bo_handles;
|
||||
|
||||
mtx_lock(&queue->device->mutex);
|
||||
submit->in_sync = do_wait ? device->last_job_sync : 0;
|
||||
submit->out_sync = device->last_job_sync;
|
||||
int ret = v3dv_ioctl(device->render_fd, DRM_IOCTL_V3D_SUBMIT_CSD, submit);
|
||||
mtx_unlock(&queue->device->mutex);
|
||||
|
||||
static bool warned = false;
|
||||
if (ret && !warned) {
|
||||
fprintf(stderr, "Compute dispatch returned %s. Expect corruption.\n",
|
||||
strerror(errno));
|
||||
warned = true;
|
||||
}
|
||||
|
||||
free(bo_handles);
|
||||
|
||||
if (ret)
|
||||
return vk_error(device->instance, VK_ERROR_DEVICE_LOST);
|
||||
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
static VkResult
|
||||
queue_submit_job(struct v3dv_queue *queue,
|
||||
struct v3dv_job *job,
|
||||
|
|
@ -591,6 +632,8 @@ queue_submit_job(struct v3dv_queue *queue,
|
|||
return handle_cl_job(queue, job, do_wait);
|
||||
case V3DV_JOB_TYPE_GPU_TFU:
|
||||
return handle_tfu_job(queue, job, do_wait);
|
||||
case V3DV_JOB_TYPE_GPU_CSD:
|
||||
return handle_csd_job(queue, job, do_wait);
|
||||
case V3DV_JOB_TYPE_CPU_RESET_QUERIES:
|
||||
return handle_reset_query_cpu_job(job);
|
||||
case V3DV_JOB_TYPE_CPU_END_QUERY:
|
||||
|
|
|
|||
|
|
@ -327,6 +327,18 @@ v3dv_write_uniforms(struct v3dv_cmd_buffer *cmd_buffer,
|
|||
data));
|
||||
break;
|
||||
|
||||
case QUNIFORM_NUM_WORK_GROUPS:
|
||||
assert(job->type == V3DV_JOB_TYPE_GPU_CSD);
|
||||
assert(job->csd.workgroup_count[data] > 0);
|
||||
cl_aligned_u32(&uniforms, job->csd.workgroup_count[data]);
|
||||
break;
|
||||
|
||||
case QUNIFORM_SHARED_OFFSET:
|
||||
assert(job->type == V3DV_JOB_TYPE_GPU_CSD);
|
||||
assert(job->csd.shared_memory);
|
||||
cl_aligned_reloc(&job->indirect, &uniforms, job->csd.shared_memory, 0);
|
||||
break;
|
||||
|
||||
default:
|
||||
unreachable("unsupported quniform_contents uniform type\n");
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue