radv: implement indirect compute pipeline binds with DGC

This also supports push constants.

Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27495>
This commit is contained in:
Samuel Pitoiset 2024-02-06 18:33:49 +01:00 committed by Marge Bot
parent 12b015940f
commit a2d67adff1
3 changed files with 361 additions and 68 deletions

View file

@ -10307,7 +10307,8 @@ radv_dgc_before_dispatch(struct radv_cmd_buffer *cmd_buffer)
if (compute_shader->info.cs.regalloc_hang_bug)
cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_PS_PARTIAL_FLUSH | RADV_CMD_FLAG_CS_PARTIAL_FLUSH;
radv_emit_compute_pipeline(cmd_buffer, pipeline);
if (pipeline)
radv_emit_compute_pipeline(cmd_buffer, pipeline);
radv_emit_cache_flush(cmd_buffer);
radv_upload_compute_shader_descriptors(cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE);

View file

@ -35,13 +35,37 @@ radv_get_sequence_size_compute(const struct radv_indirect_command_layout *layout
const struct radv_compute_pipeline *pipeline, uint32_t *cmd_size)
{
const struct radv_device *device = container_of(layout->base.device, struct radv_device, vk);
struct radv_shader *cs = radv_get_shader(pipeline->base.shaders, MESA_SHADER_COMPUTE);
/* dispatch */
*cmd_size += 5 * 4;
const struct radv_userdata_info *loc = radv_get_user_sgpr(cs, AC_UD_CS_GRID_SIZE);
if (loc->sgpr_idx != -1) {
if (pipeline) {
struct radv_shader *cs = radv_get_shader(pipeline->base.shaders, MESA_SHADER_COMPUTE);
const struct radv_userdata_info *loc = radv_get_user_sgpr(cs, AC_UD_CS_GRID_SIZE);
if (loc->sgpr_idx != -1) {
if (device->load_grid_size_from_user_sgpr) {
/* PKT3_SET_SH_REG for immediate values */
*cmd_size += 5 * 4;
} else {
/* PKT3_SET_SH_REG for pointer */
*cmd_size += 4 * 4;
}
}
} else {
/* COMPUTE_PGM_{LO,RSRC1,RSRC2} */
*cmd_size += 7 * 4;
if (device->physical_device->rad_info.gfx_level >= GFX10) {
/* COMPUTE_PGM_RSRC3 */
*cmd_size += 3 * 4;
}
/* COMPUTE_{RESOURCE_LIMITS,NUM_THREADS_X} */
*cmd_size += 8 * 4;
/* Assume the compute shader needs grid size because we can't know the information for
* indirect pipelines.
*/
if (device->load_grid_size_from_user_sgpr) {
/* PKT3_SET_SH_REG for immediate values */
*cmd_size += 5 * 4;
@ -116,20 +140,32 @@ radv_get_sequence_size(const struct radv_indirect_command_layout *layout, struct
if (layout->push_constant_mask) {
bool need_copy = false;
for (unsigned i = 0; i < ARRAY_SIZE(pipeline->shaders); ++i) {
if (!pipeline->shaders[i])
continue;
if (pipeline) {
for (unsigned i = 0; i < ARRAY_SIZE(pipeline->shaders); ++i) {
if (!pipeline->shaders[i])
continue;
struct radv_userdata_locations *locs = &pipeline->shaders[i]->info.user_sgprs_locs;
if (locs->shader_data[AC_UD_PUSH_CONSTANTS].sgpr_idx >= 0) {
/* One PKT3_SET_SH_REG for emitting push constants pointer (32-bit) */
*cmd_size += 3 * 4;
need_copy = true;
struct radv_userdata_locations *locs = &pipeline->shaders[i]->info.user_sgprs_locs;
if (locs->shader_data[AC_UD_PUSH_CONSTANTS].sgpr_idx >= 0) {
/* One PKT3_SET_SH_REG for emitting push constants pointer (32-bit) */
*cmd_size += 3 * 4;
need_copy = true;
}
if (locs->shader_data[AC_UD_INLINE_PUSH_CONSTANTS].sgpr_idx >= 0)
/* One PKT3_SET_SH_REG writing all inline push constants. */
*cmd_size += (3 * util_bitcount64(layout->push_constant_mask)) * 4;
}
if (locs->shader_data[AC_UD_INLINE_PUSH_CONSTANTS].sgpr_idx >= 0)
/* One PKT3_SET_SH_REG writing all inline push constants. */
*cmd_size += (3 * util_bitcount64(layout->push_constant_mask)) * 4;
} else {
/* Assume the compute shader needs both user SGPRs because we can't know the information
* for indirect pipelines.
*/
assert(layout->pipeline_bind_point == VK_PIPELINE_BIND_POINT_COMPUTE);
*cmd_size += 3 * 4;
need_copy = true;
*cmd_size += (3 * util_bitcount64(layout->push_constant_mask)) * 4;
}
if (need_copy) {
*upload_size += align(layout->push_constant_size + 16 * layout->dynamic_offset_count, 16);
}
@ -145,7 +181,7 @@ radv_get_sequence_size(const struct radv_indirect_command_layout *layout, struct
radv_get_sequence_size_graphics(layout, graphics_pipeline, cmd_size, upload_size);
} else {
assert(layout->pipeline_bind_point == VK_PIPELINE_BIND_POINT_COMPUTE);
struct radv_compute_pipeline *compute_pipeline = radv_pipeline_to_compute(pipeline);
struct radv_compute_pipeline *compute_pipeline = pipeline ? radv_pipeline_to_compute(pipeline) : NULL;
radv_get_sequence_size_compute(layout, compute_pipeline, cmd_size);
}
}
@ -236,6 +272,9 @@ struct radv_dgc_params {
uint8_t predicating;
uint8_t predication_type;
uint64_t predication_va;
uint8_t bind_pipeline;
uint16_t pipeline_params_offset;
};
enum {
@ -292,6 +331,15 @@ dgc_emit(nir_builder *b, struct dgc_cmdbuf *cs, nir_def *value)
nir_pack_64_2x32((b), nir_load_push_constant((b), 2, 32, nir_imm_int((b), 0), \
.base = offsetof(struct radv_dgc_params, field), .range = 8))
/* Pipeline metadata */
#define load_metadata32(b, field) \
nir_load_global( \
b, nir_iadd(b, pipeline_va, nir_imm_int64(b, offsetof(struct radv_compute_pipeline_metadata, field))), 4, 1, 32)
#define load_metadata64(b, field) \
nir_load_global( \
b, nir_iadd(b, pipeline_va, nir_imm_int64(b, offsetof(struct radv_compute_pipeline_metadata, field))), 4, 1, 64)
static nir_def *
nir_pkt3_base(nir_builder *b, unsigned op, nir_def *len, bool predicate)
{
@ -849,12 +897,127 @@ dgc_emit_index_buffer(nir_builder *b, struct dgc_cmdbuf *cs, nir_def *stream_buf
/**
* Emit VK_INDIRECT_COMMANDS_TOKEN_TYPE_PUSH_CONSTANT_NV.
*/
static nir_def *
dgc_get_push_constant_shader_cnt(nir_builder *b, nir_def *stream_buf, nir_def *stream_base,
nir_def *pipeline_params_offset)
{
nir_def *res1, *res2;
nir_push_if(b, nir_ieq_imm(b, load_param8(b, bind_pipeline), 1));
{
nir_def *stream_offset = nir_iadd(b, pipeline_params_offset, stream_base);
nir_def *pipeline_va = nir_load_ssbo(b, 1, 64, stream_buf, stream_offset);
res1 = nir_b2i32(b, nir_ine_imm(b, load_metadata32(b, push_const_sgpr), 0));
}
nir_push_else(b, 0);
{
res2 = load_param16(b, push_constant_shader_cnt);
}
nir_pop_if(b, 0);
return nir_if_phi(b, res1, res2);
}
static nir_def *
dgc_get_upload_sgpr(nir_builder *b, nir_def *stream_buf, nir_def *stream_base, nir_def *param_buf,
nir_def *param_offset, nir_def *cur_shader_idx, nir_def *pipeline_params_offset)
{
nir_def *res1, *res2;
nir_push_if(b, nir_ieq_imm(b, load_param8(b, bind_pipeline), 1));
{
nir_def *stream_offset = nir_iadd(b, pipeline_params_offset, stream_base);
nir_def *pipeline_va = nir_load_ssbo(b, 1, 64, stream_buf, stream_offset);
res1 = load_metadata32(b, push_const_sgpr);
}
nir_push_else(b, 0);
{
res2 = nir_load_ssbo(b, 1, 32, param_buf, nir_iadd(b, param_offset, nir_imul_imm(b, cur_shader_idx, 12)));
}
nir_pop_if(b, 0);
nir_def *res = nir_if_phi(b, res1, res2);
return nir_ubfe_imm(b, res, 0, 16);
}
static nir_def *
dgc_get_inline_sgpr(nir_builder *b, nir_def *stream_buf, nir_def *stream_base, nir_def *param_buf,
nir_def *param_offset, nir_def *cur_shader_idx, nir_def *pipeline_params_offset)
{
nir_def *res1, *res2;
nir_push_if(b, nir_ieq_imm(b, load_param8(b, bind_pipeline), 1));
{
nir_def *stream_offset = nir_iadd(b, pipeline_params_offset, stream_base);
nir_def *pipeline_va = nir_load_ssbo(b, 1, 64, stream_buf, stream_offset);
res1 = load_metadata32(b, push_const_sgpr);
}
nir_push_else(b, 0);
{
res2 = nir_load_ssbo(b, 1, 32, param_buf, nir_iadd(b, param_offset, nir_imul_imm(b, cur_shader_idx, 12)));
}
nir_pop_if(b, 0);
nir_def *res = nir_if_phi(b, res1, res2);
return nir_ubfe_imm(b, res, 16, 16);
}
static nir_def *
dgc_get_inline_mask(nir_builder *b, nir_def *stream_buf, nir_def *stream_base, nir_def *param_buf,
nir_def *param_offset, nir_def *cur_shader_idx, nir_def *pipeline_params_offset)
{
nir_def *res1, *res2;
nir_push_if(b, nir_ieq_imm(b, load_param8(b, bind_pipeline), 1));
{
nir_def *stream_offset = nir_iadd(b, pipeline_params_offset, stream_base);
nir_def *pipeline_va = nir_load_ssbo(b, 1, 64, stream_buf, stream_offset);
res1 = load_metadata64(b, inline_push_const_mask);
}
nir_push_else(b, 0);
{
nir_def *reg_info = nir_load_ssbo(
b, 2, 32, param_buf, nir_iadd(b, param_offset, nir_iadd_imm(b, nir_imul_imm(b, cur_shader_idx, 12), 4)));
res2 = nir_pack_64_2x32(b, nir_channels(b, reg_info, 0x3));
}
nir_pop_if(b, 0);
return nir_if_phi(b, res1, res2);
}
static nir_def *
dgc_push_constant_needs_copy(nir_builder *b, nir_def *stream_buf, nir_def *stream_base, nir_def *pipeline_params_offset)
{
nir_def *res1, *res2;
nir_push_if(b, nir_ieq_imm(b, load_param8(b, bind_pipeline), 1));
{
nir_def *stream_offset = nir_iadd(b, pipeline_params_offset, stream_base);
nir_def *pipeline_va = nir_load_ssbo(b, 1, 64, stream_buf, stream_offset);
res1 = nir_ine_imm(b, nir_ubfe_imm(b, load_metadata32(b, push_const_sgpr), 0, 16), 0);
}
nir_push_else(b, 0);
{
res2 = nir_ine_imm(b, load_param8(b, const_copy), 0);
}
nir_pop_if(b, 0);
return nir_if_phi(b, res1, res2);
}
static void
dgc_emit_push_constant(nir_builder *b, struct dgc_cmdbuf *cs, nir_def *stream_buf, nir_def *stream_base,
nir_def *push_const_mask, nir_variable *upload_offset)
nir_def *pipeline_params_offset, nir_def *push_const_mask, nir_variable *upload_offset)
{
nir_def *vbo_cnt = load_param8(b, vbo_cnt);
nir_def *const_copy = nir_ine_imm(b, load_param8(b, const_copy), 0);
nir_def *const_copy = dgc_push_constant_needs_copy(b, stream_buf, stream_base, pipeline_params_offset);
nir_def *const_copy_size = load_param16(b, const_copy_size);
nir_def *const_copy_words = nir_ushr_imm(b, const_copy_size, 2);
const_copy_words = nir_bcsel(b, const_copy, const_copy_words, nir_imm_int(b, 0));
@ -906,7 +1069,7 @@ dgc_emit_push_constant(nir_builder *b, struct dgc_cmdbuf *cs, nir_def *stream_bu
nir_variable *shader_idx = nir_variable_create(b->shader, nir_var_shader_temp, glsl_uint_type(), "shader_idx");
nir_store_var(b, shader_idx, nir_imm_int(b, 0), 0x1);
nir_def *shader_cnt = load_param16(b, push_constant_shader_cnt);
nir_def *shader_cnt = dgc_get_push_constant_shader_cnt(b, stream_buf, stream_base, pipeline_params_offset);
nir_push_loop(b);
{
@ -917,11 +1080,12 @@ dgc_emit_push_constant(nir_builder *b, struct dgc_cmdbuf *cs, nir_def *stream_bu
}
nir_pop_if(b, NULL);
nir_def *reg_info =
nir_load_ssbo(b, 3, 32, param_buf, nir_iadd(b, param_offset, nir_imul_imm(b, cur_shader_idx, 12)));
nir_def *upload_sgpr = nir_ubfe_imm(b, nir_channel(b, reg_info, 0), 0, 16);
nir_def *inline_sgpr = nir_ubfe_imm(b, nir_channel(b, reg_info, 0), 16, 16);
nir_def *inline_mask = nir_pack_64_2x32(b, nir_channels(b, reg_info, 0x6));
nir_def *upload_sgpr = dgc_get_upload_sgpr(b, stream_buf, stream_base, param_buf, param_offset, cur_shader_idx,
pipeline_params_offset);
nir_def *inline_sgpr = dgc_get_inline_sgpr(b, stream_buf, stream_base, param_buf, param_offset, cur_shader_idx,
pipeline_params_offset);
nir_def *inline_mask = dgc_get_inline_mask(b, stream_buf, stream_base, param_buf, param_offset, cur_shader_idx,
pipeline_params_offset);
nir_push_if(b, nir_ine_imm(b, upload_sgpr, 0));
{
@ -974,6 +1138,26 @@ dgc_emit_push_constant(nir_builder *b, struct dgc_cmdbuf *cs, nir_def *stream_bu
dgc_emit(b, cs, nir_vec(b, pkt, 3));
}
nir_push_else(b, NULL);
{
nir_push_if(b, nir_ieq_imm(b, load_param8(b, bind_pipeline), 1));
{
/* For indirect pipeline binds, partial push constant updates can't be emitted
* when the DGC execute is called because there is no bound pipeline and they have
* to be emitted from the DGC prepare shader.
*/
nir_def *new_data =
nir_load_ssbo(b, 1, 32, param_buf, nir_iadd(b, param_const_offset, nir_ishl_imm(b, cur_idx, 2)));
nir_store_var(b, data, new_data, 0x1);
nir_def *pkt[3] = {nir_pkt3(b, PKT3_SET_SH_REG, nir_imm_int(b, 1)),
nir_iadd(b, inline_sgpr, nir_load_var(b, pc_idx)), nir_load_var(b, data)};
dgc_emit(b, cs, nir_vec(b, pkt, 3));
}
nir_pop_if(b, NULL);
}
nir_pop_if(b, NULL);
nir_store_var(b, idx, nir_iadd_imm(b, cur_idx, 1), 0x1);
@ -1129,9 +1313,53 @@ dgc_emit_vertex_buffer(nir_builder *b, struct dgc_cmdbuf *cs, nir_def *stream_bu
/**
* For emitting VK_INDIRECT_COMMANDS_TOKEN_TYPE_DISPATCH_NV.
*/
static nir_def *
dgc_get_grid_sgpr(nir_builder *b, nir_def *stream_buf, nir_def *stream_base, nir_def *pipeline_params_offset)
{
nir_def *res1, *res2;
nir_push_if(b, nir_ieq_imm(b, load_param8(b, bind_pipeline), 1));
{
nir_def *stream_offset = nir_iadd(b, pipeline_params_offset, stream_base);
nir_def *pipeline_va = nir_load_ssbo(b, 1, 64, stream_buf, stream_offset);
res1 = load_metadata32(b, grid_base_sgpr);
}
nir_push_else(b, 0);
{
res2 = load_param16(b, grid_base_sgpr);
}
nir_pop_if(b, 0);
return nir_if_phi(b, res1, res2);
}
static nir_def *
dgc_get_dispatch_initiator(nir_builder *b, nir_def *stream_buf, nir_def *stream_base, nir_def *pipeline_params_offset)
{
nir_def *res1, *res2;
nir_push_if(b, nir_ieq_imm(b, load_param8(b, bind_pipeline), 1));
{
nir_def *stream_offset = nir_iadd(b, pipeline_params_offset, stream_base);
nir_def *pipeline_va = nir_load_ssbo(b, 1, 64, stream_buf, stream_offset);
nir_def *dispatch_initiator = load_param32(b, dispatch_initiator);
nir_def *wave32 = nir_ieq_imm(b, load_metadata32(b, wave32), 1);
res1 = nir_bcsel(b, wave32, nir_ior_imm(b, dispatch_initiator, S_00B800_CS_W32_EN(1)), dispatch_initiator);
}
nir_push_else(b, 0);
{
res2 = load_param32(b, dispatch_initiator);
}
nir_pop_if(b, 0);
return nir_if_phi(b, res1, res2);
}
static void
dgc_emit_dispatch(nir_builder *b, struct dgc_cmdbuf *cs, nir_def *stream_buf, nir_def *stream_base,
nir_def *dispatch_params_offset, nir_def *sequence_id, const struct radv_device *device)
nir_def *dispatch_params_offset, nir_def *pipeline_params_offset, nir_def *sequence_id,
const struct radv_device *device)
{
nir_def *stream_offset = nir_iadd(b, dispatch_params_offset, stream_base);
@ -1140,7 +1368,7 @@ dgc_emit_dispatch(nir_builder *b, struct dgc_cmdbuf *cs, nir_def *stream_buf, ni
nir_def *wg_y = nir_channel(b, dispatch_data, 1);
nir_def *wg_z = nir_channel(b, dispatch_data, 2);
nir_def *grid_sgpr = load_param16(b, grid_base_sgpr);
nir_def *grid_sgpr = dgc_get_grid_sgpr(b, stream_buf, stream_base, pipeline_params_offset);
nir_push_if(b, nir_ine_imm(b, grid_sgpr, 0));
{
if (device->load_grid_size_from_user_sgpr) {
@ -1156,7 +1384,8 @@ dgc_emit_dispatch(nir_builder *b, struct dgc_cmdbuf *cs, nir_def *stream_buf, ni
dgc_emit_sqtt_begin_api_marker(b, cs, ApiCmdDispatch);
dgc_emit_sqtt_marker_event_with_dims(b, cs, sequence_id, wg_x, wg_y, wg_z, EventCmdDispatch);
dgc_emit_dispatch_direct(b, cs, wg_x, wg_y, wg_z, load_param32(b, dispatch_initiator));
nir_def *dispatch_initiator = dgc_get_dispatch_initiator(b, stream_buf, stream_base, pipeline_params_offset);
dgc_emit_dispatch_direct(b, cs, wg_x, wg_y, wg_z, dispatch_initiator);
dgc_emit_sqtt_thread_trace_marker(b, cs);
dgc_emit_sqtt_end_api_marker(b, cs, ApiCmdDispatch);
@ -1200,6 +1429,48 @@ dgc_emit_draw_mesh_tasks(nir_builder *b, struct dgc_cmdbuf *cs, nir_def *stream_
nir_pop_if(b, NULL);
}
/**
* Emit VK_INDIRECT_COMMANDS_TOKEN_TYPE_PIPELINE_NV.
*/
static void
dgc_emit_set_sh_reg_seq(nir_builder *b, struct dgc_cmdbuf *cs, unsigned reg, unsigned num)
{
nir_def *values[2] = {
nir_imm_int(b, PKT3(PKT3_SET_SH_REG, num, false)),
nir_imm_int(b, (reg - SI_SH_REG_OFFSET) >> 2),
};
dgc_emit(b, cs, nir_vec(b, values, 2));
}
static void
dgc_emit_bind_pipeline(nir_builder *b, struct dgc_cmdbuf *cs, nir_def *stream_buf, nir_def *stream_base,
nir_def *pipeline_params_offset, const struct radv_device *device)
{
nir_def *stream_offset = nir_iadd(b, pipeline_params_offset, stream_base);
nir_def *pipeline_va = nir_load_ssbo(b, 1, 64, stream_buf, stream_offset);
dgc_emit_set_sh_reg_seq(b, cs, R_00B830_COMPUTE_PGM_LO, 1);
dgc_emit(b, cs, load_metadata32(b, shader_va));
dgc_emit_set_sh_reg_seq(b, cs, R_00B848_COMPUTE_PGM_RSRC1, 2);
dgc_emit(b, cs, load_metadata32(b, rsrc1));
dgc_emit(b, cs, load_metadata32(b, rsrc2));
if (device->physical_device->rad_info.gfx_level >= GFX10) {
dgc_emit_set_sh_reg_seq(b, cs, R_00B8A0_COMPUTE_PGM_RSRC3, 1);
dgc_emit(b, cs, load_metadata32(b, rsrc3));
}
dgc_emit_set_sh_reg_seq(b, cs, R_00B854_COMPUTE_RESOURCE_LIMITS, 1);
dgc_emit(b, cs, load_metadata32(b, compute_resource_limits));
dgc_emit_set_sh_reg_seq(b, cs, R_00B81C_COMPUTE_NUM_THREAD_X, 3);
dgc_emit(b, cs, load_metadata32(b, block_size_x));
dgc_emit(b, cs, load_metadata32(b, block_size_y));
dgc_emit(b, cs, load_metadata32(b, block_size_z));
}
static nir_def *
dgc_is_cond_render_enabled(nir_builder *b)
{
@ -1300,7 +1571,14 @@ build_dgc_prepare_shader(struct radv_device *dev)
nir_def *push_const_mask = load_param64(&b, push_constant_mask);
nir_push_if(&b, nir_ine_imm(&b, push_const_mask, 0));
{
dgc_emit_push_constant(&b, &cmd_buf, stream_buf, stream_base, push_const_mask, upload_offset);
dgc_emit_push_constant(&b, &cmd_buf, stream_buf, stream_base, load_param16(&b, pipeline_params_offset),
push_const_mask, upload_offset);
}
nir_pop_if(&b, 0);
nir_push_if(&b, nir_ieq_imm(&b, load_param8(&b, bind_pipeline), 1));
{
dgc_emit_bind_pipeline(&b, &cmd_buf, stream_buf, stream_base, load_param16(&b, pipeline_params_offset), dev);
}
nir_pop_if(&b, 0);
@ -1353,8 +1631,8 @@ build_dgc_prepare_shader(struct radv_device *dev)
}
nir_push_else(&b, NULL);
{
dgc_emit_dispatch(&b, &cmd_buf, stream_buf, stream_base, load_param16(&b, dispatch_params_offset), sequence_id,
dev);
dgc_emit_dispatch(&b, &cmd_buf, stream_buf, stream_base, load_param16(&b, dispatch_params_offset),
load_param16(&b, pipeline_params_offset), sequence_id, dev);
}
nir_pop_if(&b, NULL);
@ -1551,6 +1829,10 @@ radv_CreateIndirectCommandsLayoutNV(VkDevice _device, const VkIndirectCommandsLa
layout->draw_mesh_tasks = true;
layout->draw_params_offset = pCreateInfo->pTokens[i].offset;
break;
case VK_INDIRECT_COMMANDS_TOKEN_TYPE_PIPELINE_NV:
layout->bind_pipeline = true;
layout->pipeline_params_offset = pCreateInfo->pTokens[i].offset;
break;
default:
unreachable("Unhandled token type");
}
@ -1761,8 +2043,6 @@ radv_prepare_dgc_compute(struct radv_cmd_buffer *cmd_buffer, const VkGeneratedCo
{
VK_FROM_HANDLE(radv_indirect_command_layout, layout, pGeneratedCommandsInfo->indirectCommandsLayout);
VK_FROM_HANDLE(radv_pipeline, pipeline, pGeneratedCommandsInfo->pipeline);
struct radv_compute_pipeline *compute_pipeline = radv_pipeline_to_compute(pipeline);
struct radv_shader *cs = radv_get_shader(compute_pipeline->base.shaders, MESA_SHADER_COMPUTE);
*upload_size = MAX2(*upload_size, 16);
@ -1771,15 +2051,8 @@ radv_prepare_dgc_compute(struct radv_cmd_buffer *cmd_buffer, const VkGeneratedCo
return;
}
uint32_t dispatch_initiator = cmd_buffer->device->dispatch_initiator;
dispatch_initiator |= S_00B800_FORCE_START_AT_000(1);
if (cs->info.wave_size == 32) {
assert(cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX10);
dispatch_initiator |= S_00B800_CS_W32_EN(1);
}
params->dispatch_params_offset = layout->dispatch_params_offset;
params->dispatch_initiator = dispatch_initiator;
params->dispatch_initiator = cmd_buffer->device->dispatch_initiator | S_00B800_FORCE_START_AT_000(1);
params->is_dispatch = 1;
if (cond_render_enabled) {
@ -1788,9 +2061,22 @@ radv_prepare_dgc_compute(struct radv_cmd_buffer *cmd_buffer, const VkGeneratedCo
params->predication_type = cmd_buffer->state.predication_type;
}
const struct radv_userdata_info *loc = radv_get_user_sgpr(cs, AC_UD_CS_GRID_SIZE);
if (loc->sgpr_idx != -1) {
params->grid_base_sgpr = (cs->info.user_data_0 + 4 * loc->sgpr_idx - SI_SH_REG_OFFSET) >> 2;
if (pipeline) {
struct radv_compute_pipeline *compute_pipeline = radv_pipeline_to_compute(pipeline);
struct radv_shader *cs = radv_get_shader(compute_pipeline->base.shaders, MESA_SHADER_COMPUTE);
if (cs->info.wave_size == 32) {
assert(cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX10);
params->dispatch_initiator |= S_00B800_CS_W32_EN(1);
}
const struct radv_userdata_info *loc = radv_get_user_sgpr(cs, AC_UD_CS_GRID_SIZE);
if (loc->sgpr_idx != -1) {
params->grid_base_sgpr = (cs->info.user_data_0 + 4 * loc->sgpr_idx - SI_SH_REG_OFFSET) >> 2;
}
} else {
params->bind_pipeline = 1;
params->pipeline_params_offset = layout->pipeline_params_offset;
}
}
@ -1849,35 +2135,38 @@ radv_prepare_dgc(struct radv_cmd_buffer *cmd_buffer, const VkGeneratedCommandsIn
upload_data = (char *)upload_data + ARRAY_SIZE(pipeline->shaders) * 12;
unsigned idx = 0;
for (unsigned i = 0; i < ARRAY_SIZE(pipeline->shaders); ++i) {
if (!pipeline->shaders[i])
continue;
const struct radv_shader *shader = pipeline->shaders[i];
const struct radv_userdata_locations *locs = &shader->info.user_sgprs_locs;
if (locs->shader_data[AC_UD_PUSH_CONSTANTS].sgpr_idx >= 0)
params.const_copy = 1;
if (pipeline) {
for (unsigned i = 0; i < ARRAY_SIZE(pipeline->shaders); ++i) {
if (!pipeline->shaders[i])
continue;
if (locs->shader_data[AC_UD_PUSH_CONSTANTS].sgpr_idx >= 0 ||
locs->shader_data[AC_UD_INLINE_PUSH_CONSTANTS].sgpr_idx >= 0) {
unsigned upload_sgpr = 0;
unsigned inline_sgpr = 0;
const struct radv_shader *shader = pipeline->shaders[i];
const struct radv_userdata_locations *locs = &shader->info.user_sgprs_locs;
if (locs->shader_data[AC_UD_PUSH_CONSTANTS].sgpr_idx >= 0)
params.const_copy = 1;
if (locs->shader_data[AC_UD_PUSH_CONSTANTS].sgpr_idx >= 0) {
upload_sgpr = (shader->info.user_data_0 + 4 * locs->shader_data[AC_UD_PUSH_CONSTANTS].sgpr_idx -
SI_SH_REG_OFFSET) >>
2;
if (locs->shader_data[AC_UD_PUSH_CONSTANTS].sgpr_idx >= 0 ||
locs->shader_data[AC_UD_INLINE_PUSH_CONSTANTS].sgpr_idx >= 0) {
unsigned upload_sgpr = 0;
unsigned inline_sgpr = 0;
if (locs->shader_data[AC_UD_PUSH_CONSTANTS].sgpr_idx >= 0) {
upload_sgpr = (shader->info.user_data_0 + 4 * locs->shader_data[AC_UD_PUSH_CONSTANTS].sgpr_idx -
SI_SH_REG_OFFSET) >>
2;
}
if (locs->shader_data[AC_UD_INLINE_PUSH_CONSTANTS].sgpr_idx >= 0) {
inline_sgpr = (shader->info.user_data_0 +
4 * locs->shader_data[AC_UD_INLINE_PUSH_CONSTANTS].sgpr_idx - SI_SH_REG_OFFSET) >>
2;
desc[idx * 3 + 1] = pipeline->shaders[i]->info.inline_push_constant_mask;
desc[idx * 3 + 2] = pipeline->shaders[i]->info.inline_push_constant_mask >> 32;
}
desc[idx * 3] = upload_sgpr | (inline_sgpr << 16);
++idx;
}
if (locs->shader_data[AC_UD_INLINE_PUSH_CONSTANTS].sgpr_idx >= 0) {
inline_sgpr = (shader->info.user_data_0 + 4 * locs->shader_data[AC_UD_INLINE_PUSH_CONSTANTS].sgpr_idx -
SI_SH_REG_OFFSET) >>
2;
desc[idx * 3 + 1] = pipeline->shaders[i]->info.inline_push_constant_mask;
desc[idx * 3 + 2] = pipeline->shaders[i]->info.inline_push_constant_mask >> 32;
}
desc[idx * 3] = upload_sgpr | (inline_sgpr << 16);
++idx;
}
}

View file

@ -3280,6 +3280,9 @@ struct radv_indirect_command_layout {
uint16_t dispatch_params_offset;
bool bind_pipeline;
uint16_t pipeline_params_offset;
uint32_t bind_vbo_mask;
uint32_t vbo_offsets[MAX_VBS];