mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-02 16:38:09 +02:00
radv: implement indirect compute pipeline binds with DGC
This also supports push constants. Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27495>
This commit is contained in:
parent
12b015940f
commit
a2d67adff1
3 changed files with 361 additions and 68 deletions
|
|
@ -10307,7 +10307,8 @@ radv_dgc_before_dispatch(struct radv_cmd_buffer *cmd_buffer)
|
|||
if (compute_shader->info.cs.regalloc_hang_bug)
|
||||
cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_PS_PARTIAL_FLUSH | RADV_CMD_FLAG_CS_PARTIAL_FLUSH;
|
||||
|
||||
radv_emit_compute_pipeline(cmd_buffer, pipeline);
|
||||
if (pipeline)
|
||||
radv_emit_compute_pipeline(cmd_buffer, pipeline);
|
||||
radv_emit_cache_flush(cmd_buffer);
|
||||
|
||||
radv_upload_compute_shader_descriptors(cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE);
|
||||
|
|
|
|||
|
|
@ -35,13 +35,37 @@ radv_get_sequence_size_compute(const struct radv_indirect_command_layout *layout
|
|||
const struct radv_compute_pipeline *pipeline, uint32_t *cmd_size)
|
||||
{
|
||||
const struct radv_device *device = container_of(layout->base.device, struct radv_device, vk);
|
||||
struct radv_shader *cs = radv_get_shader(pipeline->base.shaders, MESA_SHADER_COMPUTE);
|
||||
|
||||
/* dispatch */
|
||||
*cmd_size += 5 * 4;
|
||||
|
||||
const struct radv_userdata_info *loc = radv_get_user_sgpr(cs, AC_UD_CS_GRID_SIZE);
|
||||
if (loc->sgpr_idx != -1) {
|
||||
if (pipeline) {
|
||||
struct radv_shader *cs = radv_get_shader(pipeline->base.shaders, MESA_SHADER_COMPUTE);
|
||||
const struct radv_userdata_info *loc = radv_get_user_sgpr(cs, AC_UD_CS_GRID_SIZE);
|
||||
if (loc->sgpr_idx != -1) {
|
||||
if (device->load_grid_size_from_user_sgpr) {
|
||||
/* PKT3_SET_SH_REG for immediate values */
|
||||
*cmd_size += 5 * 4;
|
||||
} else {
|
||||
/* PKT3_SET_SH_REG for pointer */
|
||||
*cmd_size += 4 * 4;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
/* COMPUTE_PGM_{LO,RSRC1,RSRC2} */
|
||||
*cmd_size += 7 * 4;
|
||||
|
||||
if (device->physical_device->rad_info.gfx_level >= GFX10) {
|
||||
/* COMPUTE_PGM_RSRC3 */
|
||||
*cmd_size += 3 * 4;
|
||||
}
|
||||
|
||||
/* COMPUTE_{RESOURCE_LIMITS,NUM_THREADS_X} */
|
||||
*cmd_size += 8 * 4;
|
||||
|
||||
/* Assume the compute shader needs grid size because we can't know the information for
|
||||
* indirect pipelines.
|
||||
*/
|
||||
if (device->load_grid_size_from_user_sgpr) {
|
||||
/* PKT3_SET_SH_REG for immediate values */
|
||||
*cmd_size += 5 * 4;
|
||||
|
|
@ -116,20 +140,32 @@ radv_get_sequence_size(const struct radv_indirect_command_layout *layout, struct
|
|||
if (layout->push_constant_mask) {
|
||||
bool need_copy = false;
|
||||
|
||||
for (unsigned i = 0; i < ARRAY_SIZE(pipeline->shaders); ++i) {
|
||||
if (!pipeline->shaders[i])
|
||||
continue;
|
||||
if (pipeline) {
|
||||
for (unsigned i = 0; i < ARRAY_SIZE(pipeline->shaders); ++i) {
|
||||
if (!pipeline->shaders[i])
|
||||
continue;
|
||||
|
||||
struct radv_userdata_locations *locs = &pipeline->shaders[i]->info.user_sgprs_locs;
|
||||
if (locs->shader_data[AC_UD_PUSH_CONSTANTS].sgpr_idx >= 0) {
|
||||
/* One PKT3_SET_SH_REG for emitting push constants pointer (32-bit) */
|
||||
*cmd_size += 3 * 4;
|
||||
need_copy = true;
|
||||
struct radv_userdata_locations *locs = &pipeline->shaders[i]->info.user_sgprs_locs;
|
||||
if (locs->shader_data[AC_UD_PUSH_CONSTANTS].sgpr_idx >= 0) {
|
||||
/* One PKT3_SET_SH_REG for emitting push constants pointer (32-bit) */
|
||||
*cmd_size += 3 * 4;
|
||||
need_copy = true;
|
||||
}
|
||||
if (locs->shader_data[AC_UD_INLINE_PUSH_CONSTANTS].sgpr_idx >= 0)
|
||||
/* One PKT3_SET_SH_REG writing all inline push constants. */
|
||||
*cmd_size += (3 * util_bitcount64(layout->push_constant_mask)) * 4;
|
||||
}
|
||||
if (locs->shader_data[AC_UD_INLINE_PUSH_CONSTANTS].sgpr_idx >= 0)
|
||||
/* One PKT3_SET_SH_REG writing all inline push constants. */
|
||||
*cmd_size += (3 * util_bitcount64(layout->push_constant_mask)) * 4;
|
||||
} else {
|
||||
/* Assume the compute shader needs both user SGPRs because we can't know the information
|
||||
* for indirect pipelines.
|
||||
*/
|
||||
assert(layout->pipeline_bind_point == VK_PIPELINE_BIND_POINT_COMPUTE);
|
||||
*cmd_size += 3 * 4;
|
||||
need_copy = true;
|
||||
|
||||
*cmd_size += (3 * util_bitcount64(layout->push_constant_mask)) * 4;
|
||||
}
|
||||
|
||||
if (need_copy) {
|
||||
*upload_size += align(layout->push_constant_size + 16 * layout->dynamic_offset_count, 16);
|
||||
}
|
||||
|
|
@ -145,7 +181,7 @@ radv_get_sequence_size(const struct radv_indirect_command_layout *layout, struct
|
|||
radv_get_sequence_size_graphics(layout, graphics_pipeline, cmd_size, upload_size);
|
||||
} else {
|
||||
assert(layout->pipeline_bind_point == VK_PIPELINE_BIND_POINT_COMPUTE);
|
||||
struct radv_compute_pipeline *compute_pipeline = radv_pipeline_to_compute(pipeline);
|
||||
struct radv_compute_pipeline *compute_pipeline = pipeline ? radv_pipeline_to_compute(pipeline) : NULL;
|
||||
radv_get_sequence_size_compute(layout, compute_pipeline, cmd_size);
|
||||
}
|
||||
}
|
||||
|
|
@ -236,6 +272,9 @@ struct radv_dgc_params {
|
|||
uint8_t predicating;
|
||||
uint8_t predication_type;
|
||||
uint64_t predication_va;
|
||||
|
||||
uint8_t bind_pipeline;
|
||||
uint16_t pipeline_params_offset;
|
||||
};
|
||||
|
||||
enum {
|
||||
|
|
@ -292,6 +331,15 @@ dgc_emit(nir_builder *b, struct dgc_cmdbuf *cs, nir_def *value)
|
|||
nir_pack_64_2x32((b), nir_load_push_constant((b), 2, 32, nir_imm_int((b), 0), \
|
||||
.base = offsetof(struct radv_dgc_params, field), .range = 8))
|
||||
|
||||
/* Pipeline metadata */
|
||||
#define load_metadata32(b, field) \
|
||||
nir_load_global( \
|
||||
b, nir_iadd(b, pipeline_va, nir_imm_int64(b, offsetof(struct radv_compute_pipeline_metadata, field))), 4, 1, 32)
|
||||
|
||||
#define load_metadata64(b, field) \
|
||||
nir_load_global( \
|
||||
b, nir_iadd(b, pipeline_va, nir_imm_int64(b, offsetof(struct radv_compute_pipeline_metadata, field))), 4, 1, 64)
|
||||
|
||||
static nir_def *
|
||||
nir_pkt3_base(nir_builder *b, unsigned op, nir_def *len, bool predicate)
|
||||
{
|
||||
|
|
@ -849,12 +897,127 @@ dgc_emit_index_buffer(nir_builder *b, struct dgc_cmdbuf *cs, nir_def *stream_buf
|
|||
/**
|
||||
* Emit VK_INDIRECT_COMMANDS_TOKEN_TYPE_PUSH_CONSTANT_NV.
|
||||
*/
|
||||
static nir_def *
|
||||
dgc_get_push_constant_shader_cnt(nir_builder *b, nir_def *stream_buf, nir_def *stream_base,
|
||||
nir_def *pipeline_params_offset)
|
||||
{
|
||||
nir_def *res1, *res2;
|
||||
|
||||
nir_push_if(b, nir_ieq_imm(b, load_param8(b, bind_pipeline), 1));
|
||||
{
|
||||
nir_def *stream_offset = nir_iadd(b, pipeline_params_offset, stream_base);
|
||||
nir_def *pipeline_va = nir_load_ssbo(b, 1, 64, stream_buf, stream_offset);
|
||||
|
||||
res1 = nir_b2i32(b, nir_ine_imm(b, load_metadata32(b, push_const_sgpr), 0));
|
||||
}
|
||||
nir_push_else(b, 0);
|
||||
{
|
||||
res2 = load_param16(b, push_constant_shader_cnt);
|
||||
}
|
||||
nir_pop_if(b, 0);
|
||||
|
||||
return nir_if_phi(b, res1, res2);
|
||||
}
|
||||
|
||||
static nir_def *
|
||||
dgc_get_upload_sgpr(nir_builder *b, nir_def *stream_buf, nir_def *stream_base, nir_def *param_buf,
|
||||
nir_def *param_offset, nir_def *cur_shader_idx, nir_def *pipeline_params_offset)
|
||||
{
|
||||
nir_def *res1, *res2;
|
||||
|
||||
nir_push_if(b, nir_ieq_imm(b, load_param8(b, bind_pipeline), 1));
|
||||
{
|
||||
nir_def *stream_offset = nir_iadd(b, pipeline_params_offset, stream_base);
|
||||
nir_def *pipeline_va = nir_load_ssbo(b, 1, 64, stream_buf, stream_offset);
|
||||
|
||||
res1 = load_metadata32(b, push_const_sgpr);
|
||||
}
|
||||
nir_push_else(b, 0);
|
||||
{
|
||||
res2 = nir_load_ssbo(b, 1, 32, param_buf, nir_iadd(b, param_offset, nir_imul_imm(b, cur_shader_idx, 12)));
|
||||
}
|
||||
nir_pop_if(b, 0);
|
||||
|
||||
nir_def *res = nir_if_phi(b, res1, res2);
|
||||
|
||||
return nir_ubfe_imm(b, res, 0, 16);
|
||||
}
|
||||
|
||||
static nir_def *
|
||||
dgc_get_inline_sgpr(nir_builder *b, nir_def *stream_buf, nir_def *stream_base, nir_def *param_buf,
|
||||
nir_def *param_offset, nir_def *cur_shader_idx, nir_def *pipeline_params_offset)
|
||||
{
|
||||
nir_def *res1, *res2;
|
||||
|
||||
nir_push_if(b, nir_ieq_imm(b, load_param8(b, bind_pipeline), 1));
|
||||
{
|
||||
nir_def *stream_offset = nir_iadd(b, pipeline_params_offset, stream_base);
|
||||
nir_def *pipeline_va = nir_load_ssbo(b, 1, 64, stream_buf, stream_offset);
|
||||
|
||||
res1 = load_metadata32(b, push_const_sgpr);
|
||||
}
|
||||
nir_push_else(b, 0);
|
||||
{
|
||||
res2 = nir_load_ssbo(b, 1, 32, param_buf, nir_iadd(b, param_offset, nir_imul_imm(b, cur_shader_idx, 12)));
|
||||
}
|
||||
nir_pop_if(b, 0);
|
||||
|
||||
nir_def *res = nir_if_phi(b, res1, res2);
|
||||
|
||||
return nir_ubfe_imm(b, res, 16, 16);
|
||||
}
|
||||
|
||||
static nir_def *
|
||||
dgc_get_inline_mask(nir_builder *b, nir_def *stream_buf, nir_def *stream_base, nir_def *param_buf,
|
||||
nir_def *param_offset, nir_def *cur_shader_idx, nir_def *pipeline_params_offset)
|
||||
{
|
||||
nir_def *res1, *res2;
|
||||
|
||||
nir_push_if(b, nir_ieq_imm(b, load_param8(b, bind_pipeline), 1));
|
||||
{
|
||||
nir_def *stream_offset = nir_iadd(b, pipeline_params_offset, stream_base);
|
||||
nir_def *pipeline_va = nir_load_ssbo(b, 1, 64, stream_buf, stream_offset);
|
||||
|
||||
res1 = load_metadata64(b, inline_push_const_mask);
|
||||
}
|
||||
nir_push_else(b, 0);
|
||||
{
|
||||
nir_def *reg_info = nir_load_ssbo(
|
||||
b, 2, 32, param_buf, nir_iadd(b, param_offset, nir_iadd_imm(b, nir_imul_imm(b, cur_shader_idx, 12), 4)));
|
||||
res2 = nir_pack_64_2x32(b, nir_channels(b, reg_info, 0x3));
|
||||
}
|
||||
nir_pop_if(b, 0);
|
||||
|
||||
return nir_if_phi(b, res1, res2);
|
||||
}
|
||||
|
||||
static nir_def *
|
||||
dgc_push_constant_needs_copy(nir_builder *b, nir_def *stream_buf, nir_def *stream_base, nir_def *pipeline_params_offset)
|
||||
{
|
||||
nir_def *res1, *res2;
|
||||
|
||||
nir_push_if(b, nir_ieq_imm(b, load_param8(b, bind_pipeline), 1));
|
||||
{
|
||||
nir_def *stream_offset = nir_iadd(b, pipeline_params_offset, stream_base);
|
||||
nir_def *pipeline_va = nir_load_ssbo(b, 1, 64, stream_buf, stream_offset);
|
||||
|
||||
res1 = nir_ine_imm(b, nir_ubfe_imm(b, load_metadata32(b, push_const_sgpr), 0, 16), 0);
|
||||
}
|
||||
nir_push_else(b, 0);
|
||||
{
|
||||
res2 = nir_ine_imm(b, load_param8(b, const_copy), 0);
|
||||
}
|
||||
nir_pop_if(b, 0);
|
||||
|
||||
return nir_if_phi(b, res1, res2);
|
||||
}
|
||||
|
||||
static void
|
||||
dgc_emit_push_constant(nir_builder *b, struct dgc_cmdbuf *cs, nir_def *stream_buf, nir_def *stream_base,
|
||||
nir_def *push_const_mask, nir_variable *upload_offset)
|
||||
nir_def *pipeline_params_offset, nir_def *push_const_mask, nir_variable *upload_offset)
|
||||
{
|
||||
nir_def *vbo_cnt = load_param8(b, vbo_cnt);
|
||||
nir_def *const_copy = nir_ine_imm(b, load_param8(b, const_copy), 0);
|
||||
nir_def *const_copy = dgc_push_constant_needs_copy(b, stream_buf, stream_base, pipeline_params_offset);
|
||||
nir_def *const_copy_size = load_param16(b, const_copy_size);
|
||||
nir_def *const_copy_words = nir_ushr_imm(b, const_copy_size, 2);
|
||||
const_copy_words = nir_bcsel(b, const_copy, const_copy_words, nir_imm_int(b, 0));
|
||||
|
|
@ -906,7 +1069,7 @@ dgc_emit_push_constant(nir_builder *b, struct dgc_cmdbuf *cs, nir_def *stream_bu
|
|||
|
||||
nir_variable *shader_idx = nir_variable_create(b->shader, nir_var_shader_temp, glsl_uint_type(), "shader_idx");
|
||||
nir_store_var(b, shader_idx, nir_imm_int(b, 0), 0x1);
|
||||
nir_def *shader_cnt = load_param16(b, push_constant_shader_cnt);
|
||||
nir_def *shader_cnt = dgc_get_push_constant_shader_cnt(b, stream_buf, stream_base, pipeline_params_offset);
|
||||
|
||||
nir_push_loop(b);
|
||||
{
|
||||
|
|
@ -917,11 +1080,12 @@ dgc_emit_push_constant(nir_builder *b, struct dgc_cmdbuf *cs, nir_def *stream_bu
|
|||
}
|
||||
nir_pop_if(b, NULL);
|
||||
|
||||
nir_def *reg_info =
|
||||
nir_load_ssbo(b, 3, 32, param_buf, nir_iadd(b, param_offset, nir_imul_imm(b, cur_shader_idx, 12)));
|
||||
nir_def *upload_sgpr = nir_ubfe_imm(b, nir_channel(b, reg_info, 0), 0, 16);
|
||||
nir_def *inline_sgpr = nir_ubfe_imm(b, nir_channel(b, reg_info, 0), 16, 16);
|
||||
nir_def *inline_mask = nir_pack_64_2x32(b, nir_channels(b, reg_info, 0x6));
|
||||
nir_def *upload_sgpr = dgc_get_upload_sgpr(b, stream_buf, stream_base, param_buf, param_offset, cur_shader_idx,
|
||||
pipeline_params_offset);
|
||||
nir_def *inline_sgpr = dgc_get_inline_sgpr(b, stream_buf, stream_base, param_buf, param_offset, cur_shader_idx,
|
||||
pipeline_params_offset);
|
||||
nir_def *inline_mask = dgc_get_inline_mask(b, stream_buf, stream_base, param_buf, param_offset, cur_shader_idx,
|
||||
pipeline_params_offset);
|
||||
|
||||
nir_push_if(b, nir_ine_imm(b, upload_sgpr, 0));
|
||||
{
|
||||
|
|
@ -974,6 +1138,26 @@ dgc_emit_push_constant(nir_builder *b, struct dgc_cmdbuf *cs, nir_def *stream_bu
|
|||
|
||||
dgc_emit(b, cs, nir_vec(b, pkt, 3));
|
||||
}
|
||||
nir_push_else(b, NULL);
|
||||
{
|
||||
nir_push_if(b, nir_ieq_imm(b, load_param8(b, bind_pipeline), 1));
|
||||
{
|
||||
/* For indirect pipeline binds, partial push constant updates can't be emitted
|
||||
* when the DGC execute is called because there is no bound pipeline and they have
|
||||
* to be emitted from the DGC prepare shader.
|
||||
*/
|
||||
nir_def *new_data =
|
||||
nir_load_ssbo(b, 1, 32, param_buf, nir_iadd(b, param_const_offset, nir_ishl_imm(b, cur_idx, 2)));
|
||||
nir_store_var(b, data, new_data, 0x1);
|
||||
|
||||
nir_def *pkt[3] = {nir_pkt3(b, PKT3_SET_SH_REG, nir_imm_int(b, 1)),
|
||||
nir_iadd(b, inline_sgpr, nir_load_var(b, pc_idx)), nir_load_var(b, data)};
|
||||
|
||||
dgc_emit(b, cs, nir_vec(b, pkt, 3));
|
||||
}
|
||||
nir_pop_if(b, NULL);
|
||||
}
|
||||
|
||||
nir_pop_if(b, NULL);
|
||||
|
||||
nir_store_var(b, idx, nir_iadd_imm(b, cur_idx, 1), 0x1);
|
||||
|
|
@ -1129,9 +1313,53 @@ dgc_emit_vertex_buffer(nir_builder *b, struct dgc_cmdbuf *cs, nir_def *stream_bu
|
|||
/**
|
||||
* For emitting VK_INDIRECT_COMMANDS_TOKEN_TYPE_DISPATCH_NV.
|
||||
*/
|
||||
static nir_def *
|
||||
dgc_get_grid_sgpr(nir_builder *b, nir_def *stream_buf, nir_def *stream_base, nir_def *pipeline_params_offset)
|
||||
{
|
||||
nir_def *res1, *res2;
|
||||
|
||||
nir_push_if(b, nir_ieq_imm(b, load_param8(b, bind_pipeline), 1));
|
||||
{
|
||||
nir_def *stream_offset = nir_iadd(b, pipeline_params_offset, stream_base);
|
||||
nir_def *pipeline_va = nir_load_ssbo(b, 1, 64, stream_buf, stream_offset);
|
||||
res1 = load_metadata32(b, grid_base_sgpr);
|
||||
}
|
||||
nir_push_else(b, 0);
|
||||
{
|
||||
res2 = load_param16(b, grid_base_sgpr);
|
||||
}
|
||||
nir_pop_if(b, 0);
|
||||
|
||||
return nir_if_phi(b, res1, res2);
|
||||
}
|
||||
|
||||
static nir_def *
|
||||
dgc_get_dispatch_initiator(nir_builder *b, nir_def *stream_buf, nir_def *stream_base, nir_def *pipeline_params_offset)
|
||||
{
|
||||
nir_def *res1, *res2;
|
||||
|
||||
nir_push_if(b, nir_ieq_imm(b, load_param8(b, bind_pipeline), 1));
|
||||
{
|
||||
nir_def *stream_offset = nir_iadd(b, pipeline_params_offset, stream_base);
|
||||
nir_def *pipeline_va = nir_load_ssbo(b, 1, 64, stream_buf, stream_offset);
|
||||
|
||||
nir_def *dispatch_initiator = load_param32(b, dispatch_initiator);
|
||||
nir_def *wave32 = nir_ieq_imm(b, load_metadata32(b, wave32), 1);
|
||||
res1 = nir_bcsel(b, wave32, nir_ior_imm(b, dispatch_initiator, S_00B800_CS_W32_EN(1)), dispatch_initiator);
|
||||
}
|
||||
nir_push_else(b, 0);
|
||||
{
|
||||
res2 = load_param32(b, dispatch_initiator);
|
||||
}
|
||||
nir_pop_if(b, 0);
|
||||
|
||||
return nir_if_phi(b, res1, res2);
|
||||
}
|
||||
|
||||
static void
|
||||
dgc_emit_dispatch(nir_builder *b, struct dgc_cmdbuf *cs, nir_def *stream_buf, nir_def *stream_base,
|
||||
nir_def *dispatch_params_offset, nir_def *sequence_id, const struct radv_device *device)
|
||||
nir_def *dispatch_params_offset, nir_def *pipeline_params_offset, nir_def *sequence_id,
|
||||
const struct radv_device *device)
|
||||
{
|
||||
nir_def *stream_offset = nir_iadd(b, dispatch_params_offset, stream_base);
|
||||
|
||||
|
|
@ -1140,7 +1368,7 @@ dgc_emit_dispatch(nir_builder *b, struct dgc_cmdbuf *cs, nir_def *stream_buf, ni
|
|||
nir_def *wg_y = nir_channel(b, dispatch_data, 1);
|
||||
nir_def *wg_z = nir_channel(b, dispatch_data, 2);
|
||||
|
||||
nir_def *grid_sgpr = load_param16(b, grid_base_sgpr);
|
||||
nir_def *grid_sgpr = dgc_get_grid_sgpr(b, stream_buf, stream_base, pipeline_params_offset);
|
||||
nir_push_if(b, nir_ine_imm(b, grid_sgpr, 0));
|
||||
{
|
||||
if (device->load_grid_size_from_user_sgpr) {
|
||||
|
|
@ -1156,7 +1384,8 @@ dgc_emit_dispatch(nir_builder *b, struct dgc_cmdbuf *cs, nir_def *stream_buf, ni
|
|||
dgc_emit_sqtt_begin_api_marker(b, cs, ApiCmdDispatch);
|
||||
dgc_emit_sqtt_marker_event_with_dims(b, cs, sequence_id, wg_x, wg_y, wg_z, EventCmdDispatch);
|
||||
|
||||
dgc_emit_dispatch_direct(b, cs, wg_x, wg_y, wg_z, load_param32(b, dispatch_initiator));
|
||||
nir_def *dispatch_initiator = dgc_get_dispatch_initiator(b, stream_buf, stream_base, pipeline_params_offset);
|
||||
dgc_emit_dispatch_direct(b, cs, wg_x, wg_y, wg_z, dispatch_initiator);
|
||||
|
||||
dgc_emit_sqtt_thread_trace_marker(b, cs);
|
||||
dgc_emit_sqtt_end_api_marker(b, cs, ApiCmdDispatch);
|
||||
|
|
@ -1200,6 +1429,48 @@ dgc_emit_draw_mesh_tasks(nir_builder *b, struct dgc_cmdbuf *cs, nir_def *stream_
|
|||
nir_pop_if(b, NULL);
|
||||
}
|
||||
|
||||
/**
|
||||
* Emit VK_INDIRECT_COMMANDS_TOKEN_TYPE_PIPELINE_NV.
|
||||
*/
|
||||
static void
|
||||
dgc_emit_set_sh_reg_seq(nir_builder *b, struct dgc_cmdbuf *cs, unsigned reg, unsigned num)
|
||||
{
|
||||
nir_def *values[2] = {
|
||||
nir_imm_int(b, PKT3(PKT3_SET_SH_REG, num, false)),
|
||||
nir_imm_int(b, (reg - SI_SH_REG_OFFSET) >> 2),
|
||||
};
|
||||
dgc_emit(b, cs, nir_vec(b, values, 2));
|
||||
}
|
||||
|
||||
static void
|
||||
dgc_emit_bind_pipeline(nir_builder *b, struct dgc_cmdbuf *cs, nir_def *stream_buf, nir_def *stream_base,
|
||||
nir_def *pipeline_params_offset, const struct radv_device *device)
|
||||
{
|
||||
nir_def *stream_offset = nir_iadd(b, pipeline_params_offset, stream_base);
|
||||
|
||||
nir_def *pipeline_va = nir_load_ssbo(b, 1, 64, stream_buf, stream_offset);
|
||||
|
||||
dgc_emit_set_sh_reg_seq(b, cs, R_00B830_COMPUTE_PGM_LO, 1);
|
||||
dgc_emit(b, cs, load_metadata32(b, shader_va));
|
||||
|
||||
dgc_emit_set_sh_reg_seq(b, cs, R_00B848_COMPUTE_PGM_RSRC1, 2);
|
||||
dgc_emit(b, cs, load_metadata32(b, rsrc1));
|
||||
dgc_emit(b, cs, load_metadata32(b, rsrc2));
|
||||
|
||||
if (device->physical_device->rad_info.gfx_level >= GFX10) {
|
||||
dgc_emit_set_sh_reg_seq(b, cs, R_00B8A0_COMPUTE_PGM_RSRC3, 1);
|
||||
dgc_emit(b, cs, load_metadata32(b, rsrc3));
|
||||
}
|
||||
|
||||
dgc_emit_set_sh_reg_seq(b, cs, R_00B854_COMPUTE_RESOURCE_LIMITS, 1);
|
||||
dgc_emit(b, cs, load_metadata32(b, compute_resource_limits));
|
||||
|
||||
dgc_emit_set_sh_reg_seq(b, cs, R_00B81C_COMPUTE_NUM_THREAD_X, 3);
|
||||
dgc_emit(b, cs, load_metadata32(b, block_size_x));
|
||||
dgc_emit(b, cs, load_metadata32(b, block_size_y));
|
||||
dgc_emit(b, cs, load_metadata32(b, block_size_z));
|
||||
}
|
||||
|
||||
static nir_def *
|
||||
dgc_is_cond_render_enabled(nir_builder *b)
|
||||
{
|
||||
|
|
@ -1300,7 +1571,14 @@ build_dgc_prepare_shader(struct radv_device *dev)
|
|||
nir_def *push_const_mask = load_param64(&b, push_constant_mask);
|
||||
nir_push_if(&b, nir_ine_imm(&b, push_const_mask, 0));
|
||||
{
|
||||
dgc_emit_push_constant(&b, &cmd_buf, stream_buf, stream_base, push_const_mask, upload_offset);
|
||||
dgc_emit_push_constant(&b, &cmd_buf, stream_buf, stream_base, load_param16(&b, pipeline_params_offset),
|
||||
push_const_mask, upload_offset);
|
||||
}
|
||||
nir_pop_if(&b, 0);
|
||||
|
||||
nir_push_if(&b, nir_ieq_imm(&b, load_param8(&b, bind_pipeline), 1));
|
||||
{
|
||||
dgc_emit_bind_pipeline(&b, &cmd_buf, stream_buf, stream_base, load_param16(&b, pipeline_params_offset), dev);
|
||||
}
|
||||
nir_pop_if(&b, 0);
|
||||
|
||||
|
|
@ -1353,8 +1631,8 @@ build_dgc_prepare_shader(struct radv_device *dev)
|
|||
}
|
||||
nir_push_else(&b, NULL);
|
||||
{
|
||||
dgc_emit_dispatch(&b, &cmd_buf, stream_buf, stream_base, load_param16(&b, dispatch_params_offset), sequence_id,
|
||||
dev);
|
||||
dgc_emit_dispatch(&b, &cmd_buf, stream_buf, stream_base, load_param16(&b, dispatch_params_offset),
|
||||
load_param16(&b, pipeline_params_offset), sequence_id, dev);
|
||||
}
|
||||
nir_pop_if(&b, NULL);
|
||||
|
||||
|
|
@ -1551,6 +1829,10 @@ radv_CreateIndirectCommandsLayoutNV(VkDevice _device, const VkIndirectCommandsLa
|
|||
layout->draw_mesh_tasks = true;
|
||||
layout->draw_params_offset = pCreateInfo->pTokens[i].offset;
|
||||
break;
|
||||
case VK_INDIRECT_COMMANDS_TOKEN_TYPE_PIPELINE_NV:
|
||||
layout->bind_pipeline = true;
|
||||
layout->pipeline_params_offset = pCreateInfo->pTokens[i].offset;
|
||||
break;
|
||||
default:
|
||||
unreachable("Unhandled token type");
|
||||
}
|
||||
|
|
@ -1761,8 +2043,6 @@ radv_prepare_dgc_compute(struct radv_cmd_buffer *cmd_buffer, const VkGeneratedCo
|
|||
{
|
||||
VK_FROM_HANDLE(radv_indirect_command_layout, layout, pGeneratedCommandsInfo->indirectCommandsLayout);
|
||||
VK_FROM_HANDLE(radv_pipeline, pipeline, pGeneratedCommandsInfo->pipeline);
|
||||
struct radv_compute_pipeline *compute_pipeline = radv_pipeline_to_compute(pipeline);
|
||||
struct radv_shader *cs = radv_get_shader(compute_pipeline->base.shaders, MESA_SHADER_COMPUTE);
|
||||
|
||||
*upload_size = MAX2(*upload_size, 16);
|
||||
|
||||
|
|
@ -1771,15 +2051,8 @@ radv_prepare_dgc_compute(struct radv_cmd_buffer *cmd_buffer, const VkGeneratedCo
|
|||
return;
|
||||
}
|
||||
|
||||
uint32_t dispatch_initiator = cmd_buffer->device->dispatch_initiator;
|
||||
dispatch_initiator |= S_00B800_FORCE_START_AT_000(1);
|
||||
if (cs->info.wave_size == 32) {
|
||||
assert(cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX10);
|
||||
dispatch_initiator |= S_00B800_CS_W32_EN(1);
|
||||
}
|
||||
|
||||
params->dispatch_params_offset = layout->dispatch_params_offset;
|
||||
params->dispatch_initiator = dispatch_initiator;
|
||||
params->dispatch_initiator = cmd_buffer->device->dispatch_initiator | S_00B800_FORCE_START_AT_000(1);
|
||||
params->is_dispatch = 1;
|
||||
|
||||
if (cond_render_enabled) {
|
||||
|
|
@ -1788,9 +2061,22 @@ radv_prepare_dgc_compute(struct radv_cmd_buffer *cmd_buffer, const VkGeneratedCo
|
|||
params->predication_type = cmd_buffer->state.predication_type;
|
||||
}
|
||||
|
||||
const struct radv_userdata_info *loc = radv_get_user_sgpr(cs, AC_UD_CS_GRID_SIZE);
|
||||
if (loc->sgpr_idx != -1) {
|
||||
params->grid_base_sgpr = (cs->info.user_data_0 + 4 * loc->sgpr_idx - SI_SH_REG_OFFSET) >> 2;
|
||||
if (pipeline) {
|
||||
struct radv_compute_pipeline *compute_pipeline = radv_pipeline_to_compute(pipeline);
|
||||
struct radv_shader *cs = radv_get_shader(compute_pipeline->base.shaders, MESA_SHADER_COMPUTE);
|
||||
|
||||
if (cs->info.wave_size == 32) {
|
||||
assert(cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX10);
|
||||
params->dispatch_initiator |= S_00B800_CS_W32_EN(1);
|
||||
}
|
||||
|
||||
const struct radv_userdata_info *loc = radv_get_user_sgpr(cs, AC_UD_CS_GRID_SIZE);
|
||||
if (loc->sgpr_idx != -1) {
|
||||
params->grid_base_sgpr = (cs->info.user_data_0 + 4 * loc->sgpr_idx - SI_SH_REG_OFFSET) >> 2;
|
||||
}
|
||||
} else {
|
||||
params->bind_pipeline = 1;
|
||||
params->pipeline_params_offset = layout->pipeline_params_offset;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -1849,35 +2135,38 @@ radv_prepare_dgc(struct radv_cmd_buffer *cmd_buffer, const VkGeneratedCommandsIn
|
|||
upload_data = (char *)upload_data + ARRAY_SIZE(pipeline->shaders) * 12;
|
||||
|
||||
unsigned idx = 0;
|
||||
for (unsigned i = 0; i < ARRAY_SIZE(pipeline->shaders); ++i) {
|
||||
if (!pipeline->shaders[i])
|
||||
continue;
|
||||
|
||||
const struct radv_shader *shader = pipeline->shaders[i];
|
||||
const struct radv_userdata_locations *locs = &shader->info.user_sgprs_locs;
|
||||
if (locs->shader_data[AC_UD_PUSH_CONSTANTS].sgpr_idx >= 0)
|
||||
params.const_copy = 1;
|
||||
if (pipeline) {
|
||||
for (unsigned i = 0; i < ARRAY_SIZE(pipeline->shaders); ++i) {
|
||||
if (!pipeline->shaders[i])
|
||||
continue;
|
||||
|
||||
if (locs->shader_data[AC_UD_PUSH_CONSTANTS].sgpr_idx >= 0 ||
|
||||
locs->shader_data[AC_UD_INLINE_PUSH_CONSTANTS].sgpr_idx >= 0) {
|
||||
unsigned upload_sgpr = 0;
|
||||
unsigned inline_sgpr = 0;
|
||||
const struct radv_shader *shader = pipeline->shaders[i];
|
||||
const struct radv_userdata_locations *locs = &shader->info.user_sgprs_locs;
|
||||
if (locs->shader_data[AC_UD_PUSH_CONSTANTS].sgpr_idx >= 0)
|
||||
params.const_copy = 1;
|
||||
|
||||
if (locs->shader_data[AC_UD_PUSH_CONSTANTS].sgpr_idx >= 0) {
|
||||
upload_sgpr = (shader->info.user_data_0 + 4 * locs->shader_data[AC_UD_PUSH_CONSTANTS].sgpr_idx -
|
||||
SI_SH_REG_OFFSET) >>
|
||||
2;
|
||||
if (locs->shader_data[AC_UD_PUSH_CONSTANTS].sgpr_idx >= 0 ||
|
||||
locs->shader_data[AC_UD_INLINE_PUSH_CONSTANTS].sgpr_idx >= 0) {
|
||||
unsigned upload_sgpr = 0;
|
||||
unsigned inline_sgpr = 0;
|
||||
|
||||
if (locs->shader_data[AC_UD_PUSH_CONSTANTS].sgpr_idx >= 0) {
|
||||
upload_sgpr = (shader->info.user_data_0 + 4 * locs->shader_data[AC_UD_PUSH_CONSTANTS].sgpr_idx -
|
||||
SI_SH_REG_OFFSET) >>
|
||||
2;
|
||||
}
|
||||
|
||||
if (locs->shader_data[AC_UD_INLINE_PUSH_CONSTANTS].sgpr_idx >= 0) {
|
||||
inline_sgpr = (shader->info.user_data_0 +
|
||||
4 * locs->shader_data[AC_UD_INLINE_PUSH_CONSTANTS].sgpr_idx - SI_SH_REG_OFFSET) >>
|
||||
2;
|
||||
desc[idx * 3 + 1] = pipeline->shaders[i]->info.inline_push_constant_mask;
|
||||
desc[idx * 3 + 2] = pipeline->shaders[i]->info.inline_push_constant_mask >> 32;
|
||||
}
|
||||
desc[idx * 3] = upload_sgpr | (inline_sgpr << 16);
|
||||
++idx;
|
||||
}
|
||||
|
||||
if (locs->shader_data[AC_UD_INLINE_PUSH_CONSTANTS].sgpr_idx >= 0) {
|
||||
inline_sgpr = (shader->info.user_data_0 + 4 * locs->shader_data[AC_UD_INLINE_PUSH_CONSTANTS].sgpr_idx -
|
||||
SI_SH_REG_OFFSET) >>
|
||||
2;
|
||||
desc[idx * 3 + 1] = pipeline->shaders[i]->info.inline_push_constant_mask;
|
||||
desc[idx * 3 + 2] = pipeline->shaders[i]->info.inline_push_constant_mask >> 32;
|
||||
}
|
||||
desc[idx * 3] = upload_sgpr | (inline_sgpr << 16);
|
||||
++idx;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -3280,6 +3280,9 @@ struct radv_indirect_command_layout {
|
|||
|
||||
uint16_t dispatch_params_offset;
|
||||
|
||||
bool bind_pipeline;
|
||||
uint16_t pipeline_params_offset;
|
||||
|
||||
uint32_t bind_vbo_mask;
|
||||
uint32_t vbo_offsets[MAX_VBS];
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue