radv: optimise compute shader grid size emission.

Reviewed-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
Signed-off-by: Dave Airlie <airlied@redhat.com>
This commit is contained in:
Dave Airlie 2017-04-18 05:23:47 +10:00
parent 31174069d2
commit ec15e0d301
4 changed files with 29 additions and 13 deletions

View file

@ -604,7 +604,8 @@ static void create_function(struct nir_to_llvm_context *ctx)
switch (ctx->stage) { switch (ctx->stage) {
case MESA_SHADER_COMPUTE: case MESA_SHADER_COMPUTE:
arg_types[arg_idx++] = LLVMVectorType(ctx->i32, 3); /* grid size */ if (ctx->shader_info->info.cs.grid_components_used)
arg_types[arg_idx++] = LLVMVectorType(ctx->i32, ctx->shader_info->info.cs.grid_components_used); /* grid size */
user_sgpr_count = arg_idx; user_sgpr_count = arg_idx;
arg_types[arg_idx++] = LLVMVectorType(ctx->i32, 3); arg_types[arg_idx++] = LLVMVectorType(ctx->i32, 3);
arg_types[arg_idx++] = ctx->i32; arg_types[arg_idx++] = ctx->i32;
@ -762,10 +763,12 @@ static void create_function(struct nir_to_llvm_context *ctx)
switch (ctx->stage) { switch (ctx->stage) {
case MESA_SHADER_COMPUTE: case MESA_SHADER_COMPUTE:
set_userdata_location_shader(ctx, AC_UD_CS_GRID_SIZE, user_sgpr_idx, 3); if (ctx->shader_info->info.cs.grid_components_used) {
user_sgpr_idx += 3; set_userdata_location_shader(ctx, AC_UD_CS_GRID_SIZE, user_sgpr_idx, ctx->shader_info->info.cs.grid_components_used);
ctx->num_work_groups = user_sgpr_idx += ctx->shader_info->info.cs.grid_components_used;
LLVMGetParam(ctx->main_function, arg_idx++); ctx->num_work_groups =
LLVMGetParam(ctx->main_function, arg_idx++);
}
ctx->workgroup_ids = ctx->workgroup_ids =
LLVMGetParam(ctx->main_function, arg_idx++); LLVMGetParam(ctx->main_function, arg_idx++);
ctx->tg_size = ctx->tg_size =

View file

@ -33,6 +33,9 @@ gather_intrinsic_info(nir_intrinsic_instr *instr, struct ac_shader_info *info)
case nir_intrinsic_load_draw_id: case nir_intrinsic_load_draw_id:
info->vs.needs_draw_id = true; info->vs.needs_draw_id = true;
break; break;
case nir_intrinsic_load_num_work_groups:
info->cs.grid_components_used = instr->num_components;
break;
default: default:
break; break;
} }

View file

@ -35,6 +35,9 @@ struct ac_shader_info {
struct { struct {
bool needs_sample_positions; bool needs_sample_positions;
} ps; } ps;
struct {
uint8_t grid_components_used;
} cs;
}; };
void void

View file

@ -2781,11 +2781,14 @@ void radv_CmdDispatch(
MESA_SHADER_COMPUTE, AC_UD_CS_GRID_SIZE); MESA_SHADER_COMPUTE, AC_UD_CS_GRID_SIZE);
if (loc->sgpr_idx != -1) { if (loc->sgpr_idx != -1) {
assert(!loc->indirect); assert(!loc->indirect);
assert(loc->num_sgprs == 3); uint8_t grid_used = cmd_buffer->state.pipeline->shaders[MESA_SHADER_COMPUTE]->info.info.cs.grid_components_used;
radeon_set_sh_reg_seq(cmd_buffer->cs, R_00B900_COMPUTE_USER_DATA_0 + loc->sgpr_idx * 4, 3); assert(loc->num_sgprs == grid_used);
radeon_set_sh_reg_seq(cmd_buffer->cs, R_00B900_COMPUTE_USER_DATA_0 + loc->sgpr_idx * 4, grid_used);
radeon_emit(cmd_buffer->cs, x); radeon_emit(cmd_buffer->cs, x);
radeon_emit(cmd_buffer->cs, y); if (grid_used > 1)
radeon_emit(cmd_buffer->cs, z); radeon_emit(cmd_buffer->cs, y);
if (grid_used > 2)
radeon_emit(cmd_buffer->cs, z);
} }
radeon_emit(cmd_buffer->cs, PKT3(PKT3_DISPATCH_DIRECT, 3, 0) | radeon_emit(cmd_buffer->cs, PKT3(PKT3_DISPATCH_DIRECT, 3, 0) |
@ -2817,7 +2820,8 @@ void radv_CmdDispatchIndirect(
struct ac_userdata_info *loc = radv_lookup_user_sgpr(cmd_buffer->state.compute_pipeline, struct ac_userdata_info *loc = radv_lookup_user_sgpr(cmd_buffer->state.compute_pipeline,
MESA_SHADER_COMPUTE, AC_UD_CS_GRID_SIZE); MESA_SHADER_COMPUTE, AC_UD_CS_GRID_SIZE);
if (loc->sgpr_idx != -1) { if (loc->sgpr_idx != -1) {
for (unsigned i = 0; i < 3; ++i) { uint8_t grid_used = cmd_buffer->state.pipeline->shaders[MESA_SHADER_COMPUTE]->info.info.cs.grid_components_used;
for (unsigned i = 0; i < grid_used; ++i) {
radeon_emit(cmd_buffer->cs, PKT3(PKT3_COPY_DATA, 4, 0)); radeon_emit(cmd_buffer->cs, PKT3(PKT3_COPY_DATA, 4, 0));
radeon_emit(cmd_buffer->cs, COPY_DATA_SRC_SEL(COPY_DATA_MEM) | radeon_emit(cmd_buffer->cs, COPY_DATA_SRC_SEL(COPY_DATA_MEM) |
COPY_DATA_DST_SEL(COPY_DATA_REG)); COPY_DATA_DST_SEL(COPY_DATA_REG));
@ -2888,10 +2892,13 @@ void radv_unaligned_dispatch(
struct ac_userdata_info *loc = radv_lookup_user_sgpr(cmd_buffer->state.compute_pipeline, struct ac_userdata_info *loc = radv_lookup_user_sgpr(cmd_buffer->state.compute_pipeline,
MESA_SHADER_COMPUTE, AC_UD_CS_GRID_SIZE); MESA_SHADER_COMPUTE, AC_UD_CS_GRID_SIZE);
if (loc->sgpr_idx != -1) { if (loc->sgpr_idx != -1) {
radeon_set_sh_reg_seq(cmd_buffer->cs, R_00B900_COMPUTE_USER_DATA_0 + loc->sgpr_idx * 4, 3); uint8_t grid_used = cmd_buffer->state.pipeline->shaders[MESA_SHADER_COMPUTE]->info.info.cs.grid_components_used;
radeon_set_sh_reg_seq(cmd_buffer->cs, R_00B900_COMPUTE_USER_DATA_0 + loc->sgpr_idx * 4, grid_used);
radeon_emit(cmd_buffer->cs, blocks[0]); radeon_emit(cmd_buffer->cs, blocks[0]);
radeon_emit(cmd_buffer->cs, blocks[1]); if (grid_used > 1)
radeon_emit(cmd_buffer->cs, blocks[2]); radeon_emit(cmd_buffer->cs, blocks[1]);
if (grid_used > 2)
radeon_emit(cmd_buffer->cs, blocks[2]);
} }
radeon_emit(cmd_buffer->cs, PKT3(PKT3_DISPATCH_DIRECT, 3, 0) | radeon_emit(cmd_buffer->cs, PKT3(PKT3_DISPATCH_DIRECT, 3, 0) |
PKT3_SHADER_TYPE_S(1)); PKT3_SHADER_TYPE_S(1));