tu, ir3: Support runtime gl_SubgroupSize in FS

We already supported it in the CS for computing the subgroup ID, but
soon we'll need it in the FS too. Vertex stages will always have it
lowered.

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/13960>
This commit is contained in:
Connor Abbott 2021-11-25 16:55:01 +01:00 committed by Marge Bot
parent e6e34883a9
commit 1a1e25dcce
5 changed files with 46 additions and 9 deletions

View file

@ -2103,9 +2103,14 @@ emit_intrinsic(struct ir3_context *ctx, nir_intrinsic_instr *intr)
dst[i] = create_driver_param(ctx, IR3_DP_LOCAL_GROUP_SIZE_X + i);
}
break;
case nir_intrinsic_load_subgroup_size:
dst[0] = create_driver_param(ctx, IR3_DP_SUBGROUP_SIZE);
case nir_intrinsic_load_subgroup_size: {
assert(ctx->so->type == MESA_SHADER_COMPUTE ||
ctx->so->type == MESA_SHADER_FRAGMENT);
enum ir3_driver_param size = ctx->so->type == MESA_SHADER_COMPUTE ?
IR3_DP_CS_SUBGROUP_SIZE : IR3_DP_FS_SUBGROUP_SIZE;
dst[0] = create_driver_param(ctx, size);
break;
}
case nir_intrinsic_load_subgroup_id_shift_ir3:
dst[0] = create_driver_param(ctx, IR3_DP_SUBGROUP_ID_SHIFT);
break;

View file

@ -870,10 +870,15 @@ ir3_nir_scan_driver_consts(struct ir3_compiler *compiler, nir_shader *shader, st
layout->num_driver_params =
MAX2(layout->num_driver_params, IR3_DP_BASE_GROUP_Z + 1);
break;
case nir_intrinsic_load_subgroup_size:
case nir_intrinsic_load_subgroup_size: {
assert(shader->info.stage == MESA_SHADER_COMPUTE ||
shader->info.stage == MESA_SHADER_FRAGMENT);
enum ir3_driver_param size = shader->info.stage == MESA_SHADER_COMPUTE ?
IR3_DP_CS_SUBGROUP_SIZE : IR3_DP_FS_SUBGROUP_SIZE;
layout->num_driver_params =
MAX2(layout->num_driver_params, IR3_DP_SUBGROUP_SIZE + 1);
MAX2(layout->num_driver_params, size + 1);
break;
}
case nir_intrinsic_load_subgroup_id_shift_ir3:
layout->num_driver_params =
MAX2(layout->num_driver_params, IR3_DP_SUBGROUP_ID_SHIFT + 1);

View file

@ -49,7 +49,7 @@ enum ir3_driver_param {
IR3_DP_BASE_GROUP_X = 4,
IR3_DP_BASE_GROUP_Y = 5,
IR3_DP_BASE_GROUP_Z = 6,
IR3_DP_SUBGROUP_SIZE = 7,
IR3_DP_CS_SUBGROUP_SIZE = 7,
IR3_DP_LOCAL_GROUP_SIZE_X = 8,
IR3_DP_LOCAL_GROUP_SIZE_Y = 9,
IR3_DP_LOCAL_GROUP_SIZE_Z = 10,
@ -70,7 +70,10 @@ enum ir3_driver_param {
IR3_DP_UCP0_X = 4,
/* .... */
IR3_DP_UCP7_W = 35,
IR3_DP_VS_COUNT = 36 /* must be aligned to vec4 */
IR3_DP_VS_COUNT = 36, /* must be aligned to vec4 */
/* fragment shader driver params: */
IR3_DP_FS_SUBGROUP_SIZE = 0,
};
#define IR3_MAX_SHADER_BUFFERS 32

View file

@ -4347,7 +4347,7 @@ tu_emit_compute_driver_params(struct tu_cmd_buffer *cmd,
[IR3_DP_BASE_GROUP_X] = info->offsets[0],
[IR3_DP_BASE_GROUP_Y] = info->offsets[1],
[IR3_DP_BASE_GROUP_Z] = info->offsets[2],
[IR3_DP_SUBGROUP_SIZE] = subgroup_size,
[IR3_DP_CS_SUBGROUP_SIZE] = subgroup_size,
[IR3_DP_SUBGROUP_ID_SHIFT] = subgroup_shift,
};
@ -4399,8 +4399,8 @@ tu_emit_compute_driver_params(struct tu_cmd_buffer *cmd,
tu_cs_emit_qw(cs, global_iova(cmd, cs_indirect_xyz[0]));
}
/* Fill out IR3_DP_SUBGROUP_SIZE and IR3_DP_SUBGROUP_ID_SHIFT for indirect
* dispatch.
/* Fill out IR3_DP_CS_SUBGROUP_SIZE and IR3_DP_SUBGROUP_ID_SHIFT for
* indirect dispatch.
*/
if (info->indirect && num_consts > IR3_DP_BASE_GROUP_X) {
tu_cs_emit_pkt7(cs, tu6_stage2opcode(type), 7);

View file

@ -617,6 +617,30 @@ tu6_emit_xs(struct tu_cs *cs,
tu_cs_emit_qw(cs, iova + start);
}
}
/* emit FS driver param */
if (stage == MESA_SHADER_FRAGMENT && const_state->num_driver_params > 0) {
uint32_t base = const_state->offsets.driver_param;
int32_t size = DIV_ROUND_UP(const_state->num_driver_params, 4);
size = MAX2(MIN2(size + base, xs->constlen) - base, 0);
if (size > 0) {
tu_cs_emit_pkt7(cs, tu6_stage2opcode(stage), 3 + size * 4);
tu_cs_emit(cs, CP_LOAD_STATE6_0_DST_OFF(base) |
CP_LOAD_STATE6_0_STATE_TYPE(ST6_CONSTANTS) |
CP_LOAD_STATE6_0_STATE_SRC(SS6_DIRECT) |
CP_LOAD_STATE6_0_STATE_BLOCK(tu6_stage2shadersb(stage)) |
CP_LOAD_STATE6_0_NUM_UNIT(size));
tu_cs_emit(cs, CP_LOAD_STATE6_1_EXT_SRC_ADDR(0));
tu_cs_emit(cs, CP_LOAD_STATE6_2_EXT_SRC_ADDR_HI(0));
assert(size == 1);
tu_cs_emit(cs, xs->info.double_threadsize ? 128 : 64);
tu_cs_emit(cs, 0);
tu_cs_emit(cs, 0);
tu_cs_emit(cs, 0);
}
}
}
static void