mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-05 16:08:04 +02:00
tu, ir3: Support runtime gl_SubgroupSize in FS
We already supported it in the CS for computing the subgroup ID, but soon we'll need it in the FS too. Vertex stages will always have it lowered. Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/13960>
This commit is contained in:
parent
e6e34883a9
commit
1a1e25dcce
5 changed files with 46 additions and 9 deletions
|
|
@ -2103,9 +2103,14 @@ emit_intrinsic(struct ir3_context *ctx, nir_intrinsic_instr *intr)
|
|||
dst[i] = create_driver_param(ctx, IR3_DP_LOCAL_GROUP_SIZE_X + i);
|
||||
}
|
||||
break;
|
||||
case nir_intrinsic_load_subgroup_size:
|
||||
dst[0] = create_driver_param(ctx, IR3_DP_SUBGROUP_SIZE);
|
||||
case nir_intrinsic_load_subgroup_size: {
|
||||
assert(ctx->so->type == MESA_SHADER_COMPUTE ||
|
||||
ctx->so->type == MESA_SHADER_FRAGMENT);
|
||||
enum ir3_driver_param size = ctx->so->type == MESA_SHADER_COMPUTE ?
|
||||
IR3_DP_CS_SUBGROUP_SIZE : IR3_DP_FS_SUBGROUP_SIZE;
|
||||
dst[0] = create_driver_param(ctx, size);
|
||||
break;
|
||||
}
|
||||
case nir_intrinsic_load_subgroup_id_shift_ir3:
|
||||
dst[0] = create_driver_param(ctx, IR3_DP_SUBGROUP_ID_SHIFT);
|
||||
break;
|
||||
|
|
|
|||
|
|
@ -870,10 +870,15 @@ ir3_nir_scan_driver_consts(struct ir3_compiler *compiler, nir_shader *shader, st
|
|||
layout->num_driver_params =
|
||||
MAX2(layout->num_driver_params, IR3_DP_BASE_GROUP_Z + 1);
|
||||
break;
|
||||
case nir_intrinsic_load_subgroup_size:
|
||||
case nir_intrinsic_load_subgroup_size: {
|
||||
assert(shader->info.stage == MESA_SHADER_COMPUTE ||
|
||||
shader->info.stage == MESA_SHADER_FRAGMENT);
|
||||
enum ir3_driver_param size = shader->info.stage == MESA_SHADER_COMPUTE ?
|
||||
IR3_DP_CS_SUBGROUP_SIZE : IR3_DP_FS_SUBGROUP_SIZE;
|
||||
layout->num_driver_params =
|
||||
MAX2(layout->num_driver_params, IR3_DP_SUBGROUP_SIZE + 1);
|
||||
MAX2(layout->num_driver_params, size + 1);
|
||||
break;
|
||||
}
|
||||
case nir_intrinsic_load_subgroup_id_shift_ir3:
|
||||
layout->num_driver_params =
|
||||
MAX2(layout->num_driver_params, IR3_DP_SUBGROUP_ID_SHIFT + 1);
|
||||
|
|
|
|||
|
|
@ -49,7 +49,7 @@ enum ir3_driver_param {
|
|||
IR3_DP_BASE_GROUP_X = 4,
|
||||
IR3_DP_BASE_GROUP_Y = 5,
|
||||
IR3_DP_BASE_GROUP_Z = 6,
|
||||
IR3_DP_SUBGROUP_SIZE = 7,
|
||||
IR3_DP_CS_SUBGROUP_SIZE = 7,
|
||||
IR3_DP_LOCAL_GROUP_SIZE_X = 8,
|
||||
IR3_DP_LOCAL_GROUP_SIZE_Y = 9,
|
||||
IR3_DP_LOCAL_GROUP_SIZE_Z = 10,
|
||||
|
|
@ -70,7 +70,10 @@ enum ir3_driver_param {
|
|||
IR3_DP_UCP0_X = 4,
|
||||
/* .... */
|
||||
IR3_DP_UCP7_W = 35,
|
||||
IR3_DP_VS_COUNT = 36 /* must be aligned to vec4 */
|
||||
IR3_DP_VS_COUNT = 36, /* must be aligned to vec4 */
|
||||
|
||||
/* fragment shader driver params: */
|
||||
IR3_DP_FS_SUBGROUP_SIZE = 0,
|
||||
};
|
||||
|
||||
#define IR3_MAX_SHADER_BUFFERS 32
|
||||
|
|
|
|||
|
|
@ -4347,7 +4347,7 @@ tu_emit_compute_driver_params(struct tu_cmd_buffer *cmd,
|
|||
[IR3_DP_BASE_GROUP_X] = info->offsets[0],
|
||||
[IR3_DP_BASE_GROUP_Y] = info->offsets[1],
|
||||
[IR3_DP_BASE_GROUP_Z] = info->offsets[2],
|
||||
[IR3_DP_SUBGROUP_SIZE] = subgroup_size,
|
||||
[IR3_DP_CS_SUBGROUP_SIZE] = subgroup_size,
|
||||
[IR3_DP_SUBGROUP_ID_SHIFT] = subgroup_shift,
|
||||
};
|
||||
|
||||
|
|
@ -4399,8 +4399,8 @@ tu_emit_compute_driver_params(struct tu_cmd_buffer *cmd,
|
|||
tu_cs_emit_qw(cs, global_iova(cmd, cs_indirect_xyz[0]));
|
||||
}
|
||||
|
||||
/* Fill out IR3_DP_SUBGROUP_SIZE and IR3_DP_SUBGROUP_ID_SHIFT for indirect
|
||||
* dispatch.
|
||||
/* Fill out IR3_DP_CS_SUBGROUP_SIZE and IR3_DP_SUBGROUP_ID_SHIFT for
|
||||
* indirect dispatch.
|
||||
*/
|
||||
if (info->indirect && num_consts > IR3_DP_BASE_GROUP_X) {
|
||||
tu_cs_emit_pkt7(cs, tu6_stage2opcode(type), 7);
|
||||
|
|
|
|||
|
|
@ -617,6 +617,30 @@ tu6_emit_xs(struct tu_cs *cs,
|
|||
tu_cs_emit_qw(cs, iova + start);
|
||||
}
|
||||
}
|
||||
|
||||
/* emit FS driver param */
|
||||
if (stage == MESA_SHADER_FRAGMENT && const_state->num_driver_params > 0) {
|
||||
uint32_t base = const_state->offsets.driver_param;
|
||||
int32_t size = DIV_ROUND_UP(const_state->num_driver_params, 4);
|
||||
size = MAX2(MIN2(size + base, xs->constlen) - base, 0);
|
||||
|
||||
if (size > 0) {
|
||||
tu_cs_emit_pkt7(cs, tu6_stage2opcode(stage), 3 + size * 4);
|
||||
tu_cs_emit(cs, CP_LOAD_STATE6_0_DST_OFF(base) |
|
||||
CP_LOAD_STATE6_0_STATE_TYPE(ST6_CONSTANTS) |
|
||||
CP_LOAD_STATE6_0_STATE_SRC(SS6_DIRECT) |
|
||||
CP_LOAD_STATE6_0_STATE_BLOCK(tu6_stage2shadersb(stage)) |
|
||||
CP_LOAD_STATE6_0_NUM_UNIT(size));
|
||||
tu_cs_emit(cs, CP_LOAD_STATE6_1_EXT_SRC_ADDR(0));
|
||||
tu_cs_emit(cs, CP_LOAD_STATE6_2_EXT_SRC_ADDR_HI(0));
|
||||
|
||||
assert(size == 1);
|
||||
tu_cs_emit(cs, xs->info.double_threadsize ? 128 : 64);
|
||||
tu_cs_emit(cs, 0);
|
||||
tu_cs_emit(cs, 0);
|
||||
tu_cs_emit(cs, 0);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue