diff --git a/src/freedreno/ir3/ir3_compiler_nir.c b/src/freedreno/ir3/ir3_compiler_nir.c index 6e7f88198f7..c7357114d5f 100644 --- a/src/freedreno/ir3/ir3_compiler_nir.c +++ b/src/freedreno/ir3/ir3_compiler_nir.c @@ -171,7 +171,7 @@ create_frag_input(struct ir3_context *ctx, struct ir3_instruction *coord, } static struct ir3_instruction * -create_driver_param(struct ir3_context *ctx, enum ir3_driver_param dp) +create_driver_param(struct ir3_context *ctx, uint32_t dp) { /* first four vec4 sysval's reserved for UBOs: */ /* NOTE: dp is in scalar, but there can be >4 dp components: */ @@ -182,7 +182,7 @@ create_driver_param(struct ir3_context *ctx, enum ir3_driver_param dp) } static struct ir3_instruction * -create_driver_param_indirect(struct ir3_context *ctx, enum ir3_driver_param dp, +create_driver_param_indirect(struct ir3_context *ctx, uint32_t dp, struct ir3_instruction *address) { /* first four vec4 sysval's reserved for UBOs: */ @@ -2798,25 +2798,25 @@ emit_intrinsic(struct ir3_context *ctx, nir_intrinsic_instr *intr) case nir_intrinsic_load_base_vertex: case nir_intrinsic_load_first_vertex: if (!ctx->basevertex) { - ctx->basevertex = create_driver_param(ctx, IR3_DP_VTXID_BASE); + ctx->basevertex = create_driver_param(ctx, IR3_DP_VS(vtxid_base)); } dst[0] = ctx->basevertex; break; case nir_intrinsic_load_is_indexed_draw: if (!ctx->is_indexed_draw) { - ctx->is_indexed_draw = create_driver_param(ctx, IR3_DP_IS_INDEXED_DRAW); + ctx->is_indexed_draw = create_driver_param(ctx, IR3_DP_VS(is_indexed_draw)); } dst[0] = ctx->is_indexed_draw; break; case nir_intrinsic_load_draw_id: if (!ctx->draw_id) { - ctx->draw_id = create_driver_param(ctx, IR3_DP_DRAWID); + ctx->draw_id = create_driver_param(ctx, IR3_DP_VS(draw_id)); } dst[0] = ctx->draw_id; break; case nir_intrinsic_load_base_instance: if (!ctx->base_instance) { - ctx->base_instance = create_driver_param(ctx, IR3_DP_INSTID_BASE); + ctx->base_instance = create_driver_param(ctx, IR3_DP_VS(instid_base)); } dst[0] = ctx->base_instance; break; @@ -2863,7 +2863,7 @@ emit_intrinsic(struct ir3_context *ctx, nir_intrinsic_instr *intr) idx = nir_intrinsic_ucp_id(intr); for (int i = 0; i < dest_components; i++) { unsigned n = idx * 4 + i; - dst[i] = create_driver_param(ctx, IR3_DP_UCP0_X + n); + dst[i] = create_driver_param(ctx, IR3_DP_VS(ucp[0].x) + n); } create_rpt = true; break; @@ -2899,41 +2899,41 @@ emit_intrinsic(struct ir3_context *ctx, nir_intrinsic_instr *intr) } else { /* For a3xx/a4xx, this comes in via const injection by the hw */ for (int i = 0; i < dest_components; i++) { - dst[i] = create_driver_param(ctx, IR3_DP_WORKGROUP_ID_X + i); + dst[i] = create_driver_param(ctx, IR3_DP_CS(workgroup_id_x) + i); } } break; case nir_intrinsic_load_base_workgroup_id: for (int i = 0; i < dest_components; i++) { - dst[i] = create_driver_param(ctx, IR3_DP_BASE_GROUP_X + i); + dst[i] = create_driver_param(ctx, IR3_DP_CS(base_group_x) + i); } create_rpt = true; break; case nir_intrinsic_load_num_workgroups: for (int i = 0; i < dest_components; i++) { - dst[i] = create_driver_param(ctx, IR3_DP_NUM_WORK_GROUPS_X + i); + dst[i] = create_driver_param(ctx, IR3_DP_CS(num_work_groups_x) + i); } create_rpt = true; break; case nir_intrinsic_load_workgroup_size: for (int i = 0; i < dest_components; i++) { - dst[i] = create_driver_param(ctx, IR3_DP_LOCAL_GROUP_SIZE_X + i); + dst[i] = create_driver_param(ctx, IR3_DP_CS(local_group_size_x) + i); } create_rpt = true; break; case nir_intrinsic_load_subgroup_size: { assert(ctx->so->type == MESA_SHADER_COMPUTE || ctx->so->type == MESA_SHADER_FRAGMENT); - enum ir3_driver_param size = ctx->so->type == MESA_SHADER_COMPUTE ? - IR3_DP_CS_SUBGROUP_SIZE : IR3_DP_FS_SUBGROUP_SIZE; + unsigned size = ctx->so->type == MESA_SHADER_COMPUTE ? + IR3_DP_CS(subgroup_size) : IR3_DP_FS(subgroup_size); dst[0] = create_driver_param(ctx, size); break; } case nir_intrinsic_load_subgroup_id_shift_ir3: - dst[0] = create_driver_param(ctx, IR3_DP_SUBGROUP_ID_SHIFT); + dst[0] = create_driver_param(ctx, IR3_DP_CS(subgroup_id_shift)); break; case nir_intrinsic_load_work_dim: - dst[0] = create_driver_param(ctx, IR3_DP_WORK_DIM); + dst[0] = create_driver_param(ctx, IR3_DP_CS(work_dim)); break; case nir_intrinsic_load_subgroup_invocation: assert(ctx->compiler->has_getfiberid); @@ -2943,24 +2943,24 @@ emit_intrinsic(struct ir3_context *ctx, nir_intrinsic_instr *intr) break; case nir_intrinsic_load_tess_level_outer_default: for (int i = 0; i < dest_components; i++) { - dst[i] = create_driver_param(ctx, IR3_DP_HS_DEFAULT_OUTER_LEVEL_X + i); + dst[i] = create_driver_param(ctx, IR3_DP_TCS(default_outer_level_x) + i); } create_rpt = true; break; case nir_intrinsic_load_tess_level_inner_default: for (int i = 0; i < dest_components; i++) { - dst[i] = create_driver_param(ctx, IR3_DP_HS_DEFAULT_INNER_LEVEL_X + i); + dst[i] = create_driver_param(ctx, IR3_DP_TCS(default_inner_level_x) + i); } create_rpt = true; break; case nir_intrinsic_load_frag_invocation_count: - dst[0] = create_driver_param(ctx, IR3_DP_FS_FRAG_INVOCATION_COUNT); + dst[0] = create_driver_param(ctx, IR3_DP_FS(frag_invocation_count)); break; case nir_intrinsic_load_frag_size_ir3: case nir_intrinsic_load_frag_offset_ir3: { - enum ir3_driver_param param = + unsigned param = intr->intrinsic == nir_intrinsic_load_frag_size_ir3 ? - IR3_DP_FS_FRAG_SIZE : IR3_DP_FS_FRAG_OFFSET; + IR3_DP_FS(frag_size) : IR3_DP_FS(frag_offset); if (nir_src_is_const(intr->src[0])) { uint32_t view = nir_src_as_uint(intr->src[0]); for (int i = 0; i < dest_components; i++) { @@ -4559,7 +4559,7 @@ emit_stream_out(struct ir3_context *ctx) * of the shader: */ vtxcnt = create_sysval_input(ctx, SYSTEM_VALUE_VERTEX_CNT, 0x1); - maxvtxcnt = create_driver_param(ctx, IR3_DP_VTXCNT_MAX); + maxvtxcnt = create_driver_param(ctx, IR3_DP_VS(vtxcnt_max)); /* at this point, we are at the original 'end' block, * re-purpose this block to stream-out condition, then diff --git a/src/freedreno/ir3/ir3_nir.c b/src/freedreno/ir3/ir3_nir.c index ad709b9a284..41a206e2cab 100644 --- a/src/freedreno/ir3/ir3_nir.c +++ b/src/freedreno/ir3/ir3_nir.c @@ -1188,61 +1188,61 @@ ir3_get_driver_param_info(const nir_shader *shader, nir_intrinsic_instr *intr, { switch (intr->intrinsic) { case nir_intrinsic_load_base_workgroup_id: - param_info->offset = IR3_DP_BASE_GROUP_X; + param_info->offset = IR3_DP_CS(base_group_x); break; case nir_intrinsic_load_num_workgroups: - param_info->offset = IR3_DP_NUM_WORK_GROUPS_X; + param_info->offset = IR3_DP_CS(num_work_groups_x); break; case nir_intrinsic_load_workgroup_size: - param_info->offset = IR3_DP_LOCAL_GROUP_SIZE_X; + param_info->offset = IR3_DP_CS(local_group_size_x); break; case nir_intrinsic_load_subgroup_size: assert(shader->info.stage == MESA_SHADER_COMPUTE || shader->info.stage == MESA_SHADER_FRAGMENT); if (shader->info.stage == MESA_SHADER_COMPUTE) { - param_info->offset = IR3_DP_CS_SUBGROUP_SIZE; + param_info->offset = IR3_DP_CS(subgroup_size); } else { - param_info->offset = IR3_DP_FS_SUBGROUP_SIZE; + param_info->offset = IR3_DP_FS(subgroup_size); } break; case nir_intrinsic_load_subgroup_id_shift_ir3: - param_info->offset = IR3_DP_SUBGROUP_ID_SHIFT; + param_info->offset = IR3_DP_CS(subgroup_id_shift); break; case nir_intrinsic_load_work_dim: - param_info->offset = IR3_DP_WORK_DIM; + param_info->offset = IR3_DP_CS(work_dim); break; case nir_intrinsic_load_base_vertex: case nir_intrinsic_load_first_vertex: - param_info->offset = IR3_DP_VTXID_BASE; + param_info->offset = IR3_DP_VS(vtxid_base); break; case nir_intrinsic_load_is_indexed_draw: - param_info->offset = IR3_DP_IS_INDEXED_DRAW; + param_info->offset = IR3_DP_VS(is_indexed_draw); break; case nir_intrinsic_load_draw_id: - param_info->offset = IR3_DP_DRAWID; + param_info->offset = IR3_DP_VS(draw_id); break; case nir_intrinsic_load_base_instance: - param_info->offset = IR3_DP_INSTID_BASE; + param_info->offset = IR3_DP_VS(instid_base); break; case nir_intrinsic_load_user_clip_plane: { uint32_t idx = nir_intrinsic_ucp_id(intr); - param_info->offset = IR3_DP_UCP0_X + 4 * idx; + param_info->offset = IR3_DP_VS(ucp[0].x) + 4 * idx; break; } case nir_intrinsic_load_tess_level_outer_default: - param_info->offset = IR3_DP_HS_DEFAULT_OUTER_LEVEL_X; + param_info->offset = IR3_DP_TCS(default_outer_level_x); break; case nir_intrinsic_load_tess_level_inner_default: - param_info->offset = IR3_DP_HS_DEFAULT_INNER_LEVEL_X; + param_info->offset = IR3_DP_TCS(default_inner_level_x); break; case nir_intrinsic_load_frag_size_ir3: - param_info->offset = IR3_DP_FS_FRAG_SIZE; + param_info->offset = IR3_DP_FS(frag_size); break; case nir_intrinsic_load_frag_offset_ir3: - param_info->offset = IR3_DP_FS_FRAG_OFFSET; + param_info->offset = IR3_DP_FS(frag_offset); break; case nir_intrinsic_load_frag_invocation_count: - param_info->offset = IR3_DP_FS_FRAG_INVOCATION_COUNT; + param_info->offset = IR3_DP_FS(frag_invocation_count); break; default: return false; @@ -1306,7 +1306,7 @@ ir3_nir_scan_driver_consts(struct ir3_compiler *compiler, nir_shader *shader, st if (!compiler->has_shared_regfile && shader->info.stage == MESA_SHADER_COMPUTE) { layout->num_driver_params = - MAX2(layout->num_driver_params, IR3_DP_WORKGROUP_ID_Z + 1); + MAX2(layout->num_driver_params, IR3_DP_CS(workgroup_id_z) + 1); } } @@ -1338,7 +1338,7 @@ ir3_setup_const_state(nir_shader *nir, struct ir3_shader_variant *v, if ((compiler->gen < 5) && (v->stream_output.num_outputs > 0)) { const_state->num_driver_params = - MAX2(const_state->num_driver_params, IR3_DP_VTXCNT_MAX + 1); + MAX2(const_state->num_driver_params, IR3_DP_VS(vtxcnt_max) + 1); } const_state->num_ubos = nir->info.num_ubos; @@ -1375,7 +1375,7 @@ ir3_setup_const_state(nir_shader *nir, struct ir3_shader_variant *v, const_state->num_driver_params = align(const_state->num_driver_params, 4); unsigned upload_unit = 1; if (v->type == MESA_SHADER_COMPUTE || - (const_state->num_driver_params >= IR3_DP_VTXID_BASE)) { + (const_state->num_driver_params >= IR3_DP_VS(vtxid_base))) { upload_unit = compiler->const_upload_unit; } diff --git a/src/freedreno/ir3/ir3_shader.h b/src/freedreno/ir3/ir3_shader.h index 5bb689341ec..99034b591f3 100644 --- a/src/freedreno/ir3/ir3_shader.h +++ b/src/freedreno/ir3/ir3_shader.h @@ -21,60 +21,95 @@ BEGINC; -/* driver param indices: */ -enum ir3_driver_param { - /* compute shader driver params: */ - IR3_DP_NUM_WORK_GROUPS_X = 0, - IR3_DP_NUM_WORK_GROUPS_Y = 1, - IR3_DP_NUM_WORK_GROUPS_Z = 2, - IR3_DP_WORK_DIM = 3, - IR3_DP_BASE_GROUP_X = 4, - IR3_DP_BASE_GROUP_Y = 5, - IR3_DP_BASE_GROUP_Z = 6, - IR3_DP_CS_SUBGROUP_SIZE = 7, - IR3_DP_LOCAL_GROUP_SIZE_X = 8, - IR3_DP_LOCAL_GROUP_SIZE_Y = 9, - IR3_DP_LOCAL_GROUP_SIZE_Z = 10, - IR3_DP_SUBGROUP_ID_SHIFT = 11, - IR3_DP_WORKGROUP_ID_X = 12, - IR3_DP_WORKGROUP_ID_Y = 13, - IR3_DP_WORKGROUP_ID_Z = 14, +#define dword_offsetof(type, name) DIV_ROUND_UP(offsetof(type, name), 4) +#define dword_sizeof(type) DIV_ROUND_UP(sizeof(type), 4) + +/** + * Driver params for compute shaders. + * + * Note, driver param structs should be size aligned to vec4 + */ +struct ir3_driver_params_cs { /* NOTE: gl_NumWorkGroups should be vec4 aligned because * glDispatchComputeIndirect() needs to load these from * the info->indirect buffer. Keep that in mind when/if * adding any addition CS driver params. */ - IR3_DP_CS_COUNT = 16, /* must be aligned to vec4 */ - - /* vertex shader driver params: */ - IR3_DP_DRAWID = 0, - IR3_DP_VTXID_BASE = 1, - IR3_DP_INSTID_BASE = 2, - IR3_DP_VTXCNT_MAX = 3, - IR3_DP_IS_INDEXED_DRAW = 4, /* Note: boolean, ie. 0 or ~0 */ - /* user-clip-plane components, up to 8x vec4's: */ - IR3_DP_UCP0_X = 5, - /* .... */ - IR3_DP_UCP7_W = 36, - IR3_DP_VS_COUNT = 40, /* must be aligned to vec4 */ - - /* TCS driver params: */ - IR3_DP_HS_DEFAULT_OUTER_LEVEL_X = 0, - IR3_DP_HS_DEFAULT_OUTER_LEVEL_Y = 1, - IR3_DP_HS_DEFAULT_OUTER_LEVEL_Z = 2, - IR3_DP_HS_DEFAULT_OUTER_LEVEL_W = 3, - IR3_DP_HS_DEFAULT_INNER_LEVEL_X = 4, - IR3_DP_HS_DEFAULT_INNER_LEVEL_Y = 5, - IR3_DP_HS_COUNT = 8, /* must be aligned to vec4 */ - - /* fragment shader driver params: */ - IR3_DP_FS_SUBGROUP_SIZE = 0, - /* Dynamic params (that aren't known when compiling the shader) */ - IR3_DP_FS_DYNAMIC = 4, - IR3_DP_FS_FRAG_INVOCATION_COUNT = IR3_DP_FS_DYNAMIC, - IR3_DP_FS_FRAG_SIZE = IR3_DP_FS_DYNAMIC + 4, - IR3_DP_FS_FRAG_OFFSET = IR3_DP_FS_DYNAMIC + 6, + uint32_t num_work_groups_x; + uint32_t num_work_groups_y; + uint32_t num_work_groups_z; + uint32_t work_dim; + uint32_t base_group_x; + uint32_t base_group_y; + uint32_t base_group_z; + uint32_t subgroup_size; + uint32_t local_group_size_x; + uint32_t local_group_size_y; + uint32_t local_group_size_z; + uint32_t subgroup_id_shift; + uint32_t workgroup_id_x; + uint32_t workgroup_id_y; + uint32_t workgroup_id_z; + uint32_t __pad; }; +#define IR3_DP_CS(name) dword_offsetof(struct ir3_driver_params_cs, name) + +/** + * Driver params for vertex shaders. + * + * Note, driver param structs should be size aligned to vec4 + */ +struct ir3_driver_params_vs { + uint32_t draw_id; + uint32_t vtxid_base; + uint32_t instid_base; + uint32_t vtxcnt_max; + uint32_t is_indexed_draw; /* Note: boolean, ie. 0 or ~0 */ + /* user-clip-plane components, up to 8x vec4's: */ + struct { + uint32_t x; + uint32_t y; + uint32_t z; + uint32_t w; + } ucp[8]; + uint32_t __pad_37_39[3]; +}; +#define IR3_DP_VS(name) dword_offsetof(struct ir3_driver_params_vs, name) + +/** + * Driver params for TCS shaders. + * + * Note, driver param structs should be size aligned to vec4 + */ +struct ir3_driver_params_tcs { + uint32_t default_outer_level_x; + uint32_t default_outer_level_y; + uint32_t default_outer_level_z; + uint32_t default_outer_level_w; + uint32_t default_inner_level_x; + uint32_t default_inner_level_y; + uint32_t __pad_06_07[2]; +}; +#define IR3_DP_TCS(name) dword_offsetof(struct ir3_driver_params_tcs, name) + +/** + * Driver params for fragment shaders. + * + * Note, driver param structs should be size aligned to vec4 + */ +struct ir3_driver_params_fs { + uint32_t subgroup_size; + uint32_t __pad_01_03[3]; + /* Dynamic params (that aren't known when compiling the shader) */ +#define IR3_DP_FS_DYNAMIC dword_offsetof(struct ir3_driver_params_fs, frag_invocation_count) + uint32_t frag_invocation_count; + uint32_t __pad_05_07[3]; + uint32_t frag_size; + uint32_t __pad_09; + uint32_t frag_offset; + uint32_t __pad_11_12[2]; +}; +#define IR3_DP_FS(name) dword_offsetof(struct ir3_driver_params_fs, name) #define IR3_MAX_SHADER_BUFFERS 32 #define IR3_MAX_SHADER_IMAGES 32 diff --git a/src/freedreno/vulkan/tu_cmd_buffer.cc b/src/freedreno/vulkan/tu_cmd_buffer.cc index 46ee846a4cd..13929a39fb9 100644 --- a/src/freedreno/vulkan/tu_cmd_buffer.cc +++ b/src/freedreno/vulkan/tu_cmd_buffer.cc @@ -5423,15 +5423,15 @@ tu_emit_fdm_params(struct tu_cmd_buffer *cmd, struct tu_cs *cs, struct tu_shader *fs, unsigned num_units) { - STATIC_ASSERT(IR3_DP_FS_FRAG_INVOCATION_COUNT == IR3_DP_FS_DYNAMIC); + STATIC_ASSERT(IR3_DP_FS(frag_invocation_count) == IR3_DP_FS_DYNAMIC); tu_cs_emit(cs, fs->fs.per_samp ? cmd->vk.dynamic_graphics_state.ms.rasterization_samples : 1); tu_cs_emit(cs, 0); tu_cs_emit(cs, 0); tu_cs_emit(cs, 0); - STATIC_ASSERT(IR3_DP_FS_FRAG_SIZE == IR3_DP_FS_DYNAMIC + 4); - STATIC_ASSERT(IR3_DP_FS_FRAG_OFFSET == IR3_DP_FS_DYNAMIC + 6); + STATIC_ASSERT(IR3_DP_FS(frag_size) == IR3_DP_FS_DYNAMIC + 4); + STATIC_ASSERT(IR3_DP_FS(frag_offset) == IR3_DP_FS_DYNAMIC + 6); if (num_units > 1) { if (fs->fs.has_fdm) { struct apply_fs_params_state state = { @@ -5885,9 +5885,9 @@ vs_params_offset(struct tu_cmd_buffer *cmd) return 0; /* this layout is required by CP_DRAW_INDIRECT_MULTI */ - STATIC_ASSERT(IR3_DP_DRAWID == 0); - STATIC_ASSERT(IR3_DP_VTXID_BASE == 1); - STATIC_ASSERT(IR3_DP_INSTID_BASE == 2); + STATIC_ASSERT(IR3_DP_VS(draw_id) == 0); + STATIC_ASSERT(IR3_DP_VS(vtxid_base) == 1); + STATIC_ASSERT(IR3_DP_VS(instid_base) == 2); /* 0 means disabled for CP_DRAW_INDIRECT_MULTI */ assert(const_state->offsets.driver_param != 0); @@ -6333,6 +6333,29 @@ struct tu_dispatch_info uint64_t indirect_offset; }; +static inline struct ir3_driver_params_cs +build_driver_params_cs(const struct ir3_shader_variant *variant, + const struct tu_dispatch_info *info) +{ + unsigned subgroup_size = variant->info.subgroup_size; + unsigned subgroup_shift = util_logbase2(subgroup_size); + + return (struct ir3_driver_params_cs) { + .num_work_groups_x = info->blocks[0], + .num_work_groups_y = info->blocks[1], + .num_work_groups_z = info->blocks[2], + .work_dim = 0, + .base_group_x = info->offsets[0], + .base_group_y = info->offsets[1], + .base_group_z = info->offsets[2], + .subgroup_size = subgroup_size, + .local_group_size_x = 0, + .local_group_size_y = 0, + .local_group_size_z = 0, + .subgroup_id_shift = subgroup_shift, + }; +} + template static void tu_emit_compute_driver_params(struct tu_cmd_buffer *cmd, @@ -6353,27 +6376,15 @@ tu_emit_compute_driver_params(struct tu_cmd_buffer *cmd, bool direct_indirect_load = !(info->indirect_offset & 0xf) && - !(info->indirect && num_consts > IR3_DP_BASE_GROUP_X); + !(info->indirect && num_consts > IR3_DP_CS(base_group_x)); uint64_t iova = 0; if (!info->indirect) { - uint32_t driver_params[12] = { - [IR3_DP_NUM_WORK_GROUPS_X] = info->blocks[0], - [IR3_DP_NUM_WORK_GROUPS_Y] = info->blocks[1], - [IR3_DP_NUM_WORK_GROUPS_Z] = info->blocks[2], - [IR3_DP_WORK_DIM] = 0, - [IR3_DP_BASE_GROUP_X] = info->offsets[0], - [IR3_DP_BASE_GROUP_Y] = info->offsets[1], - [IR3_DP_BASE_GROUP_Z] = info->offsets[2], - [IR3_DP_CS_SUBGROUP_SIZE] = subgroup_size, - [IR3_DP_LOCAL_GROUP_SIZE_X] = 0, - [IR3_DP_LOCAL_GROUP_SIZE_Y] = 0, - [IR3_DP_LOCAL_GROUP_SIZE_Z] = 0, - [IR3_DP_SUBGROUP_ID_SHIFT] = subgroup_shift, - }; + struct ir3_driver_params_cs driver_params = + build_driver_params_cs(variant, info); - assert(num_consts <= ARRAY_SIZE(driver_params)); + assert(num_consts <= dword_sizeof(driver_params)); struct tu_cs_memory consts; uint32_t consts_vec4 = DIV_ROUND_UP(num_consts, 4); @@ -6382,7 +6393,7 @@ tu_emit_compute_driver_params(struct tu_cmd_buffer *cmd, vk_command_buffer_set_error(&cmd->vk, result); return; } - memcpy(consts.map, driver_params, num_consts * sizeof(uint32_t)); + memcpy(consts.map, &driver_params, num_consts * sizeof(uint32_t)); iova = consts.iova; } else if (direct_indirect_load) { iova = info->indirect->iova + info->indirect_offset; @@ -6406,12 +6417,12 @@ tu_emit_compute_driver_params(struct tu_cmd_buffer *cmd, /* Fill out IR3_DP_CS_SUBGROUP_SIZE and IR3_DP_SUBGROUP_ID_SHIFT for * indirect dispatch. */ - if (info->indirect && num_consts > IR3_DP_BASE_GROUP_X) { + if (info->indirect && num_consts > IR3_DP_CS(base_group_x)) { uint32_t indirect_driver_params[8] = { 0, 0, 0, subgroup_size, 0, 0, 0, subgroup_shift, }; - bool emit_local = num_consts > IR3_DP_LOCAL_GROUP_SIZE_X; + bool emit_local = num_consts > IR3_DP_CS(local_group_size_x); uint32_t emit_size = emit_local ? 8 : 4; tu_cs_emit_pkt7(cs, CP_MEM_WRITE, 2 + emit_size); @@ -6448,22 +6459,10 @@ tu_emit_compute_driver_params(struct tu_cmd_buffer *cmd, (variant->constlen - offset) * 4); if (!info->indirect) { - uint32_t driver_params[12] = { - [IR3_DP_NUM_WORK_GROUPS_X] = info->blocks[0], - [IR3_DP_NUM_WORK_GROUPS_Y] = info->blocks[1], - [IR3_DP_NUM_WORK_GROUPS_Z] = info->blocks[2], - [IR3_DP_WORK_DIM] = 0, - [IR3_DP_BASE_GROUP_X] = info->offsets[0], - [IR3_DP_BASE_GROUP_Y] = info->offsets[1], - [IR3_DP_BASE_GROUP_Z] = info->offsets[2], - [IR3_DP_CS_SUBGROUP_SIZE] = subgroup_size, - [IR3_DP_LOCAL_GROUP_SIZE_X] = 0, - [IR3_DP_LOCAL_GROUP_SIZE_Y] = 0, - [IR3_DP_LOCAL_GROUP_SIZE_Z] = 0, - [IR3_DP_SUBGROUP_ID_SHIFT] = subgroup_shift, - }; + struct ir3_driver_params_cs driver_params = + build_driver_params_cs(variant, info); - assert(num_consts <= ARRAY_SIZE(driver_params)); + assert(num_consts <= dword_sizeof(driver_params)); /* push constants */ tu_cs_emit_pkt7(cs, tu6_stage2opcode(type), 3 + num_consts); @@ -6474,9 +6473,7 @@ tu_emit_compute_driver_params(struct tu_cmd_buffer *cmd, CP_LOAD_STATE6_0_NUM_UNIT(num_consts / 4)); tu_cs_emit(cs, 0); tu_cs_emit(cs, 0); - uint32_t i; - for (i = 0; i < num_consts; i++) - tu_cs_emit(cs, driver_params[i]); + tu_cs_emit_array(cs, (uint32_t *)&driver_params, num_consts); } else if (!(info->indirect_offset & 0xf)) { tu_cs_emit_pkt7(cs, tu6_stage2opcode(type), 3); tu_cs_emit(cs, CP_LOAD_STATE6_0_DST_OFF(offset) | @@ -6518,21 +6515,21 @@ tu_emit_compute_driver_params(struct tu_cmd_buffer *cmd, /* Fill out IR3_DP_CS_SUBGROUP_SIZE and IR3_DP_SUBGROUP_ID_SHIFT for * indirect dispatch. */ - if (info->indirect && num_consts > IR3_DP_BASE_GROUP_X) { - bool emit_local = num_consts > IR3_DP_LOCAL_GROUP_SIZE_X; + if (info->indirect && num_consts > IR3_DP_CS(base_group_x)) { + bool emit_local = num_consts > IR3_DP_CS(local_group_size_x); tu_cs_emit_pkt7(cs, tu6_stage2opcode(type), 7 + (emit_local ? 4 : 0)); - tu_cs_emit(cs, CP_LOAD_STATE6_0_DST_OFF(offset + (IR3_DP_BASE_GROUP_X / 4)) | + tu_cs_emit(cs, CP_LOAD_STATE6_0_DST_OFF(offset + (IR3_DP_CS(base_group_x) / 4)) | CP_LOAD_STATE6_0_STATE_TYPE(ST6_CONSTANTS) | CP_LOAD_STATE6_0_STATE_SRC(SS6_DIRECT) | CP_LOAD_STATE6_0_STATE_BLOCK(tu6_stage2shadersb(type)) | - CP_LOAD_STATE6_0_NUM_UNIT((num_consts - IR3_DP_BASE_GROUP_X) / 4)); + CP_LOAD_STATE6_0_NUM_UNIT((num_consts - IR3_DP_CS(base_group_x)) / 4)); tu_cs_emit_qw(cs, 0); tu_cs_emit(cs, 0); /* BASE_GROUP_X */ tu_cs_emit(cs, 0); /* BASE_GROUP_Y */ tu_cs_emit(cs, 0); /* BASE_GROUP_Z */ tu_cs_emit(cs, subgroup_size); if (emit_local) { - assert(num_consts == align(IR3_DP_SUBGROUP_ID_SHIFT, 4)); + assert(num_consts == align(IR3_DP_CS(subgroup_id_shift), 4)); tu_cs_emit(cs, 0); /* LOCAL_GROUP_SIZE_X */ tu_cs_emit(cs, 0); /* LOCAL_GROUP_SIZE_Y */ tu_cs_emit(cs, 0); /* LOCAL_GROUP_SIZE_Z */ diff --git a/src/freedreno/vulkan/tu_shader.cc b/src/freedreno/vulkan/tu_shader.cc index 85bf441e9c0..1c7fb922707 100644 --- a/src/freedreno/vulkan/tu_shader.cc +++ b/src/freedreno/vulkan/tu_shader.cc @@ -511,9 +511,9 @@ lower_intrinsic(nir_builder *b, nir_intrinsic_instr *instr, if (!dev->compiler->load_shader_consts_via_preamble) return false; - enum ir3_driver_param param = + unsigned param = instr->intrinsic == nir_intrinsic_load_frag_size_ir3 ? - IR3_DP_FS_FRAG_SIZE : IR3_DP_FS_FRAG_OFFSET; + IR3_DP_FS(frag_size) : IR3_DP_FS(frag_offset); unsigned offset = param - IR3_DP_FS_DYNAMIC; @@ -531,7 +531,7 @@ lower_intrinsic(nir_builder *b, nir_intrinsic_instr *instr, nir_def *result = ir3_load_driver_ubo(b, 1, &shader->const_state.fdm_ubo, - IR3_DP_FS_FRAG_INVOCATION_COUNT - + IR3_DP_FS(frag_invocation_count) - IR3_DP_FS_DYNAMIC); nir_def_replace(&instr->def, result); diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_compute.c b/src/gallium/drivers/freedreno/a4xx/fd4_compute.c index cf7a0a64131..3999a7d183f 100644 --- a/src/gallium/drivers/freedreno/a4xx/fd4_compute.c +++ b/src/gallium/drivers/freedreno/a4xx/fd4_compute.c @@ -59,12 +59,12 @@ cs_program_emit(struct fd_ringbuffer *ring, struct ir3_shader_variant *v) num_wg_id, work_dim_id, unused_id; local_invocation_id = ir3_find_sysval_regid(v, SYSTEM_VALUE_LOCAL_INVOCATION_ID); - work_group_id = driver_param_base + IR3_DP_WORKGROUP_ID_X; - num_wg_id = driver_param_base + IR3_DP_NUM_WORK_GROUPS_X; - local_group_size_id = driver_param_base + IR3_DP_LOCAL_GROUP_SIZE_X; - work_dim_id = driver_param_base + IR3_DP_WORK_DIM; + work_group_id = driver_param_base + IR3_DP_CS(workgroup_id_x); + num_wg_id = driver_param_base + IR3_DP_CS(num_work_groups_x); + local_group_size_id = driver_param_base + IR3_DP_CS(local_group_size_x); + work_dim_id = driver_param_base + IR3_DP_CS(work_dim); /* NOTE: At some point we'll want to use this, it's probably WGOFFSETCONSTID */ - unused_id = driver_param_base + IR3_DP_BASE_GROUP_X; + unused_id = driver_param_base + IR3_DP_CS(base_group_x); OUT_PKT0(ring, REG_A4XX_HLSQ_CL_CONTROL_0, 2); OUT_RING(ring, A4XX_HLSQ_CL_CONTROL_0_WGIDCONSTID(work_group_id) | diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_const.cc b/src/gallium/drivers/freedreno/a6xx/fd6_const.cc index a1f15d4cdbc..c033c3f98ac 100644 --- a/src/gallium/drivers/freedreno/a6xx/fd6_const.cc +++ b/src/gallium/drivers/freedreno/a6xx/fd6_const.cc @@ -281,19 +281,29 @@ fd6_build_driver_params(struct fd6_emit *emit) return NULL; } - unsigned size_dwords = num_dp * (4 + IR3_DP_VS_COUNT); /* 4dw PKT7 header */ + bool needs_ucp = !!emit->vs->key.ucp_enables; + + if (PIPELINE == HAS_TESS_GS) { + needs_ucp |= emit->gs && emit->gs->key.ucp_enables; + needs_ucp |= emit->hs && emit->hs->key.ucp_enables; + needs_ucp |= emit->ds && emit->ds->key.ucp_enables; + } + + struct ir3_driver_params_vs p = + ir3_build_driver_params_vs(ctx, emit->info, emit->draw, emit->draw_id, needs_ucp); + + unsigned size_dwords = + num_dp * (4 + dword_sizeof(p)); /* 4dw PKT7 header */ struct fd_ringbuffer *dpconstobj = fd_submit_new_ringbuffer( ctx->batch->submit, size_dwords * 4, FD_RINGBUFFER_STREAMING); if (emit->vs->need_driver_params) { - ir3_emit_driver_params(emit->vs, dpconstobj, ctx, emit->info, - emit->indirect, emit->draw, emit->draw_id); + ir3_emit_driver_params(emit->vs, dpconstobj, ctx, emit->info, emit->indirect, &p); } if (PIPELINE == HAS_TESS_GS) { if (emit->gs && emit->gs->need_driver_params) { - ir3_emit_driver_params(emit->gs, dpconstobj, ctx, emit->info, - emit->indirect, emit->draw, 0); + ir3_emit_driver_params(emit->gs, dpconstobj, ctx, emit->info, emit->indirect, &p); } if (emit->hs && emit->hs->need_driver_params) { @@ -301,8 +311,7 @@ fd6_build_driver_params(struct fd6_emit *emit) } if (emit->ds && emit->ds->need_driver_params) { - ir3_emit_driver_params(emit->ds, dpconstobj, ctx, emit->info, - emit->indirect, emit->draw, 0); + ir3_emit_driver_params(emit->ds, dpconstobj, ctx, emit->info, emit->indirect, &p); } } diff --git a/src/gallium/drivers/freedreno/ir3/ir3_const.h b/src/gallium/drivers/freedreno/ir3/ir3_const.h index 1d5f7b68323..144c6fa9ddd 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_const.h +++ b/src/gallium/drivers/freedreno/ir3/ir3_const.h @@ -430,35 +430,44 @@ emit_kernel_params(struct fd_context *ctx, const struct ir3_shader_variant *v, } } +static inline struct ir3_driver_params_vs +ir3_build_driver_params_vs(struct fd_context *ctx, + const struct pipe_draw_info *info, + const struct pipe_draw_start_count_bias *draw, + uint32_t draw_id, bool needs_ucp) + assert_dt +{ + struct ir3_driver_params_vs vertex_params = { + .draw_id = draw_id, /* filled by hw (CP_DRAW_INDIRECT_MULTI) */ + .vtxid_base = info->index_size ? draw->index_bias : draw->start, + .instid_base = info->start_instance, + .vtxcnt_max = ctx->streamout.max_tf_vtx, + .is_indexed_draw = info->index_size != 0 ? ~0 : 0, + }; + if (needs_ucp) { + struct pipe_clip_state *ucp = &ctx->ucp; + for (unsigned i = 0; i < ARRAY_SIZE(vertex_params.ucp); i++) { + vertex_params.ucp[i].x = fui(ucp->ucp[i][0]); + vertex_params.ucp[i].y = fui(ucp->ucp[i][1]); + vertex_params.ucp[i].z = fui(ucp->ucp[i][2]); + vertex_params.ucp[i].w = fui(ucp->ucp[i][3]); + } + } + return vertex_params; +} + static inline void ir3_emit_driver_params(const struct ir3_shader_variant *v, struct fd_ringbuffer *ring, struct fd_context *ctx, const struct pipe_draw_info *info, const struct pipe_draw_indirect_info *indirect, - const struct pipe_draw_start_count_bias *draw, - const uint32_t draw_id) assert_dt + const struct ir3_driver_params_vs *vertex_params) + assert_dt { assert(v->need_driver_params); const struct ir3_const_state *const_state = ir3_const_state(v); uint32_t offset = const_state->offsets.driver_param; - uint32_t vertex_params[IR3_DP_VS_COUNT] = { - [IR3_DP_DRAWID] = draw_id, /* filled by hw (CP_DRAW_INDIRECT_MULTI) */ - [IR3_DP_VTXID_BASE] = info->index_size ? draw->index_bias : draw->start, - [IR3_DP_INSTID_BASE] = info->start_instance, - [IR3_DP_VTXCNT_MAX] = ctx->streamout.max_tf_vtx, - [IR3_DP_IS_INDEXED_DRAW] = info->index_size != 0 ? ~0 : 0, - }; - if (v->key.ucp_enables) { - struct pipe_clip_state *ucp = &ctx->ucp; - unsigned pos = IR3_DP_UCP0_X; - for (unsigned i = 0; pos <= IR3_DP_UCP7_W; i++) { - for (unsigned j = 0; j < 4; j++) { - vertex_params[pos] = fui(ucp->ucp[i][j]); - pos++; - } - } - } /* Only emit as many params as needed, i.e. up to the highest enabled UCP * plane. However a binning pass may drop even some of these, so limit to @@ -466,7 +475,7 @@ ir3_emit_driver_params(const struct ir3_shader_variant *v, */ const uint32_t vertex_params_size = MIN2(const_state->num_driver_params, (v->constlen - offset) * 4); - assert(vertex_params_size <= IR3_DP_VS_COUNT); + assert(vertex_params_size <= dword_sizeof(*vertex_params)); /* for indirect draw, we need to copy VTXID_BASE from * indirect-draw parameters buffer.. which is annoying @@ -501,15 +510,28 @@ ir3_emit_driver_params(const struct ir3_shader_variant *v, pipe_resource_reference(&vertex_params_rsc, NULL); } else { - emit_const_user(ring, v, offset * 4, vertex_params_size, vertex_params); + emit_const_user(ring, v, offset * 4, vertex_params_size, (uint32_t *)vertex_params); } /* if needed, emit stream-out buffer addresses: */ - if (vertex_params[IR3_DP_VTXCNT_MAX] > 0) { + if (vertex_params->vtxcnt_max > 0) { emit_tfbos(ctx, v, ring); } } +static inline struct ir3_driver_params_tcs +ir3_build_driver_params_tcs(struct fd_context *ctx) + assert_dt +{ + return (struct ir3_driver_params_tcs) { + .default_outer_level_x = fui(ctx->default_outer_level[0]), + .default_outer_level_y = fui(ctx->default_outer_level[1]), + .default_outer_level_z = fui(ctx->default_outer_level[2]), + .default_outer_level_w = fui(ctx->default_outer_level[3]), + .default_inner_level_x = fui(ctx->default_inner_level[0]), + .default_inner_level_y = fui(ctx->default_inner_level[1]), + }; +} static inline void ir3_emit_hs_driver_params(const struct ir3_shader_variant *v, @@ -521,20 +543,13 @@ ir3_emit_hs_driver_params(const struct ir3_shader_variant *v, const struct ir3_const_state *const_state = ir3_const_state(v); uint32_t offset = const_state->offsets.driver_param; - uint32_t hs_params[IR3_DP_HS_COUNT] = { - [IR3_DP_HS_DEFAULT_OUTER_LEVEL_X] = fui(ctx->default_outer_level[0]), - [IR3_DP_HS_DEFAULT_OUTER_LEVEL_Y] = fui(ctx->default_outer_level[1]), - [IR3_DP_HS_DEFAULT_OUTER_LEVEL_Z] = fui(ctx->default_outer_level[2]), - [IR3_DP_HS_DEFAULT_OUTER_LEVEL_W] = fui(ctx->default_outer_level[3]), - [IR3_DP_HS_DEFAULT_INNER_LEVEL_X] = fui(ctx->default_inner_level[0]), - [IR3_DP_HS_DEFAULT_INNER_LEVEL_Y] = fui(ctx->default_inner_level[1]), - }; + struct ir3_driver_params_tcs hs_params = ir3_build_driver_params_tcs(ctx); const uint32_t hs_params_size = MIN2(const_state->num_driver_params, (v->constlen - offset) * 4); - assert(hs_params_size <= IR3_DP_HS_COUNT); + assert(hs_params_size <= dword_sizeof(hs_params)); - emit_const_user(ring, v, offset * 4, hs_params_size, hs_params); + emit_const_user(ring, v, offset * 4, hs_params_size, (uint32_t *)&hs_params); } @@ -552,7 +567,11 @@ ir3_emit_vs_consts(const struct ir3_shader_variant *v, /* emit driver params every time: */ if (info && v->need_driver_params) { ring_wfi(ctx->batch, ring); - ir3_emit_driver_params(v, ring, ctx, info, indirect, draw, 0); + + struct ir3_driver_params_vs p = + ir3_build_driver_params_vs(ctx, info, draw, 0, v->key.ucp_enables); + + ir3_emit_driver_params(v, ring, ctx, info, indirect, &p); } } @@ -565,6 +584,29 @@ ir3_emit_fs_consts(const struct ir3_shader_variant *v, emit_common_consts(v, ring, ctx, PIPE_SHADER_FRAGMENT); } +static inline struct ir3_driver_params_cs +ir3_build_driver_params_cs(const struct ir3_shader_variant *v, + const struct pipe_grid_info *info) +{ + return (struct ir3_driver_params_cs) { + .num_work_groups_x = info->grid[0], + .num_work_groups_y = info->grid[1], + .num_work_groups_z = info->grid[2], + .work_dim = info->work_dim, + .base_group_x = info->grid_base[0], + .base_group_y = info->grid_base[1], + .base_group_z = info->grid_base[2], + .subgroup_size = v->info.subgroup_size, + .local_group_size_x = info->block[0], + .local_group_size_y = info->block[1], + .local_group_size_z = info->block[2], + .subgroup_id_shift = util_logbase2(v->info.subgroup_size), + .workgroup_id_x = 0, // TODO + .workgroup_id_y = 0, // TODO + .workgroup_id_z = 0, // TODO + }; +} + static inline void ir3_emit_cs_driver_params(const struct ir3_shader_variant *v, struct fd_ringbuffer *ring, struct fd_context *ctx, @@ -614,27 +656,12 @@ ir3_emit_cs_driver_params(const struct ir3_shader_variant *v, } else { // TODO some of these are not part of the indirect state.. so we // need to emit some of this directly in both cases. - uint32_t compute_params[IR3_DP_CS_COUNT] = { - [IR3_DP_NUM_WORK_GROUPS_X] = info->grid[0], - [IR3_DP_NUM_WORK_GROUPS_Y] = info->grid[1], - [IR3_DP_NUM_WORK_GROUPS_Z] = info->grid[2], - [IR3_DP_WORK_DIM] = info->work_dim, - [IR3_DP_BASE_GROUP_X] = info->grid_base[0], - [IR3_DP_BASE_GROUP_Y] = info->grid_base[1], - [IR3_DP_BASE_GROUP_Z] = info->grid_base[2], - [IR3_DP_CS_SUBGROUP_SIZE] = v->info.subgroup_size, - [IR3_DP_LOCAL_GROUP_SIZE_X] = info->block[0], - [IR3_DP_LOCAL_GROUP_SIZE_Y] = info->block[1], - [IR3_DP_LOCAL_GROUP_SIZE_Z] = info->block[2], - [IR3_DP_SUBGROUP_ID_SHIFT] = util_logbase2(v->info.subgroup_size), - [IR3_DP_WORKGROUP_ID_X] = 0, // TODO - [IR3_DP_WORKGROUP_ID_Y] = 0, // TODO - [IR3_DP_WORKGROUP_ID_Z] = 0, // TODO - }; + struct ir3_driver_params_cs compute_params = + ir3_build_driver_params_cs(v, info); uint32_t size = MIN2(const_state->num_driver_params, v->constlen * 4 - offset * 4); - emit_const_user(ring, v, offset * 4, size, compute_params); + emit_const_user(ring, v, offset * 4, size, (uint32_t *)&compute_params); } } }