mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-08 11:18:08 +02:00
freedreno/ir3+tu: Convert driver-params to structs
This at least lets us de-dup the dp setup between the push-const path and preamble-loads-from-ubo path. Signed-off-by: Rob Clark <robdclark@chromium.org> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/31534>
This commit is contained in:
parent
81d8387dbc
commit
7e9b948430
8 changed files with 271 additions and 203 deletions
|
|
@ -171,7 +171,7 @@ create_frag_input(struct ir3_context *ctx, struct ir3_instruction *coord,
|
|||
}
|
||||
|
||||
static struct ir3_instruction *
|
||||
create_driver_param(struct ir3_context *ctx, enum ir3_driver_param dp)
|
||||
create_driver_param(struct ir3_context *ctx, uint32_t dp)
|
||||
{
|
||||
/* first four vec4 sysval's reserved for UBOs: */
|
||||
/* NOTE: dp is in scalar, but there can be >4 dp components: */
|
||||
|
|
@ -182,7 +182,7 @@ create_driver_param(struct ir3_context *ctx, enum ir3_driver_param dp)
|
|||
}
|
||||
|
||||
static struct ir3_instruction *
|
||||
create_driver_param_indirect(struct ir3_context *ctx, enum ir3_driver_param dp,
|
||||
create_driver_param_indirect(struct ir3_context *ctx, uint32_t dp,
|
||||
struct ir3_instruction *address)
|
||||
{
|
||||
/* first four vec4 sysval's reserved for UBOs: */
|
||||
|
|
@ -2798,25 +2798,25 @@ emit_intrinsic(struct ir3_context *ctx, nir_intrinsic_instr *intr)
|
|||
case nir_intrinsic_load_base_vertex:
|
||||
case nir_intrinsic_load_first_vertex:
|
||||
if (!ctx->basevertex) {
|
||||
ctx->basevertex = create_driver_param(ctx, IR3_DP_VTXID_BASE);
|
||||
ctx->basevertex = create_driver_param(ctx, IR3_DP_VS(vtxid_base));
|
||||
}
|
||||
dst[0] = ctx->basevertex;
|
||||
break;
|
||||
case nir_intrinsic_load_is_indexed_draw:
|
||||
if (!ctx->is_indexed_draw) {
|
||||
ctx->is_indexed_draw = create_driver_param(ctx, IR3_DP_IS_INDEXED_DRAW);
|
||||
ctx->is_indexed_draw = create_driver_param(ctx, IR3_DP_VS(is_indexed_draw));
|
||||
}
|
||||
dst[0] = ctx->is_indexed_draw;
|
||||
break;
|
||||
case nir_intrinsic_load_draw_id:
|
||||
if (!ctx->draw_id) {
|
||||
ctx->draw_id = create_driver_param(ctx, IR3_DP_DRAWID);
|
||||
ctx->draw_id = create_driver_param(ctx, IR3_DP_VS(draw_id));
|
||||
}
|
||||
dst[0] = ctx->draw_id;
|
||||
break;
|
||||
case nir_intrinsic_load_base_instance:
|
||||
if (!ctx->base_instance) {
|
||||
ctx->base_instance = create_driver_param(ctx, IR3_DP_INSTID_BASE);
|
||||
ctx->base_instance = create_driver_param(ctx, IR3_DP_VS(instid_base));
|
||||
}
|
||||
dst[0] = ctx->base_instance;
|
||||
break;
|
||||
|
|
@ -2863,7 +2863,7 @@ emit_intrinsic(struct ir3_context *ctx, nir_intrinsic_instr *intr)
|
|||
idx = nir_intrinsic_ucp_id(intr);
|
||||
for (int i = 0; i < dest_components; i++) {
|
||||
unsigned n = idx * 4 + i;
|
||||
dst[i] = create_driver_param(ctx, IR3_DP_UCP0_X + n);
|
||||
dst[i] = create_driver_param(ctx, IR3_DP_VS(ucp[0].x) + n);
|
||||
}
|
||||
create_rpt = true;
|
||||
break;
|
||||
|
|
@ -2899,41 +2899,41 @@ emit_intrinsic(struct ir3_context *ctx, nir_intrinsic_instr *intr)
|
|||
} else {
|
||||
/* For a3xx/a4xx, this comes in via const injection by the hw */
|
||||
for (int i = 0; i < dest_components; i++) {
|
||||
dst[i] = create_driver_param(ctx, IR3_DP_WORKGROUP_ID_X + i);
|
||||
dst[i] = create_driver_param(ctx, IR3_DP_CS(workgroup_id_x) + i);
|
||||
}
|
||||
}
|
||||
break;
|
||||
case nir_intrinsic_load_base_workgroup_id:
|
||||
for (int i = 0; i < dest_components; i++) {
|
||||
dst[i] = create_driver_param(ctx, IR3_DP_BASE_GROUP_X + i);
|
||||
dst[i] = create_driver_param(ctx, IR3_DP_CS(base_group_x) + i);
|
||||
}
|
||||
create_rpt = true;
|
||||
break;
|
||||
case nir_intrinsic_load_num_workgroups:
|
||||
for (int i = 0; i < dest_components; i++) {
|
||||
dst[i] = create_driver_param(ctx, IR3_DP_NUM_WORK_GROUPS_X + i);
|
||||
dst[i] = create_driver_param(ctx, IR3_DP_CS(num_work_groups_x) + i);
|
||||
}
|
||||
create_rpt = true;
|
||||
break;
|
||||
case nir_intrinsic_load_workgroup_size:
|
||||
for (int i = 0; i < dest_components; i++) {
|
||||
dst[i] = create_driver_param(ctx, IR3_DP_LOCAL_GROUP_SIZE_X + i);
|
||||
dst[i] = create_driver_param(ctx, IR3_DP_CS(local_group_size_x) + i);
|
||||
}
|
||||
create_rpt = true;
|
||||
break;
|
||||
case nir_intrinsic_load_subgroup_size: {
|
||||
assert(ctx->so->type == MESA_SHADER_COMPUTE ||
|
||||
ctx->so->type == MESA_SHADER_FRAGMENT);
|
||||
enum ir3_driver_param size = ctx->so->type == MESA_SHADER_COMPUTE ?
|
||||
IR3_DP_CS_SUBGROUP_SIZE : IR3_DP_FS_SUBGROUP_SIZE;
|
||||
unsigned size = ctx->so->type == MESA_SHADER_COMPUTE ?
|
||||
IR3_DP_CS(subgroup_size) : IR3_DP_FS(subgroup_size);
|
||||
dst[0] = create_driver_param(ctx, size);
|
||||
break;
|
||||
}
|
||||
case nir_intrinsic_load_subgroup_id_shift_ir3:
|
||||
dst[0] = create_driver_param(ctx, IR3_DP_SUBGROUP_ID_SHIFT);
|
||||
dst[0] = create_driver_param(ctx, IR3_DP_CS(subgroup_id_shift));
|
||||
break;
|
||||
case nir_intrinsic_load_work_dim:
|
||||
dst[0] = create_driver_param(ctx, IR3_DP_WORK_DIM);
|
||||
dst[0] = create_driver_param(ctx, IR3_DP_CS(work_dim));
|
||||
break;
|
||||
case nir_intrinsic_load_subgroup_invocation:
|
||||
assert(ctx->compiler->has_getfiberid);
|
||||
|
|
@ -2943,24 +2943,24 @@ emit_intrinsic(struct ir3_context *ctx, nir_intrinsic_instr *intr)
|
|||
break;
|
||||
case nir_intrinsic_load_tess_level_outer_default:
|
||||
for (int i = 0; i < dest_components; i++) {
|
||||
dst[i] = create_driver_param(ctx, IR3_DP_HS_DEFAULT_OUTER_LEVEL_X + i);
|
||||
dst[i] = create_driver_param(ctx, IR3_DP_TCS(default_outer_level_x) + i);
|
||||
}
|
||||
create_rpt = true;
|
||||
break;
|
||||
case nir_intrinsic_load_tess_level_inner_default:
|
||||
for (int i = 0; i < dest_components; i++) {
|
||||
dst[i] = create_driver_param(ctx, IR3_DP_HS_DEFAULT_INNER_LEVEL_X + i);
|
||||
dst[i] = create_driver_param(ctx, IR3_DP_TCS(default_inner_level_x) + i);
|
||||
}
|
||||
create_rpt = true;
|
||||
break;
|
||||
case nir_intrinsic_load_frag_invocation_count:
|
||||
dst[0] = create_driver_param(ctx, IR3_DP_FS_FRAG_INVOCATION_COUNT);
|
||||
dst[0] = create_driver_param(ctx, IR3_DP_FS(frag_invocation_count));
|
||||
break;
|
||||
case nir_intrinsic_load_frag_size_ir3:
|
||||
case nir_intrinsic_load_frag_offset_ir3: {
|
||||
enum ir3_driver_param param =
|
||||
unsigned param =
|
||||
intr->intrinsic == nir_intrinsic_load_frag_size_ir3 ?
|
||||
IR3_DP_FS_FRAG_SIZE : IR3_DP_FS_FRAG_OFFSET;
|
||||
IR3_DP_FS(frag_size) : IR3_DP_FS(frag_offset);
|
||||
if (nir_src_is_const(intr->src[0])) {
|
||||
uint32_t view = nir_src_as_uint(intr->src[0]);
|
||||
for (int i = 0; i < dest_components; i++) {
|
||||
|
|
@ -4559,7 +4559,7 @@ emit_stream_out(struct ir3_context *ctx)
|
|||
* of the shader:
|
||||
*/
|
||||
vtxcnt = create_sysval_input(ctx, SYSTEM_VALUE_VERTEX_CNT, 0x1);
|
||||
maxvtxcnt = create_driver_param(ctx, IR3_DP_VTXCNT_MAX);
|
||||
maxvtxcnt = create_driver_param(ctx, IR3_DP_VS(vtxcnt_max));
|
||||
|
||||
/* at this point, we are at the original 'end' block,
|
||||
* re-purpose this block to stream-out condition, then
|
||||
|
|
|
|||
|
|
@ -1188,61 +1188,61 @@ ir3_get_driver_param_info(const nir_shader *shader, nir_intrinsic_instr *intr,
|
|||
{
|
||||
switch (intr->intrinsic) {
|
||||
case nir_intrinsic_load_base_workgroup_id:
|
||||
param_info->offset = IR3_DP_BASE_GROUP_X;
|
||||
param_info->offset = IR3_DP_CS(base_group_x);
|
||||
break;
|
||||
case nir_intrinsic_load_num_workgroups:
|
||||
param_info->offset = IR3_DP_NUM_WORK_GROUPS_X;
|
||||
param_info->offset = IR3_DP_CS(num_work_groups_x);
|
||||
break;
|
||||
case nir_intrinsic_load_workgroup_size:
|
||||
param_info->offset = IR3_DP_LOCAL_GROUP_SIZE_X;
|
||||
param_info->offset = IR3_DP_CS(local_group_size_x);
|
||||
break;
|
||||
case nir_intrinsic_load_subgroup_size:
|
||||
assert(shader->info.stage == MESA_SHADER_COMPUTE ||
|
||||
shader->info.stage == MESA_SHADER_FRAGMENT);
|
||||
if (shader->info.stage == MESA_SHADER_COMPUTE) {
|
||||
param_info->offset = IR3_DP_CS_SUBGROUP_SIZE;
|
||||
param_info->offset = IR3_DP_CS(subgroup_size);
|
||||
} else {
|
||||
param_info->offset = IR3_DP_FS_SUBGROUP_SIZE;
|
||||
param_info->offset = IR3_DP_FS(subgroup_size);
|
||||
}
|
||||
break;
|
||||
case nir_intrinsic_load_subgroup_id_shift_ir3:
|
||||
param_info->offset = IR3_DP_SUBGROUP_ID_SHIFT;
|
||||
param_info->offset = IR3_DP_CS(subgroup_id_shift);
|
||||
break;
|
||||
case nir_intrinsic_load_work_dim:
|
||||
param_info->offset = IR3_DP_WORK_DIM;
|
||||
param_info->offset = IR3_DP_CS(work_dim);
|
||||
break;
|
||||
case nir_intrinsic_load_base_vertex:
|
||||
case nir_intrinsic_load_first_vertex:
|
||||
param_info->offset = IR3_DP_VTXID_BASE;
|
||||
param_info->offset = IR3_DP_VS(vtxid_base);
|
||||
break;
|
||||
case nir_intrinsic_load_is_indexed_draw:
|
||||
param_info->offset = IR3_DP_IS_INDEXED_DRAW;
|
||||
param_info->offset = IR3_DP_VS(is_indexed_draw);
|
||||
break;
|
||||
case nir_intrinsic_load_draw_id:
|
||||
param_info->offset = IR3_DP_DRAWID;
|
||||
param_info->offset = IR3_DP_VS(draw_id);
|
||||
break;
|
||||
case nir_intrinsic_load_base_instance:
|
||||
param_info->offset = IR3_DP_INSTID_BASE;
|
||||
param_info->offset = IR3_DP_VS(instid_base);
|
||||
break;
|
||||
case nir_intrinsic_load_user_clip_plane: {
|
||||
uint32_t idx = nir_intrinsic_ucp_id(intr);
|
||||
param_info->offset = IR3_DP_UCP0_X + 4 * idx;
|
||||
param_info->offset = IR3_DP_VS(ucp[0].x) + 4 * idx;
|
||||
break;
|
||||
}
|
||||
case nir_intrinsic_load_tess_level_outer_default:
|
||||
param_info->offset = IR3_DP_HS_DEFAULT_OUTER_LEVEL_X;
|
||||
param_info->offset = IR3_DP_TCS(default_outer_level_x);
|
||||
break;
|
||||
case nir_intrinsic_load_tess_level_inner_default:
|
||||
param_info->offset = IR3_DP_HS_DEFAULT_INNER_LEVEL_X;
|
||||
param_info->offset = IR3_DP_TCS(default_inner_level_x);
|
||||
break;
|
||||
case nir_intrinsic_load_frag_size_ir3:
|
||||
param_info->offset = IR3_DP_FS_FRAG_SIZE;
|
||||
param_info->offset = IR3_DP_FS(frag_size);
|
||||
break;
|
||||
case nir_intrinsic_load_frag_offset_ir3:
|
||||
param_info->offset = IR3_DP_FS_FRAG_OFFSET;
|
||||
param_info->offset = IR3_DP_FS(frag_offset);
|
||||
break;
|
||||
case nir_intrinsic_load_frag_invocation_count:
|
||||
param_info->offset = IR3_DP_FS_FRAG_INVOCATION_COUNT;
|
||||
param_info->offset = IR3_DP_FS(frag_invocation_count);
|
||||
break;
|
||||
default:
|
||||
return false;
|
||||
|
|
@ -1306,7 +1306,7 @@ ir3_nir_scan_driver_consts(struct ir3_compiler *compiler, nir_shader *shader, st
|
|||
if (!compiler->has_shared_regfile &&
|
||||
shader->info.stage == MESA_SHADER_COMPUTE) {
|
||||
layout->num_driver_params =
|
||||
MAX2(layout->num_driver_params, IR3_DP_WORKGROUP_ID_Z + 1);
|
||||
MAX2(layout->num_driver_params, IR3_DP_CS(workgroup_id_z) + 1);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -1338,7 +1338,7 @@ ir3_setup_const_state(nir_shader *nir, struct ir3_shader_variant *v,
|
|||
|
||||
if ((compiler->gen < 5) && (v->stream_output.num_outputs > 0)) {
|
||||
const_state->num_driver_params =
|
||||
MAX2(const_state->num_driver_params, IR3_DP_VTXCNT_MAX + 1);
|
||||
MAX2(const_state->num_driver_params, IR3_DP_VS(vtxcnt_max) + 1);
|
||||
}
|
||||
|
||||
const_state->num_ubos = nir->info.num_ubos;
|
||||
|
|
@ -1375,7 +1375,7 @@ ir3_setup_const_state(nir_shader *nir, struct ir3_shader_variant *v,
|
|||
const_state->num_driver_params = align(const_state->num_driver_params, 4);
|
||||
unsigned upload_unit = 1;
|
||||
if (v->type == MESA_SHADER_COMPUTE ||
|
||||
(const_state->num_driver_params >= IR3_DP_VTXID_BASE)) {
|
||||
(const_state->num_driver_params >= IR3_DP_VS(vtxid_base))) {
|
||||
upload_unit = compiler->const_upload_unit;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -21,60 +21,95 @@
|
|||
|
||||
BEGINC;
|
||||
|
||||
/* driver param indices: */
|
||||
enum ir3_driver_param {
|
||||
/* compute shader driver params: */
|
||||
IR3_DP_NUM_WORK_GROUPS_X = 0,
|
||||
IR3_DP_NUM_WORK_GROUPS_Y = 1,
|
||||
IR3_DP_NUM_WORK_GROUPS_Z = 2,
|
||||
IR3_DP_WORK_DIM = 3,
|
||||
IR3_DP_BASE_GROUP_X = 4,
|
||||
IR3_DP_BASE_GROUP_Y = 5,
|
||||
IR3_DP_BASE_GROUP_Z = 6,
|
||||
IR3_DP_CS_SUBGROUP_SIZE = 7,
|
||||
IR3_DP_LOCAL_GROUP_SIZE_X = 8,
|
||||
IR3_DP_LOCAL_GROUP_SIZE_Y = 9,
|
||||
IR3_DP_LOCAL_GROUP_SIZE_Z = 10,
|
||||
IR3_DP_SUBGROUP_ID_SHIFT = 11,
|
||||
IR3_DP_WORKGROUP_ID_X = 12,
|
||||
IR3_DP_WORKGROUP_ID_Y = 13,
|
||||
IR3_DP_WORKGROUP_ID_Z = 14,
|
||||
#define dword_offsetof(type, name) DIV_ROUND_UP(offsetof(type, name), 4)
|
||||
#define dword_sizeof(type) DIV_ROUND_UP(sizeof(type), 4)
|
||||
|
||||
/**
|
||||
* Driver params for compute shaders.
|
||||
*
|
||||
* Note, driver param structs should be size aligned to vec4
|
||||
*/
|
||||
struct ir3_driver_params_cs {
|
||||
/* NOTE: gl_NumWorkGroups should be vec4 aligned because
|
||||
* glDispatchComputeIndirect() needs to load these from
|
||||
* the info->indirect buffer. Keep that in mind when/if
|
||||
* adding any addition CS driver params.
|
||||
*/
|
||||
IR3_DP_CS_COUNT = 16, /* must be aligned to vec4 */
|
||||
|
||||
/* vertex shader driver params: */
|
||||
IR3_DP_DRAWID = 0,
|
||||
IR3_DP_VTXID_BASE = 1,
|
||||
IR3_DP_INSTID_BASE = 2,
|
||||
IR3_DP_VTXCNT_MAX = 3,
|
||||
IR3_DP_IS_INDEXED_DRAW = 4, /* Note: boolean, ie. 0 or ~0 */
|
||||
/* user-clip-plane components, up to 8x vec4's: */
|
||||
IR3_DP_UCP0_X = 5,
|
||||
/* .... */
|
||||
IR3_DP_UCP7_W = 36,
|
||||
IR3_DP_VS_COUNT = 40, /* must be aligned to vec4 */
|
||||
|
||||
/* TCS driver params: */
|
||||
IR3_DP_HS_DEFAULT_OUTER_LEVEL_X = 0,
|
||||
IR3_DP_HS_DEFAULT_OUTER_LEVEL_Y = 1,
|
||||
IR3_DP_HS_DEFAULT_OUTER_LEVEL_Z = 2,
|
||||
IR3_DP_HS_DEFAULT_OUTER_LEVEL_W = 3,
|
||||
IR3_DP_HS_DEFAULT_INNER_LEVEL_X = 4,
|
||||
IR3_DP_HS_DEFAULT_INNER_LEVEL_Y = 5,
|
||||
IR3_DP_HS_COUNT = 8, /* must be aligned to vec4 */
|
||||
|
||||
/* fragment shader driver params: */
|
||||
IR3_DP_FS_SUBGROUP_SIZE = 0,
|
||||
/* Dynamic params (that aren't known when compiling the shader) */
|
||||
IR3_DP_FS_DYNAMIC = 4,
|
||||
IR3_DP_FS_FRAG_INVOCATION_COUNT = IR3_DP_FS_DYNAMIC,
|
||||
IR3_DP_FS_FRAG_SIZE = IR3_DP_FS_DYNAMIC + 4,
|
||||
IR3_DP_FS_FRAG_OFFSET = IR3_DP_FS_DYNAMIC + 6,
|
||||
uint32_t num_work_groups_x;
|
||||
uint32_t num_work_groups_y;
|
||||
uint32_t num_work_groups_z;
|
||||
uint32_t work_dim;
|
||||
uint32_t base_group_x;
|
||||
uint32_t base_group_y;
|
||||
uint32_t base_group_z;
|
||||
uint32_t subgroup_size;
|
||||
uint32_t local_group_size_x;
|
||||
uint32_t local_group_size_y;
|
||||
uint32_t local_group_size_z;
|
||||
uint32_t subgroup_id_shift;
|
||||
uint32_t workgroup_id_x;
|
||||
uint32_t workgroup_id_y;
|
||||
uint32_t workgroup_id_z;
|
||||
uint32_t __pad;
|
||||
};
|
||||
#define IR3_DP_CS(name) dword_offsetof(struct ir3_driver_params_cs, name)
|
||||
|
||||
/**
|
||||
* Driver params for vertex shaders.
|
||||
*
|
||||
* Note, driver param structs should be size aligned to vec4
|
||||
*/
|
||||
struct ir3_driver_params_vs {
|
||||
uint32_t draw_id;
|
||||
uint32_t vtxid_base;
|
||||
uint32_t instid_base;
|
||||
uint32_t vtxcnt_max;
|
||||
uint32_t is_indexed_draw; /* Note: boolean, ie. 0 or ~0 */
|
||||
/* user-clip-plane components, up to 8x vec4's: */
|
||||
struct {
|
||||
uint32_t x;
|
||||
uint32_t y;
|
||||
uint32_t z;
|
||||
uint32_t w;
|
||||
} ucp[8];
|
||||
uint32_t __pad_37_39[3];
|
||||
};
|
||||
#define IR3_DP_VS(name) dword_offsetof(struct ir3_driver_params_vs, name)
|
||||
|
||||
/**
|
||||
* Driver params for TCS shaders.
|
||||
*
|
||||
* Note, driver param structs should be size aligned to vec4
|
||||
*/
|
||||
struct ir3_driver_params_tcs {
|
||||
uint32_t default_outer_level_x;
|
||||
uint32_t default_outer_level_y;
|
||||
uint32_t default_outer_level_z;
|
||||
uint32_t default_outer_level_w;
|
||||
uint32_t default_inner_level_x;
|
||||
uint32_t default_inner_level_y;
|
||||
uint32_t __pad_06_07[2];
|
||||
};
|
||||
#define IR3_DP_TCS(name) dword_offsetof(struct ir3_driver_params_tcs, name)
|
||||
|
||||
/**
|
||||
* Driver params for fragment shaders.
|
||||
*
|
||||
* Note, driver param structs should be size aligned to vec4
|
||||
*/
|
||||
struct ir3_driver_params_fs {
|
||||
uint32_t subgroup_size;
|
||||
uint32_t __pad_01_03[3];
|
||||
/* Dynamic params (that aren't known when compiling the shader) */
|
||||
#define IR3_DP_FS_DYNAMIC dword_offsetof(struct ir3_driver_params_fs, frag_invocation_count)
|
||||
uint32_t frag_invocation_count;
|
||||
uint32_t __pad_05_07[3];
|
||||
uint32_t frag_size;
|
||||
uint32_t __pad_09;
|
||||
uint32_t frag_offset;
|
||||
uint32_t __pad_11_12[2];
|
||||
};
|
||||
#define IR3_DP_FS(name) dword_offsetof(struct ir3_driver_params_fs, name)
|
||||
|
||||
#define IR3_MAX_SHADER_BUFFERS 32
|
||||
#define IR3_MAX_SHADER_IMAGES 32
|
||||
|
|
|
|||
|
|
@ -5423,15 +5423,15 @@ tu_emit_fdm_params(struct tu_cmd_buffer *cmd,
|
|||
struct tu_cs *cs, struct tu_shader *fs,
|
||||
unsigned num_units)
|
||||
{
|
||||
STATIC_ASSERT(IR3_DP_FS_FRAG_INVOCATION_COUNT == IR3_DP_FS_DYNAMIC);
|
||||
STATIC_ASSERT(IR3_DP_FS(frag_invocation_count) == IR3_DP_FS_DYNAMIC);
|
||||
tu_cs_emit(cs, fs->fs.per_samp ?
|
||||
cmd->vk.dynamic_graphics_state.ms.rasterization_samples : 1);
|
||||
tu_cs_emit(cs, 0);
|
||||
tu_cs_emit(cs, 0);
|
||||
tu_cs_emit(cs, 0);
|
||||
|
||||
STATIC_ASSERT(IR3_DP_FS_FRAG_SIZE == IR3_DP_FS_DYNAMIC + 4);
|
||||
STATIC_ASSERT(IR3_DP_FS_FRAG_OFFSET == IR3_DP_FS_DYNAMIC + 6);
|
||||
STATIC_ASSERT(IR3_DP_FS(frag_size) == IR3_DP_FS_DYNAMIC + 4);
|
||||
STATIC_ASSERT(IR3_DP_FS(frag_offset) == IR3_DP_FS_DYNAMIC + 6);
|
||||
if (num_units > 1) {
|
||||
if (fs->fs.has_fdm) {
|
||||
struct apply_fs_params_state state = {
|
||||
|
|
@ -5885,9 +5885,9 @@ vs_params_offset(struct tu_cmd_buffer *cmd)
|
|||
return 0;
|
||||
|
||||
/* this layout is required by CP_DRAW_INDIRECT_MULTI */
|
||||
STATIC_ASSERT(IR3_DP_DRAWID == 0);
|
||||
STATIC_ASSERT(IR3_DP_VTXID_BASE == 1);
|
||||
STATIC_ASSERT(IR3_DP_INSTID_BASE == 2);
|
||||
STATIC_ASSERT(IR3_DP_VS(draw_id) == 0);
|
||||
STATIC_ASSERT(IR3_DP_VS(vtxid_base) == 1);
|
||||
STATIC_ASSERT(IR3_DP_VS(instid_base) == 2);
|
||||
|
||||
/* 0 means disabled for CP_DRAW_INDIRECT_MULTI */
|
||||
assert(const_state->offsets.driver_param != 0);
|
||||
|
|
@ -6333,6 +6333,29 @@ struct tu_dispatch_info
|
|||
uint64_t indirect_offset;
|
||||
};
|
||||
|
||||
static inline struct ir3_driver_params_cs
|
||||
build_driver_params_cs(const struct ir3_shader_variant *variant,
|
||||
const struct tu_dispatch_info *info)
|
||||
{
|
||||
unsigned subgroup_size = variant->info.subgroup_size;
|
||||
unsigned subgroup_shift = util_logbase2(subgroup_size);
|
||||
|
||||
return (struct ir3_driver_params_cs) {
|
||||
.num_work_groups_x = info->blocks[0],
|
||||
.num_work_groups_y = info->blocks[1],
|
||||
.num_work_groups_z = info->blocks[2],
|
||||
.work_dim = 0,
|
||||
.base_group_x = info->offsets[0],
|
||||
.base_group_y = info->offsets[1],
|
||||
.base_group_z = info->offsets[2],
|
||||
.subgroup_size = subgroup_size,
|
||||
.local_group_size_x = 0,
|
||||
.local_group_size_y = 0,
|
||||
.local_group_size_z = 0,
|
||||
.subgroup_id_shift = subgroup_shift,
|
||||
};
|
||||
}
|
||||
|
||||
template <chip CHIP>
|
||||
static void
|
||||
tu_emit_compute_driver_params(struct tu_cmd_buffer *cmd,
|
||||
|
|
@ -6353,27 +6376,15 @@ tu_emit_compute_driver_params(struct tu_cmd_buffer *cmd,
|
|||
|
||||
bool direct_indirect_load =
|
||||
!(info->indirect_offset & 0xf) &&
|
||||
!(info->indirect && num_consts > IR3_DP_BASE_GROUP_X);
|
||||
!(info->indirect && num_consts > IR3_DP_CS(base_group_x));
|
||||
|
||||
uint64_t iova = 0;
|
||||
|
||||
if (!info->indirect) {
|
||||
uint32_t driver_params[12] = {
|
||||
[IR3_DP_NUM_WORK_GROUPS_X] = info->blocks[0],
|
||||
[IR3_DP_NUM_WORK_GROUPS_Y] = info->blocks[1],
|
||||
[IR3_DP_NUM_WORK_GROUPS_Z] = info->blocks[2],
|
||||
[IR3_DP_WORK_DIM] = 0,
|
||||
[IR3_DP_BASE_GROUP_X] = info->offsets[0],
|
||||
[IR3_DP_BASE_GROUP_Y] = info->offsets[1],
|
||||
[IR3_DP_BASE_GROUP_Z] = info->offsets[2],
|
||||
[IR3_DP_CS_SUBGROUP_SIZE] = subgroup_size,
|
||||
[IR3_DP_LOCAL_GROUP_SIZE_X] = 0,
|
||||
[IR3_DP_LOCAL_GROUP_SIZE_Y] = 0,
|
||||
[IR3_DP_LOCAL_GROUP_SIZE_Z] = 0,
|
||||
[IR3_DP_SUBGROUP_ID_SHIFT] = subgroup_shift,
|
||||
};
|
||||
struct ir3_driver_params_cs driver_params =
|
||||
build_driver_params_cs(variant, info);
|
||||
|
||||
assert(num_consts <= ARRAY_SIZE(driver_params));
|
||||
assert(num_consts <= dword_sizeof(driver_params));
|
||||
|
||||
struct tu_cs_memory consts;
|
||||
uint32_t consts_vec4 = DIV_ROUND_UP(num_consts, 4);
|
||||
|
|
@ -6382,7 +6393,7 @@ tu_emit_compute_driver_params(struct tu_cmd_buffer *cmd,
|
|||
vk_command_buffer_set_error(&cmd->vk, result);
|
||||
return;
|
||||
}
|
||||
memcpy(consts.map, driver_params, num_consts * sizeof(uint32_t));
|
||||
memcpy(consts.map, &driver_params, num_consts * sizeof(uint32_t));
|
||||
iova = consts.iova;
|
||||
} else if (direct_indirect_load) {
|
||||
iova = info->indirect->iova + info->indirect_offset;
|
||||
|
|
@ -6406,12 +6417,12 @@ tu_emit_compute_driver_params(struct tu_cmd_buffer *cmd,
|
|||
/* Fill out IR3_DP_CS_SUBGROUP_SIZE and IR3_DP_SUBGROUP_ID_SHIFT for
|
||||
* indirect dispatch.
|
||||
*/
|
||||
if (info->indirect && num_consts > IR3_DP_BASE_GROUP_X) {
|
||||
if (info->indirect && num_consts > IR3_DP_CS(base_group_x)) {
|
||||
uint32_t indirect_driver_params[8] = {
|
||||
0, 0, 0, subgroup_size,
|
||||
0, 0, 0, subgroup_shift,
|
||||
};
|
||||
bool emit_local = num_consts > IR3_DP_LOCAL_GROUP_SIZE_X;
|
||||
bool emit_local = num_consts > IR3_DP_CS(local_group_size_x);
|
||||
uint32_t emit_size = emit_local ? 8 : 4;
|
||||
|
||||
tu_cs_emit_pkt7(cs, CP_MEM_WRITE, 2 + emit_size);
|
||||
|
|
@ -6448,22 +6459,10 @@ tu_emit_compute_driver_params(struct tu_cmd_buffer *cmd,
|
|||
(variant->constlen - offset) * 4);
|
||||
|
||||
if (!info->indirect) {
|
||||
uint32_t driver_params[12] = {
|
||||
[IR3_DP_NUM_WORK_GROUPS_X] = info->blocks[0],
|
||||
[IR3_DP_NUM_WORK_GROUPS_Y] = info->blocks[1],
|
||||
[IR3_DP_NUM_WORK_GROUPS_Z] = info->blocks[2],
|
||||
[IR3_DP_WORK_DIM] = 0,
|
||||
[IR3_DP_BASE_GROUP_X] = info->offsets[0],
|
||||
[IR3_DP_BASE_GROUP_Y] = info->offsets[1],
|
||||
[IR3_DP_BASE_GROUP_Z] = info->offsets[2],
|
||||
[IR3_DP_CS_SUBGROUP_SIZE] = subgroup_size,
|
||||
[IR3_DP_LOCAL_GROUP_SIZE_X] = 0,
|
||||
[IR3_DP_LOCAL_GROUP_SIZE_Y] = 0,
|
||||
[IR3_DP_LOCAL_GROUP_SIZE_Z] = 0,
|
||||
[IR3_DP_SUBGROUP_ID_SHIFT] = subgroup_shift,
|
||||
};
|
||||
struct ir3_driver_params_cs driver_params =
|
||||
build_driver_params_cs(variant, info);
|
||||
|
||||
assert(num_consts <= ARRAY_SIZE(driver_params));
|
||||
assert(num_consts <= dword_sizeof(driver_params));
|
||||
|
||||
/* push constants */
|
||||
tu_cs_emit_pkt7(cs, tu6_stage2opcode(type), 3 + num_consts);
|
||||
|
|
@ -6474,9 +6473,7 @@ tu_emit_compute_driver_params(struct tu_cmd_buffer *cmd,
|
|||
CP_LOAD_STATE6_0_NUM_UNIT(num_consts / 4));
|
||||
tu_cs_emit(cs, 0);
|
||||
tu_cs_emit(cs, 0);
|
||||
uint32_t i;
|
||||
for (i = 0; i < num_consts; i++)
|
||||
tu_cs_emit(cs, driver_params[i]);
|
||||
tu_cs_emit_array(cs, (uint32_t *)&driver_params, num_consts);
|
||||
} else if (!(info->indirect_offset & 0xf)) {
|
||||
tu_cs_emit_pkt7(cs, tu6_stage2opcode(type), 3);
|
||||
tu_cs_emit(cs, CP_LOAD_STATE6_0_DST_OFF(offset) |
|
||||
|
|
@ -6518,21 +6515,21 @@ tu_emit_compute_driver_params(struct tu_cmd_buffer *cmd,
|
|||
/* Fill out IR3_DP_CS_SUBGROUP_SIZE and IR3_DP_SUBGROUP_ID_SHIFT for
|
||||
* indirect dispatch.
|
||||
*/
|
||||
if (info->indirect && num_consts > IR3_DP_BASE_GROUP_X) {
|
||||
bool emit_local = num_consts > IR3_DP_LOCAL_GROUP_SIZE_X;
|
||||
if (info->indirect && num_consts > IR3_DP_CS(base_group_x)) {
|
||||
bool emit_local = num_consts > IR3_DP_CS(local_group_size_x);
|
||||
tu_cs_emit_pkt7(cs, tu6_stage2opcode(type), 7 + (emit_local ? 4 : 0));
|
||||
tu_cs_emit(cs, CP_LOAD_STATE6_0_DST_OFF(offset + (IR3_DP_BASE_GROUP_X / 4)) |
|
||||
tu_cs_emit(cs, CP_LOAD_STATE6_0_DST_OFF(offset + (IR3_DP_CS(base_group_x) / 4)) |
|
||||
CP_LOAD_STATE6_0_STATE_TYPE(ST6_CONSTANTS) |
|
||||
CP_LOAD_STATE6_0_STATE_SRC(SS6_DIRECT) |
|
||||
CP_LOAD_STATE6_0_STATE_BLOCK(tu6_stage2shadersb(type)) |
|
||||
CP_LOAD_STATE6_0_NUM_UNIT((num_consts - IR3_DP_BASE_GROUP_X) / 4));
|
||||
CP_LOAD_STATE6_0_NUM_UNIT((num_consts - IR3_DP_CS(base_group_x)) / 4));
|
||||
tu_cs_emit_qw(cs, 0);
|
||||
tu_cs_emit(cs, 0); /* BASE_GROUP_X */
|
||||
tu_cs_emit(cs, 0); /* BASE_GROUP_Y */
|
||||
tu_cs_emit(cs, 0); /* BASE_GROUP_Z */
|
||||
tu_cs_emit(cs, subgroup_size);
|
||||
if (emit_local) {
|
||||
assert(num_consts == align(IR3_DP_SUBGROUP_ID_SHIFT, 4));
|
||||
assert(num_consts == align(IR3_DP_CS(subgroup_id_shift), 4));
|
||||
tu_cs_emit(cs, 0); /* LOCAL_GROUP_SIZE_X */
|
||||
tu_cs_emit(cs, 0); /* LOCAL_GROUP_SIZE_Y */
|
||||
tu_cs_emit(cs, 0); /* LOCAL_GROUP_SIZE_Z */
|
||||
|
|
|
|||
|
|
@ -511,9 +511,9 @@ lower_intrinsic(nir_builder *b, nir_intrinsic_instr *instr,
|
|||
if (!dev->compiler->load_shader_consts_via_preamble)
|
||||
return false;
|
||||
|
||||
enum ir3_driver_param param =
|
||||
unsigned param =
|
||||
instr->intrinsic == nir_intrinsic_load_frag_size_ir3 ?
|
||||
IR3_DP_FS_FRAG_SIZE : IR3_DP_FS_FRAG_OFFSET;
|
||||
IR3_DP_FS(frag_size) : IR3_DP_FS(frag_offset);
|
||||
|
||||
unsigned offset = param - IR3_DP_FS_DYNAMIC;
|
||||
|
||||
|
|
@ -531,7 +531,7 @@ lower_intrinsic(nir_builder *b, nir_intrinsic_instr *instr,
|
|||
|
||||
nir_def *result =
|
||||
ir3_load_driver_ubo(b, 1, &shader->const_state.fdm_ubo,
|
||||
IR3_DP_FS_FRAG_INVOCATION_COUNT -
|
||||
IR3_DP_FS(frag_invocation_count) -
|
||||
IR3_DP_FS_DYNAMIC);
|
||||
|
||||
nir_def_replace(&instr->def, result);
|
||||
|
|
|
|||
|
|
@ -59,12 +59,12 @@ cs_program_emit(struct fd_ringbuffer *ring, struct ir3_shader_variant *v)
|
|||
num_wg_id, work_dim_id, unused_id;
|
||||
local_invocation_id =
|
||||
ir3_find_sysval_regid(v, SYSTEM_VALUE_LOCAL_INVOCATION_ID);
|
||||
work_group_id = driver_param_base + IR3_DP_WORKGROUP_ID_X;
|
||||
num_wg_id = driver_param_base + IR3_DP_NUM_WORK_GROUPS_X;
|
||||
local_group_size_id = driver_param_base + IR3_DP_LOCAL_GROUP_SIZE_X;
|
||||
work_dim_id = driver_param_base + IR3_DP_WORK_DIM;
|
||||
work_group_id = driver_param_base + IR3_DP_CS(workgroup_id_x);
|
||||
num_wg_id = driver_param_base + IR3_DP_CS(num_work_groups_x);
|
||||
local_group_size_id = driver_param_base + IR3_DP_CS(local_group_size_x);
|
||||
work_dim_id = driver_param_base + IR3_DP_CS(work_dim);
|
||||
/* NOTE: At some point we'll want to use this, it's probably WGOFFSETCONSTID */
|
||||
unused_id = driver_param_base + IR3_DP_BASE_GROUP_X;
|
||||
unused_id = driver_param_base + IR3_DP_CS(base_group_x);
|
||||
|
||||
OUT_PKT0(ring, REG_A4XX_HLSQ_CL_CONTROL_0, 2);
|
||||
OUT_RING(ring, A4XX_HLSQ_CL_CONTROL_0_WGIDCONSTID(work_group_id) |
|
||||
|
|
|
|||
|
|
@ -281,19 +281,29 @@ fd6_build_driver_params(struct fd6_emit *emit)
|
|||
return NULL;
|
||||
}
|
||||
|
||||
unsigned size_dwords = num_dp * (4 + IR3_DP_VS_COUNT); /* 4dw PKT7 header */
|
||||
bool needs_ucp = !!emit->vs->key.ucp_enables;
|
||||
|
||||
if (PIPELINE == HAS_TESS_GS) {
|
||||
needs_ucp |= emit->gs && emit->gs->key.ucp_enables;
|
||||
needs_ucp |= emit->hs && emit->hs->key.ucp_enables;
|
||||
needs_ucp |= emit->ds && emit->ds->key.ucp_enables;
|
||||
}
|
||||
|
||||
struct ir3_driver_params_vs p =
|
||||
ir3_build_driver_params_vs(ctx, emit->info, emit->draw, emit->draw_id, needs_ucp);
|
||||
|
||||
unsigned size_dwords =
|
||||
num_dp * (4 + dword_sizeof(p)); /* 4dw PKT7 header */
|
||||
struct fd_ringbuffer *dpconstobj = fd_submit_new_ringbuffer(
|
||||
ctx->batch->submit, size_dwords * 4, FD_RINGBUFFER_STREAMING);
|
||||
|
||||
if (emit->vs->need_driver_params) {
|
||||
ir3_emit_driver_params(emit->vs, dpconstobj, ctx, emit->info,
|
||||
emit->indirect, emit->draw, emit->draw_id);
|
||||
ir3_emit_driver_params(emit->vs, dpconstobj, ctx, emit->info, emit->indirect, &p);
|
||||
}
|
||||
|
||||
if (PIPELINE == HAS_TESS_GS) {
|
||||
if (emit->gs && emit->gs->need_driver_params) {
|
||||
ir3_emit_driver_params(emit->gs, dpconstobj, ctx, emit->info,
|
||||
emit->indirect, emit->draw, 0);
|
||||
ir3_emit_driver_params(emit->gs, dpconstobj, ctx, emit->info, emit->indirect, &p);
|
||||
}
|
||||
|
||||
if (emit->hs && emit->hs->need_driver_params) {
|
||||
|
|
@ -301,8 +311,7 @@ fd6_build_driver_params(struct fd6_emit *emit)
|
|||
}
|
||||
|
||||
if (emit->ds && emit->ds->need_driver_params) {
|
||||
ir3_emit_driver_params(emit->ds, dpconstobj, ctx, emit->info,
|
||||
emit->indirect, emit->draw, 0);
|
||||
ir3_emit_driver_params(emit->ds, dpconstobj, ctx, emit->info, emit->indirect, &p);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -430,35 +430,44 @@ emit_kernel_params(struct fd_context *ctx, const struct ir3_shader_variant *v,
|
|||
}
|
||||
}
|
||||
|
||||
static inline struct ir3_driver_params_vs
|
||||
ir3_build_driver_params_vs(struct fd_context *ctx,
|
||||
const struct pipe_draw_info *info,
|
||||
const struct pipe_draw_start_count_bias *draw,
|
||||
uint32_t draw_id, bool needs_ucp)
|
||||
assert_dt
|
||||
{
|
||||
struct ir3_driver_params_vs vertex_params = {
|
||||
.draw_id = draw_id, /* filled by hw (CP_DRAW_INDIRECT_MULTI) */
|
||||
.vtxid_base = info->index_size ? draw->index_bias : draw->start,
|
||||
.instid_base = info->start_instance,
|
||||
.vtxcnt_max = ctx->streamout.max_tf_vtx,
|
||||
.is_indexed_draw = info->index_size != 0 ? ~0 : 0,
|
||||
};
|
||||
if (needs_ucp) {
|
||||
struct pipe_clip_state *ucp = &ctx->ucp;
|
||||
for (unsigned i = 0; i < ARRAY_SIZE(vertex_params.ucp); i++) {
|
||||
vertex_params.ucp[i].x = fui(ucp->ucp[i][0]);
|
||||
vertex_params.ucp[i].y = fui(ucp->ucp[i][1]);
|
||||
vertex_params.ucp[i].z = fui(ucp->ucp[i][2]);
|
||||
vertex_params.ucp[i].w = fui(ucp->ucp[i][3]);
|
||||
}
|
||||
}
|
||||
return vertex_params;
|
||||
}
|
||||
|
||||
static inline void
|
||||
ir3_emit_driver_params(const struct ir3_shader_variant *v,
|
||||
struct fd_ringbuffer *ring, struct fd_context *ctx,
|
||||
const struct pipe_draw_info *info,
|
||||
const struct pipe_draw_indirect_info *indirect,
|
||||
const struct pipe_draw_start_count_bias *draw,
|
||||
const uint32_t draw_id) assert_dt
|
||||
const struct ir3_driver_params_vs *vertex_params)
|
||||
assert_dt
|
||||
{
|
||||
assert(v->need_driver_params);
|
||||
|
||||
const struct ir3_const_state *const_state = ir3_const_state(v);
|
||||
uint32_t offset = const_state->offsets.driver_param;
|
||||
uint32_t vertex_params[IR3_DP_VS_COUNT] = {
|
||||
[IR3_DP_DRAWID] = draw_id, /* filled by hw (CP_DRAW_INDIRECT_MULTI) */
|
||||
[IR3_DP_VTXID_BASE] = info->index_size ? draw->index_bias : draw->start,
|
||||
[IR3_DP_INSTID_BASE] = info->start_instance,
|
||||
[IR3_DP_VTXCNT_MAX] = ctx->streamout.max_tf_vtx,
|
||||
[IR3_DP_IS_INDEXED_DRAW] = info->index_size != 0 ? ~0 : 0,
|
||||
};
|
||||
if (v->key.ucp_enables) {
|
||||
struct pipe_clip_state *ucp = &ctx->ucp;
|
||||
unsigned pos = IR3_DP_UCP0_X;
|
||||
for (unsigned i = 0; pos <= IR3_DP_UCP7_W; i++) {
|
||||
for (unsigned j = 0; j < 4; j++) {
|
||||
vertex_params[pos] = fui(ucp->ucp[i][j]);
|
||||
pos++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Only emit as many params as needed, i.e. up to the highest enabled UCP
|
||||
* plane. However a binning pass may drop even some of these, so limit to
|
||||
|
|
@ -466,7 +475,7 @@ ir3_emit_driver_params(const struct ir3_shader_variant *v,
|
|||
*/
|
||||
const uint32_t vertex_params_size =
|
||||
MIN2(const_state->num_driver_params, (v->constlen - offset) * 4);
|
||||
assert(vertex_params_size <= IR3_DP_VS_COUNT);
|
||||
assert(vertex_params_size <= dword_sizeof(*vertex_params));
|
||||
|
||||
/* for indirect draw, we need to copy VTXID_BASE from
|
||||
* indirect-draw parameters buffer.. which is annoying
|
||||
|
|
@ -501,15 +510,28 @@ ir3_emit_driver_params(const struct ir3_shader_variant *v,
|
|||
|
||||
pipe_resource_reference(&vertex_params_rsc, NULL);
|
||||
} else {
|
||||
emit_const_user(ring, v, offset * 4, vertex_params_size, vertex_params);
|
||||
emit_const_user(ring, v, offset * 4, vertex_params_size, (uint32_t *)vertex_params);
|
||||
}
|
||||
|
||||
/* if needed, emit stream-out buffer addresses: */
|
||||
if (vertex_params[IR3_DP_VTXCNT_MAX] > 0) {
|
||||
if (vertex_params->vtxcnt_max > 0) {
|
||||
emit_tfbos(ctx, v, ring);
|
||||
}
|
||||
}
|
||||
|
||||
static inline struct ir3_driver_params_tcs
|
||||
ir3_build_driver_params_tcs(struct fd_context *ctx)
|
||||
assert_dt
|
||||
{
|
||||
return (struct ir3_driver_params_tcs) {
|
||||
.default_outer_level_x = fui(ctx->default_outer_level[0]),
|
||||
.default_outer_level_y = fui(ctx->default_outer_level[1]),
|
||||
.default_outer_level_z = fui(ctx->default_outer_level[2]),
|
||||
.default_outer_level_w = fui(ctx->default_outer_level[3]),
|
||||
.default_inner_level_x = fui(ctx->default_inner_level[0]),
|
||||
.default_inner_level_y = fui(ctx->default_inner_level[1]),
|
||||
};
|
||||
}
|
||||
|
||||
static inline void
|
||||
ir3_emit_hs_driver_params(const struct ir3_shader_variant *v,
|
||||
|
|
@ -521,20 +543,13 @@ ir3_emit_hs_driver_params(const struct ir3_shader_variant *v,
|
|||
|
||||
const struct ir3_const_state *const_state = ir3_const_state(v);
|
||||
uint32_t offset = const_state->offsets.driver_param;
|
||||
uint32_t hs_params[IR3_DP_HS_COUNT] = {
|
||||
[IR3_DP_HS_DEFAULT_OUTER_LEVEL_X] = fui(ctx->default_outer_level[0]),
|
||||
[IR3_DP_HS_DEFAULT_OUTER_LEVEL_Y] = fui(ctx->default_outer_level[1]),
|
||||
[IR3_DP_HS_DEFAULT_OUTER_LEVEL_Z] = fui(ctx->default_outer_level[2]),
|
||||
[IR3_DP_HS_DEFAULT_OUTER_LEVEL_W] = fui(ctx->default_outer_level[3]),
|
||||
[IR3_DP_HS_DEFAULT_INNER_LEVEL_X] = fui(ctx->default_inner_level[0]),
|
||||
[IR3_DP_HS_DEFAULT_INNER_LEVEL_Y] = fui(ctx->default_inner_level[1]),
|
||||
};
|
||||
struct ir3_driver_params_tcs hs_params = ir3_build_driver_params_tcs(ctx);
|
||||
|
||||
const uint32_t hs_params_size =
|
||||
MIN2(const_state->num_driver_params, (v->constlen - offset) * 4);
|
||||
assert(hs_params_size <= IR3_DP_HS_COUNT);
|
||||
assert(hs_params_size <= dword_sizeof(hs_params));
|
||||
|
||||
emit_const_user(ring, v, offset * 4, hs_params_size, hs_params);
|
||||
emit_const_user(ring, v, offset * 4, hs_params_size, (uint32_t *)&hs_params);
|
||||
}
|
||||
|
||||
|
||||
|
|
@ -552,7 +567,11 @@ ir3_emit_vs_consts(const struct ir3_shader_variant *v,
|
|||
/* emit driver params every time: */
|
||||
if (info && v->need_driver_params) {
|
||||
ring_wfi(ctx->batch, ring);
|
||||
ir3_emit_driver_params(v, ring, ctx, info, indirect, draw, 0);
|
||||
|
||||
struct ir3_driver_params_vs p =
|
||||
ir3_build_driver_params_vs(ctx, info, draw, 0, v->key.ucp_enables);
|
||||
|
||||
ir3_emit_driver_params(v, ring, ctx, info, indirect, &p);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -565,6 +584,29 @@ ir3_emit_fs_consts(const struct ir3_shader_variant *v,
|
|||
emit_common_consts(v, ring, ctx, PIPE_SHADER_FRAGMENT);
|
||||
}
|
||||
|
||||
static inline struct ir3_driver_params_cs
|
||||
ir3_build_driver_params_cs(const struct ir3_shader_variant *v,
|
||||
const struct pipe_grid_info *info)
|
||||
{
|
||||
return (struct ir3_driver_params_cs) {
|
||||
.num_work_groups_x = info->grid[0],
|
||||
.num_work_groups_y = info->grid[1],
|
||||
.num_work_groups_z = info->grid[2],
|
||||
.work_dim = info->work_dim,
|
||||
.base_group_x = info->grid_base[0],
|
||||
.base_group_y = info->grid_base[1],
|
||||
.base_group_z = info->grid_base[2],
|
||||
.subgroup_size = v->info.subgroup_size,
|
||||
.local_group_size_x = info->block[0],
|
||||
.local_group_size_y = info->block[1],
|
||||
.local_group_size_z = info->block[2],
|
||||
.subgroup_id_shift = util_logbase2(v->info.subgroup_size),
|
||||
.workgroup_id_x = 0, // TODO
|
||||
.workgroup_id_y = 0, // TODO
|
||||
.workgroup_id_z = 0, // TODO
|
||||
};
|
||||
}
|
||||
|
||||
static inline void
|
||||
ir3_emit_cs_driver_params(const struct ir3_shader_variant *v,
|
||||
struct fd_ringbuffer *ring, struct fd_context *ctx,
|
||||
|
|
@ -614,27 +656,12 @@ ir3_emit_cs_driver_params(const struct ir3_shader_variant *v,
|
|||
} else {
|
||||
// TODO some of these are not part of the indirect state.. so we
|
||||
// need to emit some of this directly in both cases.
|
||||
uint32_t compute_params[IR3_DP_CS_COUNT] = {
|
||||
[IR3_DP_NUM_WORK_GROUPS_X] = info->grid[0],
|
||||
[IR3_DP_NUM_WORK_GROUPS_Y] = info->grid[1],
|
||||
[IR3_DP_NUM_WORK_GROUPS_Z] = info->grid[2],
|
||||
[IR3_DP_WORK_DIM] = info->work_dim,
|
||||
[IR3_DP_BASE_GROUP_X] = info->grid_base[0],
|
||||
[IR3_DP_BASE_GROUP_Y] = info->grid_base[1],
|
||||
[IR3_DP_BASE_GROUP_Z] = info->grid_base[2],
|
||||
[IR3_DP_CS_SUBGROUP_SIZE] = v->info.subgroup_size,
|
||||
[IR3_DP_LOCAL_GROUP_SIZE_X] = info->block[0],
|
||||
[IR3_DP_LOCAL_GROUP_SIZE_Y] = info->block[1],
|
||||
[IR3_DP_LOCAL_GROUP_SIZE_Z] = info->block[2],
|
||||
[IR3_DP_SUBGROUP_ID_SHIFT] = util_logbase2(v->info.subgroup_size),
|
||||
[IR3_DP_WORKGROUP_ID_X] = 0, // TODO
|
||||
[IR3_DP_WORKGROUP_ID_Y] = 0, // TODO
|
||||
[IR3_DP_WORKGROUP_ID_Z] = 0, // TODO
|
||||
};
|
||||
struct ir3_driver_params_cs compute_params =
|
||||
ir3_build_driver_params_cs(v, info);
|
||||
uint32_t size =
|
||||
MIN2(const_state->num_driver_params, v->constlen * 4 - offset * 4);
|
||||
|
||||
emit_const_user(ring, v, offset * 4, size, compute_params);
|
||||
emit_const_user(ring, v, offset * 4, size, (uint32_t *)&compute_params);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue