mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-20 11:40:10 +01:00
turnip,ir3/a750: Implement consts loading via preamble
A750 expects driver params loaded through the preamble, old path does work but has issues when the same LOAD_STATE is used between several draw calls (it seems that LOAD_STATE is executed only for the first draw call). To solve this we now lower driver params to UBOs and let NIR deal with them. Notes: - VS params are loaded via old path since blob do the same and there are no issues observed. - FDM is not supported at the moment. - For now driver params data is emitted via CP_NOP because it's tricky to allocate space for the data. (It is emitted when we are already in sub_cs) Co-Authored-By: Connor Abbott <cwabbott0@gmail.com> Signed-off-by: Danylo Piliaiev <dpiliaiev@igalia.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26934>
This commit is contained in:
parent
7429ca3115
commit
76e417ca59
24 changed files with 765 additions and 287 deletions
|
|
@ -211,6 +211,9 @@ struct fd_dev_info {
|
|||
* command buffers. We copy this dispatch as is.
|
||||
*/
|
||||
bool cmdbuf_start_a725_quirk;
|
||||
|
||||
bool load_inline_uniforms_via_preamble_ldgk;
|
||||
bool load_shader_consts_via_preamble;
|
||||
} a7xx;
|
||||
};
|
||||
|
||||
|
|
|
|||
|
|
@ -791,6 +791,8 @@ a7xx_740 = A7XXProps(
|
|||
|
||||
a7xx_750 = A7XXProps(
|
||||
has_event_write_sample_count = True,
|
||||
load_inline_uniforms_via_preamble_ldgk = True,
|
||||
load_shader_consts_via_preamble = True,
|
||||
)
|
||||
|
||||
a730_magic_regs = dict(
|
||||
|
|
|
|||
|
|
@ -210,6 +210,7 @@ ir3_compiler_create(struct fd_device *dev, const struct fd_dev_id *dev_id,
|
|||
|
||||
compiler->has_fs_tex_prefetch = dev_info->a6xx.has_fs_tex_prefetch;
|
||||
compiler->stsc_duplication_quirk = dev_info->a7xx.stsc_duplication_quirk;
|
||||
compiler->load_shader_consts_via_preamble = dev_info->a7xx.load_shader_consts_via_preamble;
|
||||
} else {
|
||||
compiler->max_const_pipeline = 512;
|
||||
compiler->max_const_geom = 512;
|
||||
|
|
|
|||
|
|
@ -249,6 +249,8 @@ struct ir3_compiler {
|
|||
bool has_fs_tex_prefetch;
|
||||
|
||||
bool stsc_duplication_quirk;
|
||||
|
||||
bool load_shader_consts_via_preamble;
|
||||
};
|
||||
|
||||
void ir3_compiler_destroy(struct ir3_compiler *compiler);
|
||||
|
|
|
|||
|
|
@ -160,6 +160,9 @@ static bool
|
|||
lower_immed(struct ir3_cp_ctx *ctx, struct ir3_instruction *instr, unsigned n,
|
||||
struct ir3_register *reg, unsigned new_flags)
|
||||
{
|
||||
if (ctx->shader->compiler->load_shader_consts_via_preamble)
|
||||
return false;
|
||||
|
||||
if (!(new_flags & IR3_REG_IMMED))
|
||||
return false;
|
||||
|
||||
|
|
|
|||
|
|
@ -77,6 +77,15 @@ apply_ss(struct ir3_instruction *instr,
|
|||
state->needs_ss_for_const = false;
|
||||
}
|
||||
|
||||
static inline void
|
||||
apply_sy(struct ir3_instruction *instr,
|
||||
struct ir3_legalize_state *state,
|
||||
bool mergedregs)
|
||||
{
|
||||
instr->flags |= IR3_INSTR_SY;
|
||||
regmask_init(&state->needs_sy, mergedregs);
|
||||
}
|
||||
|
||||
/* We want to evaluate each block from the position of any other
|
||||
* predecessor block, in order that the flags set are the union of
|
||||
* all possible program paths.
|
||||
|
|
@ -176,9 +185,7 @@ legalize_block(struct ir3_legalize_ctx *ctx, struct ir3_block *block)
|
|||
|
||||
if ((last_n && is_barrier(last_n)) || n->opc == OPC_SHPE) {
|
||||
apply_ss(n, state, mergedregs);
|
||||
|
||||
n->flags |= IR3_INSTR_SY;
|
||||
regmask_init(&state->needs_sy, mergedregs);
|
||||
apply_sy(n, state, mergedregs);
|
||||
last_input_needs_ss = false;
|
||||
}
|
||||
|
||||
|
|
@ -211,14 +218,15 @@ legalize_block(struct ir3_legalize_ctx *ctx, struct ir3_block *block)
|
|||
}
|
||||
|
||||
if (regmask_get(&state->needs_sy, reg)) {
|
||||
n->flags |= IR3_INSTR_SY;
|
||||
regmask_init(&state->needs_sy, mergedregs);
|
||||
apply_sy(n, state, mergedregs);
|
||||
}
|
||||
} else if ((reg->flags & IR3_REG_CONST) && state->needs_ss_for_const) {
|
||||
} else if ((reg->flags & IR3_REG_CONST)) {
|
||||
if (state->needs_ss_for_const) {
|
||||
apply_ss(n, state, mergedregs);
|
||||
last_input_needs_ss = false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
foreach_dst (reg, n) {
|
||||
if (regmask_get(&state->needs_ss_war, reg)) {
|
||||
|
|
|
|||
|
|
@ -31,6 +31,59 @@
|
|||
#include "ir3_nir.h"
|
||||
#include "ir3_shader.h"
|
||||
|
||||
|
||||
nir_def *
|
||||
ir3_get_driver_ubo(nir_builder *b, struct ir3_driver_ubo *ubo)
|
||||
{
|
||||
/* Pick a UBO index to use as our constant data. Skip UBO 0 since that's
|
||||
* reserved for gallium's cb0.
|
||||
*/
|
||||
if (ubo->idx == -1) {
|
||||
if (b->shader->info.num_ubos == 0)
|
||||
b->shader->info.num_ubos++;
|
||||
ubo->idx = b->shader->info.num_ubos++;
|
||||
} else {
|
||||
assert(ubo->idx != 0);
|
||||
/* Binning shader shared ir3_driver_ubo definitions but not shader info */
|
||||
b->shader->info.num_ubos = MAX2(b->shader->info.num_ubos, ubo->idx + 1);
|
||||
}
|
||||
|
||||
return nir_imm_int(b, ubo->idx);
|
||||
}
|
||||
|
||||
nir_def *
|
||||
ir3_load_driver_ubo(nir_builder *b, unsigned components,
|
||||
struct ir3_driver_ubo *ubo,
|
||||
unsigned offset)
|
||||
{
|
||||
ubo->size = MAX2(ubo->size, offset + components);
|
||||
|
||||
return nir_load_ubo(b, components, 32, ir3_get_driver_ubo(b, ubo),
|
||||
nir_imm_int(b, offset * sizeof(uint32_t)),
|
||||
.align_mul = 16,
|
||||
.align_offset = (offset % 4) * sizeof(uint32_t),
|
||||
.range_base = offset * sizeof(uint32_t),
|
||||
.range = components * sizeof(uint32_t));
|
||||
}
|
||||
|
||||
nir_def *
|
||||
ir3_load_driver_ubo_indirect(nir_builder *b, unsigned components,
|
||||
struct ir3_driver_ubo *ubo,
|
||||
unsigned base, nir_def *offset,
|
||||
unsigned range)
|
||||
{
|
||||
ubo->size = MAX2(ubo->size, base + components + range * 4);
|
||||
|
||||
return nir_load_ubo(b, components, 32, ir3_get_driver_ubo(b, ubo),
|
||||
nir_iadd(b, nir_imul24(b, offset, nir_imm_int(b, 16)),
|
||||
nir_imm_int(b, base * sizeof(uint32_t))),
|
||||
.align_mul = 16,
|
||||
.align_offset = (base % 4) * sizeof(uint32_t),
|
||||
.range_base = base * sizeof(uint32_t),
|
||||
.range = components * sizeof(uint32_t) +
|
||||
(range - 1) * 16);
|
||||
}
|
||||
|
||||
static bool
|
||||
ir3_nir_should_vectorize_mem(unsigned align_mul, unsigned align_offset,
|
||||
unsigned bit_size, unsigned num_components,
|
||||
|
|
@ -742,6 +795,9 @@ ir3_nir_lower_variant(struct ir3_shader_variant *so, nir_shader *s)
|
|||
|
||||
OPT(s, ir3_nir_opt_subgroups, so);
|
||||
|
||||
if (so->compiler->load_shader_consts_via_preamble)
|
||||
progress |= OPT(s, ir3_nir_lower_driver_params_to_ubo, so);
|
||||
|
||||
/* Do the preamble before analysing UBO ranges, because it's usually
|
||||
* higher-value and because it can result in eliminating some indirect UBO
|
||||
* accesses where otherwise we'd have to push the whole range. However we
|
||||
|
|
@ -844,6 +900,75 @@ ir3_nir_lower_variant(struct ir3_shader_variant *so, nir_shader *s)
|
|||
ir3_setup_const_state(s, so, ir3_const_state(so));
|
||||
}
|
||||
|
||||
bool
|
||||
ir3_get_driver_param_info(const nir_shader *shader, nir_intrinsic_instr *intr,
|
||||
struct driver_param_info *param_info)
|
||||
{
|
||||
switch (intr->intrinsic) {
|
||||
case nir_intrinsic_load_base_workgroup_id:
|
||||
param_info->offset = IR3_DP_BASE_GROUP_X;
|
||||
break;
|
||||
case nir_intrinsic_load_num_workgroups:
|
||||
param_info->offset = IR3_DP_NUM_WORK_GROUPS_X;
|
||||
break;
|
||||
case nir_intrinsic_load_workgroup_size:
|
||||
param_info->offset = IR3_DP_LOCAL_GROUP_SIZE_X;
|
||||
break;
|
||||
case nir_intrinsic_load_subgroup_size:
|
||||
assert(shader->info.stage == MESA_SHADER_COMPUTE ||
|
||||
shader->info.stage == MESA_SHADER_FRAGMENT);
|
||||
if (shader->info.stage == MESA_SHADER_COMPUTE) {
|
||||
param_info->offset = IR3_DP_CS_SUBGROUP_SIZE;
|
||||
} else {
|
||||
param_info->offset = IR3_DP_FS_SUBGROUP_SIZE;
|
||||
}
|
||||
break;
|
||||
case nir_intrinsic_load_subgroup_id_shift_ir3:
|
||||
param_info->offset = IR3_DP_SUBGROUP_ID_SHIFT;
|
||||
break;
|
||||
case nir_intrinsic_load_work_dim:
|
||||
param_info->offset = IR3_DP_WORK_DIM;
|
||||
break;
|
||||
case nir_intrinsic_load_base_vertex:
|
||||
case nir_intrinsic_load_first_vertex:
|
||||
param_info->offset = IR3_DP_VTXID_BASE;
|
||||
break;
|
||||
case nir_intrinsic_load_is_indexed_draw:
|
||||
param_info->offset = IR3_DP_IS_INDEXED_DRAW;
|
||||
break;
|
||||
case nir_intrinsic_load_draw_id:
|
||||
param_info->offset = IR3_DP_DRAWID;
|
||||
break;
|
||||
case nir_intrinsic_load_base_instance:
|
||||
param_info->offset = IR3_DP_INSTID_BASE;
|
||||
break;
|
||||
case nir_intrinsic_load_user_clip_plane: {
|
||||
uint32_t idx = nir_intrinsic_ucp_id(intr);
|
||||
param_info->offset = IR3_DP_UCP0_X + 4 * idx;
|
||||
break;
|
||||
}
|
||||
case nir_intrinsic_load_tess_level_outer_default:
|
||||
param_info->offset = IR3_DP_HS_DEFAULT_OUTER_LEVEL_X;
|
||||
break;
|
||||
case nir_intrinsic_load_tess_level_inner_default:
|
||||
param_info->offset = IR3_DP_HS_DEFAULT_INNER_LEVEL_X;
|
||||
break;
|
||||
case nir_intrinsic_load_frag_size_ir3:
|
||||
param_info->offset = IR3_DP_FS_FRAG_SIZE;
|
||||
break;
|
||||
case nir_intrinsic_load_frag_offset_ir3:
|
||||
param_info->offset = IR3_DP_FS_FRAG_OFFSET;
|
||||
break;
|
||||
case nir_intrinsic_load_frag_invocation_count:
|
||||
param_info->offset = IR3_DP_FS_FRAG_INVOCATION_COUNT;
|
||||
break;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static void
|
||||
ir3_nir_scan_driver_consts(struct ir3_compiler *compiler, nir_shader *shader, struct ir3_const_state *layout)
|
||||
{
|
||||
|
|
@ -877,84 +1002,16 @@ ir3_nir_scan_driver_consts(struct ir3_compiler *compiler, nir_shader *shader, st
|
|||
layout->image_dims.count += 3; /* three const per */
|
||||
}
|
||||
break;
|
||||
case nir_intrinsic_load_base_vertex:
|
||||
case nir_intrinsic_load_first_vertex:
|
||||
layout->num_driver_params =
|
||||
MAX2(layout->num_driver_params, IR3_DP_VTXID_BASE + 1);
|
||||
break;
|
||||
case nir_intrinsic_load_is_indexed_draw:
|
||||
layout->num_driver_params =
|
||||
MAX2(layout->num_driver_params, IR3_DP_IS_INDEXED_DRAW + 1);
|
||||
break;
|
||||
case nir_intrinsic_load_base_instance:
|
||||
layout->num_driver_params =
|
||||
MAX2(layout->num_driver_params, IR3_DP_INSTID_BASE + 1);
|
||||
break;
|
||||
case nir_intrinsic_load_user_clip_plane:
|
||||
idx = nir_intrinsic_ucp_id(intr);
|
||||
layout->num_driver_params = MAX2(layout->num_driver_params,
|
||||
IR3_DP_UCP0_X + (idx + 1) * 4);
|
||||
break;
|
||||
case nir_intrinsic_load_num_workgroups:
|
||||
layout->num_driver_params =
|
||||
MAX2(layout->num_driver_params, IR3_DP_NUM_WORK_GROUPS_Z + 1);
|
||||
break;
|
||||
case nir_intrinsic_load_workgroup_id:
|
||||
if (!compiler->has_shared_regfile) {
|
||||
layout->num_driver_params =
|
||||
MAX2(layout->num_driver_params, IR3_DP_WORKGROUP_ID_Z + 1);
|
||||
}
|
||||
break;
|
||||
case nir_intrinsic_load_workgroup_size:
|
||||
layout->num_driver_params = MAX2(layout->num_driver_params,
|
||||
IR3_DP_LOCAL_GROUP_SIZE_Z + 1);
|
||||
break;
|
||||
case nir_intrinsic_load_base_workgroup_id:
|
||||
layout->num_driver_params =
|
||||
MAX2(layout->num_driver_params, IR3_DP_BASE_GROUP_Z + 1);
|
||||
break;
|
||||
case nir_intrinsic_load_subgroup_size: {
|
||||
assert(shader->info.stage == MESA_SHADER_COMPUTE ||
|
||||
shader->info.stage == MESA_SHADER_FRAGMENT);
|
||||
enum ir3_driver_param size = shader->info.stage == MESA_SHADER_COMPUTE ?
|
||||
IR3_DP_CS_SUBGROUP_SIZE : IR3_DP_FS_SUBGROUP_SIZE;
|
||||
layout->num_driver_params =
|
||||
MAX2(layout->num_driver_params, size + 1);
|
||||
break;
|
||||
}
|
||||
case nir_intrinsic_load_subgroup_id_shift_ir3:
|
||||
layout->num_driver_params =
|
||||
MAX2(layout->num_driver_params, IR3_DP_SUBGROUP_ID_SHIFT + 1);
|
||||
break;
|
||||
case nir_intrinsic_load_draw_id:
|
||||
layout->num_driver_params =
|
||||
MAX2(layout->num_driver_params, IR3_DP_DRAWID + 1);
|
||||
break;
|
||||
case nir_intrinsic_load_tess_level_outer_default:
|
||||
layout->num_driver_params = MAX2(layout->num_driver_params,
|
||||
IR3_DP_HS_DEFAULT_OUTER_LEVEL_W + 1);
|
||||
break;
|
||||
case nir_intrinsic_load_tess_level_inner_default:
|
||||
layout->num_driver_params = MAX2(layout->num_driver_params,
|
||||
IR3_DP_HS_DEFAULT_INNER_LEVEL_Y + 1);
|
||||
break;
|
||||
case nir_intrinsic_load_frag_size_ir3:
|
||||
layout->num_driver_params = MAX2(layout->num_driver_params,
|
||||
IR3_DP_FS_FRAG_SIZE + 2 +
|
||||
(nir_intrinsic_range(intr) - 1) * 4);
|
||||
break;
|
||||
case nir_intrinsic_load_frag_offset_ir3:
|
||||
layout->num_driver_params = MAX2(layout->num_driver_params,
|
||||
IR3_DP_FS_FRAG_OFFSET + 2 +
|
||||
(nir_intrinsic_range(intr) - 1) * 4);
|
||||
break;
|
||||
case nir_intrinsic_load_frag_invocation_count:
|
||||
layout->num_driver_params = MAX2(layout->num_driver_params,
|
||||
IR3_DP_FS_FRAG_INVOCATION_COUNT + 1);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
struct driver_param_info param_info;
|
||||
if (ir3_get_driver_param_info(shader, intr, ¶m_info)) {
|
||||
layout->num_driver_params =
|
||||
MAX2(layout->num_driver_params,
|
||||
param_info.offset + nir_intrinsic_dest_components(intr));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -999,7 +1056,7 @@ ir3_setup_const_state(nir_shader *nir, struct ir3_shader_variant *v,
|
|||
const_state->preamble_size;
|
||||
unsigned ptrsz = ir3_pointer_size(compiler);
|
||||
|
||||
if (const_state->num_ubos > 0) {
|
||||
if (const_state->num_ubos > 0 && compiler->gen < 6) {
|
||||
const_state->offsets.ubo = constoff;
|
||||
constoff += align(const_state->num_ubos * ptrsz, 4) / 4;
|
||||
}
|
||||
|
|
@ -1043,6 +1100,26 @@ ir3_setup_const_state(nir_shader *nir, struct ir3_shader_variant *v,
|
|||
constoff += align(IR3_MAX_SO_BUFFERS * ptrsz, 4) / 4;
|
||||
}
|
||||
|
||||
if (!compiler->load_shader_consts_via_preamble) {
|
||||
switch (v->type) {
|
||||
case MESA_SHADER_TESS_CTRL:
|
||||
case MESA_SHADER_TESS_EVAL:
|
||||
const_state->offsets.primitive_param = constoff;
|
||||
constoff += 2;
|
||||
|
||||
const_state->offsets.primitive_map = constoff;
|
||||
break;
|
||||
case MESA_SHADER_GEOMETRY:
|
||||
const_state->offsets.primitive_param = constoff;
|
||||
constoff += 1;
|
||||
|
||||
const_state->offsets.primitive_map = constoff;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
switch (v->type) {
|
||||
case MESA_SHADER_VERTEX:
|
||||
const_state->offsets.primitive_param = constoff;
|
||||
|
|
@ -1050,17 +1127,9 @@ ir3_setup_const_state(nir_shader *nir, struct ir3_shader_variant *v,
|
|||
break;
|
||||
case MESA_SHADER_TESS_CTRL:
|
||||
case MESA_SHADER_TESS_EVAL:
|
||||
const_state->offsets.primitive_param = constoff;
|
||||
constoff += 2;
|
||||
|
||||
const_state->offsets.primitive_map = constoff;
|
||||
constoff += DIV_ROUND_UP(v->input_size, 4);
|
||||
break;
|
||||
case MESA_SHADER_GEOMETRY:
|
||||
const_state->offsets.primitive_param = constoff;
|
||||
constoff += 1;
|
||||
|
||||
const_state->offsets.primitive_map = constoff;
|
||||
constoff += DIV_ROUND_UP(v->input_size, 4);
|
||||
break;
|
||||
default:
|
||||
|
|
|
|||
|
|
@ -42,6 +42,8 @@ bool ir3_nir_lower_load_barycentric_at_sample(nir_shader *shader);
|
|||
bool ir3_nir_lower_load_barycentric_at_offset(nir_shader *shader);
|
||||
bool ir3_nir_lower_push_consts_to_preamble(nir_shader *nir,
|
||||
struct ir3_shader_variant *v);
|
||||
bool ir3_nir_lower_driver_params_to_ubo(nir_shader *nir,
|
||||
struct ir3_shader_variant *v);
|
||||
bool ir3_nir_move_varying_inputs(nir_shader *shader);
|
||||
int ir3_nir_coord_offset(nir_def *ssa);
|
||||
bool ir3_nir_lower_tex_prefetch(nir_shader *shader);
|
||||
|
|
@ -88,6 +90,23 @@ nir_def *ir3_nir_try_propagate_bit_shift(nir_builder *b,
|
|||
|
||||
bool ir3_nir_opt_subgroups(nir_shader *nir, struct ir3_shader_variant *v);
|
||||
|
||||
nir_def *ir3_get_driver_ubo(nir_builder *b, struct ir3_driver_ubo *ubo);
|
||||
nir_def *ir3_load_driver_ubo(nir_builder *b, unsigned components,
|
||||
struct ir3_driver_ubo *ubo,
|
||||
unsigned offset);
|
||||
nir_def *ir3_load_driver_ubo_indirect(nir_builder *b, unsigned components,
|
||||
struct ir3_driver_ubo *ubo,
|
||||
unsigned base, nir_def *offset,
|
||||
unsigned range);
|
||||
|
||||
struct driver_param_info {
|
||||
uint32_t offset;
|
||||
};
|
||||
|
||||
bool ir3_get_driver_param_info(const nir_shader *shader,
|
||||
nir_intrinsic_instr *intr,
|
||||
struct driver_param_info *param_info);
|
||||
|
||||
static inline nir_intrinsic_instr *
|
||||
ir3_bindless_resource(nir_src src)
|
||||
{
|
||||
|
|
|
|||
|
|
@ -337,13 +337,18 @@ lower_ubo_load_to_uniform(nir_intrinsic_instr *instr, nir_builder *b,
|
|||
}
|
||||
|
||||
static bool
|
||||
copy_ubo_to_uniform(nir_shader *nir, const struct ir3_const_state *const_state)
|
||||
copy_ubo_to_uniform(nir_shader *nir, const struct ir3_const_state *const_state,
|
||||
bool const_data_via_cp)
|
||||
{
|
||||
const struct ir3_ubo_analysis_state *state = &const_state->ubo_state;
|
||||
|
||||
if (state->num_enabled == 0 ||
|
||||
(state->num_enabled == 1 && !state->range[0].ubo.bindless &&
|
||||
state->range[0].ubo.block == const_state->constant_data_ubo))
|
||||
if (state->num_enabled == 0)
|
||||
return false;
|
||||
|
||||
if (state->num_enabled == 1 &&
|
||||
!state->range[0].ubo.bindless &&
|
||||
state->range[0].ubo.block == const_state->consts_ubo.idx &&
|
||||
const_data_via_cp)
|
||||
return false;
|
||||
|
||||
nir_function_impl *preamble = nir_shader_get_preamble(nir);
|
||||
|
|
@ -358,7 +363,8 @@ copy_ubo_to_uniform(nir_shader *nir, const struct ir3_const_state *const_state)
|
|||
* the CP do it for us.
|
||||
*/
|
||||
if (!range->ubo.bindless &&
|
||||
range->ubo.block == const_state->constant_data_ubo)
|
||||
range->ubo.block == const_state->consts_ubo.idx &&
|
||||
const_data_via_cp)
|
||||
continue;
|
||||
|
||||
nir_def *ubo = nir_imm_int(b, range->ubo.block);
|
||||
|
|
@ -502,7 +508,8 @@ ir3_nir_lower_ubo_loads(nir_shader *nir, struct ir3_shader_variant *v)
|
|||
nir->info.num_ubos = num_ubos;
|
||||
|
||||
if (compiler->has_preamble && push_ubos)
|
||||
progress |= copy_ubo_to_uniform(nir, const_state);
|
||||
progress |= copy_ubo_to_uniform(
|
||||
nir, const_state, !compiler->load_shader_consts_via_preamble);
|
||||
|
||||
return progress;
|
||||
}
|
||||
|
|
@ -584,15 +591,6 @@ ir3_nir_lower_load_const_instr(nir_builder *b, nir_instr *in_instr, void *data)
|
|||
struct ir3_const_state *const_state = data;
|
||||
nir_intrinsic_instr *instr = nir_instr_as_intrinsic(in_instr);
|
||||
|
||||
/* Pick a UBO index to use as our constant data. Skip UBO 0 since that's
|
||||
* reserved for gallium's cb0.
|
||||
*/
|
||||
if (const_state->constant_data_ubo == -1) {
|
||||
if (b->shader->info.num_ubos == 0)
|
||||
b->shader->info.num_ubos++;
|
||||
const_state->constant_data_ubo = b->shader->info.num_ubos++;
|
||||
}
|
||||
|
||||
unsigned num_components = instr->num_components;
|
||||
unsigned bit_size = instr->def.bit_size;
|
||||
if (instr->def.bit_size == 16) {
|
||||
|
|
@ -606,7 +604,7 @@ ir3_nir_lower_load_const_instr(nir_builder *b, nir_instr *in_instr, void *data)
|
|||
bit_size = 32;
|
||||
}
|
||||
unsigned base = nir_intrinsic_base(instr);
|
||||
nir_def *index = nir_imm_int(b, const_state->constant_data_ubo);
|
||||
nir_def *index = ir3_get_driver_ubo(b, &const_state->consts_ubo);
|
||||
nir_def *offset =
|
||||
nir_iadd_imm(b, instr->src[0].ssa, base);
|
||||
|
||||
|
|
@ -640,8 +638,6 @@ ir3_nir_lower_load_constant(nir_shader *nir, struct ir3_shader_variant *v)
|
|||
{
|
||||
struct ir3_const_state *const_state = ir3_const_state(v);
|
||||
|
||||
const_state->constant_data_ubo = -1;
|
||||
|
||||
bool progress = nir_shader_lower_instructions(
|
||||
nir, ir3_lower_load_const_filter, ir3_nir_lower_load_const_instr,
|
||||
const_state);
|
||||
|
|
|
|||
81
src/freedreno/ir3/ir3_nir_lower_driver_params_to_ubo.c
Normal file
81
src/freedreno/ir3/ir3_nir_lower_driver_params_to_ubo.c
Normal file
|
|
@ -0,0 +1,81 @@
|
|||
/*
|
||||
* Copyright © 2023 Igalia S.L.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
|
||||
#include "compiler/nir/nir.h"
|
||||
#include "compiler/nir/nir_builder.h"
|
||||
#include "util/u_math.h"
|
||||
#include "ir3_compiler.h"
|
||||
#include "ir3_nir.h"
|
||||
|
||||
static bool
|
||||
lower_driver_param_to_ubo(nir_builder *b, nir_intrinsic_instr *intr, void *in)
|
||||
{
|
||||
struct ir3_const_state *const_state = in;
|
||||
|
||||
if (b->shader->info.stage == MESA_SHADER_VERTEX)
|
||||
return false;
|
||||
|
||||
unsigned components = nir_intrinsic_dest_components(intr);
|
||||
|
||||
b->cursor = nir_before_instr(&intr->instr);
|
||||
|
||||
nir_def *result;
|
||||
switch (intr->intrinsic) {
|
||||
case nir_intrinsic_load_primitive_location_ir3:
|
||||
result = ir3_load_driver_ubo(b, components,
|
||||
&const_state->primitive_map_ubo,
|
||||
nir_intrinsic_driver_location(intr));
|
||||
break;
|
||||
case nir_intrinsic_load_vs_primitive_stride_ir3:
|
||||
result = ir3_load_driver_ubo(b, components,
|
||||
&const_state->primitive_param_ubo, 0);
|
||||
break;
|
||||
case nir_intrinsic_load_vs_vertex_stride_ir3:
|
||||
result = ir3_load_driver_ubo(b, components,
|
||||
&const_state->primitive_param_ubo, 1);
|
||||
break;
|
||||
case nir_intrinsic_load_hs_patch_stride_ir3:
|
||||
result = ir3_load_driver_ubo(b, components,
|
||||
&const_state->primitive_param_ubo, 2);
|
||||
break;
|
||||
case nir_intrinsic_load_patch_vertices_in:
|
||||
result = ir3_load_driver_ubo(b, components,
|
||||
&const_state->primitive_param_ubo, 3);
|
||||
break;
|
||||
case nir_intrinsic_load_tess_param_base_ir3:
|
||||
result = ir3_load_driver_ubo(b, components,
|
||||
&const_state->primitive_param_ubo, 4);
|
||||
break;
|
||||
case nir_intrinsic_load_tess_factor_base_ir3:
|
||||
result = ir3_load_driver_ubo(b, components,
|
||||
&const_state->primitive_param_ubo, 6);
|
||||
break;
|
||||
default: {
|
||||
struct driver_param_info param_info;
|
||||
if (!ir3_get_driver_param_info(b->shader, intr, ¶m_info))
|
||||
return false;
|
||||
|
||||
result = ir3_load_driver_ubo(b, components,
|
||||
&const_state->driver_params_ubo,
|
||||
param_info.offset);
|
||||
}
|
||||
}
|
||||
|
||||
nir_instr_remove(&intr->instr);
|
||||
nir_def_rewrite_uses(&intr->def, result);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool
|
||||
ir3_nir_lower_driver_params_to_ubo(nir_shader *nir,
|
||||
struct ir3_shader_variant *v)
|
||||
{
|
||||
bool result = nir_shader_intrinsics_pass(
|
||||
nir, lower_driver_param_to_ubo,
|
||||
nir_metadata_block_index | nir_metadata_dominance, ir3_const_state(v));
|
||||
|
||||
return result;
|
||||
}
|
||||
|
|
@ -303,6 +303,10 @@ alloc_variant(struct ir3_shader *shader, const struct ir3_shader_key *key,
|
|||
if (!v->binning_pass) {
|
||||
v->const_state = rzalloc_size(v, sizeof(*v->const_state));
|
||||
v->const_state->push_consts_type = shader->options.push_consts_type;
|
||||
v->const_state->consts_ubo.idx = -1;
|
||||
v->const_state->driver_params_ubo.idx = -1;
|
||||
v->const_state->primitive_map_ubo.idx = -1;
|
||||
v->const_state->primitive_param_ubo.idx = -1;
|
||||
}
|
||||
|
||||
return v;
|
||||
|
|
|
|||
|
|
@ -154,6 +154,16 @@ enum ir3_push_consts_type {
|
|||
IR3_PUSH_CONSTS_SHARED_PREAMBLE,
|
||||
};
|
||||
|
||||
/* This represents an internal UBO filled out by the driver. There are a few
|
||||
* common UBOs that must be filled out identically by all drivers, for example
|
||||
* for shader linkage, but drivers can also add their own that they manage
|
||||
* themselves.
|
||||
*/
|
||||
struct ir3_driver_ubo {
|
||||
int32_t idx;
|
||||
uint32_t size;
|
||||
};
|
||||
|
||||
/**
|
||||
* Describes the layout of shader consts in the const register file.
|
||||
*
|
||||
|
|
@ -186,8 +196,11 @@ struct ir3_const_state {
|
|||
unsigned num_ubos;
|
||||
unsigned num_driver_params; /* scalar */
|
||||
|
||||
/* UBO that should be mapped to the NIR shader's constant_data (or -1). */
|
||||
int32_t constant_data_ubo;
|
||||
struct ir3_driver_ubo consts_ubo;
|
||||
struct ir3_driver_ubo driver_params_ubo;
|
||||
struct ir3_driver_ubo primitive_map_ubo, primitive_param_ubo;
|
||||
|
||||
int32_t constant_data_dynamic_offsets;
|
||||
|
||||
struct {
|
||||
/* user const start at zero */
|
||||
|
|
|
|||
|
|
@ -93,6 +93,7 @@ libfreedreno_ir3_files = files(
|
|||
'ir3_nir.h',
|
||||
'ir3_nir_analyze_ubo_ranges.c',
|
||||
'ir3_nir_lower_64b.c',
|
||||
'ir3_nir_lower_driver_params_to_ubo.c',
|
||||
'ir3_nir_lower_load_barycentric_at_sample.c',
|
||||
'ir3_nir_lower_load_barycentric_at_offset.c',
|
||||
'ir3_nir_lower_push_consts_to_preamble.c',
|
||||
|
|
|
|||
|
|
@ -4326,6 +4326,8 @@ tu6_emit_inline_ubo(struct tu_cs *cs,
|
|||
gl_shader_stage type,
|
||||
struct tu_descriptor_state *descriptors)
|
||||
{
|
||||
assert(const_state->num_inline_ubos == 0 || !cs->device->physical_device->info->a7xx.load_shader_consts_via_preamble);
|
||||
|
||||
/* Emit loads of inline uniforms. These load directly from the uniform's
|
||||
* storage space inside the descriptor set.
|
||||
*/
|
||||
|
|
@ -4578,7 +4580,7 @@ fs_params_offset(struct tu_cmd_buffer *cmd)
|
|||
&cmd->state.program.link[MESA_SHADER_FRAGMENT];
|
||||
const struct ir3_const_state *const_state = &link->const_state;
|
||||
|
||||
if (const_state->num_driver_params <= IR3_DP_FS_DYNAMIC)
|
||||
if (const_state->num_driver_params < IR3_DP_FS_DYNAMIC)
|
||||
return 0;
|
||||
|
||||
if (const_state->offsets.driver_param + IR3_DP_FS_DYNAMIC / 4 >= link->constlen)
|
||||
|
|
@ -5412,10 +5414,100 @@ tu_emit_compute_driver_params(struct tu_cmd_buffer *cmd,
|
|||
const struct tu_shader *shader = cmd->state.shaders[MESA_SHADER_COMPUTE];
|
||||
const struct ir3_shader_variant *variant = shader->variant;
|
||||
const struct ir3_const_state *const_state = variant->const_state;
|
||||
uint32_t offset = const_state->offsets.driver_param;
|
||||
unsigned subgroup_size = variant->info.subgroup_size;
|
||||
unsigned subgroup_shift = util_logbase2(subgroup_size);
|
||||
|
||||
if (cmd->device->physical_device->info->a7xx.load_shader_consts_via_preamble) {
|
||||
uint32_t num_consts = const_state->driver_params_ubo.size;
|
||||
if (num_consts == 0)
|
||||
return;
|
||||
|
||||
bool direct_indirect_load =
|
||||
!(info->indirect_offset & 0xf) &&
|
||||
!(info->indirect && num_consts > IR3_DP_BASE_GROUP_X);
|
||||
|
||||
uint64_t iova = 0;
|
||||
|
||||
if (!info->indirect) {
|
||||
uint32_t driver_params[12] = {
|
||||
[IR3_DP_NUM_WORK_GROUPS_X] = info->blocks[0],
|
||||
[IR3_DP_NUM_WORK_GROUPS_Y] = info->blocks[1],
|
||||
[IR3_DP_NUM_WORK_GROUPS_Z] = info->blocks[2],
|
||||
[IR3_DP_WORK_DIM] = 0,
|
||||
[IR3_DP_BASE_GROUP_X] = info->offsets[0],
|
||||
[IR3_DP_BASE_GROUP_Y] = info->offsets[1],
|
||||
[IR3_DP_BASE_GROUP_Z] = info->offsets[2],
|
||||
[IR3_DP_CS_SUBGROUP_SIZE] = subgroup_size,
|
||||
[IR3_DP_LOCAL_GROUP_SIZE_X] = 0,
|
||||
[IR3_DP_LOCAL_GROUP_SIZE_Y] = 0,
|
||||
[IR3_DP_LOCAL_GROUP_SIZE_Z] = 0,
|
||||
[IR3_DP_SUBGROUP_ID_SHIFT] = subgroup_shift,
|
||||
};
|
||||
|
||||
assert(num_consts <= ARRAY_SIZE(driver_params));
|
||||
|
||||
struct tu_cs_memory consts;
|
||||
uint32_t consts_vec4 = DIV_ROUND_UP(num_consts, 4);
|
||||
VkResult result = tu_cs_alloc(&cmd->sub_cs, consts_vec4, 4, &consts);
|
||||
if (result != VK_SUCCESS) {
|
||||
vk_command_buffer_set_error(&cmd->vk, result);
|
||||
return;
|
||||
}
|
||||
memcpy(consts.map, driver_params, num_consts * sizeof(uint32_t));
|
||||
iova = consts.iova;
|
||||
} else if (direct_indirect_load) {
|
||||
iova = info->indirect->iova + info->indirect_offset;
|
||||
} else {
|
||||
/* Vulkan guarantees only 4 byte alignment for indirect_offset.
|
||||
* However, CP_LOAD_STATE.EXT_SRC_ADDR needs 16 byte alignment.
|
||||
*/
|
||||
|
||||
uint64_t indirect_iova = info->indirect->iova + info->indirect_offset;
|
||||
|
||||
for (uint32_t i = 0; i < 3; i++) {
|
||||
tu_cs_emit_pkt7(cs, CP_MEM_TO_MEM, 5);
|
||||
tu_cs_emit(cs, 0);
|
||||
tu_cs_emit_qw(cs, global_iova_arr(cmd, cs_indirect_xyz, i));
|
||||
tu_cs_emit_qw(cs, indirect_iova + i * sizeof(uint32_t));
|
||||
}
|
||||
|
||||
/* Fill out IR3_DP_CS_SUBGROUP_SIZE and IR3_DP_SUBGROUP_ID_SHIFT for
|
||||
* indirect dispatch.
|
||||
*/
|
||||
if (info->indirect && num_consts > IR3_DP_BASE_GROUP_X) {
|
||||
uint32_t indirect_driver_params[8] = {
|
||||
0, 0, 0, subgroup_size,
|
||||
0, 0, 0, subgroup_shift,
|
||||
};
|
||||
bool emit_local = num_consts > IR3_DP_LOCAL_GROUP_SIZE_X;
|
||||
uint32_t emit_size = emit_local ? 8 : 4;
|
||||
|
||||
tu_cs_emit_pkt7(cs, CP_MEM_WRITE, 2 + emit_size);
|
||||
tu_cs_emit_qw(cs, global_iova_arr(cmd, cs_indirect_xyz, 0) + 4 * sizeof(uint32_t));
|
||||
for (uint32_t i = 0; i < emit_size; i++) {
|
||||
tu_cs_emit(cs, indirect_driver_params[i]);
|
||||
}
|
||||
}
|
||||
|
||||
tu_cs_emit_pkt7(cs, CP_WAIT_MEM_WRITES, 0);
|
||||
tu_emit_event_write<CHIP>(cmd, cs, FD_CACHE_INVALIDATE);
|
||||
|
||||
iova = global_iova(cmd, cs_indirect_xyz[0]);
|
||||
}
|
||||
|
||||
tu_cs_emit_pkt7(cs, tu6_stage2opcode(type), 5);
|
||||
tu_cs_emit(cs, CP_LOAD_STATE6_0_DST_OFF(const_state->driver_params_ubo.idx) |
|
||||
CP_LOAD_STATE6_0_STATE_TYPE(ST6_UBO) |
|
||||
CP_LOAD_STATE6_0_STATE_SRC(SS6_DIRECT) |
|
||||
CP_LOAD_STATE6_0_STATE_BLOCK(tu6_stage2shadersb(type)) |
|
||||
CP_LOAD_STATE6_0_NUM_UNIT(1));
|
||||
tu_cs_emit(cs, CP_LOAD_STATE6_1_EXT_SRC_ADDR(0));
|
||||
tu_cs_emit(cs, CP_LOAD_STATE6_2_EXT_SRC_ADDR_HI(0));
|
||||
int size_vec4s = DIV_ROUND_UP(num_consts, 4);
|
||||
tu_cs_emit_qw(cs, iova | ((uint64_t)A6XX_UBO_1_SIZE(size_vec4s) << 32));
|
||||
|
||||
} else {
|
||||
uint32_t offset = const_state->offsets.driver_param;
|
||||
if (variant->constlen <= offset)
|
||||
return;
|
||||
|
||||
|
|
@ -5510,6 +5602,7 @@ tu_emit_compute_driver_params(struct tu_cmd_buffer *cmd,
|
|||
tu_cs_emit(cs, subgroup_shift);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <chip CHIP>
|
||||
|
|
|
|||
|
|
@ -507,6 +507,26 @@ tu_cs_reset(struct tu_cs *cs)
|
|||
cs->entry_count = 0;
|
||||
}
|
||||
|
||||
uint64_t
|
||||
tu_cs_emit_data_nop(struct tu_cs *cs,
|
||||
const uint32_t *data,
|
||||
uint32_t size,
|
||||
uint32_t align_dwords)
|
||||
{
|
||||
uint32_t total_size = size + (align_dwords - 1);
|
||||
tu_cs_emit_pkt7(cs, CP_NOP, total_size);
|
||||
|
||||
uint64_t iova = tu_cs_get_cur_iova(cs);
|
||||
uint64_t iova_aligned = align64(iova, align_dwords * sizeof(uint32_t));
|
||||
size_t offset = (iova_aligned - iova) / sizeof(uint32_t);
|
||||
cs->cur += offset;
|
||||
memcpy(cs->cur, data, size * sizeof(uint32_t));
|
||||
|
||||
cs->cur += total_size - offset;
|
||||
|
||||
return iova + offset * sizeof(uint32_t);
|
||||
}
|
||||
|
||||
void
|
||||
tu_cs_emit_debug_string(struct tu_cs *cs, const char *string, int len)
|
||||
{
|
||||
|
|
|
|||
|
|
@ -480,6 +480,12 @@ tu_cond_exec_end(struct tu_cs *cs)
|
|||
}
|
||||
}
|
||||
|
||||
uint64_t
|
||||
tu_cs_emit_data_nop(struct tu_cs *cs,
|
||||
const uint32_t *data,
|
||||
uint32_t size,
|
||||
uint32_t align);
|
||||
|
||||
/* Temporary struct for tracking a register state to be written, used by
|
||||
* a6xx-pack.h and tu_cs_emit_regs()
|
||||
*/
|
||||
|
|
|
|||
|
|
@ -231,7 +231,7 @@ get_device_extensions(const struct tu_physical_device *device,
|
|||
.EXT_extended_dynamic_state3 = true,
|
||||
.EXT_external_memory_dma_buf = true,
|
||||
.EXT_filter_cubic = device->info->a6xx.has_tex_filter_cubic,
|
||||
.EXT_fragment_density_map = true,
|
||||
.EXT_fragment_density_map = !device->info->a7xx.load_shader_consts_via_preamble,
|
||||
.EXT_global_priority = true,
|
||||
.EXT_global_priority_query = true,
|
||||
.EXT_graphics_pipeline_library = true,
|
||||
|
|
|
|||
|
|
@ -196,7 +196,7 @@ struct tu6_global
|
|||
uint32_t pad[7];
|
||||
} flush_base[4];
|
||||
|
||||
alignas(16) uint32_t cs_indirect_xyz[3];
|
||||
alignas(16) uint32_t cs_indirect_xyz[12];
|
||||
|
||||
volatile uint32_t vtx_stats_query_not_running;
|
||||
|
||||
|
|
|
|||
|
|
@ -408,7 +408,37 @@ tu6_emit_dynamic_offset(struct tu_cs *cs,
|
|||
const struct tu_program_state *program)
|
||||
{
|
||||
const struct tu_physical_device *phys_dev = cs->device->physical_device;
|
||||
if (!xs || shader->const_state.dynamic_offset_loc == UINT32_MAX)
|
||||
|
||||
if (!xs)
|
||||
return;
|
||||
|
||||
if (cs->device->physical_device->info->a7xx.load_shader_consts_via_preamble) {
|
||||
if (shader->const_state.dynamic_offsets_ubo.size == 0)
|
||||
return;
|
||||
|
||||
uint32_t offsets[MAX_SETS];
|
||||
for (unsigned i = 0; i < phys_dev->usable_sets; i++) {
|
||||
unsigned dynamic_offset_start =
|
||||
program->dynamic_descriptor_offsets[i] / (A6XX_TEX_CONST_DWORDS * 4);
|
||||
offsets[i] = dynamic_offset_start;
|
||||
}
|
||||
|
||||
/* A7XX TODO: Emit data via sub_cs instead of NOP */
|
||||
uint64_t iova = tu_cs_emit_data_nop(cs, offsets, phys_dev->usable_sets, 4);
|
||||
uint32_t offset = shader->const_state.dynamic_offsets_ubo.idx;
|
||||
|
||||
tu_cs_emit_pkt7(cs, tu6_stage2opcode(xs->type), 5);
|
||||
tu_cs_emit(cs, CP_LOAD_STATE6_0_DST_OFF(offset) |
|
||||
CP_LOAD_STATE6_0_STATE_TYPE(ST6_UBO) |
|
||||
CP_LOAD_STATE6_0_STATE_SRC(SS6_DIRECT) |
|
||||
CP_LOAD_STATE6_0_STATE_BLOCK(tu6_stage2shadersb(xs->type)) |
|
||||
CP_LOAD_STATE6_0_NUM_UNIT(1));
|
||||
tu_cs_emit(cs, CP_LOAD_STATE6_1_EXT_SRC_ADDR(0));
|
||||
tu_cs_emit(cs, CP_LOAD_STATE6_2_EXT_SRC_ADDR_HI(0));
|
||||
int size_vec4s = DIV_ROUND_UP(phys_dev->usable_sets, 4);
|
||||
tu_cs_emit_qw(cs, iova | ((uint64_t)A6XX_UBO_1_SIZE(size_vec4s) << 32));
|
||||
} else {
|
||||
if (shader->const_state.dynamic_offset_loc == UINT32_MAX)
|
||||
return;
|
||||
|
||||
tu_cs_emit_pkt7(cs, tu6_stage2opcode(xs->type), 3 + phys_dev->usable_sets);
|
||||
|
|
@ -425,6 +455,7 @@ tu6_emit_dynamic_offset(struct tu_cs *cs,
|
|||
program->dynamic_descriptor_offsets[i] / (A6XX_TEX_CONST_DWORDS * 4);
|
||||
tu_cs_emit(cs, dynamic_offset_start);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <chip CHIP>
|
||||
|
|
@ -569,24 +600,77 @@ tu6_setup_streamout(struct tu_cs *cs,
|
|||
}
|
||||
}
|
||||
|
||||
static void
|
||||
tu6_emit_const(struct tu_cs *cs, uint32_t opcode, uint32_t base,
|
||||
enum a6xx_state_block block, uint32_t offset,
|
||||
uint32_t size, const uint32_t *dwords) {
|
||||
assert(size % 4 == 0);
|
||||
enum tu_geom_consts_type
|
||||
{
|
||||
TU_CONSTS_PRIMITIVE_MAP,
|
||||
TU_CONSTS_PRIMITIVE_PARAM,
|
||||
};
|
||||
|
||||
tu_cs_emit_pkt7(cs, opcode, 3 + size);
|
||||
static void
|
||||
tu6_emit_const(struct tu_cs *cs, uint32_t opcode, enum tu_geom_consts_type type,
|
||||
const struct ir3_const_state *const_state,
|
||||
unsigned constlen, enum a6xx_state_block block,
|
||||
uint32_t offset, uint32_t size, const uint32_t *dwords) {
|
||||
assert(size % 4 == 0);
|
||||
dwords = (uint32_t *)&((uint8_t *)dwords)[offset];
|
||||
|
||||
if (block == SB6_VS_SHADER || !cs->device->physical_device->info->a7xx.load_shader_consts_via_preamble) {
|
||||
uint32_t base;
|
||||
switch (type) {
|
||||
case TU_CONSTS_PRIMITIVE_MAP:
|
||||
base = const_state->offsets.primitive_map;
|
||||
break;
|
||||
case TU_CONSTS_PRIMITIVE_PARAM:
|
||||
base = const_state->offsets.primitive_param;
|
||||
break;
|
||||
default:
|
||||
unreachable("bad consts type");
|
||||
}
|
||||
|
||||
int32_t adjusted_size = MIN2(base * 4 + size, constlen * 4) - base * 4;
|
||||
if (adjusted_size <= 0)
|
||||
return;
|
||||
|
||||
tu_cs_emit_pkt7(cs, opcode, 3 + adjusted_size);
|
||||
tu_cs_emit(cs, CP_LOAD_STATE6_0_DST_OFF(base) |
|
||||
CP_LOAD_STATE6_0_STATE_TYPE(ST6_CONSTANTS) |
|
||||
CP_LOAD_STATE6_0_STATE_SRC(SS6_DIRECT) |
|
||||
CP_LOAD_STATE6_0_STATE_BLOCK(block) |
|
||||
CP_LOAD_STATE6_0_NUM_UNIT(size / 4));
|
||||
CP_LOAD_STATE6_0_NUM_UNIT(adjusted_size / 4));
|
||||
|
||||
tu_cs_emit(cs, CP_LOAD_STATE6_1_EXT_SRC_ADDR(0));
|
||||
tu_cs_emit(cs, CP_LOAD_STATE6_2_EXT_SRC_ADDR_HI(0));
|
||||
dwords = (uint32_t *)&((uint8_t *)dwords)[offset];
|
||||
|
||||
tu_cs_emit_array(cs, dwords, size);
|
||||
tu_cs_emit_array(cs, dwords, adjusted_size);
|
||||
} else {
|
||||
uint32_t base;
|
||||
switch (type) {
|
||||
case TU_CONSTS_PRIMITIVE_MAP:
|
||||
base = const_state->primitive_map_ubo.idx;
|
||||
break;
|
||||
case TU_CONSTS_PRIMITIVE_PARAM:
|
||||
base = const_state->primitive_param_ubo.idx;
|
||||
break;
|
||||
default:
|
||||
unreachable("bad consts type");
|
||||
}
|
||||
if (base == -1)
|
||||
return;
|
||||
|
||||
/* A7XX TODO: Emit data via sub_cs instead of NOP */
|
||||
uint64_t iova = tu_cs_emit_data_nop(cs, dwords, size, 4);
|
||||
|
||||
tu_cs_emit_pkt7(cs, opcode, 5);
|
||||
tu_cs_emit(cs, CP_LOAD_STATE6_0_DST_OFF(base) |
|
||||
CP_LOAD_STATE6_0_STATE_TYPE(ST6_UBO) |
|
||||
CP_LOAD_STATE6_0_STATE_SRC(SS6_DIRECT) |
|
||||
CP_LOAD_STATE6_0_STATE_BLOCK(block) |
|
||||
CP_LOAD_STATE6_0_NUM_UNIT(1));
|
||||
tu_cs_emit(cs, CP_LOAD_STATE6_1_EXT_SRC_ADDR(0));
|
||||
tu_cs_emit(cs, CP_LOAD_STATE6_2_EXT_SRC_ADDR_HI(0));
|
||||
int size_vec4s = DIV_ROUND_UP(size, 4);
|
||||
tu_cs_emit_qw(cs, iova | ((uint64_t)A6XX_UBO_1_SIZE(size_vec4s) << 32));
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
|
|
@ -596,15 +680,13 @@ tu6_emit_link_map(struct tu_cs *cs,
|
|||
enum a6xx_state_block sb)
|
||||
{
|
||||
const struct ir3_const_state *const_state = ir3_const_state(consumer);
|
||||
uint32_t base = const_state->offsets.primitive_map;
|
||||
int size = DIV_ROUND_UP(consumer->input_size, 4);
|
||||
uint32_t size = ALIGN(consumer->input_size, 4);
|
||||
|
||||
size = (MIN2(size + base, consumer->constlen) - base) * 4;
|
||||
if (size <= 0)
|
||||
if (size == 0)
|
||||
return;
|
||||
|
||||
tu6_emit_const(cs, CP_LOAD_STATE6_GEOM, base, sb, 0, size,
|
||||
producer->output_loc);
|
||||
tu6_emit_const(cs, CP_LOAD_STATE6_GEOM, TU_CONSTS_PRIMITIVE_MAP,
|
||||
const_state, consumer->constlen, sb, 0, size, producer->output_loc);
|
||||
}
|
||||
|
||||
static int
|
||||
|
|
@ -992,8 +1074,8 @@ tu6_emit_vs_params(struct tu_cs *cs,
|
|||
0,
|
||||
0,
|
||||
};
|
||||
uint32_t vs_base = const_state->offsets.primitive_param;
|
||||
tu6_emit_const(cs, CP_LOAD_STATE6_GEOM, vs_base, SB6_VS_SHADER, 0,
|
||||
tu6_emit_const(cs, CP_LOAD_STATE6_GEOM, TU_CONSTS_PRIMITIVE_PARAM,
|
||||
const_state, constlen, SB6_VS_SHADER, 0,
|
||||
ARRAY_SIZE(vs_params), vs_params);
|
||||
}
|
||||
|
||||
|
|
@ -1018,6 +1100,8 @@ static const enum mesa_vk_dynamic_graphics_state tu_patch_control_points_state[]
|
|||
MESA_VK_DYNAMIC_TS_PATCH_CONTROL_POINTS,
|
||||
};
|
||||
|
||||
#define HS_PARAMS_SIZE 8
|
||||
|
||||
template <chip CHIP>
|
||||
static unsigned
|
||||
tu6_patch_control_points_size(struct tu_device *dev,
|
||||
|
|
@ -1027,10 +1111,17 @@ tu6_patch_control_points_size(struct tu_device *dev,
|
|||
const struct tu_program_state *program,
|
||||
uint32_t patch_control_points)
|
||||
{
|
||||
if (dev->physical_device->info->a7xx.load_shader_consts_via_preamble) {
|
||||
#define EMIT_CONST_DWORDS(const_dwords) (5 + const_dwords + 4)
|
||||
return EMIT_CONST_DWORDS(4) +
|
||||
EMIT_CONST_DWORDS(HS_PARAMS_SIZE) + 2 + 2 + 2;
|
||||
#undef EMIT_CONST_DWORDS
|
||||
} else {
|
||||
#define EMIT_CONST_DWORDS(const_dwords) (4 + const_dwords)
|
||||
return EMIT_CONST_DWORDS(4) +
|
||||
EMIT_CONST_DWORDS(program->hs_param_dwords) + 2 + 2 + 2;
|
||||
EMIT_CONST_DWORDS(HS_PARAMS_SIZE) + 2 + 2 + 2;
|
||||
#undef EMIT_CONST_DWORDS
|
||||
}
|
||||
}
|
||||
|
||||
template <chip CHIP>
|
||||
|
|
@ -1056,7 +1147,7 @@ tu6_emit_patch_control_points(struct tu_cs *cs,
|
|||
uint64_t tess_factor_iova, tess_param_iova;
|
||||
tu_get_tess_iova(dev, &tess_factor_iova, &tess_param_iova);
|
||||
|
||||
uint32_t hs_params[8] = {
|
||||
uint32_t hs_params[HS_PARAMS_SIZE] = {
|
||||
vs->variant->output_size * patch_control_points * 4, /* hs primitive stride */
|
||||
vs->variant->output_size * 4, /* hs vertex stride */
|
||||
tcs->variant->output_size,
|
||||
|
|
@ -1069,9 +1160,10 @@ tu6_emit_patch_control_points(struct tu_cs *cs,
|
|||
|
||||
const struct ir3_const_state *hs_const =
|
||||
&program->link[MESA_SHADER_TESS_CTRL].const_state;
|
||||
uint32_t hs_base = hs_const->offsets.primitive_param;
|
||||
tu6_emit_const(cs, CP_LOAD_STATE6_GEOM, hs_base, SB6_HS_SHADER, 0,
|
||||
program->hs_param_dwords, hs_params);
|
||||
unsigned hs_constlen = program->link[MESA_SHADER_TESS_CTRL].constlen;
|
||||
tu6_emit_const(cs, CP_LOAD_STATE6_GEOM, TU_CONSTS_PRIMITIVE_PARAM,
|
||||
hs_const, hs_constlen, SB6_HS_SHADER, 0,
|
||||
ARRAY_SIZE(hs_params), hs_params);
|
||||
|
||||
uint32_t patch_local_mem_size_16b =
|
||||
patch_control_points * vs->variant->output_size / 4;
|
||||
|
|
@ -1146,10 +1238,9 @@ tu6_emit_geom_tess_consts(struct tu_cs *cs,
|
|||
tess_factor_iova >> 32,
|
||||
};
|
||||
|
||||
uint32_t ds_base = ds->const_state->offsets.primitive_param;
|
||||
uint32_t ds_param_dwords = MIN2((ds->constlen - ds_base) * 4, ARRAY_SIZE(ds_params));
|
||||
tu6_emit_const(cs, CP_LOAD_STATE6_GEOM, ds_base, SB6_DS_SHADER, 0,
|
||||
ds_param_dwords, ds_params);
|
||||
tu6_emit_const(cs, CP_LOAD_STATE6_GEOM, TU_CONSTS_PRIMITIVE_PARAM,
|
||||
ds->const_state, ds->constlen, SB6_DS_SHADER, 0,
|
||||
ARRAY_SIZE(ds_params), ds_params);
|
||||
}
|
||||
|
||||
if (gs) {
|
||||
|
|
@ -1160,8 +1251,8 @@ tu6_emit_geom_tess_consts(struct tu_cs *cs,
|
|||
0,
|
||||
0,
|
||||
};
|
||||
uint32_t gs_base = gs->const_state->offsets.primitive_param;
|
||||
tu6_emit_const(cs, CP_LOAD_STATE6_GEOM, gs_base, SB6_GS_SHADER, 0,
|
||||
tu6_emit_const(cs, CP_LOAD_STATE6_GEOM, TU_CONSTS_PRIMITIVE_PARAM,
|
||||
gs->const_state, gs->constlen, SB6_GS_SHADER, 0,
|
||||
ARRAY_SIZE(gs_params), gs_params);
|
||||
}
|
||||
}
|
||||
|
|
@ -2145,15 +2236,6 @@ tu_emit_program_state(struct tu_cs *sub_cs,
|
|||
tu6_emit_vpc<CHIP>(&prog_cs, vs, hs, ds, gs, fs);
|
||||
prog->vpc_state = tu_cs_end_draw_state(sub_cs, &prog_cs);
|
||||
|
||||
if (hs) {
|
||||
const struct ir3_const_state *hs_const =
|
||||
&prog->link[MESA_SHADER_TESS_CTRL].const_state;
|
||||
unsigned hs_constlen =
|
||||
prog->link[MESA_SHADER_TESS_CTRL].constlen;
|
||||
uint32_t hs_base = hs_const->offsets.primitive_param;
|
||||
prog->hs_param_dwords = MIN2((hs_constlen - hs_base) * 4, 8);
|
||||
}
|
||||
|
||||
const struct ir3_shader_variant *last_shader;
|
||||
if (gs)
|
||||
last_shader = gs;
|
||||
|
|
|
|||
|
|
@ -93,8 +93,6 @@ struct tu_program_state
|
|||
struct tu_draw_state vpc_state;
|
||||
struct tu_draw_state fs_state;
|
||||
|
||||
uint32_t hs_param_dwords;
|
||||
|
||||
struct tu_push_constant_range shared_consts;
|
||||
|
||||
struct tu_program_descriptor_linkage link[MESA_SHADER_STAGES];
|
||||
|
|
|
|||
|
|
@ -97,6 +97,13 @@ tu_spirv_to_nir(struct tu_device *dev,
|
|||
if (result != VK_SUCCESS)
|
||||
return NULL;
|
||||
|
||||
/* ir3 uses num_ubos and num_ssbos to track the number of *bindful*
|
||||
* UBOs/SSBOs, but spirv_to_nir sets them to the total number of objects
|
||||
* which is useless for us, so reset them here.
|
||||
*/
|
||||
nir->info.num_ubos = 0;
|
||||
nir->info.num_ssbos = 0;
|
||||
|
||||
if (TU_DEBUG(NIR)) {
|
||||
fprintf(stderr, "translated nir:\n");
|
||||
nir_print_shader(nir, stderr);
|
||||
|
|
@ -175,6 +182,7 @@ lower_vulkan_resource_index(struct tu_device *dev, nir_builder *b,
|
|||
struct tu_shader *shader,
|
||||
const struct tu_pipeline_layout *layout)
|
||||
{
|
||||
struct ir3_compiler *compiler = dev->compiler;
|
||||
nir_def *vulkan_idx = instr->src[0].ssa;
|
||||
|
||||
unsigned set = nir_intrinsic_desc_set(instr);
|
||||
|
|
@ -209,9 +217,15 @@ lower_vulkan_resource_index(struct tu_device *dev, nir_builder *b,
|
|||
* get it from the const file instead.
|
||||
*/
|
||||
base = nir_imm_int(b, binding_layout->dynamic_offset_offset / (4 * A6XX_TEX_CONST_DWORDS));
|
||||
nir_def *dynamic_offset_start =
|
||||
nir_def *dynamic_offset_start;
|
||||
if (compiler->load_shader_consts_via_preamble) {
|
||||
dynamic_offset_start =
|
||||
ir3_load_driver_ubo(b, 1, &shader->const_state.dynamic_offsets_ubo, set);
|
||||
} else {
|
||||
dynamic_offset_start =
|
||||
nir_load_uniform(b, 1, 32, nir_imm_int(b, 0),
|
||||
.base = shader->const_state.dynamic_offset_loc + set);
|
||||
}
|
||||
base = nir_iadd(b, base, dynamic_offset_start);
|
||||
} else {
|
||||
base = nir_imm_int(b, (offset +
|
||||
|
|
@ -271,7 +285,7 @@ lower_load_vulkan_descriptor(nir_builder *b, nir_intrinsic_instr *intrin)
|
|||
nir_instr_remove(&intrin->instr);
|
||||
}
|
||||
|
||||
static void
|
||||
static bool
|
||||
lower_ssbo_ubo_intrinsic(struct tu_device *dev,
|
||||
nir_builder *b, nir_intrinsic_instr *intrin)
|
||||
{
|
||||
|
|
@ -291,6 +305,10 @@ lower_ssbo_ubo_intrinsic(struct tu_device *dev,
|
|||
buffer_src = 0;
|
||||
}
|
||||
|
||||
/* Don't lower non-bindless UBO loads of driver params */
|
||||
if (intrin->src[buffer_src].ssa->num_components == 1)
|
||||
return false;
|
||||
|
||||
nir_scalar scalar_idx = nir_scalar_resolved(intrin->src[buffer_src].ssa, 0);
|
||||
nir_def *descriptor_idx = nir_channel(b, intrin->src[buffer_src].ssa, 1);
|
||||
|
||||
|
|
@ -310,7 +328,7 @@ lower_ssbo_ubo_intrinsic(struct tu_device *dev,
|
|||
nir_def *bindless =
|
||||
nir_bindless_resource_ir3(b, 32, descriptor_idx, .desc_set = nir_scalar_as_uint(scalar_idx));
|
||||
nir_src_rewrite(&intrin->src[buffer_src], bindless);
|
||||
return;
|
||||
return true;
|
||||
}
|
||||
|
||||
nir_def *base_idx = nir_channel(b, scalar_idx.def, scalar_idx.comp);
|
||||
|
|
@ -361,6 +379,7 @@ lower_ssbo_ubo_intrinsic(struct tu_device *dev,
|
|||
if (info->has_dest)
|
||||
nir_def_rewrite_uses(&intrin->def, result);
|
||||
nir_instr_remove(&intrin->instr);
|
||||
return true;
|
||||
}
|
||||
|
||||
static nir_def *
|
||||
|
|
@ -461,8 +480,7 @@ lower_intrinsic(nir_builder *b, nir_intrinsic_instr *instr,
|
|||
case nir_intrinsic_ssbo_atomic:
|
||||
case nir_intrinsic_ssbo_atomic_swap:
|
||||
case nir_intrinsic_get_ssbo_size:
|
||||
lower_ssbo_ubo_intrinsic(dev, b, instr);
|
||||
return true;
|
||||
return lower_ssbo_ubo_intrinsic(dev, b, instr);
|
||||
|
||||
case nir_intrinsic_image_deref_load:
|
||||
case nir_intrinsic_image_deref_store:
|
||||
|
|
@ -473,6 +491,37 @@ lower_intrinsic(nir_builder *b, nir_intrinsic_instr *instr,
|
|||
lower_image_deref(dev, b, instr, shader, layout);
|
||||
return true;
|
||||
|
||||
case nir_intrinsic_load_frag_size_ir3:
|
||||
case nir_intrinsic_load_frag_offset_ir3: {
|
||||
if (!dev->compiler->load_shader_consts_via_preamble)
|
||||
return false;
|
||||
|
||||
enum ir3_driver_param param =
|
||||
instr->intrinsic == nir_intrinsic_load_frag_size_ir3 ?
|
||||
IR3_DP_FS_FRAG_SIZE : IR3_DP_FS_FRAG_OFFSET;
|
||||
|
||||
nir_def *view = instr->src[0].ssa;
|
||||
nir_def *result =
|
||||
ir3_load_driver_ubo_indirect(b, 2, &shader->const_state.fdm_ubo,
|
||||
param, view, nir_intrinsic_range(instr));
|
||||
|
||||
nir_def_rewrite_uses(&instr->def, result);
|
||||
nir_instr_remove(&instr->instr);
|
||||
return true;
|
||||
}
|
||||
case nir_intrinsic_load_frag_invocation_count: {
|
||||
if (!dev->compiler->load_shader_consts_via_preamble)
|
||||
return false;
|
||||
|
||||
nir_def *result =
|
||||
ir3_load_driver_ubo(b, 1, &shader->const_state.fdm_ubo,
|
||||
IR3_DP_FS_FRAG_INVOCATION_COUNT);
|
||||
|
||||
nir_def_rewrite_uses(&instr->def, result);
|
||||
nir_instr_remove(&instr->instr);
|
||||
return true;
|
||||
}
|
||||
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
|
|
@ -1219,6 +1268,7 @@ tu6_emit_xs(struct tu_cs *cs,
|
|||
unsigned immediate_size = tu_xs_get_immediates_packet_size_dwords(xs);
|
||||
|
||||
if (immediate_size > 0) {
|
||||
assert(!cs->device->physical_device->info->a7xx.load_shader_consts_via_preamble);
|
||||
tu_cs_emit_pkt7(cs, tu6_stage2opcode(stage), 3 + immediate_size);
|
||||
tu_cs_emit(cs, CP_LOAD_STATE6_0_DST_OFF(base) |
|
||||
CP_LOAD_STATE6_0_STATE_TYPE(ST6_CONSTANTS) |
|
||||
|
|
@ -1231,13 +1281,14 @@ tu6_emit_xs(struct tu_cs *cs,
|
|||
tu_cs_emit_array(cs, const_state->immediates, immediate_size);
|
||||
}
|
||||
|
||||
if (const_state->constant_data_ubo != -1) {
|
||||
if (const_state->consts_ubo.idx != -1) {
|
||||
uint64_t iova = binary_iova + xs->info.constant_data_offset;
|
||||
uint32_t offset = const_state->consts_ubo.idx;
|
||||
|
||||
/* Upload UBO state for the constant data. */
|
||||
tu_cs_emit_pkt7(cs, tu6_stage2opcode(stage), 5);
|
||||
tu_cs_emit(cs,
|
||||
CP_LOAD_STATE6_0_DST_OFF(const_state->constant_data_ubo) |
|
||||
CP_LOAD_STATE6_0_DST_OFF(offset) |
|
||||
CP_LOAD_STATE6_0_STATE_TYPE(ST6_UBO)|
|
||||
CP_LOAD_STATE6_0_STATE_SRC(SS6_DIRECT) |
|
||||
CP_LOAD_STATE6_0_STATE_BLOCK(tu6_stage2shadersb(stage)) |
|
||||
|
|
@ -1252,8 +1303,9 @@ tu6_emit_xs(struct tu_cs *cs,
|
|||
/* Upload the constant data to the const file if needed. */
|
||||
const struct ir3_ubo_analysis_state *ubo_state = &const_state->ubo_state;
|
||||
|
||||
if (!cs->device->physical_device->info->a7xx.load_shader_consts_via_preamble) {
|
||||
for (int i = 0; i < ubo_state->num_enabled; i++) {
|
||||
if (ubo_state->range[i].ubo.block != const_state->constant_data_ubo ||
|
||||
if (ubo_state->range[i].ubo.block != offset ||
|
||||
ubo_state->range[i].ubo.bindless) {
|
||||
continue;
|
||||
}
|
||||
|
|
@ -1273,9 +1325,28 @@ tu6_emit_xs(struct tu_cs *cs,
|
|||
tu_cs_emit_qw(cs, iova + start);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* emit statically-known FS driver param */
|
||||
if (stage == MESA_SHADER_FRAGMENT && const_state->num_driver_params > 0) {
|
||||
if (stage == MESA_SHADER_FRAGMENT && const_state->driver_params_ubo.size > 0) {
|
||||
uint32_t data[4] = {xs->info.double_threadsize ? 128 : 64, 0, 0, 0};
|
||||
uint32_t size = ARRAY_SIZE(data);
|
||||
|
||||
/* A7XX TODO: Emit data via sub_cs instead of NOP */
|
||||
uint64_t iova = tu_cs_emit_data_nop(cs, data, size, 4);
|
||||
uint32_t base = const_state->driver_params_ubo.idx;
|
||||
|
||||
tu_cs_emit_pkt7(cs, tu6_stage2opcode(stage), 5);
|
||||
tu_cs_emit(cs, CP_LOAD_STATE6_0_DST_OFF(base) |
|
||||
CP_LOAD_STATE6_0_STATE_TYPE(ST6_UBO) |
|
||||
CP_LOAD_STATE6_0_STATE_SRC(SS6_DIRECT) |
|
||||
CP_LOAD_STATE6_0_STATE_BLOCK(tu6_stage2shadersb(stage)) |
|
||||
CP_LOAD_STATE6_0_NUM_UNIT(1));
|
||||
tu_cs_emit(cs, CP_LOAD_STATE6_1_EXT_SRC_ADDR(0));
|
||||
tu_cs_emit(cs, CP_LOAD_STATE6_2_EXT_SRC_ADDR_HI(0));
|
||||
int size_vec4s = DIV_ROUND_UP(size, 4);
|
||||
tu_cs_emit_qw(cs, iova | ((uint64_t)A6XX_UBO_1_SIZE(size_vec4s) << 32));
|
||||
} else if (stage == MESA_SHADER_FRAGMENT && const_state->num_driver_params > 0) {
|
||||
uint32_t base = const_state->offsets.driver_param;
|
||||
int32_t size = DIV_ROUND_UP(MAX2(const_state->num_driver_params, 4), 4);
|
||||
size = MAX2(MIN2(size + base, xs->constlen) - base, 0);
|
||||
|
|
@ -2176,6 +2247,9 @@ tu_shader_init(struct tu_device *dev, const void *key_data, size_t key_size)
|
|||
vk_pipeline_cache_object_init(&dev->vk, &shader->base,
|
||||
&tu_shader_ops, obj_key_data, key_size);
|
||||
|
||||
shader->const_state.fdm_ubo.idx = -1;
|
||||
shader->const_state.dynamic_offsets_ubo.idx = -1;
|
||||
|
||||
return shader;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -49,6 +49,9 @@ struct tu_const_state
|
|||
uint32_t dynamic_offset_loc;
|
||||
unsigned num_inline_ubos;
|
||||
struct tu_inline_ubo ubos[MAX_INLINE_UBOS];
|
||||
|
||||
struct ir3_driver_ubo fdm_ubo;
|
||||
struct ir3_driver_ubo dynamic_offsets_ubo;
|
||||
};
|
||||
|
||||
struct tu_shader
|
||||
|
|
|
|||
|
|
@ -208,7 +208,7 @@ fd6_emit_ubos(const struct ir3_shader_variant *v, struct fd_ringbuffer *ring,
|
|||
|
||||
for (int i = 0; i < num_ubos; i++) {
|
||||
/* NIR constant data is packed into the end of the shader. */
|
||||
if (i == const_state->constant_data_ubo) {
|
||||
if (i == const_state->consts_ubo.idx) {
|
||||
int size_vec4s = DIV_ROUND_UP(v->constant_data_size, 16);
|
||||
OUT_RELOC(ring, v->bo, v->info.constant_data_offset,
|
||||
(uint64_t)A6XX_UBO_1_SIZE(size_vec4s) << 32, 0);
|
||||
|
|
|
|||
|
|
@ -119,7 +119,7 @@ ir3_emit_constant_data(const struct ir3_shader_variant *v,
|
|||
|
||||
for (unsigned i = 0; i < state->num_enabled; i++) {
|
||||
unsigned ubo = state->range[i].ubo.block;
|
||||
if (ubo != const_state->constant_data_ubo)
|
||||
if (ubo != const_state->consts_ubo.idx)
|
||||
continue;
|
||||
|
||||
uint32_t size = state->range[i].end - state->range[i].start;
|
||||
|
|
@ -161,7 +161,7 @@ ir3_emit_user_consts(const struct ir3_shader_variant *v,
|
|||
assert(!state->range[i].ubo.bindless);
|
||||
unsigned ubo = state->range[i].ubo.block;
|
||||
if (!(constbuf->enabled_mask & (1 << ubo)) ||
|
||||
ubo == const_state->constant_data_ubo) {
|
||||
ubo == const_state->consts_ubo.idx) {
|
||||
continue;
|
||||
}
|
||||
struct pipe_constant_buffer *cb = &constbuf->cb[ubo];
|
||||
|
|
@ -218,7 +218,7 @@ ir3_emit_ubos(struct fd_context *ctx, const struct ir3_shader_variant *v,
|
|||
struct fd_bo *bos[params];
|
||||
|
||||
for (uint32_t i = 0; i < params; i++) {
|
||||
if (i == const_state->constant_data_ubo) {
|
||||
if (i == const_state->consts_ubo.idx) {
|
||||
bos[i] = v->bo;
|
||||
offsets[i] = v->info.constant_data_offset;
|
||||
continue;
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue