turnip,ir3: Implement A7XX push consts load via preamble

New push consts loading consist of:
- Push consts are set for the entire pipeline via HLSQ_SHARED_CONSTS_IMM
  array which could fit up to 256b of push consts.
- For each shader stage that uses push consts READ_IMM_SHARED_CONSTS
  should be set in HLSQ_*_CNTL, otherwise push consts may get overwritten
  by new push consts that are set after the draw.
- Push consts are loaded into consts reg file in a shader preamble via
  stsc at the very start of the preamble.

OPC_PUSH_CONSTS_LOAD_MACRO is used instead of directly translating NIR
intrinsic into stsc because: we don't want to teach legalize pass how
to set (ss) between stores and loads of consts reg file, don't want for
stsc to be reordered, etc.

Signed-off-by: Danylo Piliaiev <dpiliaiev@igalia.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/25086>
This commit is contained in:
Danylo Piliaiev 2023-09-05 18:24:03 +02:00 committed by Marge Bot
parent e39b6e2b9b
commit a5f0f7d4b1
21 changed files with 215 additions and 48 deletions

View file

@ -1329,6 +1329,10 @@ store("uniform_ir3", [], indices=[BASE])
# vec4's.
intrinsic("copy_ubo_to_uniform_ir3", [1, 1], indices=[BASE, RANGE])
# IR3-specific intrinsic for stsc. Loads from push consts to constant file
# Should be used in the shader preamble.
intrinsic("copy_push_const_to_uniform_ir3", [1], indices=[BASE, RANGE])
# Intrinsics used by the Midgard/Bifrost blend pipeline. These are defined
# within a blend shader to read/write the raw value from the tile buffer,
# without applying any format conversion in the process. If the shader needs

View file

@ -191,6 +191,10 @@ struct fd_dev_info {
} a6xx;
struct {
/* stsc may need to be done twice for the same range to workaround
* _something_, observed in blob's disassembly.
*/
bool stsc_duplication_quirk;
} a7xx;
};

View file

@ -704,7 +704,9 @@ add_gpus([
a7xx_730 = A7XXProps()
a7xx_740 = A7XXProps()
a7xx_740 = A7XXProps(
stsc_duplication_quirk = True,
)
add_gpus([
GPUId(chip_id=0x07030001, name="FD730"), # KGSL, no speedbin data

View file

@ -194,6 +194,7 @@ static const struct opc_info {
OPC(1, OPC_SWZ_SHARED_MACRO, swz_shared.macro),
OPC(1, OPC_SCAN_MACRO, scan.macro),
OPC(1, OPC_SHPS_MACRO, shps.macro),
OPC(1, OPC_PUSH_CONSTS_LOAD_MACRO, push_consts_load.macro),
/* category 2: */
OPC(2, OPC_ADD_F, add.f),

View file

@ -131,6 +131,11 @@ typedef enum {
/* Macros that expand to a loop */
OPC_SCAN_MACRO = _OPC(1, 58),
/* Macros that expand to an stsc at the start of the preamble.
* It loads into const file and should not be optimized in any way.
*/
OPC_PUSH_CONSTS_LOAD_MACRO = _OPC(1, 59),
/* category 2: */
OPC_ADD_F = _OPC(2, 0),
OPC_MIN_F = _OPC(2, 1),
@ -406,7 +411,7 @@ typedef enum {
/*
* A manually encoded opcode
*/
OPC_META_RAW = _OPC(OPC_META, 7)
OPC_META_RAW = _OPC(OPC_META, 7),
} opc_t;
/* clang-format on */

View file

@ -445,6 +445,10 @@ struct ir3_instruction {
*/
gl_system_value sysval;
} input;
struct {
unsigned src_base, src_size;
unsigned dst_base;
} push_consts;
struct {
uint64_t value;
} raw;
@ -2485,6 +2489,7 @@ INSTR1(QUAD_SHUFFLE_VERT)
INSTR1(QUAD_SHUFFLE_DIAG)
INSTR2NODST(LDC_K)
INSTR2NODST(STC)
INSTR2NODST(STSC)
#ifndef GPU
#elif GPU >= 600
INSTR3NODST(STIB);

View file

@ -202,13 +202,13 @@ ir3_compiler_create(struct fd_device *dev, const struct fd_dev_id *dev_id,
compiler->shared_consts_size = 8;
compiler->geom_shared_consts_size_quirk = 16;
} else {
/* A7XX TODO: properly use new shared consts mechanism */
compiler->shared_consts_base_offset = -1;
compiler->shared_consts_size = 0;
compiler->geom_shared_consts_size_quirk = 0;
}
compiler->has_fs_tex_prefetch = dev_info->a6xx.has_fs_tex_prefetch;
compiler->stsc_duplication_quirk = dev_info->a7xx.stsc_duplication_quirk;
} else {
compiler->max_const_pipeline = 512;
compiler->max_const_geom = 512;

View file

@ -245,6 +245,8 @@ struct ir3_compiler {
uint64_t geom_shared_consts_size_quirk;
bool has_fs_tex_prefetch;
bool stsc_duplication_quirk;
};
void ir3_compiler_destroy(struct ir3_compiler *compiler);

View file

@ -2678,6 +2678,16 @@ emit_intrinsic(struct ir3_context *ctx, nir_intrinsic_instr *intr)
array_insert(b, b->keeps, stc);
break;
}
case nir_intrinsic_copy_push_const_to_uniform_ir3: {
struct ir3_instruction *load =
ir3_instr_create(ctx->block, OPC_PUSH_CONSTS_LOAD_MACRO, 0, 0);
array_insert(b, b->keeps, load);
load->push_consts.dst_base = nir_src_as_uint(intr->src[0]);
load->push_consts.src_base = nir_intrinsic_base(intr);
load->push_consts.src_size = nir_intrinsic_range(intr);
break;
}
default:
ir3_context_error(ctx, "Unhandled intrinsic type: %s\n",
nir_intrinsic_infos[intr->intrinsic].name);

View file

@ -58,6 +58,7 @@ struct ir3_legalize_state {
regmask_t needs_ss;
regmask_t needs_ss_war; /* write after read */
regmask_t needs_sy;
bool needs_ss_for_const;
};
struct ir3_legalize_block_data {
@ -65,6 +66,17 @@ struct ir3_legalize_block_data {
struct ir3_legalize_state state;
};
static inline void
apply_ss(struct ir3_instruction *instr,
struct ir3_legalize_state *state,
bool mergedregs)
{
instr->flags |= IR3_INSTR_SS;
regmask_init(&state->needs_ss_war, mergedregs);
regmask_init(&state->needs_ss, mergedregs);
state->needs_ss_for_const = false;
}
/* We want to evaluate each block from the position of any other
* predecessor block, in order that the flags set are the union of
* all possible program paths.
@ -109,6 +121,7 @@ legalize_block(struct ir3_legalize_ctx *ctx, struct ir3_block *block)
regmask_or(&state->needs_ss_war, &state->needs_ss_war,
&pstate->needs_ss_war);
regmask_or(&state->needs_sy, &state->needs_sy, &pstate->needs_sy);
state->needs_ss_for_const |= pstate->needs_ss_for_const;
}
/* We need to take phsyical-only edges into account when tracking shared
@ -162,17 +175,15 @@ legalize_block(struct ir3_legalize_ctx *ctx, struct ir3_block *block)
}
if ((last_n && is_barrier(last_n)) || n->opc == OPC_SHPE) {
n->flags |= IR3_INSTR_SS | IR3_INSTR_SY;
last_input_needs_ss = false;
regmask_init(&state->needs_ss_war, mergedregs);
regmask_init(&state->needs_ss, mergedregs);
apply_ss(n, state, mergedregs);
n->flags |= IR3_INSTR_SY;
regmask_init(&state->needs_sy, mergedregs);
last_input_needs_ss = false;
}
if (last_n && (last_n->opc == OPC_PREDT)) {
n->flags |= IR3_INSTR_SS;
regmask_init(&state->needs_ss_war, mergedregs);
regmask_init(&state->needs_ss, mergedregs);
apply_ss(n, state, mergedregs);
}
/* NOTE: consider dst register too.. it could happen that
@ -195,25 +206,24 @@ legalize_block(struct ir3_legalize_ctx *ctx, struct ir3_block *block)
* some tests for both this and (sy)..
*/
if (regmask_get(&state->needs_ss, reg)) {
n->flags |= IR3_INSTR_SS;
apply_ss(n, state, mergedregs);
last_input_needs_ss = false;
regmask_init(&state->needs_ss_war, mergedregs);
regmask_init(&state->needs_ss, mergedregs);
}
if (regmask_get(&state->needs_sy, reg)) {
n->flags |= IR3_INSTR_SY;
regmask_init(&state->needs_sy, mergedregs);
}
} else if ((reg->flags & IR3_REG_CONST) && state->needs_ss_for_const) {
apply_ss(n, state, mergedregs);
last_input_needs_ss = false;
}
}
foreach_dst (reg, n) {
if (regmask_get(&state->needs_ss_war, reg)) {
n->flags |= IR3_INSTR_SS;
apply_ss(n, state, mergedregs);
last_input_needs_ss = false;
regmask_init(&state->needs_ss_war, mergedregs);
regmask_init(&state->needs_ss, mergedregs);
}
}
@ -230,7 +240,7 @@ legalize_block(struct ir3_legalize_ctx *ctx, struct ir3_block *block)
}
/* need to be able to set (ss) on first instruction: */
if (list_is_empty(&block->instr_list) && (opc_cat(n->opc) >= 5))
if (list_is_empty(&block->instr_list) && (opc_cat(n->opc) >= 5) && !is_meta(n))
ir3_NOP(block);
if (ctx->compiler->samgq_workaround &&
@ -281,6 +291,8 @@ legalize_block(struct ir3_legalize_ctx *ctx, struct ir3_block *block)
} else {
regmask_set(&state->needs_ss, n->dsts[0]);
}
} else if (n->opc == OPC_PUSH_CONSTS_LOAD_MACRO) {
state->needs_ss_for_const = true;
}
if (is_ssbo(n->opc) || is_global_a3xx_atomic(n->opc) ||
@ -324,9 +336,7 @@ legalize_block(struct ir3_legalize_ctx *ctx, struct ir3_block *block)
last_input->dsts[0]->flags |= IR3_REG_EI;
if (last_input_needs_ss) {
last_input->flags |= IR3_INSTR_SS;
regmask_init(&state->needs_ss_war, mergedregs);
regmask_init(&state->needs_ss, mergedregs);
apply_ss(last_input, state, mergedregs);
}
}
}
@ -407,6 +417,36 @@ apply_fine_deriv_macro(struct ir3_legalize_ctx *ctx, struct ir3_block *block)
return true;
}
static void
apply_push_consts_load_macro(struct ir3_legalize_ctx *ctx,
struct ir3_block *block)
{
foreach_instr (n, &block->instr_list) {
if (n->opc == OPC_PUSH_CONSTS_LOAD_MACRO) {
struct ir3_instruction *stsc = ir3_instr_create(block, OPC_STSC, 0, 2);
ir3_instr_move_after(stsc, n);
ir3_src_create(stsc, 0, IR3_REG_IMMED)->iim_val =
n->push_consts.dst_base;
ir3_src_create(stsc, 0, IR3_REG_IMMED)->iim_val =
n->push_consts.src_base;
stsc->cat6.iim_val = n->push_consts.src_size;
stsc->cat6.type = TYPE_U32;
if (ctx->compiler->stsc_duplication_quirk) {
struct ir3_instruction *nop = ir3_NOP(block);
ir3_instr_move_after(nop, stsc);
nop->flags |= IR3_INSTR_SS;
ir3_instr_move_after(ir3_instr_clone(stsc), nop);
}
list_delinit(&n->node);
break;
} else if (!is_meta(n)) {
break;
}
}
}
/* NOTE: branch instructions are always the last instruction(s)
* in the block. We take advantage of this as we resolve the
* branches, since "if (foo) break;" constructs turn into
@ -1180,6 +1220,13 @@ ir3_legalize(struct ir3 *ir, struct ir3_shader_variant *so, int *max_bary)
progress |= apply_fine_deriv_macro(ctx, block);
}
foreach_block (block, &ir->block_list) {
if (block->brtype == IR3_BRANCH_GETONE) {
apply_push_consts_load_macro(ctx, block->successors[0]);
break;
}
}
nop_sched(ir, so);
while (opt_jump(ir))

View file

@ -747,6 +747,9 @@ ir3_nir_lower_variant(struct ir3_shader_variant *so, nir_shader *s)
progress |= OPT(s, ir3_nir_lower_ubo_loads, so);
if (so->shader_options.push_consts_type == IR3_PUSH_CONSTS_SHARED_PREAMBLE)
progress |= OPT(s, ir3_nir_lower_push_consts_to_preamble, so);
progress |= OPT(s, ir3_nir_lower_preamble, so);
OPT_V(s, nir_lower_amul, ir3_glsl_type_size);

View file

@ -40,6 +40,8 @@ bool ir3_nir_lower_imul(nir_shader *shader);
bool ir3_nir_lower_io_offsets(nir_shader *shader);
bool ir3_nir_lower_load_barycentric_at_sample(nir_shader *shader);
bool ir3_nir_lower_load_barycentric_at_offset(nir_shader *shader);
bool ir3_nir_lower_push_consts_to_preamble(nir_shader *nir,
struct ir3_shader_variant *v);
bool ir3_nir_move_varying_inputs(nir_shader *shader);
int ir3_nir_coord_offset(nir_def *ssa);
bool ir3_nir_lower_tex_prefetch(nir_shader *shader);

View file

@ -0,0 +1,28 @@
/*
* Copyright © 2023 Igalia S.L.
* SPDX-License-Identifier: MIT
*/
#include "compiler/nir/nir.h"
#include "compiler/nir/nir_builder.h"
#include "util/u_math.h"
#include "ir3_compiler.h"
#include "ir3_nir.h"
bool
ir3_nir_lower_push_consts_to_preamble(nir_shader *nir,
struct ir3_shader_variant *v)
{
nir_function_impl *preamble = nir_shader_get_preamble(nir);
nir_builder _b = nir_builder_at(nir_before_impl(preamble));
nir_builder *b = &_b;
nir_copy_push_const_to_uniform_ir3(
b, nir_imm_int(b, 0), .base = v->shader_options.push_consts_base,
.range = v->shader_options.push_consts_dwords);
nir_foreach_function_impl(impl, nir) {
nir_metadata_preserve(impl, nir_metadata_none);
}
return true;
}

View file

@ -691,6 +691,10 @@ sched_block(struct ir3_postsched_ctx *ctx, struct ir3_block *block)
if (instr->opc == OPC_META_TEX_PREFETCH)
schedule(ctx, instr);
foreach_instr_safe (instr, &ctx->unscheduled_list)
if (instr->opc == OPC_PUSH_CONSTS_LOAD_MACRO)
schedule(ctx, instr);
while (!list_is_empty(&ctx->unscheduled_list)) {
struct ir3_instruction *instr = choose_instr(ctx);

View file

@ -181,7 +181,8 @@ print_instr_name(struct log_stream *stream, struct ir3_instruction *instr,
}
}
if (instr->opc != OPC_MOVMSK && instr->opc != OPC_SCAN_MACRO) {
if (instr->opc != OPC_MOVMSK && instr->opc != OPC_SCAN_MACRO &&
instr->opc != OPC_PUSH_CONSTS_LOAD_MACRO) {
mesa_log_stream_printf(stream, ".%s%s",
type_name(instr->cat1.src_type),
type_name(instr->cat1.dst_type));
@ -405,6 +406,11 @@ print_instr(struct log_stream *stream, struct ir3_instruction *instr, int lvl)
mesa_log_stream_printf(stream, ", tex=%d, samp=%d, input_offset=%d",
instr->prefetch.tex, instr->prefetch.samp,
instr->prefetch.input_offset);
} else if (instr->opc == OPC_PUSH_CONSTS_LOAD_MACRO) {
mesa_log_stream_printf(
stream, " dst_offset=%d, src_offset = %d, src_size = %d",
instr->push_consts.dst_base, instr->push_consts.src_base,
instr->push_consts.src_size);
}
if (is_flow(instr) && instr->cat0.target) {

View file

@ -1235,6 +1235,10 @@ sched_block(struct ir3_sched_ctx *ctx, struct ir3_block *block)
if (instr->opc == OPC_META_TEX_PREFETCH)
schedule(ctx, instr);
foreach_instr_safe (instr, &ctx->unscheduled_list)
if (instr->opc == OPC_PUSH_CONSTS_LOAD_MACRO)
schedule(ctx, instr);
while (!list_is_empty(&ctx->unscheduled_list)) {
struct ir3_sched_notes notes = {0};
struct ir3_instruction *instr;

View file

@ -151,6 +151,7 @@ enum ir3_push_consts_type {
IR3_PUSH_CONSTS_NONE,
IR3_PUSH_CONSTS_PER_STAGE,
IR3_PUSH_CONSTS_SHARED,
IR3_PUSH_CONSTS_SHARED_PREAMBLE,
};
/**
@ -507,6 +508,9 @@ struct ir3_shader_options {
*/
enum ir3_wavesize_option real_wavesize;
enum ir3_push_consts_type push_consts_type;
uint32_t push_consts_base;
uint32_t push_consts_dwords;
};
/**

View file

@ -95,6 +95,7 @@ libfreedreno_ir3_files = files(
'ir3_nir_lower_64b.c',
'ir3_nir_lower_load_barycentric_at_sample.c',
'ir3_nir_lower_load_barycentric_at_offset.c',
'ir3_nir_lower_push_consts_to_preamble.c',
'ir3_nir_lower_io_offsets.c',
'ir3_nir_lower_tess.c',
'ir3_nir_lower_tex_prefetch.c',

View file

@ -4256,9 +4256,10 @@ tu6_user_consts_size(const struct tu_const_state *const_state,
{
uint32_t dwords = 0;
if (const_state->push_consts.dwords > 0) {
if (const_state->push_consts.type == IR3_PUSH_CONSTS_PER_STAGE) {
unsigned num_units = const_state->push_consts.dwords;
dwords += 4 + num_units;
assert(num_units > 0);
}
dwords += 8 * const_state->num_inline_ubos;
@ -4267,12 +4268,10 @@ tu6_user_consts_size(const struct tu_const_state *const_state,
}
static void
tu6_emit_user_consts(struct tu_cs *cs,
const struct tu_const_state *const_state,
unsigned constlen,
gl_shader_stage type,
struct tu_descriptor_state *descriptors,
uint32_t *push_constants)
tu6_emit_per_stage_push_consts(struct tu_cs *cs,
const struct tu_const_state *const_state,
gl_shader_stage type,
uint32_t *push_constants)
{
if (const_state->push_consts.type == IR3_PUSH_CONSTS_PER_STAGE) {
unsigned num_units = const_state->push_consts.dwords;
@ -4291,7 +4290,15 @@ tu6_emit_user_consts(struct tu_cs *cs,
for (unsigned i = 0; i < num_units; i++)
tu_cs_emit(cs, push_constants[i + offset]);
}
}
static void
tu6_emit_inline_ubo(struct tu_cs *cs,
const struct tu_const_state *const_state,
unsigned constlen,
gl_shader_stage type,
struct tu_descriptor_state *descriptors)
{
/* Emit loads of inline uniforms. These load directly from the uniform's
* storage space inside the descriptor set.
*/
@ -4349,6 +4356,18 @@ tu6_emit_shared_consts(struct tu_cs *cs,
}
}
static void
tu7_emit_shared_preamble_consts(
struct tu_cs *cs,
const struct tu_push_constant_range *shared_consts,
uint32_t *push_constants)
{
tu_cs_emit_pkt4(cs, REG_A7XX_HLSQ_SHARED_CONSTS_IMM(shared_consts->lo),
shared_consts->dwords);
tu_cs_emit_array(cs, push_constants + shared_consts->lo,
shared_consts->dwords);
}
static uint32_t
tu6_const_size(struct tu_cmd_buffer *cmd,
const struct tu_push_constant_range *shared_consts,
@ -4358,6 +4377,8 @@ tu6_const_size(struct tu_cmd_buffer *cmd,
if (shared_consts->type == IR3_PUSH_CONSTS_SHARED) {
dwords += shared_consts->dwords + 4;
} else if (shared_consts->type == IR3_PUSH_CONSTS_SHARED_PREAMBLE) {
dwords += shared_consts->dwords + 1;
}
if (compute) {
@ -4372,8 +4393,7 @@ tu6_const_size(struct tu_cmd_buffer *cmd,
}
static struct tu_draw_state
tu6_emit_consts(struct tu_cmd_buffer *cmd,
bool compute)
tu_emit_consts(struct tu_cmd_buffer *cmd, bool compute)
{
uint32_t dwords = 0;
const struct tu_push_constant_range *shared_consts =
@ -4390,24 +4410,30 @@ tu6_emit_consts(struct tu_cmd_buffer *cmd,
if (shared_consts->type == IR3_PUSH_CONSTS_SHARED) {
tu6_emit_shared_consts(&cs, shared_consts, cmd->push_constants, compute);
} else if (shared_consts->type == IR3_PUSH_CONSTS_SHARED_PREAMBLE) {
tu7_emit_shared_preamble_consts(&cs, shared_consts, cmd->push_constants);
}
if (compute) {
tu6_emit_user_consts(&cs,
&cmd->state.shaders[MESA_SHADER_COMPUTE]->const_state,
cmd->state.shaders[MESA_SHADER_COMPUTE]->variant->constlen,
MESA_SHADER_COMPUTE,
tu_get_descriptors_state(cmd, VK_PIPELINE_BIND_POINT_COMPUTE),
cmd->push_constants);
tu6_emit_per_stage_push_consts(
&cs, &cmd->state.shaders[MESA_SHADER_COMPUTE]->const_state,
MESA_SHADER_COMPUTE, cmd->push_constants);
tu6_emit_inline_ubo(
&cs, &cmd->state.shaders[MESA_SHADER_COMPUTE]->const_state,
cmd->state.shaders[MESA_SHADER_COMPUTE]->variant->constlen,
MESA_SHADER_COMPUTE,
tu_get_descriptors_state(cmd, VK_PIPELINE_BIND_POINT_COMPUTE));
} else {
struct tu_descriptor_state *descriptors =
struct tu_descriptor_state *descriptors =
tu_get_descriptors_state(cmd, VK_PIPELINE_BIND_POINT_GRAPHICS);
for (uint32_t type = MESA_SHADER_VERTEX; type <= MESA_SHADER_FRAGMENT; type++) {
const struct tu_program_descriptor_linkage *link =
&cmd->state.program.link[type];
tu6_emit_user_consts(&cs, &link->tu_const_state, link->constlen,
(gl_shader_stage) type,
descriptors, cmd->push_constants);
tu6_emit_per_stage_push_consts(&cs, &link->tu_const_state,
(gl_shader_stage) type,
cmd->push_constants);
tu6_emit_inline_ubo(&cs, &link->tu_const_state, link->constlen,
(gl_shader_stage) type, descriptors);
}
}
@ -4751,7 +4777,7 @@ tu6_draw_common(struct tu_cmd_buffer *cmd,
}
if (dirty & TU_CMD_DIRTY_SHADER_CONSTS)
cmd->state.shader_const = tu6_emit_consts(cmd, false);
cmd->state.shader_const = tu_emit_consts(cmd, false);
if (dirty & TU_CMD_DIRTY_DESC_SETS)
tu6_emit_descriptor_sets<CHIP>(cmd, VK_PIPELINE_BIND_POINT_GRAPHICS);
@ -5502,7 +5528,7 @@ tu_dispatch(struct tu_cmd_buffer *cmd,
tu_emit_cache_flush<CHIP>(cmd);
/* note: no reason to have this in a separate IB */
tu_cs_emit_state_ib(cs, tu6_emit_consts(cmd, true));
tu_cs_emit_state_ib(cs, tu_emit_consts(cmd, true));
tu_emit_compute_driver_params<CHIP>(cmd, cs, info);

View file

@ -318,7 +318,11 @@ tu_push_consts_type(const struct tu_pipeline_layout *layout,
if (tu6_shared_constants_enable(layout, compiler)) {
return IR3_PUSH_CONSTS_SHARED;
} else {
return IR3_PUSH_CONSTS_PER_STAGE;
if (compiler->gen >= 7) {
return IR3_PUSH_CONSTS_SHARED_PREAMBLE;
} else {
return IR3_PUSH_CONSTS_PER_STAGE;
}
}
}
@ -385,7 +389,9 @@ tu6_emit_xs_config(struct tu_cs *cs,
tu_cs_emit_pkt4(cs, cfg->reg_hlsq_xs_ctrl, 1);
tu_cs_emit(cs, A6XX_HLSQ_VS_CNTL_CONSTLEN(xs->constlen) |
A6XX_HLSQ_VS_CNTL_ENABLED);
A6XX_HLSQ_VS_CNTL_ENABLED |
COND(xs->shader_options.push_consts_type == IR3_PUSH_CONSTS_SHARED_PREAMBLE,
A7XX_HLSQ_VS_CNTL_READ_IMM_SHARED_CONSTS));
}
TU_GENX(tu6_emit_xs_config);
@ -2335,10 +2341,11 @@ tu_pipeline_builder_parse_shader_stages(struct tu_pipeline_builder *builder,
&pipeline->shaders[i]->const_state,
variants[i]);
if (pipeline->shaders[i]->const_state.push_consts.type ==
IR3_PUSH_CONSTS_SHARED) {
pipeline->program.shared_consts =
pipeline->shaders[i]->const_state.push_consts;
struct tu_push_constant_range *push_consts =
&pipeline->shaders[i]->const_state.push_consts;
if (push_consts->type == IR3_PUSH_CONSTS_SHARED ||
push_consts->type == IR3_PUSH_CONSTS_SHARED_PREAMBLE) {
pipeline->program.shared_consts = *push_consts;
}
}

View file

@ -2286,6 +2286,8 @@ tu_shader_create(struct tu_device *dev,
.api_wavesize = key->api_wavesize,
.real_wavesize = key->real_wavesize,
.push_consts_type = shader->const_state.push_consts.type,
.push_consts_base = shader->const_state.push_consts.lo,
.push_consts_dwords = shader->const_state.push_consts.dwords,
};
struct ir3_shader *ir3_shader =