ir3,tu: Refactor push consts info plumbing

In preparation for a new way to pass push consts into a shader,
introduced in a7xx.

Signed-off-by: Danylo Piliaiev <dpiliaiev@igalia.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/25086>
This commit is contained in:
Danylo Piliaiev 2023-09-05 11:26:16 +02:00 committed by Marge Bot
parent d5d7631060
commit 823b3bfeea
15 changed files with 89 additions and 84 deletions

View file

@ -72,7 +72,8 @@ is_shared_consts(struct ir3_compiler *compiler,
struct ir3_const_state *const_state,
struct ir3_register *reg)
{
if (const_state->shared_consts_enable && reg->flags & IR3_REG_CONST) {
if (const_state->push_consts_type == IR3_PUSH_CONSTS_SHARED &&
reg->flags & IR3_REG_CONST) {
uint32_t min_const_reg = regid(compiler->shared_consts_base_offset, 0);
uint32_t max_const_reg =
regid(compiler->shared_consts_base_offset +
@ -136,9 +137,9 @@ ir3_should_double_threadsize(struct ir3_shader_variant *v, unsigned regs_count)
const struct ir3_compiler *compiler = v->compiler;
/* If the user forced a particular wavesize respect that. */
if (v->real_wavesize == IR3_SINGLE_ONLY)
if (v->shader_options.real_wavesize == IR3_SINGLE_ONLY)
return false;
if (v->real_wavesize == IR3_DOUBLE_ONLY)
if (v->shader_options.real_wavesize == IR3_DOUBLE_ONLY)
return true;
/* We can't support more than compiler->branchstack_size diverging threads

View file

@ -2087,7 +2087,7 @@ emit_intrinsic(struct ir3_context *ctx, nir_intrinsic_instr *intr)
*/
ctx->so->constlen =
MAX2(ctx->so->constlen,
ctx->so->num_reserved_user_consts +
ctx->so->shader_options.num_reserved_user_consts +
const_state->ubo_state.size / 16);
}
break;

View file

@ -90,10 +90,10 @@ ir3_disk_cache_init_shader_key(struct ir3_compiler *compiler,
_mesa_sha1_update(&ctx, blob.data, blob.size);
blob_finish(&blob);
_mesa_sha1_update(&ctx, &shader->api_wavesize,
sizeof(shader->api_wavesize));
_mesa_sha1_update(&ctx, &shader->real_wavesize,
sizeof(shader->real_wavesize));
_mesa_sha1_update(&ctx, &shader->options.api_wavesize,
sizeof(shader->options.api_wavesize));
_mesa_sha1_update(&ctx, &shader->options.real_wavesize,
sizeof(shader->options.real_wavesize));
/* Note that on some gens stream-out is lowered in ir3 to stg. For later
* gens we maybe don't need to include stream-out in the cache key.

View file

@ -531,7 +531,7 @@ ir3_nir_post_finalize(struct ir3_shader *shader)
* the "real" subgroup size.
*/
unsigned subgroup_size = 0, max_subgroup_size = 0;
switch (shader->api_wavesize) {
switch (shader->options.api_wavesize) {
case IR3_SINGLE_ONLY:
subgroup_size = max_subgroup_size = compiler->threadsize_base;
break;
@ -981,7 +981,7 @@ ir3_setup_const_state(nir_shader *nir, struct ir3_shader_variant *v,
const_state->num_ubos = nir->info.num_ubos;
assert((const_state->ubo_state.size % 16) == 0);
unsigned constoff = v->num_reserved_user_consts +
unsigned constoff = v->shader_options.num_reserved_user_consts +
const_state->ubo_state.size / 16 +
const_state->preamble_size;
unsigned ptrsz = ir3_pointer_size(compiler);

View file

@ -450,7 +450,7 @@ ir3_nir_analyze_ubo_ranges(nir_shader *nir, struct ir3_shader_variant *v)
uint32_t range_size = state->range[i].end - state->range[i].start;
assert(offset <= max_upload);
state->range[i].offset = offset + v->num_reserved_user_consts * 16;
state->range[i].offset = offset + v->shader_options.num_reserved_user_consts * 16;
assert(offset <= max_upload);
offset += range_size;
}

View file

@ -303,7 +303,7 @@ ir3_nir_lower_preamble(nir_shader *nir, struct ir3_shader_variant *v)
/* First, lower load/store_preamble. */
const struct ir3_const_state *const_state = ir3_const_state(v);
unsigned preamble_base = v->num_reserved_user_consts * 4 +
unsigned preamble_base = v->shader_options.num_reserved_user_consts * 4 +
const_state->ubo_state.size / 4;
unsigned preamble_size = const_state->preamble_size * 4;

View file

@ -2577,7 +2577,7 @@ ir3_ra(struct ir3_shader_variant *v)
* because on some gens the register file is not big enough to hold a
* double-size wave with all 48 registers in use.
*/
if (v->real_wavesize == IR3_DOUBLE_ONLY) {
if (v->shader_options.real_wavesize == IR3_DOUBLE_ONLY) {
limit_pressure.full =
MAX2(limit_pressure.full, ctx->compiler->reg_size_vec4 / 2 * 16);
}

View file

@ -298,13 +298,11 @@ alloc_variant(struct ir3_shader *shader, const struct ir3_shader_key *key,
v->num_ssbos = info->num_ssbos;
v->num_ibos = info->num_ssbos + info->num_images;
v->num_reserved_user_consts = shader->num_reserved_user_consts;
v->api_wavesize = shader->api_wavesize;
v->real_wavesize = shader->real_wavesize;
v->shader_options = shader->options;
if (!v->binning_pass) {
v->const_state = rzalloc_size(v, sizeof(*v->const_state));
v->const_state->shared_consts_enable = shader->shared_consts_enable;
v->const_state->push_consts_type = shader->options.push_consts_type;
}
return v;
@ -589,7 +587,7 @@ ir3_trim_constlen(const struct ir3_shader_variant **variants,
if (variants[i]) {
constlens[i] = variants[i]->constlen;
shared_consts_enable =
ir3_const_state(variants[i])->shared_consts_enable;
ir3_const_state(variants[i])->push_consts_type == IR3_PUSH_CONSTS_SHARED;
}
}
@ -641,10 +639,7 @@ ir3_shader_from_nir(struct ir3_compiler *compiler, nir_shader *nir,
if (stream_output)
memcpy(&shader->stream_output, stream_output,
sizeof(shader->stream_output));
shader->num_reserved_user_consts = options->reserved_user_consts;
shader->api_wavesize = options->api_wavesize;
shader->real_wavesize = options->real_wavesize;
shader->shared_consts_enable = options->shared_consts_enable;
shader->options = *options;
shader->nir = nir;
ir3_disk_cache_init_shader_key(compiler, shader);

View file

@ -147,6 +147,12 @@ struct ir3_ubo_analysis_state {
uint32_t size;
};
enum ir3_push_consts_type {
IR3_PUSH_CONSTS_NONE,
IR3_PUSH_CONSTS_PER_STAGE,
IR3_PUSH_CONSTS_SHARED,
};
/**
* Describes the layout of shader consts in the const register file.
*
@ -213,7 +219,7 @@ struct ir3_const_state {
/* State of ubo access lowered to push consts: */
struct ir3_ubo_analysis_state ubo_state;
bool shared_consts_enable;
enum ir3_push_consts_type push_consts_type;
};
/**
@ -489,6 +495,20 @@ struct ir3_disasm_info {
/* Represents half register in regid */
#define HALF_REG_ID 0x100
struct ir3_shader_options {
unsigned num_reserved_user_consts;
/* What API-visible wavesizes are allowed. Even if only double wavesize is
* allowed, we may still use the smaller wavesize "under the hood" and the
* application simply sees the upper half as always disabled.
*/
enum ir3_wavesize_option api_wavesize;
/* What wavesizes we're allowed to actually use. If the API wavesize is
* single-only, then this must be single-only too.
*/
enum ir3_wavesize_option real_wavesize;
enum ir3_push_consts_type push_consts_type;
};
/**
* Shader variant which contains the actual hw shader instructions,
* and necessary info for shader state setup.
@ -554,6 +574,8 @@ struct ir3_shader_variant {
struct ir3_info info;
struct ir3_shader_options shader_options;
uint32_t constant_data_size;
/* Levels of nesting of flow control:
@ -751,8 +773,6 @@ struct ir3_shader_variant {
/* The total number of SSBOs and images, i.e. the number of hardware IBOs. */
unsigned num_ibos;
unsigned num_reserved_user_consts;
union {
struct {
enum tess_primitive_mode primitive_mode;
@ -790,8 +810,6 @@ struct ir3_shader_variant {
} cs;
};
enum ir3_wavesize_option api_wavesize, real_wavesize;
/* For when we don't have a shader, variant's copy of streamout state */
struct ir3_stream_output_info stream_output;
};
@ -849,18 +867,7 @@ struct ir3_shader {
struct ir3_compiler *compiler;
unsigned num_reserved_user_consts;
/* What API-visible wavesizes are allowed. Even if only double wavesize is
* allowed, we may still use the smaller wavesize "under the hood" and the
* application simply sees the upper half as always disabled.
*/
enum ir3_wavesize_option api_wavesize;
/* What wavesizes we're allowed to actually use. If the API wavesize is
* single-only, then this must be single-only too.
*/
enum ir3_wavesize_option real_wavesize;
struct ir3_shader_options options;
bool nir_finalized;
struct nir_shader *nir;
@ -893,8 +900,6 @@ struct ir3_shader {
* recompiles for GL NOS that doesn't actually apply to the shader.
*/
struct ir3_shader_key key_mask;
bool shared_consts_enable;
};
/**
@ -914,7 +919,8 @@ static inline unsigned
_ir3_max_const(const struct ir3_shader_variant *v, bool safe_constlen)
{
const struct ir3_compiler *compiler = v->compiler;
bool shared_consts_enable = ir3_const_state(v)->shared_consts_enable;
bool shared_consts_enable =
ir3_const_state(v)->push_consts_type == IR3_PUSH_CONSTS_SHARED;
/* Shared consts size for CS and FS matches with what's acutally used,
* but the size of shared consts for geomtry stages doesn't.
@ -969,13 +975,6 @@ ir3_shader_get_variant(struct ir3_shader *shader,
const struct ir3_shader_key *key, bool binning_pass,
bool keep_ir, bool *created);
struct ir3_shader_options {
unsigned reserved_user_consts;
enum ir3_wavesize_option api_wavesize, real_wavesize;
bool shared_consts_enable;
};
struct ir3_shader *
ir3_shader_from_nir(struct ir3_compiler *compiler, nir_shader *nir,
const struct ir3_shader_options *options,

View file

@ -741,7 +741,7 @@ compile_shader(struct tu_device *dev, struct nir_shader *nir,
ir3_finalize_nir(dev->compiler, nir);
const struct ir3_shader_options options = {
.reserved_user_consts = align(consts, 4),
.num_reserved_user_consts = align(consts, 4),
.api_wavesize = IR3_SINGLE_OR_DOUBLE,
.real_wavesize = IR3_SINGLE_OR_DOUBLE,
};

View file

@ -4274,9 +4274,10 @@ tu6_emit_user_consts(struct tu_cs *cs,
struct tu_descriptor_state *descriptors,
uint32_t *push_constants)
{
if (const_state->push_consts.dwords > 0) {
if (const_state->push_consts.type == IR3_PUSH_CONSTS_PER_STAGE) {
unsigned num_units = const_state->push_consts.dwords;
unsigned offset = const_state->push_consts.lo;
assert(num_units > 0);
/* DST_OFF and NUM_UNIT requires vec4 units */
tu_cs_emit_pkt7(cs, tu6_stage2opcode(type), 3 + num_units);
@ -4355,7 +4356,7 @@ tu6_const_size(struct tu_cmd_buffer *cmd,
{
uint32_t dwords = 0;
if (shared_consts->dwords > 0) {
if (shared_consts->type == IR3_PUSH_CONSTS_SHARED) {
dwords += shared_consts->dwords + 4;
}
@ -4376,7 +4377,7 @@ tu6_emit_consts(struct tu_cmd_buffer *cmd,
{
uint32_t dwords = 0;
const struct tu_push_constant_range *shared_consts =
compute ? &cmd->state.shaders[MESA_SHADER_COMPUTE]->shared_consts :
compute ? &cmd->state.shaders[MESA_SHADER_COMPUTE]->const_state.push_consts :
&cmd->state.program.shared_consts;
dwords = tu6_const_size(cmd, shared_consts, compute);
@ -4387,14 +4388,8 @@ tu6_emit_consts(struct tu_cmd_buffer *cmd,
struct tu_cs cs;
tu_cs_begin_sub_stream(&cmd->sub_cs, dwords, &cs);
if (shared_consts->dwords > 0) {
if (shared_consts->type == IR3_PUSH_CONSTS_SHARED) {
tu6_emit_shared_consts(&cs, shared_consts, cmd->push_constants, compute);
for (uint32_t i = 0; i < ARRAY_SIZE(cmd->state.program.link); i++) {
const struct tu_program_descriptor_linkage *link =
&cmd->state.program.link[i];
assert(!link->tu_const_state.push_consts.dwords);
}
}
if (compute) {

View file

@ -308,6 +308,20 @@ tu_blend_state_is_dual_src(const struct vk_color_blend_state *cb)
return false;
}
enum ir3_push_consts_type
tu_push_consts_type(const struct tu_pipeline_layout *layout,
const struct ir3_compiler *compiler)
{
if (!layout->push_constant_size)
return IR3_PUSH_CONSTS_NONE;
if (tu6_shared_constants_enable(layout, compiler)) {
return IR3_PUSH_CONSTS_SHARED;
} else {
return IR3_PUSH_CONSTS_PER_STAGE;
}
}
template <chip CHIP>
struct xs_config {
uint16_t reg_sp_xs_config;
@ -2321,9 +2335,10 @@ tu_pipeline_builder_parse_shader_stages(struct tu_pipeline_builder *builder,
&pipeline->shaders[i]->const_state,
variants[i]);
if (pipeline->shaders[i]->shared_consts.dwords != 0) {
if (pipeline->shaders[i]->const_state.push_consts.type ==
IR3_PUSH_CONSTS_SHARED) {
pipeline->program.shared_consts =
pipeline->shaders[i]->shared_consts;
pipeline->shaders[i]->const_state.push_consts;
}
}

View file

@ -70,6 +70,10 @@ tu6_shared_constants_enable(const struct tu_pipeline_layout *layout,
layout->push_constant_size <= (compiler->shared_consts_size * 16);
}
enum ir3_push_consts_type
tu_push_consts_type(const struct tu_pipeline_layout *layout,
const struct ir3_compiler *compiler);
struct tu_program_descriptor_linkage
{
struct ir3_const_state const_state;

View file

@ -682,8 +682,7 @@ gather_push_constants(nir_shader *shader, struct tu_shader *tu_shader)
}
if (min >= max) {
tu_shader->const_state.push_consts.lo = 0;
tu_shader->const_state.push_consts.dwords = 0;
tu_shader->const_state.push_consts = (struct tu_push_constant_range) {};
return;
}
@ -706,7 +705,7 @@ tu_lower_io(nir_shader *shader, struct tu_device *dev,
const struct tu_pipeline_layout *layout,
unsigned *reserved_consts_vec4_out)
{
if (!tu6_shared_constants_enable(layout, dev->compiler))
if (tu_shader->const_state.push_consts.type == IR3_PUSH_CONSTS_PER_STAGE)
gather_push_constants(shader, tu_shader);
struct tu_const_state *const_state = &tu_shader->const_state;
@ -1227,7 +1226,8 @@ tu6_emit_cs_config(struct tu_cs *cs,
const struct tu_pvtmem_config *pvtmem,
uint64_t binary_iova)
{
bool shared_consts_enable = ir3_const_state(v)->shared_consts_enable;
bool shared_consts_enable =
ir3_const_state(v)->push_consts_type == IR3_PUSH_CONSTS_SHARED;
tu6_emit_shared_consts_enable<CHIP>(cs, shared_consts_enable);
tu_cs_emit_regs(cs, HLSQ_INVALIDATE_CMD(CHIP,
@ -2084,7 +2084,6 @@ tu_shader_serialize(struct vk_pipeline_cache_object *object,
container_of(object, struct tu_shader, base);
blob_write_bytes(blob, &shader->const_state, sizeof(shader->const_state));
blob_write_bytes(blob, &shader->shared_consts, sizeof(shader->shared_consts));
blob_write_uint32(blob, shader->view_mask);
blob_write_uint8(blob, shader->active_desc_sets);
@ -2126,7 +2125,6 @@ tu_shader_deserialize(struct vk_pipeline_cache *cache,
return NULL;
blob_copy_bytes(blob, &shader->const_state, sizeof(shader->const_state));
blob_copy_bytes(blob, &shader->shared_consts, sizeof(shader->shared_consts));
shader->view_mask = blob_read_uint32(blob);
shader->active_desc_sets = blob_read_uint8(blob);
@ -2270,6 +2268,12 @@ tu_shader_create(struct tu_device *dev,
nir->info.stage == MESA_SHADER_GEOMETRY)
tu_gather_xfb_info(nir, &so_info);
shader->const_state.push_consts = (struct tu_push_constant_range) {
.lo = 0,
.dwords = layout->push_constant_size / 4,
.type = tu_push_consts_type(layout, dev->compiler),
};
unsigned reserved_consts_vec4 = 0;
NIR_PASS_V(nir, tu_lower_io, dev, shader, layout, &reserved_consts_vec4);
@ -2277,20 +2281,11 @@ tu_shader_create(struct tu_device *dev,
ir3_finalize_nir(dev->compiler, nir);
bool shared_consts_enable = tu6_shared_constants_enable(layout, dev->compiler);
if (shared_consts_enable) {
assert(!shader->const_state.push_consts.dwords);
shader->shared_consts = (struct tu_push_constant_range) {
.lo = 0,
.dwords = layout->push_constant_size / 4,
};
}
const struct ir3_shader_options options = {
.reserved_user_consts = reserved_consts_vec4,
.num_reserved_user_consts = reserved_consts_vec4,
.api_wavesize = key->api_wavesize,
.real_wavesize = key->real_wavesize,
.shared_consts_enable = shared_consts_enable,
.push_consts_type = shader->const_state.push_consts.type,
};
struct ir3_shader *ir3_shader =

View file

@ -30,10 +30,16 @@ struct tu_inline_ubo
unsigned size_vec4;
};
/* The meaning of the range depends on "type". If it's
* IR3_PUSH_CONSTS_PER_STAGE, then it's the range used by this shader. If
* it's IR3_PUSH_CONSTS_SHARED then it's the overall range as provided by
* the pipeline layout and must match between shaders where it's non-zero.
*/
struct tu_push_constant_range
{
uint32_t lo;
uint32_t dwords;
enum ir3_push_consts_type type;
};
struct tu_const_state
@ -63,11 +69,6 @@ struct tu_shader
uint32_t view_mask;
uint8_t active_desc_sets;
/* This is the range of shared consts used by all shaders. It must be the
* same between shaders.
*/
struct tu_push_constant_range shared_consts;
union {
struct {
unsigned patch_type;