mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-24 19:40:10 +01:00
nir: Add a .base field to nir_load_ubo_vec4.
This lets nir-to-tgsi fold the constant offset of addressing calculations into the CONST[] reference, which is important for D3D9-era compatibility: HW of that age has limited uniform space, and if we do the addressing math as math in the shader for dynamic indexing, the nir_load_consts end up taking up uniforms we don't have available. r300: total instructions in shared programs: 1279699 -> 1279167 (-0.04%) instructions in affected programs: 134796 -> 134264 (-0.39%) total instructions in shared programs: 1279699 -> 1279167 (-0.04%) instructions in affected programs: 134796 -> 134264 (-0.39%) total temps in shared programs: 213912 -> 213736 (-0.08%) temps in affected programs: 2166 -> 1990 (-8.13%) total consts in shared programs: 953237 -> 952973 (-0.03%) consts in affected programs: 45980 -> 45716 (-0.57%) Acked-by: Timur Kristóf <timur.kristof@gmail.com> Reviewed-by: Matt Turner <mattst88@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/14309>
This commit is contained in:
parent
a98103c55d
commit
700d2fbd0a
7 changed files with 20 additions and 8 deletions
|
|
@ -5257,6 +5257,9 @@ typedef struct {
|
|||
/** nir_load_uniform max base offset */
|
||||
uint32_t uniform_max;
|
||||
|
||||
/** nir_load_ubo_vec4 max base offset */
|
||||
uint32_t ubo_vec4_max;
|
||||
|
||||
/** nir_var_mem_shared max base offset */
|
||||
uint32_t shared_max;
|
||||
|
||||
|
|
|
|||
|
|
@ -934,8 +934,8 @@ def load(name, src_comp, indices=[], flags=[]):
|
|||
load("uniform", [1], [BASE, RANGE, DEST_TYPE], [CAN_ELIMINATE, CAN_REORDER])
|
||||
# src[] = { buffer_index, offset }.
|
||||
load("ubo", [-1, 1], [ACCESS, ALIGN_MUL, ALIGN_OFFSET, RANGE_BASE, RANGE], flags=[CAN_ELIMINATE, CAN_REORDER])
|
||||
# src[] = { buffer_index, offset in vec4 units }
|
||||
load("ubo_vec4", [-1, 1], [ACCESS, COMPONENT], flags=[CAN_ELIMINATE, CAN_REORDER])
|
||||
# src[] = { buffer_index, offset in vec4 units }. base is also in vec4 units.
|
||||
load("ubo_vec4", [-1, 1], [ACCESS, BASE, COMPONENT], flags=[CAN_ELIMINATE, CAN_REORDER])
|
||||
# src[] = { offset }.
|
||||
load("input", [1], [BASE, COMPONENT, DEST_TYPE, IO_SEMANTICS], [CAN_ELIMINATE, CAN_REORDER])
|
||||
# src[] = { vertex_id, offset }.
|
||||
|
|
|
|||
|
|
@ -65,8 +65,7 @@ lower_instr(nir_intrinsic_instr *instr, nir_builder *b, bool dword_packed, bool
|
|||
*/
|
||||
assert(!dword_packed);
|
||||
load_result = nir_load_ubo_vec4(b, instr->num_components, instr->dest.ssa.bit_size,
|
||||
ubo_idx,
|
||||
nir_iadd_imm(b, uniform_offset, nir_intrinsic_base(instr)));
|
||||
ubo_idx, uniform_offset, .base=nir_intrinsic_base(instr));
|
||||
} else {
|
||||
/* For PIPE_CAP_PACKED_UNIFORMS, the uniforms are packed with the
|
||||
* base/offset in dword units instead of vec4 units.
|
||||
|
|
|
|||
|
|
@ -135,6 +135,8 @@ process_instr(nir_builder *b, nir_instr *instr, void *s)
|
|||
switch (intrin->intrinsic) {
|
||||
case nir_intrinsic_load_uniform:
|
||||
return try_fold_load_store(b, intrin, state, 0, state->options->uniform_max);
|
||||
case nir_intrinsic_load_ubo_vec4:
|
||||
return try_fold_load_store(b, intrin, state, 1, state->options->ubo_vec4_max);
|
||||
case nir_intrinsic_load_shared:
|
||||
case nir_intrinsic_load_shared_ir3:
|
||||
return try_fold_load_store(b, intrin, state, 0, state->options->shared_max);
|
||||
|
|
|
|||
|
|
@ -873,6 +873,11 @@ emit_intrinsic_load_ubo_ldc(struct ir3_context *ctx, nir_intrinsic_instr *intr,
|
|||
{
|
||||
struct ir3_block *b = ctx->block;
|
||||
|
||||
/* This is only generated for us by nir_lower_ubo_vec4, which leaves base =
|
||||
* 0.
|
||||
*/
|
||||
assert(nir_intrinsic_base(intr) == 0);
|
||||
|
||||
unsigned ncomp = intr->num_components;
|
||||
struct ir3_instruction *offset = ir3_get_src(ctx, &intr->src[1])[0];
|
||||
struct ir3_instruction *idx = ir3_get_src(ctx, &intr->src[0])[0];
|
||||
|
|
|
|||
|
|
@ -1338,6 +1338,7 @@ ntt_emit_load_ubo(struct ntt_compile *c, nir_intrinsic_instr *instr)
|
|||
/* !PIPE_CAP_LOAD_CONSTBUF: Just emit it as a vec4 reference to the const
|
||||
* file.
|
||||
*/
|
||||
src.Index = nir_intrinsic_base(instr);
|
||||
|
||||
if (nir_src_is_const(instr->src[1])) {
|
||||
src.Index += ntt_src_as_uint(c, instr->src[1]);
|
||||
|
|
|
|||
|
|
@ -872,6 +872,7 @@ bool ShaderFromNirProcessor::emit_load_ubo_vec4(nir_intrinsic_instr* instr)
|
|||
{
|
||||
auto bufid = nir_src_as_const_value(instr->src[0]);
|
||||
auto buf_offset = nir_src_as_const_value(instr->src[1]);
|
||||
auto base = nir_intrinsic_base(instr);
|
||||
|
||||
if (!buf_offset) {
|
||||
/* TODO: if buf_offset is constant then this can also be solved by using the CF indes
|
||||
|
|
@ -892,11 +893,11 @@ bool ShaderFromNirProcessor::emit_load_ubo_vec4(nir_intrinsic_instr* instr)
|
|||
|
||||
FetchInstruction *ir;
|
||||
if (bufid) {
|
||||
ir = new FetchInstruction(vc_fetch, no_index_offset, trgt, addr, 0,
|
||||
ir = new FetchInstruction(vc_fetch, no_index_offset, trgt, addr, base,
|
||||
1 + bufid->u32, nullptr, bim_none);
|
||||
} else {
|
||||
PValue bufid = from_nir(instr->src[0], 0, 0);
|
||||
ir = new FetchInstruction(vc_fetch, no_index_offset, trgt, addr, 0,
|
||||
ir = new FetchInstruction(vc_fetch, no_index_offset, trgt, addr, base,
|
||||
1, bufid, bim_zero);
|
||||
}
|
||||
ir->set_dest_swizzle(swz);
|
||||
|
|
@ -905,6 +906,7 @@ bool ShaderFromNirProcessor::emit_load_ubo_vec4(nir_intrinsic_instr* instr)
|
|||
return true;
|
||||
}
|
||||
|
||||
uint32_t offset = 512 + base + buf_offset->u32;
|
||||
|
||||
if (bufid) {
|
||||
int buf_cmp = nir_intrinsic_component(instr);
|
||||
|
|
@ -912,7 +914,7 @@ bool ShaderFromNirProcessor::emit_load_ubo_vec4(nir_intrinsic_instr* instr)
|
|||
for (unsigned i = 0; i < nir_dest_num_components(instr->dest); ++i) {
|
||||
int cmp = buf_cmp + i;
|
||||
assert(cmp < 4);
|
||||
auto u = PValue(new UniformValue(512 + buf_offset->u32, cmp, bufid->u32 + 1));
|
||||
auto u = PValue(new UniformValue(offset, cmp, bufid->u32 + 1));
|
||||
if (instr->dest.is_ssa)
|
||||
load_preloaded_value(instr->dest, i, u);
|
||||
else {
|
||||
|
|
@ -930,7 +932,7 @@ bool ShaderFromNirProcessor::emit_load_ubo_vec4(nir_intrinsic_instr* instr)
|
|||
auto kc_id = from_nir(instr->src[0], 0);
|
||||
for (unsigned i = 0; i < nir_dest_num_components(instr->dest); ++i) {
|
||||
int cmp = buf_cmp + i;
|
||||
auto u = PValue(new UniformValue(512 + buf_offset->u32, cmp, kc_id));
|
||||
auto u = PValue(new UniformValue(offset, cmp, kc_id));
|
||||
if (instr->dest.is_ssa)
|
||||
load_preloaded_value(instr->dest, i, u);
|
||||
else {
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue