diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h index 0c5b82cfd3c..aae86579bc7 100644 --- a/src/compiler/nir/nir.h +++ b/src/compiler/nir/nir.h @@ -5257,6 +5257,9 @@ typedef struct { /** nir_load_uniform max base offset */ uint32_t uniform_max; + /** nir_load_ubo_vec4 max base offset */ + uint32_t ubo_vec4_max; + /** nir_var_mem_shared max base offset */ uint32_t shared_max; diff --git a/src/compiler/nir/nir_intrinsics.py b/src/compiler/nir/nir_intrinsics.py index 24f47f6279b..8f59443f8ea 100644 --- a/src/compiler/nir/nir_intrinsics.py +++ b/src/compiler/nir/nir_intrinsics.py @@ -934,8 +934,8 @@ def load(name, src_comp, indices=[], flags=[]): load("uniform", [1], [BASE, RANGE, DEST_TYPE], [CAN_ELIMINATE, CAN_REORDER]) # src[] = { buffer_index, offset }. load("ubo", [-1, 1], [ACCESS, ALIGN_MUL, ALIGN_OFFSET, RANGE_BASE, RANGE], flags=[CAN_ELIMINATE, CAN_REORDER]) -# src[] = { buffer_index, offset in vec4 units } -load("ubo_vec4", [-1, 1], [ACCESS, COMPONENT], flags=[CAN_ELIMINATE, CAN_REORDER]) +# src[] = { buffer_index, offset in vec4 units }. base is also in vec4 units. +load("ubo_vec4", [-1, 1], [ACCESS, BASE, COMPONENT], flags=[CAN_ELIMINATE, CAN_REORDER]) # src[] = { offset }. load("input", [1], [BASE, COMPONENT, DEST_TYPE, IO_SEMANTICS], [CAN_ELIMINATE, CAN_REORDER]) # src[] = { vertex_id, offset }. diff --git a/src/compiler/nir/nir_lower_uniforms_to_ubo.c b/src/compiler/nir/nir_lower_uniforms_to_ubo.c index 12f835c3f0e..8fefcfd8f33 100644 --- a/src/compiler/nir/nir_lower_uniforms_to_ubo.c +++ b/src/compiler/nir/nir_lower_uniforms_to_ubo.c @@ -65,8 +65,7 @@ lower_instr(nir_intrinsic_instr *instr, nir_builder *b, bool dword_packed, bool */ assert(!dword_packed); load_result = nir_load_ubo_vec4(b, instr->num_components, instr->dest.ssa.bit_size, - ubo_idx, - nir_iadd_imm(b, uniform_offset, nir_intrinsic_base(instr))); + ubo_idx, uniform_offset, .base=nir_intrinsic_base(instr)); } else { /* For PIPE_CAP_PACKED_UNIFORMS, the uniforms are packed with the * base/offset in dword units instead of vec4 units. diff --git a/src/compiler/nir/nir_opt_offsets.c b/src/compiler/nir/nir_opt_offsets.c index 58cfa98a1ac..0d10fc93e46 100644 --- a/src/compiler/nir/nir_opt_offsets.c +++ b/src/compiler/nir/nir_opt_offsets.c @@ -135,6 +135,8 @@ process_instr(nir_builder *b, nir_instr *instr, void *s) switch (intrin->intrinsic) { case nir_intrinsic_load_uniform: return try_fold_load_store(b, intrin, state, 0, state->options->uniform_max); + case nir_intrinsic_load_ubo_vec4: + return try_fold_load_store(b, intrin, state, 1, state->options->ubo_vec4_max); case nir_intrinsic_load_shared: case nir_intrinsic_load_shared_ir3: return try_fold_load_store(b, intrin, state, 0, state->options->shared_max); diff --git a/src/freedreno/ir3/ir3_compiler_nir.c b/src/freedreno/ir3/ir3_compiler_nir.c index 17449664e40..9611e743e81 100644 --- a/src/freedreno/ir3/ir3_compiler_nir.c +++ b/src/freedreno/ir3/ir3_compiler_nir.c @@ -873,6 +873,11 @@ emit_intrinsic_load_ubo_ldc(struct ir3_context *ctx, nir_intrinsic_instr *intr, { struct ir3_block *b = ctx->block; + /* This is only generated for us by nir_lower_ubo_vec4, which leaves base = + * 0. + */ + assert(nir_intrinsic_base(intr) == 0); + unsigned ncomp = intr->num_components; struct ir3_instruction *offset = ir3_get_src(ctx, &intr->src[1])[0]; struct ir3_instruction *idx = ir3_get_src(ctx, &intr->src[0])[0]; diff --git a/src/gallium/auxiliary/nir/nir_to_tgsi.c b/src/gallium/auxiliary/nir/nir_to_tgsi.c index 6281c197edf..df76d50af51 100644 --- a/src/gallium/auxiliary/nir/nir_to_tgsi.c +++ b/src/gallium/auxiliary/nir/nir_to_tgsi.c @@ -1338,6 +1338,7 @@ ntt_emit_load_ubo(struct ntt_compile *c, nir_intrinsic_instr *instr) /* !PIPE_CAP_LOAD_CONSTBUF: Just emit it as a vec4 reference to the const * file. */ + src.Index = nir_intrinsic_base(instr); if (nir_src_is_const(instr->src[1])) { src.Index += ntt_src_as_uint(c, instr->src[1]); diff --git a/src/gallium/drivers/r600/sfn/sfn_shader_base.cpp b/src/gallium/drivers/r600/sfn/sfn_shader_base.cpp index ed3f1b57a31..3f8ef12db9f 100644 --- a/src/gallium/drivers/r600/sfn/sfn_shader_base.cpp +++ b/src/gallium/drivers/r600/sfn/sfn_shader_base.cpp @@ -872,6 +872,7 @@ bool ShaderFromNirProcessor::emit_load_ubo_vec4(nir_intrinsic_instr* instr) { auto bufid = nir_src_as_const_value(instr->src[0]); auto buf_offset = nir_src_as_const_value(instr->src[1]); + auto base = nir_intrinsic_base(instr); if (!buf_offset) { /* TODO: if buf_offset is constant then this can also be solved by using the CF indes @@ -892,11 +893,11 @@ bool ShaderFromNirProcessor::emit_load_ubo_vec4(nir_intrinsic_instr* instr) FetchInstruction *ir; if (bufid) { - ir = new FetchInstruction(vc_fetch, no_index_offset, trgt, addr, 0, + ir = new FetchInstruction(vc_fetch, no_index_offset, trgt, addr, base, 1 + bufid->u32, nullptr, bim_none); } else { PValue bufid = from_nir(instr->src[0], 0, 0); - ir = new FetchInstruction(vc_fetch, no_index_offset, trgt, addr, 0, + ir = new FetchInstruction(vc_fetch, no_index_offset, trgt, addr, base, 1, bufid, bim_zero); } ir->set_dest_swizzle(swz); @@ -905,6 +906,7 @@ bool ShaderFromNirProcessor::emit_load_ubo_vec4(nir_intrinsic_instr* instr) return true; } + uint32_t offset = 512 + base + buf_offset->u32; if (bufid) { int buf_cmp = nir_intrinsic_component(instr); @@ -912,7 +914,7 @@ bool ShaderFromNirProcessor::emit_load_ubo_vec4(nir_intrinsic_instr* instr) for (unsigned i = 0; i < nir_dest_num_components(instr->dest); ++i) { int cmp = buf_cmp + i; assert(cmp < 4); - auto u = PValue(new UniformValue(512 + buf_offset->u32, cmp, bufid->u32 + 1)); + auto u = PValue(new UniformValue(offset, cmp, bufid->u32 + 1)); if (instr->dest.is_ssa) load_preloaded_value(instr->dest, i, u); else { @@ -930,7 +932,7 @@ bool ShaderFromNirProcessor::emit_load_ubo_vec4(nir_intrinsic_instr* instr) auto kc_id = from_nir(instr->src[0], 0); for (unsigned i = 0; i < nir_dest_num_components(instr->dest); ++i) { int cmp = buf_cmp + i; - auto u = PValue(new UniformValue(512 + buf_offset->u32, cmp, kc_id)); + auto u = PValue(new UniformValue(offset, cmp, kc_id)); if (instr->dest.is_ssa) load_preloaded_value(instr->dest, i, u); else {