ir3: always use byte offset for @load/store_global_ir3

Before a7xx, ldg/stg.a use an offset in units of their type size while
on a7xx and later, the offset is always in bytes. Currently,
@load/store_global_ir3 take their offset in dwords (32-bits). This has a
few downsides: offsets need an extra shl during codegen on a7xx and
addressing sub-dword-aligned addresses is only possible by doing 64-bit
math on the base address.

Improve the situation by always using a byte offset for
@load/store_global_ir3 and adding the offset_shift index to support type
units pre-a7xx. While we're at it, add the base index as well to support
all ldg/stg.g features in @load/store_global_ir3.

Supporting these renewed intrinsics consists of two parts:
- ir3_nir_lower_io_offsets legalizes the offset_shift on a6xx: for
  ldg.a/stg.a, the offset has to be in units of the type size so extra
  shifts are inserted to accomplish this if necessary. On a7xx, offsets
  are always in bytes so nothing needs to be done.
- The intrinsics are emitted as ldg/stg if the offset is a small enough
  constant and as ldg.a/stg.a otherwise. a6xx supports an extra shift
  for ldg.a/stg.a that only applies to the GPR offset (not the immediate
  base); NIR is pattern matched at this point to extract this if
  possible.

All users of @load/store_global_ir3 are updated to generate the offset
in units of bytes. ir3_nir_analyze_ubo_ranges is updated to take the new
offset_shift into account.

Totals from 2029 (1.15% of 176266) affected shaders:
MaxWaves: 26728 -> 26660 (-0.25%); split: +0.01%, -0.26%
Instrs: 1314089 -> 1278603 (-2.70%); split: -2.72%, +0.02%
CodeSize: 2739108 -> 2633236 (-3.87%); split: -3.87%, +0.01%
NOPs: 197537 -> 200843 (+1.67%); split: -1.62%, +3.30%
MOVs: 43771 -> 44025 (+0.58%); split: -1.11%, +1.69%
Full: 31849 -> 31948 (+0.31%); split: -0.03%, +0.34%
(ss): 37965 -> 42027 (+10.70%); split: -3.47%, +14.17%
(sy): 13752 -> 13566 (-1.35%); split: -4.04%, +2.68%
(ss)-stall: 154238 -> 170353 (+10.45%); split: -1.72%, +12.16%
(sy)-stall: 804442 -> 806518 (+0.26%); split: -4.65%, +4.91%
Preamble Instrs: 326728 -> 293488 (-10.17%)
Cat0: 217926 -> 220947 (+1.39%); split: -1.58%, +2.96%
Cat1: 50182 -> 50446 (+0.53%); split: -0.97%, +1.49%
Cat2: 460987 -> 452101 (-1.93%); split: -2.26%, +0.33%
Cat3: 390696 -> 361271 (-7.53%)
Cat7: 39148 -> 38688 (-1.18%); split: -1.24%, +0.06%

Signed-off-by: Job Noorman <jnoorman@igalia.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/41342>
This commit is contained in:
Job Noorman 2026-05-05 06:25:49 +02:00
parent 6158072e6f
commit c784af5ca0
9 changed files with 288 additions and 84 deletions

View file

@ -1594,11 +1594,17 @@ load("shared_ir3", [1], [BASE, ALIGN_MUL, ALIGN_OFFSET], [CAN_ELIMINATE])
# src[] = { value, address(vec2 of hi+lo uint32_t), offset }.
# const_index[] = { write_mask, align_mul, align_offset }
store("global_ir3", [1, 1], indices=[ACCESS, ALIGN_MUL, ALIGN_OFFSET])
# Final address is calculated as `address + ((offset + BASE) << OFFSET_SHIFT)
# `offset` is sign-extended to 64-bits first so the offset calculation does not
# cause 32-bit overflows.
# a6xx has another shift field which only applies to `offset`; this is not
# represented here.
store("global_ir3", [1, 1], indices=[ACCESS, ALIGN_MUL, ALIGN_OFFSET, OFFSET_SHIFT, BASE])
# src[] = { address(vec2 of hi+lo uint32_t), offset }.
# const_index[] = { access, align_mul, align_offset }
# the alignment applies to the base address
load("global_ir3", [1, 1], indices=[ACCESS, ALIGN_MUL, ALIGN_OFFSET, RANGE_BASE, RANGE], flags=[CAN_ELIMINATE])
# Final address is calculated as for @store_global_ir3
load("global_ir3", [1, 1], indices=[ACCESS, ALIGN_MUL, ALIGN_OFFSET, RANGE_BASE, RANGE, OFFSET_SHIFT, BASE], flags=[CAN_ELIMINATE])
# Etnaviv-specific load/glboal intrinsics. They take a 32-bit base address and
# a 32-bit offset, which doesn't need to be an immediate.

View file

@ -431,6 +431,132 @@ emit_intrinsic_image_size(struct ir3_context *ctx, nir_intrinsic_instr *intr,
ir3_split_dest(b, dst, resinfo, 0, intr->num_components);
}
/* On a6xx, on top of the offset_shift that applies to the whole offset, there's
* a second shift that only applies to the GPR part of the offset (so not to the
* immediate part). We extract that here by simply pattern matching for ishl on
* the offset src. Returns the shift if a match is found and it fits in the
* 2-bit field, in which case *offset_src is set to the src of ishl and
* *offset_src_comp to the component of *offset_src.
*/
static unsigned
parse_src_shift(struct ir3_context *ctx, nir_src **offset_src,
unsigned *offset_src_comp)
{
*offset_src_comp = 0;
if (ctx->compiler->gen >= 7) {
return 0;
}
nir_scalar offset =
nir_scalar_chase_movs(nir_get_scalar((*offset_src)->ssa, 0));
if (!nir_scalar_is_alu(offset) || nir_scalar_alu_op(offset) != nir_op_ishl) {
return 0;
}
nir_scalar shift_src = nir_scalar_chase_alu_src(offset, 1);
if (!nir_scalar_is_const(shift_src)) {
return 0;
}
unsigned shift = nir_scalar_as_uint(shift_src);
if (shift >= (1 << 2)) {
return 0;
}
nir_alu_instr *offset_alu = nir_def_as_alu(offset.def);
*offset_src = &offset_alu->src[0].src;
*offset_src_comp = offset_alu->src[0].swizzle[offset.comp];
return shift;
}
static bool
base_fits_ldg_stg_a(struct ir3_compiler *compiler, unsigned base)
{
if (compiler->gen >= 7) {
return base < (1 << 8);
}
return base < (1 << 2);
}
/* Represents an offset for ldg/stg(.a):
* - src == NULL: ldg/stg base_address + imm
* - src != NULL:
* - a6xx: ldg/stg.a base_addr + (src << src_shift) + imm
* - a7xx: ldg/stg.a base_addr + src + imm
*/
struct ldg_stg_offset {
struct ir3_instruction *src;
struct ir3_instruction *src_shift;
struct ir3_instruction *imm;
};
static struct ldg_stg_offset
ldg_stg_offset(struct ir3_context *ctx, nir_intrinsic_instr *intr)
{
assert(intr->intrinsic == nir_intrinsic_load_global_ir3 ||
intr->intrinsic == nir_intrinsic_store_global_ir3);
if (ctx->compiler->gen >= 7) {
assert(nir_intrinsic_offset_shift(intr) == 0);
} else {
ASSERTED unsigned bit_size =
intr->intrinsic == nir_intrinsic_load_global_ir3
? intr->def.bit_size
: intr->src[0].ssa->bit_size;
assert(nir_intrinsic_offset_shift(intr) == ffs(bit_size / 8) - 1);
}
struct ldg_stg_offset offset = {};
nir_src *offset_src = nir_get_io_offset_src(intr);
int32_t base = nir_intrinsic_base(intr);
unsigned offset_shift = nir_intrinsic_offset_shift(intr);
struct ir3_builder *b = &ctx->build;
if (nir_src_is_const(*offset_src)) {
int32_t full_imm_offset = base + nir_src_as_int(*offset_src);
int32_t full_imm_offset_bytes = full_imm_offset << offset_shift;
/* ldg/stg offset immediate is 13 bits. Note that ldg/stg use byte offsets
* even on a6xx.
*/
if (full_imm_offset_bytes < (1 << 12) &&
full_imm_offset_bytes >= -(1 << 12)) {
offset.imm = create_immed(b, full_imm_offset_bytes);
} else {
/* The immediate offset does not fit. Generate ldg/stg.a with the
* immediate in a GPR.
*/
offset.src = create_immed(b, full_imm_offset);
offset.src_shift = create_immed(b, 0);
offset.imm = create_immed(b, 0);
}
} else {
if (base_fits_ldg_stg_a(ctx->compiler, base)) {
unsigned offset_src_comp;
unsigned shift = parse_src_shift(ctx, &offset_src, &offset_src_comp);
offset.src = ir3_get_src(ctx, offset_src)[offset_src_comp];
offset.src_shift = create_immed(b, shift);
offset.imm = create_immed(b, base);
} else {
/* This should be rare, but various passes might update
* base/offset_shift in a way that makes the combination illegal.
* Detect that here and replace base by an add.
*/
offset.src = ir3_ADD_U(b, ir3_get_src(ctx, offset_src)[0], 0,
create_immed(b, base), 0);
offset.src_shift = create_immed(b, 0);
offset.imm = create_immed(b, 0);
}
}
return offset;
}
static void
emit_intrinsic_load_global_ir3(struct ir3_context *ctx,
nir_intrinsic_instr *intr,
@ -438,31 +564,19 @@ emit_intrinsic_load_global_ir3(struct ir3_context *ctx,
{
struct ir3_builder *b = &ctx->build;
unsigned dest_components = nir_intrinsic_dest_components(intr);
struct ir3_instruction *addr, *offset;
struct ir3_instruction *addr;
addr = ir3_collect(b, ir3_get_src(ctx, &intr->src[0])[0]);
struct ldg_stg_offset offset = ldg_stg_offset(ctx, intr);
struct ir3_instruction *load;
bool const_offset_in_bounds =
nir_src_is_const(intr->src[1]) &&
nir_src_as_int(intr->src[1]) < (1 << 8) &&
nir_src_as_int(intr->src[1]) > -(1 << 8);
if (const_offset_in_bounds) {
load = ir3_LDG(b, addr, 0,
create_immed(b, nir_src_as_int(intr->src[1]) * 4),
0, create_immed(b, dest_components), 0);
if (!offset.src) {
load = ir3_LDG(b, addr, 0, offset.imm, 0,
create_immed(b, dest_components), 0);
} else {
unsigned shift = ctx->compiler->gen >= 7 ? 2 : 0;
offset = ir3_get_src(ctx, &intr->src[1])[0];
if (shift) {
/* A7XX TODO: Move to NIR for it to be properly optimized? */
offset = ir3_SHL_B(b, offset, 0, create_immed(b, shift), 0);
}
load =
ir3_LDG_A(b, addr, 0, offset, 0, create_immed(b, 0), 0,
create_immed(b, 0), 0, create_immed(b, dest_components), 0);
load = ir3_LDG_A(b, addr, 0, offset.src, 0, offset.src_shift, 0,
offset.imm, 0, create_immed(b, dest_components), 0);
}
load->cat6.type = type_uint_size(intr->def.bit_size);
@ -479,33 +593,22 @@ emit_intrinsic_store_global_ir3(struct ir3_context *ctx,
nir_intrinsic_instr *intr)
{
struct ir3_builder *b = &ctx->build;
struct ir3_instruction *value, *addr, *offset;
struct ir3_instruction *value, *addr;
unsigned ncomp = nir_intrinsic_src_components(intr, 0);
addr = ir3_collect(b, ir3_get_src(ctx, &intr->src[1])[0]);
value = ir3_create_collect(b, ir3_get_src(ctx, &intr->src[0]), ncomp);
struct ldg_stg_offset offset = ldg_stg_offset(ctx, intr);
struct ir3_instruction *stg;
bool const_offset_in_bounds = nir_src_is_const(intr->src[2]) &&
nir_src_as_int(intr->src[2]) < (1 << 10) &&
nir_src_as_int(intr->src[2]) > -(1 << 10);
if (const_offset_in_bounds) {
stg = ir3_STG(b, addr, 0,
create_immed(b, nir_src_as_int(intr->src[2]) * 4), 0,
value, 0,
create_immed(b, ncomp), 0);
if (!offset.src) {
stg = ir3_STG(b, addr, 0, offset.imm, 0, value, 0, create_immed(b, ncomp),
0);
} else {
offset = ir3_get_src(ctx, &intr->src[2])[0];
if (ctx->compiler->gen >= 7) {
/* A7XX TODO: Move to NIR for it to be properly optimized? */
offset = ir3_SHL_B(b, offset, 0, create_immed(b, 2), 0);
}
stg =
ir3_STG_A(b, addr, 0, offset, 0, create_immed(b, 0), 0,
create_immed(b, 0), 0, value, 0, create_immed(b, ncomp), 0);
stg = ir3_STG_A(b, addr, 0, offset.src, 0, offset.src_shift, 0,
offset.imm, 0, value, 0, create_immed(b, ncomp), 0);
}
stg->cat6.type = type_uint_size(intr->src[0].ssa->bit_size);

View file

@ -607,7 +607,7 @@ lower_shader_clock(struct nir_builder *b, nir_intrinsic_instr *instr, void *data
nir_def *clock_lo =
nir_load_global_ir3(b, 1, 32, base_addr, nir_imm_int(b, 0));
nir_def *clock_hi =
nir_load_global_ir3(b, 1, 32, base_addr, nir_imm_int(b, 1));
nir_load_global_ir3(b, 1, 32, base_addr, nir_imm_int(b, 4));
clock = nir_vec2(b, clock_lo, clock_hi);
}
nir_push_else(b, NULL);
@ -2052,3 +2052,20 @@ ir3_nir_intrinsic_barycentric_sysval(nir_intrinsic_instr *intr)
return sysval;
}
nir_io_offset
ir3_nir_get_global_offset(nir_builder *b, struct ir3_compiler *compiler,
nir_def *offset, unsigned offset_shift)
{
if (compiler->gen >= 7) {
return (nir_io_offset){
.def = nir_ishl_imm(b, offset, offset_shift),
.shift = 0,
};
}
return (nir_io_offset){
.def = offset,
.shift = offset_shift,
};
}

View file

@ -212,6 +212,10 @@ unsigned ir3_nir_max_offset_shift(nir_intrinsic_instr *intr, const void *data);
gl_system_value
ir3_nir_intrinsic_barycentric_sysval(nir_intrinsic_instr *intr);
nir_io_offset ir3_nir_get_global_offset(nir_builder *b,
struct ir3_compiler *compiler,
nir_def *offset, unsigned offset_shift);
ENDC;
#endif /* IR3_NIR_H_ */

View file

@ -16,18 +16,13 @@ get_ubo_load_range(nir_shader *nir, nir_intrinsic_instr *instr,
uint32_t offset = nir_intrinsic_range_base(instr);
uint32_t size = nir_intrinsic_range(instr);
if (instr->intrinsic == nir_intrinsic_load_global_ir3) {
offset *= 4;
size *= 4;
}
/* If the offset is constant, the range is trivial (and NIR may not have
* figured it out).
*/
if (nir_src_is_const(instr->src[1])) {
offset = nir_src_as_uint(instr->src[1]);
if (instr->intrinsic == nir_intrinsic_load_global_ir3)
offset *= 4;
offset <<= nir_intrinsic_offset_shift(instr);
size = nir_intrinsic_dest_components(instr) * 4;
}
@ -297,25 +292,30 @@ lower_ubo_load_to_uniform(nir_intrinsic_instr *instr, nir_builder *b,
nir_def *uniform_offset = ubo_offset;
if (instr->intrinsic == nir_intrinsic_load_ubo) {
/* UBO offset is in bytes, but uniform offset is in units of
* dwords, so we need to divide by 4 (right-shift by 2). For ldc the
* offset is in units of 16 bytes, so we need to multiply by 4. And
* also the same for the constant part of the offset:
*/
const int shift = -2;
nir_def *new_offset = ir3_nir_try_propagate_bit_shift(b, ubo_offset, -2);
if (new_offset) {
uniform_offset = new_offset;
} else {
uniform_offset = shift > 0
? nir_ishl_imm(b, ubo_offset, shift)
: nir_ushr_imm(b, ubo_offset, -shift);
}
/* UBO/global offset is in bytes, but uniform offset is in units of
* dwords, so we need to divide by 4 (right-shift by 2). For ldc the
* offset is in units of 16 bytes, so we need to multiply by 4. And
* also the same for the constant part of the offset:
*/
int shift = -2;
if (instr->intrinsic == nir_intrinsic_load_global_ir3) {
unsigned offset_shift = nir_intrinsic_offset_shift(instr);
assert(offset_shift <= 2);
shift = -(2 - offset_shift);
}
nir_def *new_offset = ir3_nir_try_propagate_bit_shift(b, ubo_offset, shift);
if (new_offset) {
uniform_offset = new_offset;
} else {
uniform_offset = shift > 0 ? nir_ishl_imm(b, ubo_offset, shift)
: nir_ushr_imm(b, ubo_offset, -shift);
}
assert(!(const_offset & 0x3));
const_offset >>= 2;
const_offset >>= -shift;
const int range_offset = ((int)range->offset - (int)range->start) / 4;
const_offset += range_offset;

View file

@ -258,6 +258,52 @@ lower_offset_for_ssbo(nir_intrinsic_instr *intrinsic, nir_builder *b,
return true;
}
/* On a6xx, global memory is accessed in units of the type size. Legalize
* offset_shift to correspond to this.
*/
static bool
lower_offset_for_global(nir_builder *b, nir_intrinsic_instr *intr,
struct ir3_compiler *compiler)
{
if (compiler->gen >= 7) {
assert(nir_intrinsic_offset_shift(intr) == 0);
return false;
}
unsigned bit_size = intr->intrinsic == nir_intrinsic_load_global_ir3
? intr->def.bit_size
: intr->src[0].ssa->bit_size;
assert(bit_size < 64);
int shift = ffs(bit_size / 8) - 1;
int cur_shift = nir_intrinsic_offset_shift(intr);
int extra_shift = shift - cur_shift;
if (extra_shift == 0) {
return false;
}
b->cursor = nir_before_instr(&intr->instr);
nir_src *offset_src = nir_get_io_offset_src(intr);
nir_io_offset new_offset = {
.def = ir3_nir_try_propagate_bit_shift(b, offset_src->ssa, -extra_shift),
.shift = shift,
};
if (!new_offset.def) {
if (extra_shift > 0) {
new_offset.def = nir_ushr_imm(b, offset_src->ssa, extra_shift);
} else {
new_offset.def = nir_ishl_imm(b, offset_src->ssa, -extra_shift);
}
}
nir_set_io_offset(intr, new_offset);
return true;
}
static bool
lower_io_offsets_block(nir_block *block, nir_builder *b, void *mem_ctx,
struct ir3_compiler *c)
@ -288,6 +334,11 @@ lower_io_offsets_block(nir_block *block, nir_builder *b, void *mem_ctx,
scalarize_load(intr, b);
progress = true;
}
if (intr->intrinsic == nir_intrinsic_load_global_ir3 ||
intr->intrinsic == nir_intrinsic_store_global_ir3) {
progress |= lower_offset_for_global(b, intr, c);
}
}
return progress;

View file

@ -8,6 +8,7 @@
#include "ir3_nir.h"
struct state {
struct ir3_compiler *compiler;
uint32_t topology;
struct primitive_map {
@ -190,6 +191,33 @@ replace_intrinsic(nir_builder *b, nir_intrinsic_instr *intr,
return new_intr;
}
static void
replace_with_load_global(nir_builder *b, struct ir3_compiler *compiler,
nir_intrinsic_instr *intr, nir_def *addr,
nir_def *offset)
{
/* Our offsets are in units of 4B. */
nir_io_offset global_offset =
ir3_nir_get_global_offset(b, compiler, offset, 2);
nir_def *load = nir_load_global_ir3(
b, intr->def.num_components, intr->def.bit_size, addr, global_offset.def,
.align_mul = 4, .align_offset = 0, .offset_shift = global_offset.shift);
nir_def_replace(&intr->def, load);
}
static void
replace_with_store_global(nir_builder *b, struct ir3_compiler *compiler,
nir_intrinsic_instr *intr, nir_def *val,
nir_def *addr, nir_def *offset)
{
/* Our offsets are in units of 4B. */
nir_io_offset global_offset =
ir3_nir_get_global_offset(b, compiler, offset, 2);
nir_store_global_ir3(b, val, addr, global_offset.def, .align_mul = 4,
.align_offset = 0, .offset_shift = global_offset.shift);
nir_instr_remove(&intr->instr);
}
static void
build_primitive_map(nir_shader *shader, struct primitive_map *map)
{
@ -577,8 +605,7 @@ lower_tess_ctrl_block(nir_block *block, nir_builder *b, struct state *state)
nir_intrinsic_io_semantics(intr).location,
nir_intrinsic_component(intr), intr->src[1].ssa);
replace_intrinsic(b, intr, nir_intrinsic_load_global_ir3, address,
offset, NULL);
replace_with_load_global(b, state->compiler, intr, address, offset);
break;
}
@ -598,8 +625,8 @@ lower_tess_ctrl_block(nir_block *block, nir_builder *b, struct state *state)
nir_intrinsic_io_semantics(intr).location,
nir_intrinsic_component(intr), intr->src[2].ssa);
replace_intrinsic(b, intr, nir_intrinsic_store_global_ir3, value,
address, offset);
replace_with_store_global(b, state->compiler, intr, value, address,
offset);
break;
}
@ -623,8 +650,7 @@ lower_tess_ctrl_block(nir_block *block, nir_builder *b, struct state *state)
intr->src[0].ssa);
}
replace_intrinsic(b, intr, nir_intrinsic_load_global_ir3, address,
offset, NULL);
replace_with_load_global(b, state->compiler, intr, address, offset);
break;
}
@ -664,10 +690,9 @@ lower_tess_ctrl_block(nir_block *block, nir_builder *b, struct state *state)
nir_def *offset = build_tessfactor_base(
b, location, nir_intrinsic_component(intr), state);
replace_intrinsic(b, intr, nir_intrinsic_store_global_ir3,
intr->src[0].ssa,
load_tess_factor_base(b),
nir_iadd(b, intr->src[1].ssa, offset));
replace_with_store_global(
b, state->compiler, intr, intr->src[0].ssa,
load_tess_factor_base(b), nir_iadd(b, intr->src[1].ssa, offset));
if (location != VARYING_SLOT_PRIMITIVE_ID) {
nir_pop_if(b, nif);
@ -678,8 +703,8 @@ lower_tess_ctrl_block(nir_block *block, nir_builder *b, struct state *state)
b, state, location, nir_intrinsic_component(intr),
intr->src[1].ssa);
replace_intrinsic(b, intr, nir_intrinsic_store_global_ir3,
intr->src[0].ssa, address, offset);
replace_with_store_global(b, state->compiler, intr,
intr->src[0].ssa, address, offset);
}
break;
}
@ -694,7 +719,7 @@ bool
ir3_nir_lower_tess_ctrl(nir_shader *shader, struct ir3_shader_variant *v,
unsigned topology)
{
struct state state = {.topology = topology};
struct state state = {.topology = topology, .compiler = v->compiler};
if (shader_debug_enabled(shader->info.stage, shader->info.internal)) {
mesa_logi("NIR (before tess lowering) for %s shader:",
@ -787,8 +812,7 @@ lower_tess_eval_block(nir_block *block, nir_builder *b, struct state *state)
nir_intrinsic_io_semantics(intr).location,
nir_intrinsic_component(intr), intr->src[1].ssa);
replace_intrinsic(b, intr, nir_intrinsic_load_global_ir3, address,
offset, NULL);
replace_with_load_global(b, state->compiler, intr, address, offset);
break;
}
@ -811,8 +835,7 @@ lower_tess_eval_block(nir_block *block, nir_builder *b, struct state *state)
intr->src[0].ssa);
}
replace_intrinsic(b, intr, nir_intrinsic_load_global_ir3, address,
offset, NULL);
replace_with_load_global(b, state->compiler, intr, address, offset);
break;
}
@ -826,7 +849,7 @@ bool
ir3_nir_lower_tess_eval(nir_shader *shader, struct ir3_shader_variant *v,
unsigned topology)
{
struct state state = {.topology = topology};
struct state state = {.topology = topology, .compiler = v->compiler};
if (shader_debug_enabled(shader->info.stage, shader->info.internal)) {
mesa_logi("NIR (before tess lowering) for %s shader:",

View file

@ -262,8 +262,8 @@ load_tlas(nir_builder *b, nir_def *tlas,
} else {
return nir_load_global_ir3(b, components, 32,
nir_pack_64_2x32(b, tlas),
nir_iadd_imm(b, nir_imul_imm(b, index, AS_RECORD_SIZE / 4),
offset / 4),
nir_iadd_imm(b, nir_imul_imm(b, index, AS_RECORD_SIZE),
offset),
/* The required alignment of the
* user-specified base from the Vulkan spec.
*/

View file

@ -1024,7 +1024,7 @@ lower_inline_ubo(nir_builder *b, nir_intrinsic_instr *intrin, void *cb_data)
val = nir_load_global_ir3(b, intrin->num_components,
intrin->def.bit_size,
nir_pack_64_2x32(b, base_addr),
nir_ishr_imm(b, offset, 2),
offset,
.access =
(enum gl_access_qualifier)(
(enum gl_access_qualifier)(ACCESS_NON_WRITEABLE | ACCESS_CAN_REORDER) |
@ -1032,7 +1032,7 @@ lower_inline_ubo(nir_builder *b, nir_intrinsic_instr *intrin, void *cb_data)
.align_mul = 16,
.align_offset = 0,
.range_base = 0,
.range = range);
.range = range * 4);
} else {
val =
nir_load_const_ir3(b, intrin->num_components, intrin->def.bit_size,