diff --git a/src/compiler/nir/nir_intrinsics.py b/src/compiler/nir/nir_intrinsics.py index 9381f728fea..148c59804b4 100644 --- a/src/compiler/nir/nir_intrinsics.py +++ b/src/compiler/nir/nir_intrinsics.py @@ -1594,11 +1594,17 @@ load("shared_ir3", [1], [BASE, ALIGN_MUL, ALIGN_OFFSET], [CAN_ELIMINATE]) # src[] = { value, address(vec2 of hi+lo uint32_t), offset }. # const_index[] = { write_mask, align_mul, align_offset } -store("global_ir3", [1, 1], indices=[ACCESS, ALIGN_MUL, ALIGN_OFFSET]) +# Final address is calculated as `address + ((offset + BASE) << OFFSET_SHIFT) +# `offset` is sign-extended to 64-bits first so the offset calculation does not +# cause 32-bit overflows. +# a6xx has another shift field which only applies to `offset`; this is not +# represented here. +store("global_ir3", [1, 1], indices=[ACCESS, ALIGN_MUL, ALIGN_OFFSET, OFFSET_SHIFT, BASE]) # src[] = { address(vec2 of hi+lo uint32_t), offset }. # const_index[] = { access, align_mul, align_offset } # the alignment applies to the base address -load("global_ir3", [1, 1], indices=[ACCESS, ALIGN_MUL, ALIGN_OFFSET, RANGE_BASE, RANGE], flags=[CAN_ELIMINATE]) +# Final address is calculated as for @store_global_ir3 +load("global_ir3", [1, 1], indices=[ACCESS, ALIGN_MUL, ALIGN_OFFSET, RANGE_BASE, RANGE, OFFSET_SHIFT, BASE], flags=[CAN_ELIMINATE]) # Etnaviv-specific load/glboal intrinsics. They take a 32-bit base address and # a 32-bit offset, which doesn't need to be an immediate. diff --git a/src/freedreno/ir3/ir3_a6xx.c b/src/freedreno/ir3/ir3_a6xx.c index badc964f367..bd6df930149 100644 --- a/src/freedreno/ir3/ir3_a6xx.c +++ b/src/freedreno/ir3/ir3_a6xx.c @@ -431,6 +431,132 @@ emit_intrinsic_image_size(struct ir3_context *ctx, nir_intrinsic_instr *intr, ir3_split_dest(b, dst, resinfo, 0, intr->num_components); } +/* On a6xx, on top of the offset_shift that applies to the whole offset, there's + * a second shift that only applies to the GPR part of the offset (so not to the + * immediate part). We extract that here by simply pattern matching for ishl on + * the offset src. Returns the shift if a match is found and it fits in the + * 2-bit field, in which case *offset_src is set to the src of ishl and + * *offset_src_comp to the component of *offset_src. + */ +static unsigned +parse_src_shift(struct ir3_context *ctx, nir_src **offset_src, + unsigned *offset_src_comp) +{ + *offset_src_comp = 0; + + if (ctx->compiler->gen >= 7) { + return 0; + } + + nir_scalar offset = + nir_scalar_chase_movs(nir_get_scalar((*offset_src)->ssa, 0)); + + if (!nir_scalar_is_alu(offset) || nir_scalar_alu_op(offset) != nir_op_ishl) { + return 0; + } + + nir_scalar shift_src = nir_scalar_chase_alu_src(offset, 1); + + if (!nir_scalar_is_const(shift_src)) { + return 0; + } + + unsigned shift = nir_scalar_as_uint(shift_src); + + if (shift >= (1 << 2)) { + return 0; + } + + nir_alu_instr *offset_alu = nir_def_as_alu(offset.def); + *offset_src = &offset_alu->src[0].src; + *offset_src_comp = offset_alu->src[0].swizzle[offset.comp]; + return shift; +} + +static bool +base_fits_ldg_stg_a(struct ir3_compiler *compiler, unsigned base) +{ + if (compiler->gen >= 7) { + return base < (1 << 8); + } + + return base < (1 << 2); +} + +/* Represents an offset for ldg/stg(.a): + * - src == NULL: ldg/stg base_address + imm + * - src != NULL: + * - a6xx: ldg/stg.a base_addr + (src << src_shift) + imm + * - a7xx: ldg/stg.a base_addr + src + imm + */ +struct ldg_stg_offset { + struct ir3_instruction *src; + struct ir3_instruction *src_shift; + struct ir3_instruction *imm; +}; + +static struct ldg_stg_offset +ldg_stg_offset(struct ir3_context *ctx, nir_intrinsic_instr *intr) +{ + assert(intr->intrinsic == nir_intrinsic_load_global_ir3 || + intr->intrinsic == nir_intrinsic_store_global_ir3); + + if (ctx->compiler->gen >= 7) { + assert(nir_intrinsic_offset_shift(intr) == 0); + } else { + ASSERTED unsigned bit_size = + intr->intrinsic == nir_intrinsic_load_global_ir3 + ? intr->def.bit_size + : intr->src[0].ssa->bit_size; + assert(nir_intrinsic_offset_shift(intr) == ffs(bit_size / 8) - 1); + } + + struct ldg_stg_offset offset = {}; + nir_src *offset_src = nir_get_io_offset_src(intr); + int32_t base = nir_intrinsic_base(intr); + unsigned offset_shift = nir_intrinsic_offset_shift(intr); + struct ir3_builder *b = &ctx->build; + + if (nir_src_is_const(*offset_src)) { + int32_t full_imm_offset = base + nir_src_as_int(*offset_src); + int32_t full_imm_offset_bytes = full_imm_offset << offset_shift; + + /* ldg/stg offset immediate is 13 bits. Note that ldg/stg use byte offsets + * even on a6xx. + */ + if (full_imm_offset_bytes < (1 << 12) && + full_imm_offset_bytes >= -(1 << 12)) { + offset.imm = create_immed(b, full_imm_offset_bytes); + } else { + /* The immediate offset does not fit. Generate ldg/stg.a with the + * immediate in a GPR. + */ + offset.src = create_immed(b, full_imm_offset); + offset.src_shift = create_immed(b, 0); + offset.imm = create_immed(b, 0); + } + } else { + if (base_fits_ldg_stg_a(ctx->compiler, base)) { + unsigned offset_src_comp; + unsigned shift = parse_src_shift(ctx, &offset_src, &offset_src_comp); + offset.src = ir3_get_src(ctx, offset_src)[offset_src_comp]; + offset.src_shift = create_immed(b, shift); + offset.imm = create_immed(b, base); + } else { + /* This should be rare, but various passes might update + * base/offset_shift in a way that makes the combination illegal. + * Detect that here and replace base by an add. + */ + offset.src = ir3_ADD_U(b, ir3_get_src(ctx, offset_src)[0], 0, + create_immed(b, base), 0); + offset.src_shift = create_immed(b, 0); + offset.imm = create_immed(b, 0); + } + } + + return offset; +} + static void emit_intrinsic_load_global_ir3(struct ir3_context *ctx, nir_intrinsic_instr *intr, @@ -438,31 +564,19 @@ emit_intrinsic_load_global_ir3(struct ir3_context *ctx, { struct ir3_builder *b = &ctx->build; unsigned dest_components = nir_intrinsic_dest_components(intr); - struct ir3_instruction *addr, *offset; + struct ir3_instruction *addr; addr = ir3_collect(b, ir3_get_src(ctx, &intr->src[0])[0]); + struct ldg_stg_offset offset = ldg_stg_offset(ctx, intr); struct ir3_instruction *load; - bool const_offset_in_bounds = - nir_src_is_const(intr->src[1]) && - nir_src_as_int(intr->src[1]) < (1 << 8) && - nir_src_as_int(intr->src[1]) > -(1 << 8); - - if (const_offset_in_bounds) { - load = ir3_LDG(b, addr, 0, - create_immed(b, nir_src_as_int(intr->src[1]) * 4), - 0, create_immed(b, dest_components), 0); + if (!offset.src) { + load = ir3_LDG(b, addr, 0, offset.imm, 0, + create_immed(b, dest_components), 0); } else { - unsigned shift = ctx->compiler->gen >= 7 ? 2 : 0; - offset = ir3_get_src(ctx, &intr->src[1])[0]; - if (shift) { - /* A7XX TODO: Move to NIR for it to be properly optimized? */ - offset = ir3_SHL_B(b, offset, 0, create_immed(b, shift), 0); - } - load = - ir3_LDG_A(b, addr, 0, offset, 0, create_immed(b, 0), 0, - create_immed(b, 0), 0, create_immed(b, dest_components), 0); + load = ir3_LDG_A(b, addr, 0, offset.src, 0, offset.src_shift, 0, + offset.imm, 0, create_immed(b, dest_components), 0); } load->cat6.type = type_uint_size(intr->def.bit_size); @@ -479,33 +593,22 @@ emit_intrinsic_store_global_ir3(struct ir3_context *ctx, nir_intrinsic_instr *intr) { struct ir3_builder *b = &ctx->build; - struct ir3_instruction *value, *addr, *offset; + struct ir3_instruction *value, *addr; unsigned ncomp = nir_intrinsic_src_components(intr, 0); addr = ir3_collect(b, ir3_get_src(ctx, &intr->src[1])[0]); value = ir3_create_collect(b, ir3_get_src(ctx, &intr->src[0]), ncomp); + struct ldg_stg_offset offset = ldg_stg_offset(ctx, intr); struct ir3_instruction *stg; - bool const_offset_in_bounds = nir_src_is_const(intr->src[2]) && - nir_src_as_int(intr->src[2]) < (1 << 10) && - nir_src_as_int(intr->src[2]) > -(1 << 10); - - if (const_offset_in_bounds) { - stg = ir3_STG(b, addr, 0, - create_immed(b, nir_src_as_int(intr->src[2]) * 4), 0, - value, 0, - create_immed(b, ncomp), 0); + if (!offset.src) { + stg = ir3_STG(b, addr, 0, offset.imm, 0, value, 0, create_immed(b, ncomp), + 0); } else { - offset = ir3_get_src(ctx, &intr->src[2])[0]; - if (ctx->compiler->gen >= 7) { - /* A7XX TODO: Move to NIR for it to be properly optimized? */ - offset = ir3_SHL_B(b, offset, 0, create_immed(b, 2), 0); - } - stg = - ir3_STG_A(b, addr, 0, offset, 0, create_immed(b, 0), 0, - create_immed(b, 0), 0, value, 0, create_immed(b, ncomp), 0); + stg = ir3_STG_A(b, addr, 0, offset.src, 0, offset.src_shift, 0, + offset.imm, 0, value, 0, create_immed(b, ncomp), 0); } stg->cat6.type = type_uint_size(intr->src[0].ssa->bit_size); diff --git a/src/freedreno/ir3/ir3_nir.c b/src/freedreno/ir3/ir3_nir.c index 200e53e6997..a04061e8106 100644 --- a/src/freedreno/ir3/ir3_nir.c +++ b/src/freedreno/ir3/ir3_nir.c @@ -607,7 +607,7 @@ lower_shader_clock(struct nir_builder *b, nir_intrinsic_instr *instr, void *data nir_def *clock_lo = nir_load_global_ir3(b, 1, 32, base_addr, nir_imm_int(b, 0)); nir_def *clock_hi = - nir_load_global_ir3(b, 1, 32, base_addr, nir_imm_int(b, 1)); + nir_load_global_ir3(b, 1, 32, base_addr, nir_imm_int(b, 4)); clock = nir_vec2(b, clock_lo, clock_hi); } nir_push_else(b, NULL); @@ -2052,3 +2052,20 @@ ir3_nir_intrinsic_barycentric_sysval(nir_intrinsic_instr *intr) return sysval; } + +nir_io_offset +ir3_nir_get_global_offset(nir_builder *b, struct ir3_compiler *compiler, + nir_def *offset, unsigned offset_shift) +{ + if (compiler->gen >= 7) { + return (nir_io_offset){ + .def = nir_ishl_imm(b, offset, offset_shift), + .shift = 0, + }; + } + + return (nir_io_offset){ + .def = offset, + .shift = offset_shift, + }; +} diff --git a/src/freedreno/ir3/ir3_nir.h b/src/freedreno/ir3/ir3_nir.h index 887170d68cf..4797fc3b4c1 100644 --- a/src/freedreno/ir3/ir3_nir.h +++ b/src/freedreno/ir3/ir3_nir.h @@ -212,6 +212,10 @@ unsigned ir3_nir_max_offset_shift(nir_intrinsic_instr *intr, const void *data); gl_system_value ir3_nir_intrinsic_barycentric_sysval(nir_intrinsic_instr *intr); +nir_io_offset ir3_nir_get_global_offset(nir_builder *b, + struct ir3_compiler *compiler, + nir_def *offset, unsigned offset_shift); + ENDC; #endif /* IR3_NIR_H_ */ diff --git a/src/freedreno/ir3/ir3_nir_analyze_ubo_ranges.c b/src/freedreno/ir3/ir3_nir_analyze_ubo_ranges.c index 3e83573d0eb..aa6b04693f3 100644 --- a/src/freedreno/ir3/ir3_nir_analyze_ubo_ranges.c +++ b/src/freedreno/ir3/ir3_nir_analyze_ubo_ranges.c @@ -16,18 +16,13 @@ get_ubo_load_range(nir_shader *nir, nir_intrinsic_instr *instr, uint32_t offset = nir_intrinsic_range_base(instr); uint32_t size = nir_intrinsic_range(instr); - if (instr->intrinsic == nir_intrinsic_load_global_ir3) { - offset *= 4; - size *= 4; - } - /* If the offset is constant, the range is trivial (and NIR may not have * figured it out). */ if (nir_src_is_const(instr->src[1])) { offset = nir_src_as_uint(instr->src[1]); if (instr->intrinsic == nir_intrinsic_load_global_ir3) - offset *= 4; + offset <<= nir_intrinsic_offset_shift(instr); size = nir_intrinsic_dest_components(instr) * 4; } @@ -297,25 +292,30 @@ lower_ubo_load_to_uniform(nir_intrinsic_instr *instr, nir_builder *b, nir_def *uniform_offset = ubo_offset; - if (instr->intrinsic == nir_intrinsic_load_ubo) { - /* UBO offset is in bytes, but uniform offset is in units of - * dwords, so we need to divide by 4 (right-shift by 2). For ldc the - * offset is in units of 16 bytes, so we need to multiply by 4. And - * also the same for the constant part of the offset: - */ - const int shift = -2; - nir_def *new_offset = ir3_nir_try_propagate_bit_shift(b, ubo_offset, -2); - if (new_offset) { - uniform_offset = new_offset; - } else { - uniform_offset = shift > 0 - ? nir_ishl_imm(b, ubo_offset, shift) - : nir_ushr_imm(b, ubo_offset, -shift); - } + /* UBO/global offset is in bytes, but uniform offset is in units of + * dwords, so we need to divide by 4 (right-shift by 2). For ldc the + * offset is in units of 16 bytes, so we need to multiply by 4. And + * also the same for the constant part of the offset: + */ + int shift = -2; + + if (instr->intrinsic == nir_intrinsic_load_global_ir3) { + unsigned offset_shift = nir_intrinsic_offset_shift(instr); + assert(offset_shift <= 2); + + shift = -(2 - offset_shift); + } + + nir_def *new_offset = ir3_nir_try_propagate_bit_shift(b, ubo_offset, shift); + if (new_offset) { + uniform_offset = new_offset; + } else { + uniform_offset = shift > 0 ? nir_ishl_imm(b, ubo_offset, shift) + : nir_ushr_imm(b, ubo_offset, -shift); } assert(!(const_offset & 0x3)); - const_offset >>= 2; + const_offset >>= -shift; const int range_offset = ((int)range->offset - (int)range->start) / 4; const_offset += range_offset; diff --git a/src/freedreno/ir3/ir3_nir_lower_io_offsets.c b/src/freedreno/ir3/ir3_nir_lower_io_offsets.c index dd89a6e7efe..e01eb2fd336 100644 --- a/src/freedreno/ir3/ir3_nir_lower_io_offsets.c +++ b/src/freedreno/ir3/ir3_nir_lower_io_offsets.c @@ -258,6 +258,52 @@ lower_offset_for_ssbo(nir_intrinsic_instr *intrinsic, nir_builder *b, return true; } +/* On a6xx, global memory is accessed in units of the type size. Legalize + * offset_shift to correspond to this. + */ +static bool +lower_offset_for_global(nir_builder *b, nir_intrinsic_instr *intr, + struct ir3_compiler *compiler) +{ + if (compiler->gen >= 7) { + assert(nir_intrinsic_offset_shift(intr) == 0); + return false; + } + + unsigned bit_size = intr->intrinsic == nir_intrinsic_load_global_ir3 + ? intr->def.bit_size + : intr->src[0].ssa->bit_size; + + assert(bit_size < 64); + + int shift = ffs(bit_size / 8) - 1; + int cur_shift = nir_intrinsic_offset_shift(intr); + int extra_shift = shift - cur_shift; + + if (extra_shift == 0) { + return false; + } + + b->cursor = nir_before_instr(&intr->instr); + + nir_src *offset_src = nir_get_io_offset_src(intr); + nir_io_offset new_offset = { + .def = ir3_nir_try_propagate_bit_shift(b, offset_src->ssa, -extra_shift), + .shift = shift, + }; + + if (!new_offset.def) { + if (extra_shift > 0) { + new_offset.def = nir_ushr_imm(b, offset_src->ssa, extra_shift); + } else { + new_offset.def = nir_ishl_imm(b, offset_src->ssa, -extra_shift); + } + } + + nir_set_io_offset(intr, new_offset); + return true; +} + static bool lower_io_offsets_block(nir_block *block, nir_builder *b, void *mem_ctx, struct ir3_compiler *c) @@ -288,6 +334,11 @@ lower_io_offsets_block(nir_block *block, nir_builder *b, void *mem_ctx, scalarize_load(intr, b); progress = true; } + + if (intr->intrinsic == nir_intrinsic_load_global_ir3 || + intr->intrinsic == nir_intrinsic_store_global_ir3) { + progress |= lower_offset_for_global(b, intr, c); + } } return progress; diff --git a/src/freedreno/ir3/ir3_nir_lower_tess.c b/src/freedreno/ir3/ir3_nir_lower_tess.c index 6b5d66a77b7..b961b92b6e5 100644 --- a/src/freedreno/ir3/ir3_nir_lower_tess.c +++ b/src/freedreno/ir3/ir3_nir_lower_tess.c @@ -8,6 +8,7 @@ #include "ir3_nir.h" struct state { + struct ir3_compiler *compiler; uint32_t topology; struct primitive_map { @@ -190,6 +191,33 @@ replace_intrinsic(nir_builder *b, nir_intrinsic_instr *intr, return new_intr; } +static void +replace_with_load_global(nir_builder *b, struct ir3_compiler *compiler, + nir_intrinsic_instr *intr, nir_def *addr, + nir_def *offset) +{ + /* Our offsets are in units of 4B. */ + nir_io_offset global_offset = + ir3_nir_get_global_offset(b, compiler, offset, 2); + nir_def *load = nir_load_global_ir3( + b, intr->def.num_components, intr->def.bit_size, addr, global_offset.def, + .align_mul = 4, .align_offset = 0, .offset_shift = global_offset.shift); + nir_def_replace(&intr->def, load); +} + +static void +replace_with_store_global(nir_builder *b, struct ir3_compiler *compiler, + nir_intrinsic_instr *intr, nir_def *val, + nir_def *addr, nir_def *offset) +{ + /* Our offsets are in units of 4B. */ + nir_io_offset global_offset = + ir3_nir_get_global_offset(b, compiler, offset, 2); + nir_store_global_ir3(b, val, addr, global_offset.def, .align_mul = 4, + .align_offset = 0, .offset_shift = global_offset.shift); + nir_instr_remove(&intr->instr); +} + static void build_primitive_map(nir_shader *shader, struct primitive_map *map) { @@ -577,8 +605,7 @@ lower_tess_ctrl_block(nir_block *block, nir_builder *b, struct state *state) nir_intrinsic_io_semantics(intr).location, nir_intrinsic_component(intr), intr->src[1].ssa); - replace_intrinsic(b, intr, nir_intrinsic_load_global_ir3, address, - offset, NULL); + replace_with_load_global(b, state->compiler, intr, address, offset); break; } @@ -598,8 +625,8 @@ lower_tess_ctrl_block(nir_block *block, nir_builder *b, struct state *state) nir_intrinsic_io_semantics(intr).location, nir_intrinsic_component(intr), intr->src[2].ssa); - replace_intrinsic(b, intr, nir_intrinsic_store_global_ir3, value, - address, offset); + replace_with_store_global(b, state->compiler, intr, value, address, + offset); break; } @@ -623,8 +650,7 @@ lower_tess_ctrl_block(nir_block *block, nir_builder *b, struct state *state) intr->src[0].ssa); } - replace_intrinsic(b, intr, nir_intrinsic_load_global_ir3, address, - offset, NULL); + replace_with_load_global(b, state->compiler, intr, address, offset); break; } @@ -664,10 +690,9 @@ lower_tess_ctrl_block(nir_block *block, nir_builder *b, struct state *state) nir_def *offset = build_tessfactor_base( b, location, nir_intrinsic_component(intr), state); - replace_intrinsic(b, intr, nir_intrinsic_store_global_ir3, - intr->src[0].ssa, - load_tess_factor_base(b), - nir_iadd(b, intr->src[1].ssa, offset)); + replace_with_store_global( + b, state->compiler, intr, intr->src[0].ssa, + load_tess_factor_base(b), nir_iadd(b, intr->src[1].ssa, offset)); if (location != VARYING_SLOT_PRIMITIVE_ID) { nir_pop_if(b, nif); @@ -678,8 +703,8 @@ lower_tess_ctrl_block(nir_block *block, nir_builder *b, struct state *state) b, state, location, nir_intrinsic_component(intr), intr->src[1].ssa); - replace_intrinsic(b, intr, nir_intrinsic_store_global_ir3, - intr->src[0].ssa, address, offset); + replace_with_store_global(b, state->compiler, intr, + intr->src[0].ssa, address, offset); } break; } @@ -694,7 +719,7 @@ bool ir3_nir_lower_tess_ctrl(nir_shader *shader, struct ir3_shader_variant *v, unsigned topology) { - struct state state = {.topology = topology}; + struct state state = {.topology = topology, .compiler = v->compiler}; if (shader_debug_enabled(shader->info.stage, shader->info.internal)) { mesa_logi("NIR (before tess lowering) for %s shader:", @@ -787,8 +812,7 @@ lower_tess_eval_block(nir_block *block, nir_builder *b, struct state *state) nir_intrinsic_io_semantics(intr).location, nir_intrinsic_component(intr), intr->src[1].ssa); - replace_intrinsic(b, intr, nir_intrinsic_load_global_ir3, address, - offset, NULL); + replace_with_load_global(b, state->compiler, intr, address, offset); break; } @@ -811,8 +835,7 @@ lower_tess_eval_block(nir_block *block, nir_builder *b, struct state *state) intr->src[0].ssa); } - replace_intrinsic(b, intr, nir_intrinsic_load_global_ir3, address, - offset, NULL); + replace_with_load_global(b, state->compiler, intr, address, offset); break; } @@ -826,7 +849,7 @@ bool ir3_nir_lower_tess_eval(nir_shader *shader, struct ir3_shader_variant *v, unsigned topology) { - struct state state = {.topology = topology}; + struct state state = {.topology = topology, .compiler = v->compiler}; if (shader_debug_enabled(shader->info.stage, shader->info.internal)) { mesa_logi("NIR (before tess lowering) for %s shader:", diff --git a/src/freedreno/vulkan/tu_nir_lower_ray_query.cc b/src/freedreno/vulkan/tu_nir_lower_ray_query.cc index 8c506fb8515..6dc3acf7925 100644 --- a/src/freedreno/vulkan/tu_nir_lower_ray_query.cc +++ b/src/freedreno/vulkan/tu_nir_lower_ray_query.cc @@ -262,8 +262,8 @@ load_tlas(nir_builder *b, nir_def *tlas, } else { return nir_load_global_ir3(b, components, 32, nir_pack_64_2x32(b, tlas), - nir_iadd_imm(b, nir_imul_imm(b, index, AS_RECORD_SIZE / 4), - offset / 4), + nir_iadd_imm(b, nir_imul_imm(b, index, AS_RECORD_SIZE), + offset), /* The required alignment of the * user-specified base from the Vulkan spec. */ diff --git a/src/freedreno/vulkan/tu_shader.cc b/src/freedreno/vulkan/tu_shader.cc index 1b604c484b8..7a37e60aab5 100644 --- a/src/freedreno/vulkan/tu_shader.cc +++ b/src/freedreno/vulkan/tu_shader.cc @@ -1024,7 +1024,7 @@ lower_inline_ubo(nir_builder *b, nir_intrinsic_instr *intrin, void *cb_data) val = nir_load_global_ir3(b, intrin->num_components, intrin->def.bit_size, nir_pack_64_2x32(b, base_addr), - nir_ishr_imm(b, offset, 2), + offset, .access = (enum gl_access_qualifier)( (enum gl_access_qualifier)(ACCESS_NON_WRITEABLE | ACCESS_CAN_REORDER) | @@ -1032,7 +1032,7 @@ lower_inline_ubo(nir_builder *b, nir_intrinsic_instr *intrin, void *cb_data) .align_mul = 16, .align_offset = 0, .range_base = 0, - .range = range); + .range = range * 4); } else { val = nir_load_const_ir3(b, intrin->num_components, intrin->def.bit_size,