diff --git a/src/nouveau/codegen/nv50_ir_from_nir.cpp b/src/nouveau/codegen/nv50_ir_from_nir.cpp index 1f217e366d5..0e5ebc64a11 100644 --- a/src/nouveau/codegen/nv50_ir_from_nir.cpp +++ b/src/nouveau/codegen/nv50_ir_from_nir.cpp @@ -2022,7 +2022,8 @@ Converter::visit(nir_intrinsic_instr *insn) mkOp1(getOperation(op), TYPE_U32, NULL, mkImm(idx))->fixed = 1; break; } - case nir_intrinsic_load_ubo: { + case nir_intrinsic_load_ubo: + case nir_intrinsic_ldc_nv: { const DataType dType = getDType(insn); LValues &newDefs = convert(&insn->def); Value *indirectIndex; diff --git a/src/nouveau/compiler/nak/from_nir.rs b/src/nouveau/compiler/nak/from_nir.rs index 64604d88d3a..ec5d8ec39f5 100644 --- a/src/nouveau/compiler/nak/from_nir.rs +++ b/src/nouveau/compiler/nak/from_nir.rs @@ -2606,7 +2606,7 @@ impl<'a> ShaderFromNir<'a> { } self.set_dst(&intrin.def, dst); } - nir_intrinsic_load_ubo => { + nir_intrinsic_ldc_nv => { let size_B = (intrin.def.bit_size() / 8) * intrin.def.num_components(); let idx = &srcs[0]; diff --git a/src/nouveau/compiler/nak_nir.c b/src/nouveau/compiler/nak_nir.c index fbf101b8db9..d21ddf4da1d 100644 --- a/src/nouveau/compiler/nak_nir.c +++ b/src/nouveau/compiler/nak_nir.c @@ -803,7 +803,7 @@ nak_mem_vectorize_cb(unsigned align_mul, unsigned align_offset, assert(util_is_power_of_two_nonzero(align_mul)); unsigned max_bytes = 128u / 8u; - if (low->intrinsic == nir_intrinsic_load_ubo) + if (low->intrinsic == nir_intrinsic_ldc_nv) max_bytes = 64u / 8u; align_mul = MIN2(align_mul, max_bytes); @@ -830,10 +830,10 @@ nak_mem_access_size_align(nir_intrinsic_op intrin, unsigned chunk_bytes = MIN3(bytes_pow2, align, 16); assert(util_is_power_of_two_nonzero(chunk_bytes)); - if (intrin == nir_intrinsic_load_ubo) + if (intrin == nir_intrinsic_ldc_nv) chunk_bytes = MIN2(chunk_bytes, 8); - if (intrin == nir_intrinsic_load_ubo && align < 4) { + if (intrin == nir_intrinsic_ldc_nv && align < 4) { /* CBufs require 4B alignment unless we're doing a ldc.u8 or ldc.i8. * In particular, this applies to ldc.u16 which means we either have to * fall back to two ldc.u8 or use ldc.u32 and shift stuff around to get diff --git a/src/nouveau/compiler/nak_nir_lower_fs_inputs.c b/src/nouveau/compiler/nak_nir_lower_fs_inputs.c index 18c28a26690..05032fddd6a 100644 --- a/src/nouveau/compiler/nak_nir_lower_fs_inputs.c +++ b/src/nouveau/compiler/nak_nir_lower_fs_inputs.c @@ -107,12 +107,10 @@ static nir_def * load_sample_pos_at(nir_builder *b, nir_def *sample_id, const struct nak_fs_key *fs_key) { - nir_def *loc = nir_load_ubo(b, 1, 64, - nir_imm_int(b, fs_key->sample_locations_cb), - nir_imm_int(b, fs_key->sample_locations_offset), - .align_mul = 8, - .align_offset = 0, - .range = fs_key->sample_locations_offset + 8); + nir_def *loc = nir_ldc_nv(b, 1, 64, + nir_imm_int(b, fs_key->sample_locations_cb), + nir_imm_int(b, fs_key->sample_locations_offset), + .align_mul = 8, .align_offset = 0); /* Yay little endian */ loc = nir_ushr(b, loc, nir_imul_imm(b, sample_id, 8)); diff --git a/src/nouveau/vulkan/nvk_nir_lower_descriptors.c b/src/nouveau/vulkan/nvk_nir_lower_descriptors.c index a4a5d2d6862..1072ffd13f5 100644 --- a/src/nouveau/vulkan/nvk_nir_lower_descriptors.c +++ b/src/nouveau/vulkan/nvk_nir_lower_descriptors.c @@ -512,16 +512,14 @@ lower_load_constant(nir_builder *b, nir_intrinsic_instr *load, assert(cbuf_idx >= 0); uint32_t base = nir_intrinsic_base(load); - uint32_t range = nir_intrinsic_range(load); b->cursor = nir_before_instr(&load->instr); nir_def *offset = nir_iadd_imm(b, load->src[0].ssa, base); - nir_def *data = nir_load_ubo(b, load->def.num_components, load->def.bit_size, - nir_imm_int(b, cbuf_idx), offset, - .align_mul = nir_intrinsic_align_mul(load), - .align_offset = nir_intrinsic_align_offset(load), - .range_base = base, .range = range); + nir_def *data = nir_ldc_nv(b, load->def.num_components, load->def.bit_size, + nir_imm_int(b, cbuf_idx), offset, + .align_mul = nir_intrinsic_align_mul(load), + .align_offset = nir_intrinsic_align_offset(load)); nir_def_rewrite_uses(&load->def, data); @@ -535,9 +533,9 @@ load_descriptor_set_addr(nir_builder *b, uint32_t set, uint32_t set_addr_offset = nvk_root_descriptor_offset(sets) + set * sizeof(struct nvk_buffer_address); - return nir_load_ubo(b, 1, 64, nir_imm_int(b, 0), - nir_imm_int(b, set_addr_offset), - .align_mul = 8, .align_offset = 0, .range = ~0); + return nir_ldc_nv(b, 1, 64, nir_imm_int(b, 0), + nir_imm_int(b, set_addr_offset), + .align_mul = 8, .align_offset = 0); } static nir_def * @@ -560,10 +558,9 @@ load_dynamic_buffer_start(nir_builder *b, uint32_t set, uint32_t root_offset = nvk_root_descriptor_offset(set_dynamic_buffer_start) + set; - return nir_u2u32(b, nir_load_ubo(b, 1, 8, nir_imm_int(b, 0), - nir_imm_int(b, root_offset), - .align_mul = 1, .align_offset = 0, - .range = ~0)); + return nir_u2u32(b, nir_ldc_nv(b, 1, 8, nir_imm_int(b, 0), + nir_imm_int(b, root_offset), + .align_mul = 1, .align_offset = 0)); } } @@ -594,8 +591,8 @@ load_descriptor(nir_builder *b, unsigned num_components, unsigned bit_size, assert(num_components == 4 && bit_size == 32); nir_def *desc = - nir_load_ubo(b, 4, 32, nir_imm_int(b, 0), root_desc_offset, - .align_mul = 16, .align_offset = 0, .range = ~0); + nir_ldc_nv(b, 4, 32, nir_imm_int(b, 0), root_desc_offset, + .align_mul = 16, .align_offset = 0); /* We know a priori that the the .w compnent (offset) is zero */ return nir_vec4(b, nir_channel(b, desc, 0), nir_channel(b, desc, 1), @@ -641,12 +638,11 @@ load_descriptor(nir_builder *b, unsigned num_components, unsigned bit_size, nir_def *desc; if (cbuf_idx >= 0 && max_desc_ubo_offset <= NVK_MAX_CBUF_SIZE) { - desc = nir_load_ubo(b, num_components, bit_size, - nir_imm_int(b, cbuf_idx), - desc_ubo_offset, - .align_mul = desc_align_mul, - .align_offset = desc_align_offset, - .range = ~0); + desc = nir_ldc_nv(b, num_components, bit_size, + nir_imm_int(b, cbuf_idx), + desc_ubo_offset, + .align_mul = desc_align_mul, + .align_offset = desc_align_offset); } else { nir_def *set_addr = load_descriptor_set_addr(b, set, ctx); desc = nir_load_global_constant_offset(b, num_components, bit_size, @@ -727,13 +723,12 @@ _lower_sysval_to_root_table(nir_builder *b, nir_intrinsic_instr *intrin, { b->cursor = nir_instr_remove(&intrin->instr); - nir_def *val = nir_load_ubo(b, intrin->def.num_components, - intrin->def.bit_size, - nir_imm_int(b, 0), /* Root table */ - nir_imm_int(b, root_table_offset), - .align_mul = 4, - .align_offset = 0, - .range = root_table_offset + 3 * 4); + nir_def *val = nir_ldc_nv(b, intrin->def.num_components, + intrin->def.bit_size, + nir_imm_int(b, 0), /* Root table */ + nir_imm_int(b, root_table_offset), + .align_mul = 4, + .align_offset = 0); nir_def_rewrite_uses(&intrin->def, val); @@ -759,12 +754,10 @@ lower_load_push_constant(nir_builder *b, nir_intrinsic_instr *load, push_region_offset + base); nir_def *val = - nir_load_ubo(b, load->def.num_components, load->def.bit_size, - nir_imm_int(b, 0), offset, - .align_mul = load->def.bit_size / 8, - .align_offset = 0, - .range = push_region_offset + base + - nir_intrinsic_range(load)); + nir_ldc_nv(b, load->def.num_components, load->def.bit_size, + nir_imm_int(b, 0), offset, + .align_mul = load->def.bit_size / 8, + .align_offset = 0); nir_def_rewrite_uses(&load->def, val); @@ -903,12 +896,11 @@ lower_interp_at_sample(nir_builder *b, nir_intrinsic_instr *interp, b->cursor = nir_before_instr(&interp->instr); - nir_def *loc = nir_load_ubo(b, 1, 64, - nir_imm_int(b, 0), /* Root table */ - nir_imm_int(b, root_table_offset), - .align_mul = 8, - .align_offset = 0, - .range = root_table_offset + 8); + nir_def *loc = nir_ldc_nv(b, 1, 64, + nir_imm_int(b, 0), /* Root table */ + nir_imm_int(b, root_table_offset), + .align_mul = 8, + .align_offset = 0); /* Yay little endian */ loc = nir_ushr(b, loc, nir_imul_imm(b, sample, 8)); @@ -1092,9 +1084,9 @@ lower_ssbo_resource_index(nir_builder *b, nir_intrinsic_instr *intrin, nvk_root_descriptor_offset(root_desc_addr); nir_def *root_desc_addr = - nir_load_ubo(b, 1, 64, nir_imm_int(b, 0), - nir_imm_int(b, root_desc_addr_offset), - .align_mul = 8, .align_offset = 0, .range = ~0); + nir_ldc_nv(b, 1, 64, nir_imm_int(b, 0), + nir_imm_int(b, root_desc_addr_offset), + .align_mul = 8, .align_offset = 0); nir_def *dynamic_buffer_start = nir_iadd_imm(b, load_dynamic_buffer_start(b, set, ctx), diff --git a/src/nouveau/vulkan/nvk_shader.c b/src/nouveau/vulkan/nvk_shader.c index 74b64dd166c..cf60f09760e 100644 --- a/src/nouveau/vulkan/nvk_shader.c +++ b/src/nouveau/vulkan/nvk_shader.c @@ -215,54 +215,73 @@ nvk_hash_graphics_state(struct vk_physical_device *device, } static bool -lower_load_global_constant_offset_instr(nir_builder *b, - nir_intrinsic_instr *intrin, - UNUSED void *_data) +lower_load_intrinsic(nir_builder *b, nir_intrinsic_instr *load, + UNUSED void *_data) { - if (intrin->intrinsic != nir_intrinsic_load_global_constant_offset && - intrin->intrinsic != nir_intrinsic_load_global_constant_bounded) + switch (load->intrinsic) { + case nir_intrinsic_load_ubo: { + b->cursor = nir_before_instr(&load->instr); + + nir_def *index = load->src[0].ssa; + nir_def *offset = load->src[1].ssa; + const enum gl_access_qualifier access = nir_intrinsic_access(load); + const uint32_t align_mul = nir_intrinsic_align_mul(load); + const uint32_t align_offset = nir_intrinsic_align_offset(load); + + nir_def *val = nir_ldc_nv(b, load->num_components, load->def.bit_size, + index, offset, .access = access, + .align_mul = align_mul, + .align_offset = align_offset); + nir_def_rewrite_uses(&load->def, val); + return true; + } + + case nir_intrinsic_load_global_constant_offset: + case nir_intrinsic_load_global_constant_bounded: { + b->cursor = nir_before_instr(&load->instr); + + nir_def *base_addr = load->src[0].ssa; + nir_def *offset = load->src[1].ssa; + + nir_def *zero = NULL; + if (load->intrinsic == nir_intrinsic_load_global_constant_bounded) { + nir_def *bound = load->src[2].ssa; + + unsigned bit_size = load->def.bit_size; + assert(bit_size >= 8 && bit_size % 8 == 0); + unsigned byte_size = bit_size / 8; + + zero = nir_imm_zero(b, load->num_components, bit_size); + + unsigned load_size = byte_size * load->num_components; + + nir_def *sat_offset = + nir_umin(b, offset, nir_imm_int(b, UINT32_MAX - (load_size - 1))); + nir_def *in_bounds = + nir_ilt(b, nir_iadd_imm(b, sat_offset, load_size - 1), bound); + + nir_push_if(b, in_bounds); + } + + nir_def *val = + nir_build_load_global_constant(b, load->def.num_components, + load->def.bit_size, + nir_iadd(b, base_addr, nir_u2u64(b, offset)), + .align_mul = nir_intrinsic_align_mul(load), + .align_offset = nir_intrinsic_align_offset(load)); + + if (load->intrinsic == nir_intrinsic_load_global_constant_bounded) { + nir_pop_if(b, NULL); + val = nir_if_phi(b, val, zero); + } + + nir_def_rewrite_uses(&load->def, val); + return true; + } + + default: return false; - - b->cursor = nir_before_instr(&intrin->instr); - - nir_def *base_addr = intrin->src[0].ssa; - nir_def *offset = intrin->src[1].ssa; - - nir_def *zero = NULL; - if (intrin->intrinsic == nir_intrinsic_load_global_constant_bounded) { - nir_def *bound = intrin->src[2].ssa; - - unsigned bit_size = intrin->def.bit_size; - assert(bit_size >= 8 && bit_size % 8 == 0); - unsigned byte_size = bit_size / 8; - - zero = nir_imm_zero(b, intrin->num_components, bit_size); - - unsigned load_size = byte_size * intrin->num_components; - - nir_def *sat_offset = - nir_umin(b, offset, nir_imm_int(b, UINT32_MAX - (load_size - 1))); - nir_def *in_bounds = - nir_ilt(b, nir_iadd_imm(b, sat_offset, load_size - 1), bound); - - nir_push_if(b, in_bounds); } - - nir_def *val = - nir_build_load_global_constant(b, intrin->def.num_components, - intrin->def.bit_size, - nir_iadd(b, base_addr, nir_u2u64(b, offset)), - .align_mul = nir_intrinsic_align_mul(intrin), - .align_offset = nir_intrinsic_align_offset(intrin)); - - if (intrin->intrinsic == nir_intrinsic_load_global_constant_bounded) { - nir_pop_if(b, NULL); - val = nir_if_phi(b, val, zero); - } - - nir_def_rewrite_uses(&intrin->def, val); - - return true; } struct lower_ycbcr_state { @@ -402,7 +421,7 @@ nvk_lower_nir(struct nvk_device *dev, nir_shader *nir, NIR_PASS(_, nir, nir_lower_explicit_io, nir_var_mem_ubo, nvk_buffer_addr_format(rs->uniform_buffers)); NIR_PASS(_, nir, nir_shader_intrinsics_pass, - lower_load_global_constant_offset_instr, nir_metadata_none, NULL); + lower_load_intrinsic, nir_metadata_none, NULL); if (!nir->info.shared_memory_explicit_layout) { NIR_PASS(_, nir, nir_lower_vars_to_explicit_types,