diff --git a/src/gallium/drivers/zink/nir_to_spirv/nir_to_spirv.c b/src/gallium/drivers/zink/nir_to_spirv/nir_to_spirv.c index e3b8a27a42b..30f4b6bd267 100644 --- a/src/gallium/drivers/zink/nir_to_spirv/nir_to_spirv.c +++ b/src/gallium/drivers/zink/nir_to_spirv/nir_to_spirv.c @@ -49,10 +49,10 @@ struct ntv_context { gl_shader_stage stage; const struct zink_so_info *so_info; - SpvId ubos[PIPE_MAX_CONSTANT_BUFFERS][3]; //8, 16, 32 + SpvId ubos[PIPE_MAX_CONSTANT_BUFFERS][5]; //8, 16, 32, unused, 64 nir_variable *ubo_vars[PIPE_MAX_CONSTANT_BUFFERS]; - SpvId ssbos[PIPE_MAX_SHADER_BUFFERS][3]; //8, 16, 32 + SpvId ssbos[PIPE_MAX_SHADER_BUFFERS][5]; //8, 16, 32, unused, 64 nir_variable *ssbo_vars[PIPE_MAX_SHADER_BUFFERS]; SpvId image_types[PIPE_MAX_SAMPLERS]; SpvId images[PIPE_MAX_SAMPLERS]; @@ -1915,9 +1915,9 @@ emit_load_bo(struct ntv_context *ctx, nir_intrinsic_instr *intr) bool ssbo = intr->intrinsic == nir_intrinsic_load_ssbo; assert(const_block_index); // no dynamic indexing for now - unsigned idx = 0; unsigned bit_size = nir_dest_bit_size(intr->dest); - idx = MIN2(bit_size, 32) >> 4; + assert(bit_size <= 64); + unsigned idx = bit_size >> 4; if (ssbo) { assert(idx < ARRAY_SIZE(ctx->ssbos[0])); if (!ctx->ssbos[const_block_index->u32][idx]) @@ -1928,15 +1928,12 @@ emit_load_bo(struct ntv_context *ctx, nir_intrinsic_instr *intr) emit_bo(ctx, ctx->ubo_vars[const_block_index->u32], nir_dest_bit_size(intr->dest)); } SpvId bo = ssbo ? ctx->ssbos[const_block_index->u32][idx] : ctx->ubos[const_block_index->u32][idx]; - SpvId uint_type = get_uvec_type(ctx, MIN2(bit_size, 32), 1); + SpvId uint_type = get_uvec_type(ctx, bit_size, 1); SpvId one = emit_uint_const(ctx, 32, 1); /* number of components being loaded */ unsigned num_components = nir_dest_num_components(intr->dest); - /* we need to grab 2x32 to fill the 64bit value */ - if (bit_size == 64) - num_components *= 2; - SpvId constituents[NIR_MAX_VEC_COMPONENTS * 2]; + SpvId constituents[NIR_MAX_VEC_COMPONENTS]; SpvId result; /* destination type for the load */ @@ -1950,7 +1947,7 @@ emit_load_bo(struct ntv_context *ctx, nir_intrinsic_instr *intr) /* our generated uniform has a memory layout like * * struct { - * uint base[array_size]; + * uintN base[array_size]; * }; * * first, access 'base' @@ -1983,18 +1980,6 @@ emit_load_bo(struct ntv_context *ctx, nir_intrinsic_instr *intr) offset = emit_binop(ctx, SpvOpIAdd, uint_type, offset, one); } - /* if we're loading a 64bit value, we have to reassemble all the u32 values we've loaded into u64 values - * by creating uvec2 composites and bitcasting them to u64 values - */ - if (bit_size == 64) { - num_components /= 2; - type = get_uvec_type(ctx, 64, num_components); - SpvId u64_type = get_uvec_type(ctx, 64, 1); - for (unsigned i = 0; i < num_components; i++) { - constituents[i] = spirv_builder_emit_composite_construct(&ctx->builder, get_uvec_type(ctx, 32, 2), constituents + i * 2, 2); - constituents[i] = emit_bitcast(ctx, u64_type, constituents[i]); - } - } /* if loading more than 1 value, reassemble the results into the desired type, * otherwise just use the loaded result */ @@ -2194,7 +2179,6 @@ emit_load_shared(struct ntv_context *ctx, nir_intrinsic_instr *intr) SpvId dest_type = get_dest_type(ctx, &intr->dest, nir_type_uint); unsigned num_components = nir_dest_num_components(intr->dest); unsigned bit_size = nir_dest_bit_size(intr->dest); - bool qword = bit_size == 64; SpvId uint_type = get_uvec_type(ctx, 32, 1); SpvId ptr_type = spirv_builder_type_pointer(&ctx->builder, SpvStorageClassWorkgroup, @@ -2203,17 +2187,10 @@ emit_load_shared(struct ntv_context *ctx, nir_intrinsic_instr *intr) SpvId constituents[NIR_MAX_VEC_COMPONENTS]; /* need to convert array -> vec */ for (unsigned i = 0; i < num_components; i++) { - SpvId parts[2]; - for (unsigned j = 0; j < 1 + !!qword; j++) { - SpvId member = spirv_builder_emit_access_chain(&ctx->builder, ptr_type, - ctx->shared_block_var, &offset, 1); - parts[j] = spirv_builder_emit_load(&ctx->builder, uint_type, member); - offset = emit_binop(ctx, SpvOpIAdd, uint_type, offset, emit_uint_const(ctx, 32, 1)); - } - if (qword) - constituents[i] = spirv_builder_emit_composite_construct(&ctx->builder, get_uvec_type(ctx, 64, 1), parts, 2); - else - constituents[i] = parts[0]; + SpvId member = spirv_builder_emit_access_chain(&ctx->builder, ptr_type, + ctx->shared_block_var, &offset, 1); + constituents[i] = spirv_builder_emit_load(&ctx->builder, uint_type, member); + offset = emit_binop(ctx, SpvOpIAdd, uint_type, offset, emit_uint_const(ctx, 32, 1)); } SpvId result; if (num_components > 1) @@ -2258,15 +2235,11 @@ emit_store_shared(struct ntv_context *ctx, nir_intrinsic_instr *intr) static void emit_load_push_const(struct ntv_context *ctx, nir_intrinsic_instr *intr) { - unsigned bit_size = nir_dest_bit_size(intr->dest); SpvId uint_type = get_uvec_type(ctx, 32, 1); SpvId load_type = get_uvec_type(ctx, 32, 1); /* number of components being loaded */ unsigned num_components = nir_dest_num_components(intr->dest); - /* we need to grab 2x32 to fill the 64bit value */ - if (bit_size == 64) - num_components *= 2; SpvId constituents[NIR_MAX_VEC_COMPONENTS * 2]; SpvId result; @@ -2298,18 +2271,6 @@ emit_load_push_const(struct ntv_context *ctx, nir_intrinsic_instr *intr) offset = emit_binop(ctx, SpvOpIAdd, uint_type, offset, one); } - /* if we're loading a 64bit value, we have to reassemble all the u32 values we've loaded into u64 values - * by creating uvec2 composites and bitcasting them to u64 values - */ - if (bit_size == 64) { - num_components /= 2; - type = get_uvec_type(ctx, 64, num_components); - SpvId u64_type = get_uvec_type(ctx, 64, 1); - for (unsigned i = 0; i < num_components; i++) { - constituents[i] = spirv_builder_emit_composite_construct(&ctx->builder, get_uvec_type(ctx, 32, 2), constituents + i * 2, 2); - constituents[i] = emit_bitcast(ctx, u64_type, constituents[i]); - } - } /* if loading more than 1 value, reassemble the results into the desired type, * otherwise just use the loaded result */ diff --git a/src/gallium/drivers/zink/zink_compiler.c b/src/gallium/drivers/zink/zink_compiler.c index be08bf811aa..73443e27933 100644 --- a/src/gallium/drivers/zink/zink_compiler.c +++ b/src/gallium/drivers/zink/zink_compiler.c @@ -640,16 +640,54 @@ decompose_attribs(nir_shader *nir, uint32_t decomposed_attrs, uint32_t decompose static bool rewrite_bo_access_instr(nir_builder *b, nir_instr *instr, void *data) { + struct zink_screen *screen = data; + const bool has_int64 = screen->info.feats.features.shaderInt64; if (instr->type != nir_instr_type_intrinsic) return false; nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); + b->cursor = nir_before_instr(instr); switch (intr->intrinsic) { case nir_intrinsic_load_ssbo: - case nir_intrinsic_load_ubo: - case nir_intrinsic_load_ubo_vec4: - b->cursor = nir_before_instr(instr); - nir_instr_rewrite_src_ssa(instr, &intr->src[1], nir_udiv_imm(b, intr->src[1].ssa, MIN2(nir_dest_bit_size(intr->dest), 32) / 8)); + case nir_intrinsic_load_ubo: { + /* ubo0 can have unaligned 64bit loads, particularly for bindless texture ids */ + bool force_2x32 = intr->intrinsic == nir_intrinsic_load_ubo && + nir_src_as_uint(intr->src[0]) == 0 && + nir_dest_bit_size(intr->dest) == 64 && + nir_intrinsic_align_offset(intr) % 8 != 0; + nir_instr_rewrite_src_ssa(instr, &intr->src[1], nir_udiv_imm(b, intr->src[1].ssa, + (force_2x32 ? 32 : nir_dest_bit_size(intr->dest)) / 8)); + /* if 64bit isn't supported, 64bit loads definitely aren't supported, so rewrite as 2x32 with cast and pray */ + if (force_2x32 || (nir_dest_bit_size(intr->dest) == 64 && !has_int64)) { + /* this is always scalarized */ + assert(intr->dest.ssa.num_components == 1); + /* rewrite as 2x32 */ + nir_ssa_def *load; + if (intr->intrinsic == nir_intrinsic_load_ssbo) + load = nir_load_ssbo(b, 2, 32, intr->src[0].ssa, intr->src[1].ssa, .align_mul = 4, .align_offset = 0); + else + load = nir_load_ubo(b, 2, 32, intr->src[0].ssa, intr->src[1].ssa, .align_mul = 4, .align_offset = 0, .range = 4); + nir_intrinsic_set_access(nir_instr_as_intrinsic(load->parent_instr), nir_intrinsic_access(intr)); + /* cast back to 64bit */ + nir_ssa_def *casted = nir_pack_64_2x32(b, load); + nir_ssa_def_rewrite_uses(&intr->dest.ssa, casted); + nir_instr_remove(instr); + } return true; + } + case nir_intrinsic_load_shared: + /* if 64bit isn't supported, 64bit loads definitely aren't supported, so rewrite as 2x32 with cast and pray */ + if (nir_dest_bit_size(intr->dest) == 64 && !has_int64) { + /* this is always scalarized */ + assert(intr->dest.ssa.num_components == 1); + /* rewrite as 2x32 */ + nir_ssa_def *load = nir_load_shared(b, 2, 32, intr->src[0].ssa, .align_mul = 4, .align_offset = 0); + /* cast back to 64bit */ + nir_ssa_def *casted = nir_pack_64_2x32(b, load); + nir_ssa_def_rewrite_uses(&intr->dest.ssa, casted); + nir_instr_remove(instr); + return true; + } + break; case nir_intrinsic_store_ssbo: default: break; @@ -658,9 +696,9 @@ rewrite_bo_access_instr(nir_builder *b, nir_instr *instr, void *data) } static bool -rewrite_bo_access(nir_shader *shader) +rewrite_bo_access(nir_shader *shader, struct zink_screen *screen) { - return nir_shader_instructions_pass(shader, rewrite_bo_access_instr, nir_metadata_dominance, NULL); + return nir_shader_instructions_pass(shader, rewrite_bo_access_instr, nir_metadata_dominance, screen); } static void @@ -900,7 +938,7 @@ zink_shader_compile(struct zink_screen *screen, struct zink_shader *zs, nir_shad } if (screen->driconf.inline_uniforms) { NIR_PASS_V(nir, nir_lower_io_to_scalar, nir_var_mem_ubo | nir_var_mem_ssbo | nir_var_mem_shared); - NIR_PASS_V(nir, rewrite_bo_access); + NIR_PASS_V(nir, rewrite_bo_access, screen); } if (inlined_uniforms) { optimize_nir(nir); @@ -1417,7 +1455,7 @@ zink_shader_create(struct zink_screen *screen, struct nir_shader *nir, /* run in compile if there could be inlined uniforms */ if (!screen->driconf.inline_uniforms) { NIR_PASS_V(nir, nir_lower_io_to_scalar, nir_var_mem_ubo | nir_var_mem_ssbo | nir_var_mem_shared); - NIR_PASS_V(nir, rewrite_bo_access); + NIR_PASS_V(nir, rewrite_bo_access, screen); } if (zink_debug & ZINK_DEBUG_NIR) {