mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-24 17:30:12 +01:00
zink: move all 64-32bit shader load rewriting to nir pass
this also enables natural 64bit loads on drivers that support it Reviewed-by: Dave Airlie <airlied@redhat.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/13484>
This commit is contained in:
parent
3a1ecd1e8c
commit
150d6ee97e
2 changed files with 57 additions and 58 deletions
|
|
@ -49,10 +49,10 @@ struct ntv_context {
|
|||
gl_shader_stage stage;
|
||||
const struct zink_so_info *so_info;
|
||||
|
||||
SpvId ubos[PIPE_MAX_CONSTANT_BUFFERS][3]; //8, 16, 32
|
||||
SpvId ubos[PIPE_MAX_CONSTANT_BUFFERS][5]; //8, 16, 32, unused, 64
|
||||
nir_variable *ubo_vars[PIPE_MAX_CONSTANT_BUFFERS];
|
||||
|
||||
SpvId ssbos[PIPE_MAX_SHADER_BUFFERS][3]; //8, 16, 32
|
||||
SpvId ssbos[PIPE_MAX_SHADER_BUFFERS][5]; //8, 16, 32, unused, 64
|
||||
nir_variable *ssbo_vars[PIPE_MAX_SHADER_BUFFERS];
|
||||
SpvId image_types[PIPE_MAX_SAMPLERS];
|
||||
SpvId images[PIPE_MAX_SAMPLERS];
|
||||
|
|
@ -1915,9 +1915,9 @@ emit_load_bo(struct ntv_context *ctx, nir_intrinsic_instr *intr)
|
|||
bool ssbo = intr->intrinsic == nir_intrinsic_load_ssbo;
|
||||
assert(const_block_index); // no dynamic indexing for now
|
||||
|
||||
unsigned idx = 0;
|
||||
unsigned bit_size = nir_dest_bit_size(intr->dest);
|
||||
idx = MIN2(bit_size, 32) >> 4;
|
||||
assert(bit_size <= 64);
|
||||
unsigned idx = bit_size >> 4;
|
||||
if (ssbo) {
|
||||
assert(idx < ARRAY_SIZE(ctx->ssbos[0]));
|
||||
if (!ctx->ssbos[const_block_index->u32][idx])
|
||||
|
|
@ -1928,15 +1928,12 @@ emit_load_bo(struct ntv_context *ctx, nir_intrinsic_instr *intr)
|
|||
emit_bo(ctx, ctx->ubo_vars[const_block_index->u32], nir_dest_bit_size(intr->dest));
|
||||
}
|
||||
SpvId bo = ssbo ? ctx->ssbos[const_block_index->u32][idx] : ctx->ubos[const_block_index->u32][idx];
|
||||
SpvId uint_type = get_uvec_type(ctx, MIN2(bit_size, 32), 1);
|
||||
SpvId uint_type = get_uvec_type(ctx, bit_size, 1);
|
||||
SpvId one = emit_uint_const(ctx, 32, 1);
|
||||
|
||||
/* number of components being loaded */
|
||||
unsigned num_components = nir_dest_num_components(intr->dest);
|
||||
/* we need to grab 2x32 to fill the 64bit value */
|
||||
if (bit_size == 64)
|
||||
num_components *= 2;
|
||||
SpvId constituents[NIR_MAX_VEC_COMPONENTS * 2];
|
||||
SpvId constituents[NIR_MAX_VEC_COMPONENTS];
|
||||
SpvId result;
|
||||
|
||||
/* destination type for the load */
|
||||
|
|
@ -1950,7 +1947,7 @@ emit_load_bo(struct ntv_context *ctx, nir_intrinsic_instr *intr)
|
|||
/* our generated uniform has a memory layout like
|
||||
*
|
||||
* struct {
|
||||
* uint base[array_size];
|
||||
* uintN base[array_size];
|
||||
* };
|
||||
*
|
||||
* first, access 'base'
|
||||
|
|
@ -1983,18 +1980,6 @@ emit_load_bo(struct ntv_context *ctx, nir_intrinsic_instr *intr)
|
|||
offset = emit_binop(ctx, SpvOpIAdd, uint_type, offset, one);
|
||||
}
|
||||
|
||||
/* if we're loading a 64bit value, we have to reassemble all the u32 values we've loaded into u64 values
|
||||
* by creating uvec2 composites and bitcasting them to u64 values
|
||||
*/
|
||||
if (bit_size == 64) {
|
||||
num_components /= 2;
|
||||
type = get_uvec_type(ctx, 64, num_components);
|
||||
SpvId u64_type = get_uvec_type(ctx, 64, 1);
|
||||
for (unsigned i = 0; i < num_components; i++) {
|
||||
constituents[i] = spirv_builder_emit_composite_construct(&ctx->builder, get_uvec_type(ctx, 32, 2), constituents + i * 2, 2);
|
||||
constituents[i] = emit_bitcast(ctx, u64_type, constituents[i]);
|
||||
}
|
||||
}
|
||||
/* if loading more than 1 value, reassemble the results into the desired type,
|
||||
* otherwise just use the loaded result
|
||||
*/
|
||||
|
|
@ -2194,7 +2179,6 @@ emit_load_shared(struct ntv_context *ctx, nir_intrinsic_instr *intr)
|
|||
SpvId dest_type = get_dest_type(ctx, &intr->dest, nir_type_uint);
|
||||
unsigned num_components = nir_dest_num_components(intr->dest);
|
||||
unsigned bit_size = nir_dest_bit_size(intr->dest);
|
||||
bool qword = bit_size == 64;
|
||||
SpvId uint_type = get_uvec_type(ctx, 32, 1);
|
||||
SpvId ptr_type = spirv_builder_type_pointer(&ctx->builder,
|
||||
SpvStorageClassWorkgroup,
|
||||
|
|
@ -2203,17 +2187,10 @@ emit_load_shared(struct ntv_context *ctx, nir_intrinsic_instr *intr)
|
|||
SpvId constituents[NIR_MAX_VEC_COMPONENTS];
|
||||
/* need to convert array -> vec */
|
||||
for (unsigned i = 0; i < num_components; i++) {
|
||||
SpvId parts[2];
|
||||
for (unsigned j = 0; j < 1 + !!qword; j++) {
|
||||
SpvId member = spirv_builder_emit_access_chain(&ctx->builder, ptr_type,
|
||||
ctx->shared_block_var, &offset, 1);
|
||||
parts[j] = spirv_builder_emit_load(&ctx->builder, uint_type, member);
|
||||
offset = emit_binop(ctx, SpvOpIAdd, uint_type, offset, emit_uint_const(ctx, 32, 1));
|
||||
}
|
||||
if (qword)
|
||||
constituents[i] = spirv_builder_emit_composite_construct(&ctx->builder, get_uvec_type(ctx, 64, 1), parts, 2);
|
||||
else
|
||||
constituents[i] = parts[0];
|
||||
SpvId member = spirv_builder_emit_access_chain(&ctx->builder, ptr_type,
|
||||
ctx->shared_block_var, &offset, 1);
|
||||
constituents[i] = spirv_builder_emit_load(&ctx->builder, uint_type, member);
|
||||
offset = emit_binop(ctx, SpvOpIAdd, uint_type, offset, emit_uint_const(ctx, 32, 1));
|
||||
}
|
||||
SpvId result;
|
||||
if (num_components > 1)
|
||||
|
|
@ -2258,15 +2235,11 @@ emit_store_shared(struct ntv_context *ctx, nir_intrinsic_instr *intr)
|
|||
static void
|
||||
emit_load_push_const(struct ntv_context *ctx, nir_intrinsic_instr *intr)
|
||||
{
|
||||
unsigned bit_size = nir_dest_bit_size(intr->dest);
|
||||
SpvId uint_type = get_uvec_type(ctx, 32, 1);
|
||||
SpvId load_type = get_uvec_type(ctx, 32, 1);
|
||||
|
||||
/* number of components being loaded */
|
||||
unsigned num_components = nir_dest_num_components(intr->dest);
|
||||
/* we need to grab 2x32 to fill the 64bit value */
|
||||
if (bit_size == 64)
|
||||
num_components *= 2;
|
||||
SpvId constituents[NIR_MAX_VEC_COMPONENTS * 2];
|
||||
SpvId result;
|
||||
|
||||
|
|
@ -2298,18 +2271,6 @@ emit_load_push_const(struct ntv_context *ctx, nir_intrinsic_instr *intr)
|
|||
offset = emit_binop(ctx, SpvOpIAdd, uint_type, offset, one);
|
||||
}
|
||||
|
||||
/* if we're loading a 64bit value, we have to reassemble all the u32 values we've loaded into u64 values
|
||||
* by creating uvec2 composites and bitcasting them to u64 values
|
||||
*/
|
||||
if (bit_size == 64) {
|
||||
num_components /= 2;
|
||||
type = get_uvec_type(ctx, 64, num_components);
|
||||
SpvId u64_type = get_uvec_type(ctx, 64, 1);
|
||||
for (unsigned i = 0; i < num_components; i++) {
|
||||
constituents[i] = spirv_builder_emit_composite_construct(&ctx->builder, get_uvec_type(ctx, 32, 2), constituents + i * 2, 2);
|
||||
constituents[i] = emit_bitcast(ctx, u64_type, constituents[i]);
|
||||
}
|
||||
}
|
||||
/* if loading more than 1 value, reassemble the results into the desired type,
|
||||
* otherwise just use the loaded result
|
||||
*/
|
||||
|
|
|
|||
|
|
@ -640,16 +640,54 @@ decompose_attribs(nir_shader *nir, uint32_t decomposed_attrs, uint32_t decompose
|
|||
static bool
|
||||
rewrite_bo_access_instr(nir_builder *b, nir_instr *instr, void *data)
|
||||
{
|
||||
struct zink_screen *screen = data;
|
||||
const bool has_int64 = screen->info.feats.features.shaderInt64;
|
||||
if (instr->type != nir_instr_type_intrinsic)
|
||||
return false;
|
||||
nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
|
||||
b->cursor = nir_before_instr(instr);
|
||||
switch (intr->intrinsic) {
|
||||
case nir_intrinsic_load_ssbo:
|
||||
case nir_intrinsic_load_ubo:
|
||||
case nir_intrinsic_load_ubo_vec4:
|
||||
b->cursor = nir_before_instr(instr);
|
||||
nir_instr_rewrite_src_ssa(instr, &intr->src[1], nir_udiv_imm(b, intr->src[1].ssa, MIN2(nir_dest_bit_size(intr->dest), 32) / 8));
|
||||
case nir_intrinsic_load_ubo: {
|
||||
/* ubo0 can have unaligned 64bit loads, particularly for bindless texture ids */
|
||||
bool force_2x32 = intr->intrinsic == nir_intrinsic_load_ubo &&
|
||||
nir_src_as_uint(intr->src[0]) == 0 &&
|
||||
nir_dest_bit_size(intr->dest) == 64 &&
|
||||
nir_intrinsic_align_offset(intr) % 8 != 0;
|
||||
nir_instr_rewrite_src_ssa(instr, &intr->src[1], nir_udiv_imm(b, intr->src[1].ssa,
|
||||
(force_2x32 ? 32 : nir_dest_bit_size(intr->dest)) / 8));
|
||||
/* if 64bit isn't supported, 64bit loads definitely aren't supported, so rewrite as 2x32 with cast and pray */
|
||||
if (force_2x32 || (nir_dest_bit_size(intr->dest) == 64 && !has_int64)) {
|
||||
/* this is always scalarized */
|
||||
assert(intr->dest.ssa.num_components == 1);
|
||||
/* rewrite as 2x32 */
|
||||
nir_ssa_def *load;
|
||||
if (intr->intrinsic == nir_intrinsic_load_ssbo)
|
||||
load = nir_load_ssbo(b, 2, 32, intr->src[0].ssa, intr->src[1].ssa, .align_mul = 4, .align_offset = 0);
|
||||
else
|
||||
load = nir_load_ubo(b, 2, 32, intr->src[0].ssa, intr->src[1].ssa, .align_mul = 4, .align_offset = 0, .range = 4);
|
||||
nir_intrinsic_set_access(nir_instr_as_intrinsic(load->parent_instr), nir_intrinsic_access(intr));
|
||||
/* cast back to 64bit */
|
||||
nir_ssa_def *casted = nir_pack_64_2x32(b, load);
|
||||
nir_ssa_def_rewrite_uses(&intr->dest.ssa, casted);
|
||||
nir_instr_remove(instr);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
case nir_intrinsic_load_shared:
|
||||
/* if 64bit isn't supported, 64bit loads definitely aren't supported, so rewrite as 2x32 with cast and pray */
|
||||
if (nir_dest_bit_size(intr->dest) == 64 && !has_int64) {
|
||||
/* this is always scalarized */
|
||||
assert(intr->dest.ssa.num_components == 1);
|
||||
/* rewrite as 2x32 */
|
||||
nir_ssa_def *load = nir_load_shared(b, 2, 32, intr->src[0].ssa, .align_mul = 4, .align_offset = 0);
|
||||
/* cast back to 64bit */
|
||||
nir_ssa_def *casted = nir_pack_64_2x32(b, load);
|
||||
nir_ssa_def_rewrite_uses(&intr->dest.ssa, casted);
|
||||
nir_instr_remove(instr);
|
||||
return true;
|
||||
}
|
||||
break;
|
||||
case nir_intrinsic_store_ssbo:
|
||||
default:
|
||||
break;
|
||||
|
|
@ -658,9 +696,9 @@ rewrite_bo_access_instr(nir_builder *b, nir_instr *instr, void *data)
|
|||
}
|
||||
|
||||
static bool
|
||||
rewrite_bo_access(nir_shader *shader)
|
||||
rewrite_bo_access(nir_shader *shader, struct zink_screen *screen)
|
||||
{
|
||||
return nir_shader_instructions_pass(shader, rewrite_bo_access_instr, nir_metadata_dominance, NULL);
|
||||
return nir_shader_instructions_pass(shader, rewrite_bo_access_instr, nir_metadata_dominance, screen);
|
||||
}
|
||||
|
||||
static void
|
||||
|
|
@ -900,7 +938,7 @@ zink_shader_compile(struct zink_screen *screen, struct zink_shader *zs, nir_shad
|
|||
}
|
||||
if (screen->driconf.inline_uniforms) {
|
||||
NIR_PASS_V(nir, nir_lower_io_to_scalar, nir_var_mem_ubo | nir_var_mem_ssbo | nir_var_mem_shared);
|
||||
NIR_PASS_V(nir, rewrite_bo_access);
|
||||
NIR_PASS_V(nir, rewrite_bo_access, screen);
|
||||
}
|
||||
if (inlined_uniforms) {
|
||||
optimize_nir(nir);
|
||||
|
|
@ -1417,7 +1455,7 @@ zink_shader_create(struct zink_screen *screen, struct nir_shader *nir,
|
|||
/* run in compile if there could be inlined uniforms */
|
||||
if (!screen->driconf.inline_uniforms) {
|
||||
NIR_PASS_V(nir, nir_lower_io_to_scalar, nir_var_mem_ubo | nir_var_mem_ssbo | nir_var_mem_shared);
|
||||
NIR_PASS_V(nir, rewrite_bo_access);
|
||||
NIR_PASS_V(nir, rewrite_bo_access, screen);
|
||||
}
|
||||
|
||||
if (zink_debug & ZINK_DEBUG_NIR) {
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue