zink: move all 64-32bit shader load rewriting to nir pass

this also enables natural 64bit loads on drivers that support it

Reviewed-by: Dave Airlie <airlied@redhat.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/13484>
This commit is contained in:
Mike Blumenkrantz 2021-10-20 10:02:08 -04:00 committed by Marge Bot
parent 3a1ecd1e8c
commit 150d6ee97e
2 changed files with 57 additions and 58 deletions

View file

@ -49,10 +49,10 @@ struct ntv_context {
gl_shader_stage stage;
const struct zink_so_info *so_info;
SpvId ubos[PIPE_MAX_CONSTANT_BUFFERS][3]; //8, 16, 32
SpvId ubos[PIPE_MAX_CONSTANT_BUFFERS][5]; //8, 16, 32, unused, 64
nir_variable *ubo_vars[PIPE_MAX_CONSTANT_BUFFERS];
SpvId ssbos[PIPE_MAX_SHADER_BUFFERS][3]; //8, 16, 32
SpvId ssbos[PIPE_MAX_SHADER_BUFFERS][5]; //8, 16, 32, unused, 64
nir_variable *ssbo_vars[PIPE_MAX_SHADER_BUFFERS];
SpvId image_types[PIPE_MAX_SAMPLERS];
SpvId images[PIPE_MAX_SAMPLERS];
@ -1915,9 +1915,9 @@ emit_load_bo(struct ntv_context *ctx, nir_intrinsic_instr *intr)
bool ssbo = intr->intrinsic == nir_intrinsic_load_ssbo;
assert(const_block_index); // no dynamic indexing for now
unsigned idx = 0;
unsigned bit_size = nir_dest_bit_size(intr->dest);
idx = MIN2(bit_size, 32) >> 4;
assert(bit_size <= 64);
unsigned idx = bit_size >> 4;
if (ssbo) {
assert(idx < ARRAY_SIZE(ctx->ssbos[0]));
if (!ctx->ssbos[const_block_index->u32][idx])
@ -1928,15 +1928,12 @@ emit_load_bo(struct ntv_context *ctx, nir_intrinsic_instr *intr)
emit_bo(ctx, ctx->ubo_vars[const_block_index->u32], nir_dest_bit_size(intr->dest));
}
SpvId bo = ssbo ? ctx->ssbos[const_block_index->u32][idx] : ctx->ubos[const_block_index->u32][idx];
SpvId uint_type = get_uvec_type(ctx, MIN2(bit_size, 32), 1);
SpvId uint_type = get_uvec_type(ctx, bit_size, 1);
SpvId one = emit_uint_const(ctx, 32, 1);
/* number of components being loaded */
unsigned num_components = nir_dest_num_components(intr->dest);
/* we need to grab 2x32 to fill the 64bit value */
if (bit_size == 64)
num_components *= 2;
SpvId constituents[NIR_MAX_VEC_COMPONENTS * 2];
SpvId constituents[NIR_MAX_VEC_COMPONENTS];
SpvId result;
/* destination type for the load */
@ -1950,7 +1947,7 @@ emit_load_bo(struct ntv_context *ctx, nir_intrinsic_instr *intr)
/* our generated uniform has a memory layout like
*
* struct {
* uint base[array_size];
* uintN base[array_size];
* };
*
* first, access 'base'
@ -1983,18 +1980,6 @@ emit_load_bo(struct ntv_context *ctx, nir_intrinsic_instr *intr)
offset = emit_binop(ctx, SpvOpIAdd, uint_type, offset, one);
}
/* if we're loading a 64bit value, we have to reassemble all the u32 values we've loaded into u64 values
* by creating uvec2 composites and bitcasting them to u64 values
*/
if (bit_size == 64) {
num_components /= 2;
type = get_uvec_type(ctx, 64, num_components);
SpvId u64_type = get_uvec_type(ctx, 64, 1);
for (unsigned i = 0; i < num_components; i++) {
constituents[i] = spirv_builder_emit_composite_construct(&ctx->builder, get_uvec_type(ctx, 32, 2), constituents + i * 2, 2);
constituents[i] = emit_bitcast(ctx, u64_type, constituents[i]);
}
}
/* if loading more than 1 value, reassemble the results into the desired type,
* otherwise just use the loaded result
*/
@ -2194,7 +2179,6 @@ emit_load_shared(struct ntv_context *ctx, nir_intrinsic_instr *intr)
SpvId dest_type = get_dest_type(ctx, &intr->dest, nir_type_uint);
unsigned num_components = nir_dest_num_components(intr->dest);
unsigned bit_size = nir_dest_bit_size(intr->dest);
bool qword = bit_size == 64;
SpvId uint_type = get_uvec_type(ctx, 32, 1);
SpvId ptr_type = spirv_builder_type_pointer(&ctx->builder,
SpvStorageClassWorkgroup,
@ -2203,17 +2187,10 @@ emit_load_shared(struct ntv_context *ctx, nir_intrinsic_instr *intr)
SpvId constituents[NIR_MAX_VEC_COMPONENTS];
/* need to convert array -> vec */
for (unsigned i = 0; i < num_components; i++) {
SpvId parts[2];
for (unsigned j = 0; j < 1 + !!qword; j++) {
SpvId member = spirv_builder_emit_access_chain(&ctx->builder, ptr_type,
ctx->shared_block_var, &offset, 1);
parts[j] = spirv_builder_emit_load(&ctx->builder, uint_type, member);
offset = emit_binop(ctx, SpvOpIAdd, uint_type, offset, emit_uint_const(ctx, 32, 1));
}
if (qword)
constituents[i] = spirv_builder_emit_composite_construct(&ctx->builder, get_uvec_type(ctx, 64, 1), parts, 2);
else
constituents[i] = parts[0];
SpvId member = spirv_builder_emit_access_chain(&ctx->builder, ptr_type,
ctx->shared_block_var, &offset, 1);
constituents[i] = spirv_builder_emit_load(&ctx->builder, uint_type, member);
offset = emit_binop(ctx, SpvOpIAdd, uint_type, offset, emit_uint_const(ctx, 32, 1));
}
SpvId result;
if (num_components > 1)
@ -2258,15 +2235,11 @@ emit_store_shared(struct ntv_context *ctx, nir_intrinsic_instr *intr)
static void
emit_load_push_const(struct ntv_context *ctx, nir_intrinsic_instr *intr)
{
unsigned bit_size = nir_dest_bit_size(intr->dest);
SpvId uint_type = get_uvec_type(ctx, 32, 1);
SpvId load_type = get_uvec_type(ctx, 32, 1);
/* number of components being loaded */
unsigned num_components = nir_dest_num_components(intr->dest);
/* we need to grab 2x32 to fill the 64bit value */
if (bit_size == 64)
num_components *= 2;
SpvId constituents[NIR_MAX_VEC_COMPONENTS * 2];
SpvId result;
@ -2298,18 +2271,6 @@ emit_load_push_const(struct ntv_context *ctx, nir_intrinsic_instr *intr)
offset = emit_binop(ctx, SpvOpIAdd, uint_type, offset, one);
}
/* if we're loading a 64bit value, we have to reassemble all the u32 values we've loaded into u64 values
* by creating uvec2 composites and bitcasting them to u64 values
*/
if (bit_size == 64) {
num_components /= 2;
type = get_uvec_type(ctx, 64, num_components);
SpvId u64_type = get_uvec_type(ctx, 64, 1);
for (unsigned i = 0; i < num_components; i++) {
constituents[i] = spirv_builder_emit_composite_construct(&ctx->builder, get_uvec_type(ctx, 32, 2), constituents + i * 2, 2);
constituents[i] = emit_bitcast(ctx, u64_type, constituents[i]);
}
}
/* if loading more than 1 value, reassemble the results into the desired type,
* otherwise just use the loaded result
*/

View file

@ -640,16 +640,54 @@ decompose_attribs(nir_shader *nir, uint32_t decomposed_attrs, uint32_t decompose
static bool
rewrite_bo_access_instr(nir_builder *b, nir_instr *instr, void *data)
{
struct zink_screen *screen = data;
const bool has_int64 = screen->info.feats.features.shaderInt64;
if (instr->type != nir_instr_type_intrinsic)
return false;
nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
b->cursor = nir_before_instr(instr);
switch (intr->intrinsic) {
case nir_intrinsic_load_ssbo:
case nir_intrinsic_load_ubo:
case nir_intrinsic_load_ubo_vec4:
b->cursor = nir_before_instr(instr);
nir_instr_rewrite_src_ssa(instr, &intr->src[1], nir_udiv_imm(b, intr->src[1].ssa, MIN2(nir_dest_bit_size(intr->dest), 32) / 8));
case nir_intrinsic_load_ubo: {
/* ubo0 can have unaligned 64bit loads, particularly for bindless texture ids */
bool force_2x32 = intr->intrinsic == nir_intrinsic_load_ubo &&
nir_src_as_uint(intr->src[0]) == 0 &&
nir_dest_bit_size(intr->dest) == 64 &&
nir_intrinsic_align_offset(intr) % 8 != 0;
nir_instr_rewrite_src_ssa(instr, &intr->src[1], nir_udiv_imm(b, intr->src[1].ssa,
(force_2x32 ? 32 : nir_dest_bit_size(intr->dest)) / 8));
/* if 64bit isn't supported, 64bit loads definitely aren't supported, so rewrite as 2x32 with cast and pray */
if (force_2x32 || (nir_dest_bit_size(intr->dest) == 64 && !has_int64)) {
/* this is always scalarized */
assert(intr->dest.ssa.num_components == 1);
/* rewrite as 2x32 */
nir_ssa_def *load;
if (intr->intrinsic == nir_intrinsic_load_ssbo)
load = nir_load_ssbo(b, 2, 32, intr->src[0].ssa, intr->src[1].ssa, .align_mul = 4, .align_offset = 0);
else
load = nir_load_ubo(b, 2, 32, intr->src[0].ssa, intr->src[1].ssa, .align_mul = 4, .align_offset = 0, .range = 4);
nir_intrinsic_set_access(nir_instr_as_intrinsic(load->parent_instr), nir_intrinsic_access(intr));
/* cast back to 64bit */
nir_ssa_def *casted = nir_pack_64_2x32(b, load);
nir_ssa_def_rewrite_uses(&intr->dest.ssa, casted);
nir_instr_remove(instr);
}
return true;
}
case nir_intrinsic_load_shared:
/* if 64bit isn't supported, 64bit loads definitely aren't supported, so rewrite as 2x32 with cast and pray */
if (nir_dest_bit_size(intr->dest) == 64 && !has_int64) {
/* this is always scalarized */
assert(intr->dest.ssa.num_components == 1);
/* rewrite as 2x32 */
nir_ssa_def *load = nir_load_shared(b, 2, 32, intr->src[0].ssa, .align_mul = 4, .align_offset = 0);
/* cast back to 64bit */
nir_ssa_def *casted = nir_pack_64_2x32(b, load);
nir_ssa_def_rewrite_uses(&intr->dest.ssa, casted);
nir_instr_remove(instr);
return true;
}
break;
case nir_intrinsic_store_ssbo:
default:
break;
@ -658,9 +696,9 @@ rewrite_bo_access_instr(nir_builder *b, nir_instr *instr, void *data)
}
static bool
rewrite_bo_access(nir_shader *shader)
rewrite_bo_access(nir_shader *shader, struct zink_screen *screen)
{
return nir_shader_instructions_pass(shader, rewrite_bo_access_instr, nir_metadata_dominance, NULL);
return nir_shader_instructions_pass(shader, rewrite_bo_access_instr, nir_metadata_dominance, screen);
}
static void
@ -900,7 +938,7 @@ zink_shader_compile(struct zink_screen *screen, struct zink_shader *zs, nir_shad
}
if (screen->driconf.inline_uniforms) {
NIR_PASS_V(nir, nir_lower_io_to_scalar, nir_var_mem_ubo | nir_var_mem_ssbo | nir_var_mem_shared);
NIR_PASS_V(nir, rewrite_bo_access);
NIR_PASS_V(nir, rewrite_bo_access, screen);
}
if (inlined_uniforms) {
optimize_nir(nir);
@ -1417,7 +1455,7 @@ zink_shader_create(struct zink_screen *screen, struct nir_shader *nir,
/* run in compile if there could be inlined uniforms */
if (!screen->driconf.inline_uniforms) {
NIR_PASS_V(nir, nir_lower_io_to_scalar, nir_var_mem_ubo | nir_var_mem_ssbo | nir_var_mem_shared);
NIR_PASS_V(nir, rewrite_bo_access);
NIR_PASS_V(nir, rewrite_bo_access, screen);
}
if (zink_debug & ZINK_DEBUG_NIR) {