ac/nir/ngg,ac/llvm,aco: save nogs ngg culling one lds dword

TES rel patch id is <256, so we can use an existing unused LDS
byte instead of extra dword.

To ease the programing, change the index of repacked_arg_vars
for these variables.

Reviewed-by: Marek Olšák <marek.olsak@amd.com>
Signed-off-by: Qiang Yu <yuq825@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/18832>
This commit is contained in:
Qiang Yu 2022-10-11 16:29:44 +08:00 committed by Marge Bot
parent 66d1fa9666
commit 13fb7f8f2c
3 changed files with 28 additions and 11 deletions

View file

@ -212,6 +212,8 @@ enum {
lds_es_exporter_tid = 17,
/* bit i is set when the i'th clip distance of a vertex is negative */
lds_es_clipdist_neg_mask = 18,
/* TES only, relative patch ID, less than max workgroup size */
lds_es_tes_rel_patch_id = 19,
/* Repacked arguments - also listed separately for VS and TES */
lds_es_arg_0 = 20,
@ -223,8 +225,7 @@ enum {
/* TES arguments which need to be repacked */
lds_es_tes_u = 20,
lds_es_tes_v = 24,
lds_es_tes_rel_patch_id = 28,
lds_es_tes_patch_id = 32,
lds_es_tes_patch_id = 28,
};
typedef struct {
@ -872,9 +873,9 @@ cleanup_culling_shader_after_dce(nir_shader *shader,
if (!uses_tes_v)
progress |= remove_compacted_arg(state, &b, 1);
if (!uses_tes_rel_patch_id)
progress |= remove_compacted_arg(state, &b, 2);
if (!uses_tes_patch_id)
progress |= remove_compacted_arg(state, &b, 3);
if (!uses_tes_patch_id)
progress |= remove_compacted_arg(state, &b, 2);
}
return progress;
@ -925,6 +926,16 @@ compact_vertices_after_culling(nir_builder *b,
nogs_state->compact_arg_stores[i] = &store->instr;
}
/* TES rel patch id does not cost extra dword */
if (b->shader->info.stage == MESA_SHADER_TESS_EVAL) {
nir_ssa_def *arg_val = nir_load_var(b, repacked_arg_vars[3]);
nir_intrinsic_instr *store =
nir_store_shared(b, nir_u2u8(b, arg_val), exporter_addr,
.base = lds_es_tes_rel_patch_id);
nogs_state->compact_arg_stores[3] = &store->instr;
}
}
nir_pop_if(b, if_es_accepted);
@ -947,6 +958,12 @@ compact_vertices_after_culling(nir_builder *b,
nir_ssa_def *arg_val = nir_load_shared(b, 1, 32, es_vertex_lds_addr, .base = lds_es_arg_0 + 4u * i);
nir_store_var(b, repacked_arg_vars[i], arg_val, 0x1u);
}
if (b->shader->info.stage == MESA_SHADER_TESS_EVAL) {
nir_ssa_def *arg_val = nir_load_shared(b, 1, 8, es_vertex_lds_addr,
.base = lds_es_tes_rel_patch_id);
nir_store_var(b, repacked_arg_vars[3], nir_u2u32(b, arg_val), 0x1u);
}
}
nir_push_else(b, if_packed_es_thread);
{
@ -1289,7 +1306,7 @@ ngg_nogs_get_culling_pervertex_lds_size(gl_shader_stage stage,
max_args = uses_instance_id ? 2 : 1;
} else {
assert(stage == MESA_SHADER_TESS_EVAL);
max_args = uses_primitive_id ? 4 : 3;
max_args = uses_primitive_id ? 3 : 2;
}
if (max_exported_args)
@ -1369,9 +1386,9 @@ add_deferred_attribute_culling(nir_builder *b, nir_cf_list *original_extracted_c
nir_ssa_def *tess_coord = nir_load_tess_coord(b);
nir_store_var(b, repacked_arg_vars[0], nir_channel(b, tess_coord, 0), 0x1u);
nir_store_var(b, repacked_arg_vars[1], nir_channel(b, tess_coord, 1), 0x1u);
nir_store_var(b, repacked_arg_vars[2], nir_load_tess_rel_patch_id_amd(b), 0x1u);
nir_store_var(b, repacked_arg_vars[3], nir_load_tess_rel_patch_id_amd(b), 0x1u);
if (uses_tess_primitive_id)
nir_store_var(b, repacked_arg_vars[3], nir_load_primitive_id(b), 0x1u);
nir_store_var(b, repacked_arg_vars[2], nir_load_primitive_id(b), 0x1u);
} else {
unreachable("Should be VS or TES.");
}

View file

@ -9176,8 +9176,8 @@ visit_intrinsic(isel_context* ctx, nir_intrinsic_instr* instr)
ctx->arg_temps[ctx->args->ac.tes_u.arg_index] = get_ssa_temp(ctx, instr->src[0].ssa);
ctx->arg_temps[ctx->args->ac.tes_v.arg_index] = get_ssa_temp(ctx, instr->src[1].ssa);
ctx->arg_temps[ctx->args->ac.tes_rel_patch_id.arg_index] =
get_ssa_temp(ctx, instr->src[2].ssa);
ctx->arg_temps[ctx->args->ac.tes_patch_id.arg_index] = get_ssa_temp(ctx, instr->src[3].ssa);
get_ssa_temp(ctx, instr->src[3].ssa);
ctx->arg_temps[ctx->args->ac.tes_patch_id.arg_index] = get_ssa_temp(ctx, instr->src[2].ssa);
break;
}
case nir_intrinsic_load_force_vrs_rates_amd: {

View file

@ -4312,8 +4312,8 @@ static bool visit_intrinsic(struct ac_nir_context *ctx, nir_intrinsic_instr *ins
case nir_intrinsic_overwrite_tes_arguments_amd:
ctx->abi->tes_u_replaced = ac_to_float(&ctx->ac, get_src(ctx, instr->src[0]));
ctx->abi->tes_v_replaced = ac_to_float(&ctx->ac, get_src(ctx, instr->src[1]));
ctx->abi->tes_rel_patch_id_replaced = get_src(ctx, instr->src[2]);
ctx->abi->tes_patch_id_replaced = get_src(ctx, instr->src[3]);
ctx->abi->tes_rel_patch_id_replaced = get_src(ctx, instr->src[3]);
ctx->abi->tes_patch_id_replaced = get_src(ctx, instr->src[2]);
break;
case nir_intrinsic_export_primitive_amd: {
struct ac_ngg_prim prim = {0};