ac/llvm: implement a bunch of NIR AMD intrinsics for NGG

Reviewed-by: Timur Kristóf <timur.kristof@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/12570>
This commit is contained in:
Marek Olšák 2021-08-20 22:09:15 -04:00 committed by Marge Bot
parent a33602b1f9
commit 2e95ad1433
4 changed files with 93 additions and 0 deletions

View file

@ -4597,6 +4597,22 @@ void ac_build_sendmsg_gs_alloc_req(struct ac_llvm_context *ctx, LLVMValueRef wav
ac_build_endif(ctx, 5020);
}
LLVMValueRef ac_pack_edgeflags_for_export(struct ac_llvm_context *ctx,
const struct ac_shader_args *args)
{
/* Use the following trick to extract the edge flags:
* extracted = v_and_b32 gs_invocation_id, 0x700 ; get edge flags at bits 8, 9, 10
* shifted = v_mul_u32_u24 extracted, 0x80402u ; shift the bits: 8->9, 9->19, 10->29
* result = v_and_b32 shifted, 0x20080200 ; remove garbage
*/
LLVMValueRef tmp = LLVMBuildAnd(ctx->builder,
ac_get_arg(ctx, args->gs_invocation_id),
LLVMConstInt(ctx->i32, 0x700, 0), "");
tmp = LLVMBuildMul(ctx->builder, tmp, LLVMConstInt(ctx->i32, 0x80402u, 0), "");
return LLVMBuildAnd(ctx->builder, tmp, LLVMConstInt(ctx->i32, 0x20080200, 0), "");
}
LLVMValueRef ac_pack_prim_export(struct ac_llvm_context *ctx, const struct ac_ngg_prim *prim)
{
/* The prim export format is:

View file

@ -585,6 +585,8 @@ struct ac_ngg_prim {
LLVMValueRef passthrough;
};
LLVMValueRef ac_pack_edgeflags_for_export(struct ac_llvm_context *ctx,
const struct ac_shader_args *args);
LLVMValueRef ac_pack_prim_export(struct ac_llvm_context *ctx, const struct ac_ngg_prim *prim);
void ac_build_export_prim(struct ac_llvm_context *ctx, const struct ac_ngg_prim *prim);

View file

@ -1275,6 +1275,12 @@ static void visit_alu(struct ac_nir_context *ctx, const nir_alu_instr *instr)
break;
}
case nir_op_sad_u8x4:
result = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.sad.u8", ctx->ac.i32,
(LLVMValueRef[]){src[0], src[1], src[2]}, 3,
AC_FUNC_ATTR_READNONE);
break;
default:
fprintf(stderr, "Unknown NIR alu instr: ");
nir_print_instr(&instr->instr, stderr);
@ -2948,6 +2954,8 @@ static LLVMValueRef visit_load_subgroup_id(struct ac_nir_context *ctx)
result = LLVMBuildAnd(ctx->ac.builder, ac_get_arg(&ctx->ac, ctx->args->tg_size),
LLVMConstInt(ctx->ac.i32, 0xfc0, false), "");
return LLVMBuildLShr(ctx->ac.builder, result, LLVMConstInt(ctx->ac.i32, 6, false), "");
} else if (ctx->args->merged_wave_info.used) {
return ac_unpack_param(&ctx->ac, ac_get_arg(&ctx->ac, ctx->args->merged_wave_info), 24, 4);
} else {
return LLVMConstInt(ctx->ac.i32, 0, false);
}
@ -4093,6 +4101,71 @@ static void visit_intrinsic(struct ac_nir_context *ctx, nir_intrinsic_instr *ins
cache_policy);
break;
}
case nir_intrinsic_load_packed_passthrough_primitive_amd:
result = ac_get_arg(&ctx->ac, ctx->args->gs_vtx_offset[0]);
break;
case nir_intrinsic_load_initial_edgeflags_amd:
if (ctx->stage == MESA_SHADER_VERTEX && !ctx->info->vs.blit_sgprs_amd)
result = ac_pack_edgeflags_for_export(&ctx->ac, ctx->args);
else
result = ctx->ac.i32_0;
break;
case nir_intrinsic_has_input_vertex_amd: {
LLVMValueRef num =
ac_unpack_param(&ctx->ac, ac_get_arg(&ctx->ac, ctx->args->merged_wave_info), 0, 8);
result = LLVMBuildICmp(ctx->ac.builder, LLVMIntULT, ac_get_thread_id(&ctx->ac), num, "");
break;
}
case nir_intrinsic_has_input_primitive_amd: {
LLVMValueRef num =
ac_unpack_param(&ctx->ac, ac_get_arg(&ctx->ac, ctx->args->merged_wave_info), 8, 8);
result = LLVMBuildICmp(ctx->ac.builder, LLVMIntULT, ac_get_thread_id(&ctx->ac), num, "");
break;
}
case nir_intrinsic_load_workgroup_num_input_vertices_amd:
result = ac_unpack_param(&ctx->ac, ac_get_arg(&ctx->ac, ctx->args->gs_tg_info), 12, 9);
break;
case nir_intrinsic_load_workgroup_num_input_primitives_amd:
result = ac_unpack_param(&ctx->ac, ac_get_arg(&ctx->ac, ctx->args->gs_tg_info), 22, 9);
break;
case nir_intrinsic_alloc_vertices_and_primitives_amd:
/* The caller should only call this conditionally for wave 0, so assume that the current
* wave is always wave 0.
*/
ac_build_sendmsg_gs_alloc_req(&ctx->ac, ctx->ac.i32_0,
get_src(ctx, instr->src[0]),
get_src(ctx, instr->src[1]));
break;
case nir_intrinsic_export_primitive_amd: {
struct ac_ngg_prim prim = {0};
prim.passthrough = get_src(ctx, instr->src[0]);
ac_build_export_prim(&ctx->ac, &prim);
break;
}
case nir_intrinsic_export_vertex_amd:
ctx->abi->export_vertex(ctx->abi);
break;
case nir_intrinsic_byte_permute_amd:
if (LLVM_VERSION_MAJOR < 13) {
assert("unimplemented byte_permute, LLVM 12 doesn't have amdgcn.perm");
break;
}
result = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.perm", ctx->ac.i32,
(LLVMValueRef[]){get_src(ctx, instr->src[0]),
get_src(ctx, instr->src[1]),
get_src(ctx, instr->src[2])},
3, AC_FUNC_ATTR_READNONE);
break;
case nir_intrinsic_lane_permute_16_amd:
result = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.permlane16", ctx->ac.i32,
(LLVMValueRef[]){get_src(ctx, instr->src[0]),
get_src(ctx, instr->src[0]),
get_src(ctx, instr->src[1]),
get_src(ctx, instr->src[2]),
ctx->ac.i1false,
ctx->ac.i1false},
6, AC_FUNC_ATTR_READNONE | AC_FUNC_ATTR_CONVERGENT);
break;
default:
fprintf(stderr, "Unknown intrinsic: ");
nir_print_instr(&instr->instr, stderr);

View file

@ -68,6 +68,8 @@ struct ac_shader_abi {
/* Varying -> attribute number mapping. Also NIR-only */
unsigned fs_input_attr_indices[MAX_VARYING];
void (*export_vertex)(struct ac_shader_abi *abi);
void (*emit_outputs)(struct ac_shader_abi *abi, unsigned max_outputs, LLVMValueRef *addrs);
void (*emit_vertex)(struct ac_shader_abi *abi, unsigned stream, LLVMValueRef *addrs);