mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-06 05:08:08 +02:00
ac/llvm: implement a bunch of NIR AMD intrinsics for NGG
Reviewed-by: Timur Kristóf <timur.kristof@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/12570>
This commit is contained in:
parent
a33602b1f9
commit
2e95ad1433
4 changed files with 93 additions and 0 deletions
|
|
@ -4597,6 +4597,22 @@ void ac_build_sendmsg_gs_alloc_req(struct ac_llvm_context *ctx, LLVMValueRef wav
|
|||
ac_build_endif(ctx, 5020);
|
||||
}
|
||||
|
||||
|
||||
LLVMValueRef ac_pack_edgeflags_for_export(struct ac_llvm_context *ctx,
|
||||
const struct ac_shader_args *args)
|
||||
{
|
||||
/* Use the following trick to extract the edge flags:
|
||||
* extracted = v_and_b32 gs_invocation_id, 0x700 ; get edge flags at bits 8, 9, 10
|
||||
* shifted = v_mul_u32_u24 extracted, 0x80402u ; shift the bits: 8->9, 9->19, 10->29
|
||||
* result = v_and_b32 shifted, 0x20080200 ; remove garbage
|
||||
*/
|
||||
LLVMValueRef tmp = LLVMBuildAnd(ctx->builder,
|
||||
ac_get_arg(ctx, args->gs_invocation_id),
|
||||
LLVMConstInt(ctx->i32, 0x700, 0), "");
|
||||
tmp = LLVMBuildMul(ctx->builder, tmp, LLVMConstInt(ctx->i32, 0x80402u, 0), "");
|
||||
return LLVMBuildAnd(ctx->builder, tmp, LLVMConstInt(ctx->i32, 0x20080200, 0), "");
|
||||
}
|
||||
|
||||
LLVMValueRef ac_pack_prim_export(struct ac_llvm_context *ctx, const struct ac_ngg_prim *prim)
|
||||
{
|
||||
/* The prim export format is:
|
||||
|
|
|
|||
|
|
@ -585,6 +585,8 @@ struct ac_ngg_prim {
|
|||
LLVMValueRef passthrough;
|
||||
};
|
||||
|
||||
LLVMValueRef ac_pack_edgeflags_for_export(struct ac_llvm_context *ctx,
|
||||
const struct ac_shader_args *args);
|
||||
LLVMValueRef ac_pack_prim_export(struct ac_llvm_context *ctx, const struct ac_ngg_prim *prim);
|
||||
void ac_build_export_prim(struct ac_llvm_context *ctx, const struct ac_ngg_prim *prim);
|
||||
|
||||
|
|
|
|||
|
|
@ -1275,6 +1275,12 @@ static void visit_alu(struct ac_nir_context *ctx, const nir_alu_instr *instr)
|
|||
break;
|
||||
}
|
||||
|
||||
case nir_op_sad_u8x4:
|
||||
result = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.sad.u8", ctx->ac.i32,
|
||||
(LLVMValueRef[]){src[0], src[1], src[2]}, 3,
|
||||
AC_FUNC_ATTR_READNONE);
|
||||
break;
|
||||
|
||||
default:
|
||||
fprintf(stderr, "Unknown NIR alu instr: ");
|
||||
nir_print_instr(&instr->instr, stderr);
|
||||
|
|
@ -2948,6 +2954,8 @@ static LLVMValueRef visit_load_subgroup_id(struct ac_nir_context *ctx)
|
|||
result = LLVMBuildAnd(ctx->ac.builder, ac_get_arg(&ctx->ac, ctx->args->tg_size),
|
||||
LLVMConstInt(ctx->ac.i32, 0xfc0, false), "");
|
||||
return LLVMBuildLShr(ctx->ac.builder, result, LLVMConstInt(ctx->ac.i32, 6, false), "");
|
||||
} else if (ctx->args->merged_wave_info.used) {
|
||||
return ac_unpack_param(&ctx->ac, ac_get_arg(&ctx->ac, ctx->args->merged_wave_info), 24, 4);
|
||||
} else {
|
||||
return LLVMConstInt(ctx->ac.i32, 0, false);
|
||||
}
|
||||
|
|
@ -4093,6 +4101,71 @@ static void visit_intrinsic(struct ac_nir_context *ctx, nir_intrinsic_instr *ins
|
|||
cache_policy);
|
||||
break;
|
||||
}
|
||||
case nir_intrinsic_load_packed_passthrough_primitive_amd:
|
||||
result = ac_get_arg(&ctx->ac, ctx->args->gs_vtx_offset[0]);
|
||||
break;
|
||||
case nir_intrinsic_load_initial_edgeflags_amd:
|
||||
if (ctx->stage == MESA_SHADER_VERTEX && !ctx->info->vs.blit_sgprs_amd)
|
||||
result = ac_pack_edgeflags_for_export(&ctx->ac, ctx->args);
|
||||
else
|
||||
result = ctx->ac.i32_0;
|
||||
break;
|
||||
case nir_intrinsic_has_input_vertex_amd: {
|
||||
LLVMValueRef num =
|
||||
ac_unpack_param(&ctx->ac, ac_get_arg(&ctx->ac, ctx->args->merged_wave_info), 0, 8);
|
||||
result = LLVMBuildICmp(ctx->ac.builder, LLVMIntULT, ac_get_thread_id(&ctx->ac), num, "");
|
||||
break;
|
||||
}
|
||||
case nir_intrinsic_has_input_primitive_amd: {
|
||||
LLVMValueRef num =
|
||||
ac_unpack_param(&ctx->ac, ac_get_arg(&ctx->ac, ctx->args->merged_wave_info), 8, 8);
|
||||
result = LLVMBuildICmp(ctx->ac.builder, LLVMIntULT, ac_get_thread_id(&ctx->ac), num, "");
|
||||
break;
|
||||
}
|
||||
case nir_intrinsic_load_workgroup_num_input_vertices_amd:
|
||||
result = ac_unpack_param(&ctx->ac, ac_get_arg(&ctx->ac, ctx->args->gs_tg_info), 12, 9);
|
||||
break;
|
||||
case nir_intrinsic_load_workgroup_num_input_primitives_amd:
|
||||
result = ac_unpack_param(&ctx->ac, ac_get_arg(&ctx->ac, ctx->args->gs_tg_info), 22, 9);
|
||||
break;
|
||||
case nir_intrinsic_alloc_vertices_and_primitives_amd:
|
||||
/* The caller should only call this conditionally for wave 0, so assume that the current
|
||||
* wave is always wave 0.
|
||||
*/
|
||||
ac_build_sendmsg_gs_alloc_req(&ctx->ac, ctx->ac.i32_0,
|
||||
get_src(ctx, instr->src[0]),
|
||||
get_src(ctx, instr->src[1]));
|
||||
break;
|
||||
case nir_intrinsic_export_primitive_amd: {
|
||||
struct ac_ngg_prim prim = {0};
|
||||
prim.passthrough = get_src(ctx, instr->src[0]);
|
||||
ac_build_export_prim(&ctx->ac, &prim);
|
||||
break;
|
||||
}
|
||||
case nir_intrinsic_export_vertex_amd:
|
||||
ctx->abi->export_vertex(ctx->abi);
|
||||
break;
|
||||
case nir_intrinsic_byte_permute_amd:
|
||||
if (LLVM_VERSION_MAJOR < 13) {
|
||||
assert("unimplemented byte_permute, LLVM 12 doesn't have amdgcn.perm");
|
||||
break;
|
||||
}
|
||||
result = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.perm", ctx->ac.i32,
|
||||
(LLVMValueRef[]){get_src(ctx, instr->src[0]),
|
||||
get_src(ctx, instr->src[1]),
|
||||
get_src(ctx, instr->src[2])},
|
||||
3, AC_FUNC_ATTR_READNONE);
|
||||
break;
|
||||
case nir_intrinsic_lane_permute_16_amd:
|
||||
result = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.permlane16", ctx->ac.i32,
|
||||
(LLVMValueRef[]){get_src(ctx, instr->src[0]),
|
||||
get_src(ctx, instr->src[0]),
|
||||
get_src(ctx, instr->src[1]),
|
||||
get_src(ctx, instr->src[2]),
|
||||
ctx->ac.i1false,
|
||||
ctx->ac.i1false},
|
||||
6, AC_FUNC_ATTR_READNONE | AC_FUNC_ATTR_CONVERGENT);
|
||||
break;
|
||||
default:
|
||||
fprintf(stderr, "Unknown intrinsic: ");
|
||||
nir_print_instr(&instr->instr, stderr);
|
||||
|
|
|
|||
|
|
@ -68,6 +68,8 @@ struct ac_shader_abi {
|
|||
/* Varying -> attribute number mapping. Also NIR-only */
|
||||
unsigned fs_input_attr_indices[MAX_VARYING];
|
||||
|
||||
void (*export_vertex)(struct ac_shader_abi *abi);
|
||||
|
||||
void (*emit_outputs)(struct ac_shader_abi *abi, unsigned max_outputs, LLVMValueRef *addrs);
|
||||
|
||||
void (*emit_vertex)(struct ac_shader_abi *abi, unsigned stream, LLVMValueRef *addrs);
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue