diff --git a/src/amd/llvm/ac_llvm_build.c b/src/amd/llvm/ac_llvm_build.c index c605772d647..0f07d90327a 100644 --- a/src/amd/llvm/ac_llvm_build.c +++ b/src/amd/llvm/ac_llvm_build.c @@ -4597,6 +4597,22 @@ void ac_build_sendmsg_gs_alloc_req(struct ac_llvm_context *ctx, LLVMValueRef wav ac_build_endif(ctx, 5020); } + +LLVMValueRef ac_pack_edgeflags_for_export(struct ac_llvm_context *ctx, + const struct ac_shader_args *args) +{ + /* Use the following trick to extract the edge flags: + * extracted = v_and_b32 gs_invocation_id, 0x700 ; get edge flags at bits 8, 9, 10 + * shifted = v_mul_u32_u24 extracted, 0x80402u ; shift the bits: 8->9, 9->19, 10->29 + * result = v_and_b32 shifted, 0x20080200 ; remove garbage + */ + LLVMValueRef tmp = LLVMBuildAnd(ctx->builder, + ac_get_arg(ctx, args->gs_invocation_id), + LLVMConstInt(ctx->i32, 0x700, 0), ""); + tmp = LLVMBuildMul(ctx->builder, tmp, LLVMConstInt(ctx->i32, 0x80402u, 0), ""); + return LLVMBuildAnd(ctx->builder, tmp, LLVMConstInt(ctx->i32, 0x20080200, 0), ""); +} + LLVMValueRef ac_pack_prim_export(struct ac_llvm_context *ctx, const struct ac_ngg_prim *prim) { /* The prim export format is: diff --git a/src/amd/llvm/ac_llvm_build.h b/src/amd/llvm/ac_llvm_build.h index 965ba28b7ef..0bdab2872fa 100644 --- a/src/amd/llvm/ac_llvm_build.h +++ b/src/amd/llvm/ac_llvm_build.h @@ -585,6 +585,8 @@ struct ac_ngg_prim { LLVMValueRef passthrough; }; +LLVMValueRef ac_pack_edgeflags_for_export(struct ac_llvm_context *ctx, + const struct ac_shader_args *args); LLVMValueRef ac_pack_prim_export(struct ac_llvm_context *ctx, const struct ac_ngg_prim *prim); void ac_build_export_prim(struct ac_llvm_context *ctx, const struct ac_ngg_prim *prim); diff --git a/src/amd/llvm/ac_nir_to_llvm.c b/src/amd/llvm/ac_nir_to_llvm.c index 1b63a9234bd..9c09673993f 100644 --- a/src/amd/llvm/ac_nir_to_llvm.c +++ b/src/amd/llvm/ac_nir_to_llvm.c @@ -1275,6 +1275,12 @@ static void visit_alu(struct ac_nir_context *ctx, const nir_alu_instr *instr) break; } + case nir_op_sad_u8x4: + result = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.sad.u8", ctx->ac.i32, + (LLVMValueRef[]){src[0], src[1], src[2]}, 3, + AC_FUNC_ATTR_READNONE); + break; + default: fprintf(stderr, "Unknown NIR alu instr: "); nir_print_instr(&instr->instr, stderr); @@ -2948,6 +2954,8 @@ static LLVMValueRef visit_load_subgroup_id(struct ac_nir_context *ctx) result = LLVMBuildAnd(ctx->ac.builder, ac_get_arg(&ctx->ac, ctx->args->tg_size), LLVMConstInt(ctx->ac.i32, 0xfc0, false), ""); return LLVMBuildLShr(ctx->ac.builder, result, LLVMConstInt(ctx->ac.i32, 6, false), ""); + } else if (ctx->args->merged_wave_info.used) { + return ac_unpack_param(&ctx->ac, ac_get_arg(&ctx->ac, ctx->args->merged_wave_info), 24, 4); } else { return LLVMConstInt(ctx->ac.i32, 0, false); } @@ -4093,6 +4101,71 @@ static void visit_intrinsic(struct ac_nir_context *ctx, nir_intrinsic_instr *ins cache_policy); break; } + case nir_intrinsic_load_packed_passthrough_primitive_amd: + result = ac_get_arg(&ctx->ac, ctx->args->gs_vtx_offset[0]); + break; + case nir_intrinsic_load_initial_edgeflags_amd: + if (ctx->stage == MESA_SHADER_VERTEX && !ctx->info->vs.blit_sgprs_amd) + result = ac_pack_edgeflags_for_export(&ctx->ac, ctx->args); + else + result = ctx->ac.i32_0; + break; + case nir_intrinsic_has_input_vertex_amd: { + LLVMValueRef num = + ac_unpack_param(&ctx->ac, ac_get_arg(&ctx->ac, ctx->args->merged_wave_info), 0, 8); + result = LLVMBuildICmp(ctx->ac.builder, LLVMIntULT, ac_get_thread_id(&ctx->ac), num, ""); + break; + } + case nir_intrinsic_has_input_primitive_amd: { + LLVMValueRef num = + ac_unpack_param(&ctx->ac, ac_get_arg(&ctx->ac, ctx->args->merged_wave_info), 8, 8); + result = LLVMBuildICmp(ctx->ac.builder, LLVMIntULT, ac_get_thread_id(&ctx->ac), num, ""); + break; + } + case nir_intrinsic_load_workgroup_num_input_vertices_amd: + result = ac_unpack_param(&ctx->ac, ac_get_arg(&ctx->ac, ctx->args->gs_tg_info), 12, 9); + break; + case nir_intrinsic_load_workgroup_num_input_primitives_amd: + result = ac_unpack_param(&ctx->ac, ac_get_arg(&ctx->ac, ctx->args->gs_tg_info), 22, 9); + break; + case nir_intrinsic_alloc_vertices_and_primitives_amd: + /* The caller should only call this conditionally for wave 0, so assume that the current + * wave is always wave 0. + */ + ac_build_sendmsg_gs_alloc_req(&ctx->ac, ctx->ac.i32_0, + get_src(ctx, instr->src[0]), + get_src(ctx, instr->src[1])); + break; + case nir_intrinsic_export_primitive_amd: { + struct ac_ngg_prim prim = {0}; + prim.passthrough = get_src(ctx, instr->src[0]); + ac_build_export_prim(&ctx->ac, &prim); + break; + } + case nir_intrinsic_export_vertex_amd: + ctx->abi->export_vertex(ctx->abi); + break; + case nir_intrinsic_byte_permute_amd: + if (LLVM_VERSION_MAJOR < 13) { + assert("unimplemented byte_permute, LLVM 12 doesn't have amdgcn.perm"); + break; + } + result = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.perm", ctx->ac.i32, + (LLVMValueRef[]){get_src(ctx, instr->src[0]), + get_src(ctx, instr->src[1]), + get_src(ctx, instr->src[2])}, + 3, AC_FUNC_ATTR_READNONE); + break; + case nir_intrinsic_lane_permute_16_amd: + result = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.permlane16", ctx->ac.i32, + (LLVMValueRef[]){get_src(ctx, instr->src[0]), + get_src(ctx, instr->src[0]), + get_src(ctx, instr->src[1]), + get_src(ctx, instr->src[2]), + ctx->ac.i1false, + ctx->ac.i1false}, + 6, AC_FUNC_ATTR_READNONE | AC_FUNC_ATTR_CONVERGENT); + break; default: fprintf(stderr, "Unknown intrinsic: "); nir_print_instr(&instr->instr, stderr); diff --git a/src/amd/llvm/ac_shader_abi.h b/src/amd/llvm/ac_shader_abi.h index 8309ac6fff4..62b04b970ac 100644 --- a/src/amd/llvm/ac_shader_abi.h +++ b/src/amd/llvm/ac_shader_abi.h @@ -68,6 +68,8 @@ struct ac_shader_abi { /* Varying -> attribute number mapping. Also NIR-only */ unsigned fs_input_attr_indices[MAX_VARYING]; + void (*export_vertex)(struct ac_shader_abi *abi); + void (*emit_outputs)(struct ac_shader_abi *abi, unsigned max_outputs, LLVMValueRef *addrs); void (*emit_vertex)(struct ac_shader_abi *abi, unsigned stream, LLVMValueRef *addrs);