mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-05 13:58:04 +02:00
ac/llvm: add a callback to ac_cull_triangle to generate code in inner-most block
This will reduce jumps in culling code. Reviewed-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/11486>
This commit is contained in:
parent
1805572694
commit
12d2df15f1
4 changed files with 19 additions and 6 deletions
|
|
@ -120,7 +120,8 @@ static LLVMValueRef cull_bbox(struct ac_llvm_context *ctx, LLVMValueRef pos[3][4
|
||||||
LLVMValueRef vp_scale[2], LLVMValueRef vp_translate[2],
|
LLVMValueRef vp_scale[2], LLVMValueRef vp_translate[2],
|
||||||
LLVMValueRef small_prim_precision, bool cull_view_xy,
|
LLVMValueRef small_prim_precision, bool cull_view_xy,
|
||||||
bool cull_view_near_z, bool cull_view_far_z, bool cull_small_prims,
|
bool cull_view_near_z, bool cull_view_far_z, bool cull_small_prims,
|
||||||
bool use_halfz_clip_space)
|
bool use_halfz_clip_space, ac_cull_accept_func accept_func,
|
||||||
|
void *userdata)
|
||||||
{
|
{
|
||||||
LLVMBuilderRef builder = ctx->builder;
|
LLVMBuilderRef builder = ctx->builder;
|
||||||
|
|
||||||
|
|
@ -200,6 +201,9 @@ static LLVMValueRef cull_bbox(struct ac_llvm_context *ctx, LLVMValueRef pos[3][4
|
||||||
accepted = LLVMBuildAnd(builder, accepted, visible, "");
|
accepted = LLVMBuildAnd(builder, accepted, visible, "");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (accept_func)
|
||||||
|
accept_func(ctx, accepted, userdata);
|
||||||
|
|
||||||
LLVMBuildStore(builder, accepted, accepted_var);
|
LLVMBuildStore(builder, accepted, accepted_var);
|
||||||
}
|
}
|
||||||
ac_build_endif(ctx, 10000000);
|
ac_build_endif(ctx, 10000000);
|
||||||
|
|
@ -222,11 +226,13 @@ static LLVMValueRef cull_bbox(struct ac_llvm_context *ctx, LLVMValueRef pos[3][4
|
||||||
* the rasterizer. Set to num_samples / 2^subpixel_bits.
|
* the rasterizer. Set to num_samples / 2^subpixel_bits.
|
||||||
* subpixel_bits are defined by the quantization mode.
|
* subpixel_bits are defined by the quantization mode.
|
||||||
* \param options See ac_cull_options.
|
* \param options See ac_cull_options.
|
||||||
|
* \param accept_func Callback invoked in the inner-most branch where the primitive is accepted.
|
||||||
*/
|
*/
|
||||||
LLVMValueRef ac_cull_triangle(struct ac_llvm_context *ctx, LLVMValueRef pos[3][4],
|
LLVMValueRef ac_cull_triangle(struct ac_llvm_context *ctx, LLVMValueRef pos[3][4],
|
||||||
LLVMValueRef initially_accepted, LLVMValueRef vp_scale[2],
|
LLVMValueRef initially_accepted, LLVMValueRef vp_scale[2],
|
||||||
LLVMValueRef vp_translate[2], LLVMValueRef small_prim_precision,
|
LLVMValueRef vp_translate[2], LLVMValueRef small_prim_precision,
|
||||||
struct ac_cull_options *options)
|
struct ac_cull_options *options, ac_cull_accept_func accept_func,
|
||||||
|
void *userdata)
|
||||||
{
|
{
|
||||||
struct ac_position_w_info w;
|
struct ac_position_w_info w;
|
||||||
ac_analyze_position_w(ctx, pos, &w);
|
ac_analyze_position_w(ctx, pos, &w);
|
||||||
|
|
@ -244,6 +250,7 @@ LLVMValueRef ac_cull_triangle(struct ac_llvm_context *ctx, LLVMValueRef pos[3][4
|
||||||
/* View culling and small primitive elimination. */
|
/* View culling and small primitive elimination. */
|
||||||
accepted = cull_bbox(ctx, pos, accepted, &w, vp_scale, vp_translate, small_prim_precision,
|
accepted = cull_bbox(ctx, pos, accepted, &w, vp_scale, vp_translate, small_prim_precision,
|
||||||
options->cull_view_xy, options->cull_view_near_z, options->cull_view_far_z,
|
options->cull_view_xy, options->cull_view_near_z, options->cull_view_far_z,
|
||||||
options->cull_small_prims, options->use_halfz_clip_space);
|
options->cull_small_prims, options->use_halfz_clip_space, accept_func,
|
||||||
|
userdata);
|
||||||
return accepted;
|
return accepted;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -48,9 +48,14 @@ struct ac_cull_options {
|
||||||
bool use_halfz_clip_space;
|
bool use_halfz_clip_space;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
/* Callback invoked in the inner-most branch where the primitive is accepted. */
|
||||||
|
typedef void (*ac_cull_accept_func)(struct ac_llvm_context *ctx, LLVMValueRef accepted,
|
||||||
|
void *userdata);
|
||||||
|
|
||||||
LLVMValueRef ac_cull_triangle(struct ac_llvm_context *ctx, LLVMValueRef pos[3][4],
|
LLVMValueRef ac_cull_triangle(struct ac_llvm_context *ctx, LLVMValueRef pos[3][4],
|
||||||
LLVMValueRef initially_accepted, LLVMValueRef vp_scale[2],
|
LLVMValueRef initially_accepted, LLVMValueRef vp_scale[2],
|
||||||
LLVMValueRef vp_translate[2], LLVMValueRef small_prim_precision,
|
LLVMValueRef vp_translate[2], LLVMValueRef small_prim_precision,
|
||||||
struct ac_cull_options *options);
|
struct ac_cull_options *options, ac_cull_accept_func accept_func,
|
||||||
|
void *userdata);
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
|
||||||
|
|
@ -985,7 +985,7 @@ void gfx10_emit_ngg_culling_epilogue(struct ac_shader_abi *abi, unsigned max_out
|
||||||
/* Tell ES threads whether their vertex survived. */
|
/* Tell ES threads whether their vertex survived. */
|
||||||
ac_build_ifcc(&ctx->ac,
|
ac_build_ifcc(&ctx->ac,
|
||||||
ac_cull_triangle(&ctx->ac, pos, ctx->ac.i1true, vp_scale, vp_translate,
|
ac_cull_triangle(&ctx->ac, pos, ctx->ac.i1true, vp_scale, vp_translate,
|
||||||
small_prim_precision, &options),
|
small_prim_precision, &options, NULL, NULL),
|
||||||
16003);
|
16003);
|
||||||
{
|
{
|
||||||
LLVMBuildStore(builder, ctx->ac.i32_1, gs_accepted);
|
LLVMBuildStore(builder, ctx->ac.i32_1, gs_accepted);
|
||||||
|
|
|
||||||
|
|
@ -673,7 +673,8 @@ void si_build_prim_discard_compute_shader(struct si_shader_context *ctx)
|
||||||
|
|
||||||
LLVMValueRef accepted =
|
LLVMValueRef accepted =
|
||||||
ac_cull_triangle(&ctx->ac, pos, prim_restart_accepted, vp_scale, vp_translate,
|
ac_cull_triangle(&ctx->ac, pos, prim_restart_accepted, vp_scale, vp_translate,
|
||||||
ac_get_arg(&ctx->ac, param_smallprim_precision), &options);
|
ac_get_arg(&ctx->ac, param_smallprim_precision), &options,
|
||||||
|
NULL, NULL);
|
||||||
|
|
||||||
ac_build_optimization_barrier(&ctx->ac, &accepted, false);
|
ac_build_optimization_barrier(&ctx->ac, &accepted, false);
|
||||||
LLVMValueRef accepted_threadmask = ac_get_i1_sgpr_mask(&ctx->ac, accepted);
|
LLVMValueRef accepted_threadmask = ac_get_i1_sgpr_mask(&ctx->ac, accepted);
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue