diff --git a/src/amd/llvm/ac_llvm_cull.c b/src/amd/llvm/ac_llvm_cull.c index 681c186cd35..87d201f0781 100644 --- a/src/amd/llvm/ac_llvm_cull.c +++ b/src/amd/llvm/ac_llvm_cull.c @@ -125,7 +125,9 @@ static LLVMValueRef ac_cull_face(struct ac_llvm_context *ctx, LLVMValueRef pos[3 static void cull_bbox(struct ac_llvm_context *ctx, LLVMValueRef pos[3][4], LLVMValueRef initially_accepted, struct ac_position_w_info *w, LLVMValueRef vp_scale[2], LLVMValueRef vp_translate[2], - LLVMValueRef small_prim_precision, struct ac_cull_options *options, + LLVMValueRef small_prim_precision, + LLVMValueRef clip_half_line_width[2], + struct ac_cull_options *options, ac_cull_accept_func accept_func, void *userdata) { LLVMBuilderRef builder = ctx->builder; @@ -153,6 +155,11 @@ static void cull_bbox(struct ac_llvm_context *ctx, LLVMValueRef pos[3][4], bbox_min[chan] = ac_build_fmin(ctx, bbox_min[chan], pos[2][chan]); bbox_max[chan] = ac_build_fmax(ctx, bbox_max[chan], pos[2][chan]); } + + if (clip_half_line_width[chan]) { + bbox_min[chan] = LLVMBuildFSub(builder, bbox_min[chan], clip_half_line_width[chan], ""); + bbox_max[chan] = LLVMBuildFAdd(builder, bbox_max[chan], clip_half_line_width[chan], ""); + } } /* View culling. */ @@ -238,8 +245,8 @@ static void cull_bbox(struct ac_llvm_context *ctx, LLVMValueRef pos[3][4], void ac_cull_primitive(struct ac_llvm_context *ctx, LLVMValueRef pos[3][4], LLVMValueRef initially_accepted, LLVMValueRef vp_scale[2], LLVMValueRef vp_translate[2], LLVMValueRef small_prim_precision, - struct ac_cull_options *options, ac_cull_accept_func accept_func, - void *userdata) + LLVMValueRef clip_half_line_width[2], struct ac_cull_options *options, + ac_cull_accept_func accept_func, void *userdata) { struct ac_position_w_info w; ac_analyze_position_w(ctx, pos, &w, options->num_vertices); @@ -255,6 +262,6 @@ void ac_cull_primitive(struct ac_llvm_context *ctx, LLVMValueRef pos[3][4], ""); /* View culling and small primitive elimination. */ - cull_bbox(ctx, pos, accepted, &w, vp_scale, vp_translate, small_prim_precision, options, - accept_func, userdata); + cull_bbox(ctx, pos, accepted, &w, vp_scale, vp_translate, small_prim_precision, + clip_half_line_width, options, accept_func, userdata); } diff --git a/src/amd/llvm/ac_llvm_cull.h b/src/amd/llvm/ac_llvm_cull.h index db1dcdde9f7..dc978d3fe04 100644 --- a/src/amd/llvm/ac_llvm_cull.h +++ b/src/amd/llvm/ac_llvm_cull.h @@ -57,7 +57,7 @@ typedef void (*ac_cull_accept_func)(struct ac_llvm_context *ctx, LLVMValueRef ac void ac_cull_primitive(struct ac_llvm_context *ctx, LLVMValueRef pos[3][4], LLVMValueRef initially_accepted, LLVMValueRef vp_scale[2], LLVMValueRef vp_translate[2], LLVMValueRef small_prim_precision, - struct ac_cull_options *options, ac_cull_accept_func accept_func, - void *userdata); + LLVMValueRef clip_half_line_width[2], struct ac_cull_options *options, + ac_cull_accept_func accept_func, void *userdata); #endif diff --git a/src/gallium/drivers/radeonsi/gfx10_shader_ngg.c b/src/gallium/drivers/radeonsi/gfx10_shader_ngg.c index 745dd90eb21..46ad2520f7a 100644 --- a/src/gallium/drivers/radeonsi/gfx10_shader_ngg.c +++ b/src/gallium/drivers/radeonsi/gfx10_shader_ngg.c @@ -970,9 +970,11 @@ void gfx10_emit_ngg_culling_epilogue(struct ac_shader_abi *abi) } } + LLVMValueRef clip_half_line_width[2] = {}; + /* Load the viewport state for small prim culling. */ - LLVMValueRef vp = ac_build_load_invariant( - &ctx->ac, ac_get_arg(&ctx->ac, ctx->small_prim_cull_info), ctx->ac.i32_0); + LLVMValueRef ptr = ac_get_arg(&ctx->ac, ctx->small_prim_cull_info); + LLVMValueRef vp = ac_build_load_invariant(&ctx->ac, ptr, ctx->ac.i32_0); vp = LLVMBuildBitCast(builder, vp, ctx->ac.v4f32, ""); LLVMValueRef vp_scale[2], vp_translate[2]; vp_scale[0] = ac_llvm_extract_elem(&ctx->ac, vp, 0); @@ -994,6 +996,13 @@ void gfx10_emit_ngg_culling_epilogue(struct ac_shader_abi *abi) options.cull_w = true; if (shader->key.ge.opt.ngg_culling & SI_NGG_CULL_LINES) { + ptr = LLVMBuildPointerCast(builder, ptr, + LLVMPointerType(ctx->ac.v2i32, AC_ADDR_SPACE_CONST_32BIT), ""); + LLVMValueRef terms = ac_build_load_to_sgpr(&ctx->ac, ptr, LLVMConstInt(ctx->ac.i32, 2, 0)); + terms = LLVMBuildBitCast(builder, terms, ctx->ac.v2f32, ""); + clip_half_line_width[0] = ac_llvm_extract_elem(&ctx->ac, terms, 0); + clip_half_line_width[1] = ac_llvm_extract_elem(&ctx->ac, terms, 1); + options.num_vertices = 2; assert(!(shader->key.ge.opt.ngg_culling & SI_NGG_CULL_BACK_FACE)); @@ -1012,8 +1021,8 @@ void gfx10_emit_ngg_culling_epilogue(struct ac_shader_abi *abi) (void*)gs_vtxptr, }; ac_cull_primitive(&ctx->ac, pos, ctx->ac.i1true, vp_scale, vp_translate, - small_prim_precision, &options, - gfx10_build_primitive_accepted, params); + small_prim_precision, clip_half_line_width, + &options, gfx10_build_primitive_accepted, params); } ac_build_endif(&ctx->ac, 16002); ac_build_s_barrier(&ctx->ac); diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h index 08103a8a6d2..38e40f1405f 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.h +++ b/src/gallium/drivers/radeonsi/si_pipe.h @@ -899,6 +899,8 @@ struct si_saved_cs { struct si_small_prim_cull_info { float scale[2], translate[2]; + float clip_half_line_width[2]; /* line_width * 0.5 in clip space in X and Y directions */ + /* The above fields are uploaded to memory. The below fields are passed via user SGPRs. */ float small_prim_precision; }; diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c index 828b3467e63..f9092482b0a 100644 --- a/src/gallium/drivers/radeonsi/si_state.c +++ b/src/gallium/drivers/radeonsi/si_state.c @@ -1109,7 +1109,8 @@ static void si_bind_rs_state(struct pipe_context *ctx, void *state) } if (sctx->screen->use_ngg_culling && - old_rs->half_pixel_center != rs->half_pixel_center) + (old_rs->half_pixel_center != rs->half_pixel_center || + old_rs->line_width != rs->line_width)) si_mark_atom_dirty(sctx, &sctx->atoms.s.ngg_cull_state); sctx->current_vs_state &= C_VS_STATE_CLAMP_VERTEX_COLOR; diff --git a/src/gallium/drivers/radeonsi/si_state_viewport.c b/src/gallium/drivers/radeonsi/si_state_viewport.c index f5d7a59a52e..81020db405a 100644 --- a/src/gallium/drivers/radeonsi/si_state_viewport.c +++ b/src/gallium/drivers/radeonsi/si_state_viewport.c @@ -45,6 +45,15 @@ void si_get_small_prim_cull_info(struct si_context *sctx, struct si_small_prim_c /* The viewport shouldn't flip the X axis for the small prim culling to work. */ assert(-info.scale[0] + info.translate[0] <= info.scale[0] + info.translate[0]); + /* Compute the line width used by the rasterizer. */ + float line_width = sctx->queued.named.rasterizer->line_width; + if (num_samples == 1) + line_width = roundf(line_width); + line_width = MAX2(line_width, 1); + + info.clip_half_line_width[0] = line_width * 0.5 / fabs(info.scale[0]); + info.clip_half_line_width[1] = line_width * 0.5 / fabs(info.scale[1]); + /* If the Y axis is inverted (OpenGL default framebuffer), reverse it. * This is because the viewport transformation inverts the clip space * bounding box, so min becomes max, which breaks small primitive