mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-25 10:50:10 +01:00
radeonsi: cull against clip planes, clipvertex, clip/cull distances in shader
The downside is that this duplicates shader code for clip/cull distances in both the position and parameter portions of the shader. Acked-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/13811>
This commit is contained in:
parent
881c459191
commit
513bd6acca
6 changed files with 109 additions and 14 deletions
|
|
@ -554,12 +554,14 @@ enum
|
|||
/* Byte 0: Boolean ES thread accepted (unculled) flag.
|
||||
* Byte 1: New ES thread ID, loaded by GS to prepare the prim export value.
|
||||
* Byte 2: TES rel patch ID
|
||||
* Byte 3: Unused
|
||||
* Byte 3: 8-bit clip distance mask: 1 means the clip distance is negative.
|
||||
* The mask from all vertices is AND'ed. If the result is non-zero,
|
||||
* the primitive is culled.
|
||||
*/
|
||||
lds_byte0_accept_flag = 0,
|
||||
lds_byte1_new_thread_id,
|
||||
lds_byte2_tes_rel_patch_id,
|
||||
lds_byte3_unused,
|
||||
lds_byte3_clipdist_neg_mask,
|
||||
|
||||
lds_packed_data = 0, /* lds_byteN_... */
|
||||
lds_pos_cull_x_div_w,
|
||||
|
|
@ -804,6 +806,37 @@ static void gfx10_build_primitive_accepted(struct ac_llvm_context *ac, LLVMValue
|
|||
ac_build_endif(&ctx->ac, 0);
|
||||
}
|
||||
|
||||
static void add_clipdist_bit(struct si_shader_context *ctx, LLVMValueRef distance, unsigned i,
|
||||
LLVMValueRef *packed_data)
|
||||
{
|
||||
LLVMValueRef neg = LLVMBuildFCmp(ctx->ac.builder, LLVMRealOLT, distance, ctx->ac.f32_0, "");
|
||||
neg = LLVMBuildZExt(ctx->ac.builder, neg, ctx->ac.i32, "");
|
||||
/* Put the negative distance flag into lds_byte3_clipdist_neg_mask. */
|
||||
neg = LLVMBuildShl(ctx->ac.builder, neg, LLVMConstInt(ctx->ac.i32, 24 + i, 0), "");
|
||||
*packed_data = LLVMBuildOr(ctx->ac.builder, *packed_data, neg, "");
|
||||
}
|
||||
|
||||
static bool add_clipdist_bits_for_clipvertex(struct si_shader_context *ctx,
|
||||
unsigned clipdist_enable,
|
||||
LLVMValueRef clipvertex[4],
|
||||
LLVMValueRef *packed_data)
|
||||
{
|
||||
struct ac_export_args clipdist[2];
|
||||
bool added = false;
|
||||
|
||||
si_llvm_clipvertex_to_clipdist(ctx, clipdist, clipvertex);
|
||||
|
||||
for (unsigned j = 0; j < 8; j++) {
|
||||
if (!(clipdist_enable & BITFIELD_BIT(j)))
|
||||
continue;
|
||||
|
||||
LLVMValueRef distance = clipdist[j / 4].out[j % 4];
|
||||
add_clipdist_bit(ctx, distance, j, packed_data);
|
||||
added = true;
|
||||
}
|
||||
return added;
|
||||
}
|
||||
|
||||
/**
|
||||
* Cull primitives for NGG VS or TES, then compact vertices, which happens
|
||||
* before the VS or TES main function. Return values for the main function.
|
||||
|
|
@ -826,10 +859,16 @@ void gfx10_emit_ngg_culling_epilogue(struct ac_shader_abi *abi)
|
|||
(sel->info.stage == MESA_SHADER_TESS_EVAL && !shader->key.ge.as_es));
|
||||
|
||||
LLVMValueRef es_vtxptr = ngg_nogs_vertex_ptr(ctx, get_thread_id_in_tg(ctx));
|
||||
LLVMValueRef packed_data = ctx->ac.i32_0;
|
||||
LLVMValueRef position[4] = {};
|
||||
unsigned pos_index = 0;
|
||||
unsigned clip_plane_enable = SI_NGG_CULL_GET_CLIP_PLANE_ENABLE(shader->key.ge.opt.ngg_culling);
|
||||
unsigned clipdist_enable = (sel->clipdist_mask & clip_plane_enable) | sel->culldist_mask;
|
||||
bool has_clipdist_mask = false;
|
||||
|
||||
for (unsigned i = 0; i < info->num_outputs; i++) {
|
||||
LLVMValueRef position[4];
|
||||
LLVMValueRef clipvertex[4];
|
||||
unsigned base;
|
||||
|
||||
switch (info->output_semantic[i]) {
|
||||
case VARYING_SLOT_POS:
|
||||
|
|
@ -862,12 +901,45 @@ void gfx10_emit_ngg_culling_epilogue(struct ac_shader_abi *abi)
|
|||
ac_build_gep0(&ctx->ac, es_vtxptr, LLVMConstInt(ctx->ac.i32, lds_pos_cull_x_div_w + chan, 0)));
|
||||
}
|
||||
break;
|
||||
|
||||
case VARYING_SLOT_CLIP_DIST0:
|
||||
case VARYING_SLOT_CLIP_DIST1:
|
||||
base = info->output_semantic[i] == VARYING_SLOT_CLIP_DIST1 ? 4 : 0;
|
||||
|
||||
for (unsigned j = 0; j < 4; j++) {
|
||||
unsigned index = base + j;
|
||||
|
||||
if (!(clipdist_enable & BITFIELD_BIT(index)))
|
||||
continue;
|
||||
|
||||
LLVMValueRef distance = LLVMBuildLoad(ctx->ac.builder, addrs[4 * i + j], "");
|
||||
add_clipdist_bit(ctx, distance, index, &packed_data);
|
||||
has_clipdist_mask = true;
|
||||
}
|
||||
break;
|
||||
|
||||
case VARYING_SLOT_CLIP_VERTEX:
|
||||
for (unsigned j = 0; j < 4; j++)
|
||||
clipvertex[j] = LLVMBuildLoad(ctx->ac.builder, addrs[4 * i + j], "");
|
||||
|
||||
if (add_clipdist_bits_for_clipvertex(ctx, clipdist_enable, clipvertex, &packed_data))
|
||||
has_clipdist_mask = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (clip_plane_enable && !sel->clipdist_mask) {
|
||||
/* When clip planes are enabled and there are no clip distance outputs,
|
||||
* we should use user clip planes and cull against the position.
|
||||
*/
|
||||
assert(!has_clipdist_mask);
|
||||
if (add_clipdist_bits_for_clipvertex(ctx, clipdist_enable, position, &packed_data))
|
||||
has_clipdist_mask = true;
|
||||
}
|
||||
|
||||
/* Initialize the packed data. */
|
||||
LLVMBuildStore(
|
||||
builder, ctx->ac.i32_0,
|
||||
builder, packed_data,
|
||||
ac_build_gep0(&ctx->ac, es_vtxptr, LLVMConstInt(ctx->ac.i32, lds_packed_data, 0)));
|
||||
ac_build_endif(&ctx->ac, ctx->merged_wrap_if_label);
|
||||
ac_build_s_barrier(&ctx->ac);
|
||||
|
|
@ -950,6 +1022,8 @@ void gfx10_emit_ngg_culling_epilogue(struct ac_shader_abi *abi)
|
|||
{
|
||||
/* Load positions. */
|
||||
LLVMValueRef pos[3][4] = {};
|
||||
LLVMValueRef clipdist_neg_mask = NULL;
|
||||
|
||||
for (unsigned vtx = 0; vtx < num_vertices; vtx++) {
|
||||
for (unsigned chan = 0; chan < 4; chan++) {
|
||||
unsigned index;
|
||||
|
|
@ -965,8 +1039,25 @@ void gfx10_emit_ngg_culling_epilogue(struct ac_shader_abi *abi)
|
|||
pos[vtx][chan] = LLVMBuildLoad(builder, addr, "");
|
||||
pos[vtx][chan] = ac_to_float(&ctx->ac, pos[vtx][chan]);
|
||||
}
|
||||
|
||||
if (has_clipdist_mask) {
|
||||
/* Load and AND clip distance masks. Each bit means whether that clip distance is
|
||||
* negative. If all masks are AND'ed and the result is 0, the primitive isn't culled
|
||||
* by clip distances.
|
||||
*/
|
||||
LLVMValueRef addr = si_build_gep_i8(ctx, gs_vtxptr[vtx], lds_byte3_clipdist_neg_mask);
|
||||
LLVMValueRef mask = LLVMBuildLoad(builder, addr, "");
|
||||
if (!clipdist_neg_mask)
|
||||
clipdist_neg_mask = mask;
|
||||
else
|
||||
clipdist_neg_mask = LLVMBuildAnd(builder, clipdist_neg_mask, mask, "");
|
||||
}
|
||||
}
|
||||
|
||||
LLVMValueRef clipdist_accepted =
|
||||
has_clipdist_mask ? LLVMBuildICmp(builder, LLVMIntEQ, clipdist_neg_mask, ctx->ac.i8_0, "")
|
||||
: ctx->ac.i1true;
|
||||
|
||||
LLVMValueRef vp_scale[2] = {}, vp_translate[2] = {}, small_prim_precision = NULL;
|
||||
LLVMValueRef clip_half_line_width[2] = {};
|
||||
|
||||
|
|
@ -1020,7 +1111,7 @@ void gfx10_emit_ngg_culling_epilogue(struct ac_shader_abi *abi)
|
|||
gs_accepted,
|
||||
(void*)gs_vtxptr,
|
||||
};
|
||||
ac_cull_primitive(&ctx->ac, pos, ctx->ac.i1true, vp_scale, vp_translate,
|
||||
ac_cull_primitive(&ctx->ac, pos, clipdist_accepted, vp_scale, vp_translate,
|
||||
small_prim_precision, clip_half_line_width,
|
||||
&options, gfx10_build_primitive_accepted, params);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1134,7 +1134,7 @@ struct si_context {
|
|||
|
||||
/* Emitted draw state. */
|
||||
bool ngg : 1;
|
||||
uint8_t ngg_culling;
|
||||
uint16_t ngg_culling;
|
||||
unsigned last_index_size;
|
||||
int last_base_vertex;
|
||||
unsigned last_start_instance;
|
||||
|
|
|
|||
|
|
@ -283,6 +283,8 @@ enum
|
|||
#define SI_NGG_CULL_FRONT_FACE (1 << 2) /* front faces */
|
||||
#define SI_NGG_CULL_LINES (1 << 3) /* the primitive type is lines */
|
||||
#define SI_NGG_CULL_SMALL_LINES_DIAMOND_EXIT (1 << 4) /* cull small lines according to the diamond exit rule */
|
||||
#define SI_NGG_CULL_CLIP_PLANE_ENABLE(enable) (((enable) & 0xff) << 5)
|
||||
#define SI_NGG_CULL_GET_CLIP_PLANE_ENABLE(x) (((x) >> 5) & 0xff)
|
||||
|
||||
/**
|
||||
* For VS shader keys, describe any fixups required for vertex fetch.
|
||||
|
|
@ -660,7 +662,7 @@ struct si_shader_key_ge {
|
|||
unsigned kill_pointsize : 1;
|
||||
|
||||
/* For NGG VS and TES. */
|
||||
unsigned ngg_culling : 5; /* SI_NGG_CULL_* */
|
||||
unsigned ngg_culling : 13; /* SI_NGG_CULL_* */
|
||||
|
||||
/* For shaders where monolithic variants have better code.
|
||||
*
|
||||
|
|
|
|||
|
|
@ -960,11 +960,13 @@ static void *si_create_rs_state(struct pipe_context *ctx, const struct pipe_rast
|
|||
S_028810_DX_RASTERIZATION_KILL(state->rasterizer_discard) |
|
||||
S_028810_DX_LINEAR_ATTR_CLIP_ENA(1);
|
||||
|
||||
rs->ngg_cull_flags_tris = SI_NGG_CULL_TRIANGLES;
|
||||
rs->ngg_cull_flags_tris = SI_NGG_CULL_TRIANGLES |
|
||||
SI_NGG_CULL_CLIP_PLANE_ENABLE(state->clip_plane_enable);
|
||||
rs->ngg_cull_flags_tris_y_inverted = rs->ngg_cull_flags_tris;
|
||||
|
||||
rs->ngg_cull_flags_lines = SI_NGG_CULL_LINES |
|
||||
(!rs->perpendicular_end_caps ? SI_NGG_CULL_SMALL_LINES_DIAMOND_EXIT : 0);
|
||||
(!rs->perpendicular_end_caps ? SI_NGG_CULL_SMALL_LINES_DIAMOND_EXIT : 0) |
|
||||
SI_NGG_CULL_CLIP_PLANE_ENABLE(state->clip_plane_enable);
|
||||
|
||||
if (rs->rasterizer_discard) {
|
||||
rs->ngg_cull_flags_tris |= SI_NGG_CULL_FRONT_FACE |
|
||||
|
|
|
|||
|
|
@ -76,9 +76,9 @@ struct si_state_rasterizer {
|
|||
unsigned pa_cl_clip_cntl;
|
||||
float line_width;
|
||||
float max_point_size;
|
||||
unsigned ngg_cull_flags_tris : 8;
|
||||
unsigned ngg_cull_flags_tris_y_inverted : 8;
|
||||
unsigned ngg_cull_flags_lines : 8;
|
||||
unsigned ngg_cull_flags_tris : 16;
|
||||
unsigned ngg_cull_flags_tris_y_inverted : 16;
|
||||
unsigned ngg_cull_flags_lines : 16;
|
||||
unsigned sprite_coord_enable : 8;
|
||||
unsigned clip_plane_enable : 8;
|
||||
unsigned half_pixel_center : 1;
|
||||
|
|
|
|||
|
|
@ -2263,7 +2263,7 @@ static void si_draw(struct pipe_context *ctx,
|
|||
}
|
||||
|
||||
/* Update NGG culling settings. */
|
||||
uint8_t old_ngg_culling = sctx->ngg_culling;
|
||||
uint16_t old_ngg_culling = sctx->ngg_culling;
|
||||
if (GFX_VERSION >= GFX10) {
|
||||
struct si_shader_selector *hw_vs = si_get_vs_inline(sctx, HAS_TESS, HAS_GS)->cso;
|
||||
|
||||
|
|
@ -2278,7 +2278,7 @@ static void si_draw(struct pipe_context *ctx,
|
|||
/* Check that the current shader allows culling. */
|
||||
assert(hw_vs->ngg_cull_vert_threshold != UINT_MAX);
|
||||
|
||||
uint8_t ngg_culling;
|
||||
uint16_t ngg_culling;
|
||||
|
||||
if (util_prim_is_lines(sctx->current_rast_prim)) {
|
||||
/* Overwrite it to mask out face cull flags. */
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue