diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index 165322276ea..15d0f20c58e 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -1123,6 +1123,7 @@ static void si_lower_ngg(struct si_shader *shader, nir_shader *nir, .has_param_exports = shader->info.nr_param_exports, .export_clipdist_mask = shader->info.clipdist_mask | shader->info.culldist_mask, .write_pos_to_clipvertex = shader->key.ge.mono.write_pos_to_clipvertex, + .pack_clip_cull_distances = true, .force_vrs = sel->screen->options.vrs2x2, .use_gfx12_xfb_intrinsic = !nir->info.use_aco_amd, .skip_viewport_state_culling = sel->info.writes_viewport_index, @@ -1536,7 +1537,7 @@ static void run_late_optimization_and_lowering_passes(struct si_nir_shader_ctx * NIR_PASS_V(nir, ac_nir_lower_legacy_vs, sel->screen->info.gfx_level, shader->info.clipdist_mask | shader->info.culldist_mask, - shader->key.ge.mono.write_pos_to_clipvertex, false, + shader->key.ge.mono.write_pos_to_clipvertex, true, ctx->temp_info.vs_output_param_offset, shader->info.nr_param_exports, shader->key.ge.mono.u.vs_export_prim_id, @@ -1569,6 +1570,7 @@ static void run_late_optimization_and_lowering_passes(struct si_nir_shader_ctx * .gfx_level = sel->screen->info.gfx_level, .export_clipdist_mask = shader->info.clipdist_mask | shader->info.culldist_mask, .write_pos_to_clipvertex = shader->key.ge.mono.write_pos_to_clipvertex, + .pack_clip_cull_distances = true, .param_offsets = ctx->temp_info.vs_output_param_offset, .has_param_exports = shader->info.nr_param_exports, .disable_streamout = !shader->info.num_streamout_vec4s, diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c index e2581d4bae0..990935cfc42 100644 --- a/src/gallium/drivers/radeonsi/si_state.c +++ b/src/gallium/drivers/radeonsi/si_state.c @@ -902,11 +902,22 @@ static void si_emit_clip_regs(struct si_context *sctx, unsigned index) assert(!vs->info.culldist_mask); ucp_mask = SI_USER_CLIP_PLANE_MASK & rs->clip_plane_enable; } else { - clipdist_mask = vs->info.clipdist_mask & rs->clip_plane_enable; + unsigned num_bits = 0; + + /* Pack clipdist_mask and culldist_mask (remove holes) because that's how exports are packed. */ + u_foreach_bit(i, vs->info.clipdist_mask) { + if (rs->clip_plane_enable & BITFIELD_BIT(i)) + clipdist_mask |= BITFIELD_BIT(num_bits); + num_bits++; + } + + unsigned num_culldist_bits = util_bitcount(vs->info.culldist_mask); + culldist_mask = BITFIELD_RANGE(num_bits, num_culldist_bits); + /* For points, we need to set the cull distance bits too because the clip distance bits have * no effect on them. */ - culldist_mask = vs->info.culldist_mask | clipdist_mask; + culldist_mask |= clipdist_mask; } unsigned pa_cl_cntl = S_02881C_BYPASS_VTX_RATE_COMBINER(sctx->gfx_level >= GFX10_3 && diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.cpp b/src/gallium/drivers/radeonsi/si_state_shaders.cpp index 3d53c7bd28e..a3ed656913f 100644 --- a/src/gallium/drivers/radeonsi/si_state_shaders.cpp +++ b/src/gallium/drivers/radeonsi/si_state_shaders.cpp @@ -1349,16 +1349,15 @@ unsigned si_get_num_vertices_per_output_prim(struct si_shader *shader) static unsigned si_get_vs_out_cntl(const struct si_shader_selector *sel, const struct si_shader *shader, bool ngg) { - /* Clip distances can be killed, but cull distances can't. */ - unsigned clipcull_mask = shader->info.clipdist_mask | shader->info.culldist_mask; + unsigned num_clip_distances = util_bitcount(shader->info.clipdist_mask | shader->info.culldist_mask); bool writes_psize = sel->info.writes_psize && !shader->key.ge.opt.kill_pointsize; bool writes_layer = sel->info.writes_layer && !shader->key.ge.opt.kill_layer; bool misc_vec_ena = writes_psize || (sel->info.writes_edgeflag && !ngg) || writes_layer || sel->info.writes_viewport_index || sel->screen->options.vrs2x2; - return S_02881C_VS_OUT_CCDIST0_VEC_ENA((clipcull_mask & 0x0F) != 0) | - S_02881C_VS_OUT_CCDIST1_VEC_ENA((clipcull_mask & 0xF0) != 0) | + return S_02881C_VS_OUT_CCDIST0_VEC_ENA(num_clip_distances > 0) | + S_02881C_VS_OUT_CCDIST1_VEC_ENA(num_clip_distances > 4) | S_02881C_USE_VTX_POINT_SIZE(writes_psize) | S_02881C_USE_VTX_EDGE_FLAG(sel->info.writes_edgeflag && !ngg) | S_02881C_USE_VTX_VRS_RATE(sel->screen->options.vrs2x2) |