r600: fix pop-free clipping

This update is aimed at fixing pop-free clipping and follows
the advices by Vitaliy Kuzmin: https://gitlab.freedesktop.org/mesa/mesa/-/issues/12440

This functionality requires calculating the value of the following two
registers: PA_CL_GB_HORZ_DISC_ADJ and PA_CL_GB_VERT_DISC_ADJ. These two
registers are available on all the gpus of the r600 family.

This code is built on the backport of radeonsi updates which are relevant
to this very functionality:
57e658d041 "radeonsi: rework how guardband registers are updated to decrease overhead"
146c2b7c28 "radeonsi: adjust clip discard based on line width / point size"
4d74432dd3 "radeonsi: don't discard points and lines"
63680471f9 "radeonsi: remove si_context::{scissor_enabled,clip_halfz}"

This change was tested on rv770, barts and cayman:
deqp-gles[2-3]/functional/clipping/line/wide_line_clip_viewport_center: fail pass
deqp-gles[2-3]/functional/clipping/line/wide_line_clip_viewport_corner: fail pass
deqp-gles[2-3]/functional/clipping/point/wide_point_clip: fail pass
deqp-gles[2-3]/functional/clipping/point/wide_point_clip_viewport_center: fail pass
deqp-gles[2-3]/functional/clipping/point/wide_point_clip_viewport_corner: fail pass

Cc: mesa-stable
Signed-off-by: Patrick Lerda <patrick9876@free.fr>
Reviewed-by: Filip Gawin <filip@gawin.net>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/35052>
This commit is contained in:
Patrick Lerda 2025-05-16 17:04:48 +02:00 committed by Marge Bot
parent 1730001351
commit df2c774a83
8 changed files with 101 additions and 51 deletions

View file

@ -8,24 +8,12 @@ KHR-GL33.texture_swizzle.functional,Fail
KHR-GLES3.core.nearest_edge.offset_right,Fail
dEQP-GLES2.functional.clipping.line.wide_line_clip_viewport_center,Fail
dEQP-GLES2.functional.clipping.line.wide_line_clip_viewport_corner,Fail
dEQP-GLES2.functional.clipping.point.wide_point_clip,Fail
dEQP-GLES2.functional.clipping.point.wide_point_clip_viewport_center,Fail
dEQP-GLES2.functional.clipping.point.wide_point_clip_viewport_corner,Fail
dEQP-GLES2.functional.texture.size.cube.15x15_l8,Fail
dEQP-GLES2.functional.texture.size.cube.15x15_rgb888,Fail
dEQP-GLES2.functional.texture.size.cube.15x15_rgba4444,Fail
dEQP-GLES2.functional.texture.size.cube.15x15_rgba8888,Fail
dEQP-GLES2.functional.texture.wrap.clamp_clamp_nearest_npot_etc1,Fail
dEQP-GLES3.functional.clipping.line.wide_line_clip_viewport_center,Fail
dEQP-GLES3.functional.clipping.line.wide_line_clip_viewport_corner,Fail
dEQP-GLES3.functional.clipping.point.wide_point_clip,Fail
dEQP-GLES3.functional.clipping.point.wide_point_clip_viewport_center,Fail
dEQP-GLES3.functional.clipping.point.wide_point_clip_viewport_corner,Fail
dEQP-GLES3.functional.shaders.derivate.dfdx.fastest.fbo_msaa4.float_highp,Fail
dEQP-GLES3.functional.shaders.derivate.dfdx.fastest.fbo_msaa4.vec2_highp,Fail
dEQP-GLES3.functional.shaders.derivate.dfdx.fastest.fbo_msaa4.vec3_highp,Fail

View file

@ -50,14 +50,6 @@ KHR-GLES31.core.texture_cube_map_array.texture_size_tesselation_ev_sh,Fail
KHR-GLES31.core.vertex_attrib_binding.basic-input-case5,Fail
KHR-GLES31.core.vertex_attrib_binding.basic-input-case6,Fail
# Wide line near the edge is fully missing instead of partially clipped. Do we
# need guardband?
dEQP-GLES2.functional.clipping.line.wide_line_clip_viewport_center,Fail
dEQP-GLES2.functional.clipping.line.wide_line_clip_viewport_corner,Fail
dEQP-GLES2.functional.clipping.point.wide_point_clip,Fail
dEQP-GLES2.functional.clipping.point.wide_point_clip_viewport_center,Fail
dEQP-GLES2.functional.clipping.point.wide_point_clip_viewport_corner,Fail
# Looks like sub-pixel sampling errors or something.
dEQP-GLES2.functional.texture.filtering.2d.linear_nearest_clamp_etc1,Fail
dEQP-GLES2.functional.texture.filtering.2d.linear_nearest_clamp_l8_npot,Fail
@ -147,14 +139,6 @@ dEQP-GLES2.functional.texture.vertex.cube.filtering.nearest_linear_repeat,Fail
dEQP-GLES2.functional.vertex_arrays.single_attribute.strides.buffer_0_32_short3_vec4_dynamic_draw_quads_1,Fail
dEQP-GLES2.functional.vertex_arrays.single_attribute.strides.buffer_0_32_short3_vec4_dynamic_draw_quads_256,Fail
# Wide line near the edge is fully missing instead of partially clipped. Do we
# need guardband?
dEQP-GLES3.functional.clipping.line.wide_line_clip_viewport_center,Fail
dEQP-GLES3.functional.clipping.line.wide_line_clip_viewport_corner,Fail
dEQP-GLES3.functional.clipping.point.wide_point_clip,Fail
dEQP-GLES3.functional.clipping.point.wide_point_clip_viewport_center,Fail
dEQP-GLES3.functional.clipping.point.wide_point_clip_viewport_corner,Fail
# "Attribute 0: position Storage in buffer stride 16 input datatype unsigned_short input component count 3 normalized used as vec3 instance divisor 0 drawElementsInstanced() index type short index storage in buffer index offset 0 instance count 1 1 lines (loop) "
dEQP-GLES3.functional.draw.random.117,Fail

View file

@ -509,6 +509,7 @@ static void *evergreen_create_rs_state(struct pipe_context *ctx,
S_028810_DX_LINEAR_ATTR_CLIP_ENA(1) |
S_028810_DX_RASTERIZATION_KILL(state->rasterizer_discard);
rs->multisample_enable = state->multisample;
rs->line_width = state->line_width;
/* offset */
rs->offset_units = state->offset_units;
@ -524,6 +525,7 @@ static void *evergreen_create_rs_state(struct pipe_context *ctx,
psize_min = state->point_size;
psize_max = state->point_size;
}
rs->max_point_size = psize_max;
spi_interp = S_0286D4_FLAT_SHADE_ENA(1);
spi_interp |= S_0286D4_PNT_SPRITE_ENA(1) |

View file

@ -276,6 +276,8 @@ struct r600_rasterizer_state {
unsigned pa_su_sc_mode_cntl;
float offset_units;
float offset_scale;
float line_width;
float max_point_size;
bool offset_enable;
bool offset_units_unscaled;
bool scissor_enable;

View file

@ -509,6 +509,23 @@ struct r600_common_context {
bool vs_writes_viewport_index;
bool vs_disables_clipping_viewport;
/* The number of pixels outside the viewport that are not culled by the clipper.
* Normally, the clipper clips everything outside the viewport, however, points and lines
* can have vertices outside the viewport, but their edges can be inside the viewport. Those
* shouldn't be culled. The problem is that the register setting (PA_CL_GB_*_DISC_ADJ) that
* controls the discard distance, which depends on the point size and line width, applies to
* all primitive types, and we would have to set 0 distance for triangles and non-zero for
* points and lines whenever the primitive type changes, which would add overhead and cause
* context rolls.
*
* To reduce that, whenever the discard distance changes for points and lines, we keep it
* at that higher value up to a certain small number for all primitive types including all
* points and lines within a specific size. This is slightly inefficient, but it eliminates
* a lot of guardband state updates and context register changes.
*/
float min_clip_discard_distance_watermark;
float current_clip_discard_distance;
/* Additional context states. */
unsigned flags; /* flush flags */
@ -605,6 +622,39 @@ struct r600_common_context {
enum amd_ip_type ring);
};
#define R600_ALL_PRIM_LINE_MODES \
((1 << MESA_PRIM_LINES) | (1 << MESA_PRIM_LINE_LOOP) | (1 << MESA_PRIM_LINE_STRIP) | \
(1 << MESA_PRIM_LINES_ADJACENCY) | (1 << MESA_PRIM_LINE_STRIP_ADJACENCY))
static inline bool r600_prim_is_lines(unsigned prim)
{
return ((1 << prim) & R600_ALL_PRIM_LINE_MODES) != 0;
}
static inline void r600_set_clip_discard_distance(struct r600_common_context *rctx,
float distance)
{
/* Determine whether the guardband registers change.
*
* When we see a value greater than min_clip_discard_distance_watermark, we increase it
* up to a certain number to eliminate those state changes next time they happen.
* See the comment at min_clip_discard_distance_watermark.
*/
if (distance > rctx->min_clip_discard_distance_watermark) {
/* The maximum number was determined from Viewperf. The number is in units of half-pixels. */
rctx->min_clip_discard_distance_watermark = MIN2(distance, 6);
float old_distance = rctx->current_clip_discard_distance;
float new_distance = MAX2(distance, rctx->min_clip_discard_distance_watermark);
if (old_distance != new_distance) {
rctx->current_clip_discard_distance = new_distance;
rctx->scissors.dirty_mask = (1 << R600_MAX_VIEWPORTS) - 1;
rctx->set_atom_dirty(rctx, &rctx->scissors.atom, true);
}
}
}
/* r600_buffer_common.c */
bool r600_rings_is_buffer_referenced(struct r600_common_context *ctx,
struct pb_buffer_lean *buf,
@ -771,8 +821,6 @@ void r600_texture_transfer_unmap(struct pipe_context *ctx,
/* r600_viewport.c */
void evergreen_apply_scissor_bug_workaround(struct r600_common_context *rctx,
struct pipe_scissor_state *scissor);
void r600_viewport_set_rast_deps(struct r600_common_context *rctx,
bool scissor_enable, bool clip_halfz);
void r600_update_vs_writes_viewport_index(struct r600_common_context *rctx,
struct tgsi_shader_info *info);
void r600_init_viewport_functions(struct r600_common_context *rctx);

View file

@ -482,6 +482,7 @@ static void *r600_create_rs_state(struct pipe_context *ctx,
S_028810_DX_RASTERIZATION_KILL(state->rasterizer_discard);
}
rs->multisample_enable = state->multisample;
rs->line_width = state->line_width;
/* offset */
rs->offset_units = state->offset_units;
@ -497,6 +498,7 @@ static void *r600_create_rs_state(struct pipe_context *ctx,
psize_min = state->point_size;
psize_max = state->point_size;
}
rs->max_point_size = psize_max;
sc_mode_cntl = S_028A4C_MSAA_ENABLE(state->multisample) |
S_028A4C_LINE_STIPPLE_ENABLE(state->line_stipple_enable) |

View file

@ -388,7 +388,21 @@ static void r600_bind_rs_state(struct pipe_context *ctx, void *state)
r600_mark_atom_dirty(rctx, &rctx->clip_misc_state.atom);
}
r600_viewport_set_rast_deps(&rctx->b, rs->scissor_enable, rs->clip_halfz);
if (r600_prim_is_lines(rctx->current_rast_prim))
r600_set_clip_discard_distance(&rctx->b, rs->line_width);
else if (rctx->current_rast_prim == MESA_PRIM_POINTS)
r600_set_clip_discard_distance(&rctx->b, rs->max_point_size);
if (rctx->b.scissor_enabled != rs->scissor_enable) {
rctx->b.scissor_enabled = rs->scissor_enable;
rctx->b.scissors.dirty_mask = (1 << R600_MAX_VIEWPORTS) - 1;
rctx->b.set_atom_dirty(&rctx->b, &rctx->b.scissors.atom, true);
}
if (rctx->b.clip_halfz != rs->clip_halfz) {
rctx->b.clip_halfz = rs->clip_halfz;
rctx->b.viewports.depth_range_dirty_mask = (1 << R600_MAX_VIEWPORTS) - 1;
rctx->b.set_atom_dirty(&rctx->b, &rctx->b.viewports.atom, true);
}
/* Re-emit PA_SC_LINE_STIPPLE. */
rctx->last_primitive_type = -1;
@ -2196,10 +2210,24 @@ static void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info
return;
}
rctx->current_rast_prim = (rctx->gs_shader)? rctx->gs_shader->gs_output_prim
: (rctx->tes_shader)? rctx->tes_shader->info.properties[TGSI_PROPERTY_TES_PRIM_MODE]
const enum mesa_prim rast_prim = rctx->current_rast_prim;
rctx->current_rast_prim = rctx->gs_shader ? rctx->gs_shader->gs_output_prim
: rctx->tes_shader ? rctx->tes_shader->info.properties[TGSI_PROPERTY_TES_PRIM_MODE]
: info->mode;
if (rast_prim != rctx->current_rast_prim) {
if (rctx->current_rast_prim == MESA_PRIM_POINTS) {
r600_set_clip_discard_distance(&rctx->b, rctx->rasterizer->max_point_size);
} else if (r600_prim_is_lines(rctx->current_rast_prim)) {
r600_set_clip_discard_distance(&rctx->b, rctx->rasterizer->line_width);
} else if (rctx->current_rast_prim == R600_PRIM_RECTANGLE_LIST) {
/* Don't change the clip discard distance for rectangles. */
} else {
r600_set_clip_discard_distance(&rctx->b, 0);
}
}
if (rctx->b.gfx_level >= EVERGREEN) {
evergreen_emit_atomic_buffer_setup_count(rctx, NULL, combined_atomics, &atomic_used_mask);
}

View file

@ -204,6 +204,18 @@ static void r600_emit_guardband(struct r600_common_context *rctx,
guardband_x = MIN2(-left, right);
guardband_y = MIN2(-top, bottom);
float discard_x = 1.0;
float discard_y = 1.0;
float distance = rctx->current_clip_discard_distance;
/* Add half the point size / line width */
discard_x += distance / (2.0 * vp.scale[0]);
discard_y += distance / (2.0 * vp.scale[1]);
/* Discard primitives that would lie entirely outside the viewport area. */
discard_x = MIN2(discard_x, guardband_x);
discard_y = MIN2(discard_y, guardband_y);
/* If any of the GB registers is updated, all of them must be updated. */
if (rctx->gfx_level >= CAYMAN)
radeon_set_context_reg_seq(cs, CM_R_028BE8_PA_CL_GB_VERT_CLIP_ADJ, 4);
@ -211,9 +223,9 @@ static void r600_emit_guardband(struct r600_common_context *rctx,
radeon_set_context_reg_seq(cs, R600_R_028C0C_PA_CL_GB_VERT_CLIP_ADJ, 4);
radeon_emit(cs, fui(guardband_y)); /* R_028BE8_PA_CL_GB_VERT_CLIP_ADJ */
radeon_emit(cs, fui(1.0)); /* R_028BEC_PA_CL_GB_VERT_DISC_ADJ */
radeon_emit(cs, fui(discard_y)); /* R_028BEC_PA_CL_GB_VERT_DISC_ADJ */
radeon_emit(cs, fui(guardband_x)); /* R_028BF0_PA_CL_GB_HORZ_CLIP_ADJ */
radeon_emit(cs, fui(1.0)); /* R_028BF4_PA_CL_GB_HORZ_DISC_ADJ */
radeon_emit(cs, fui(discard_x)); /* R_028BF4_PA_CL_GB_HORZ_DISC_ADJ */
}
static void r600_emit_scissors(struct r600_common_context *rctx, struct r600_atom *atom)
@ -373,22 +385,6 @@ static void r600_emit_viewport_states(struct r600_common_context *rctx,
r600_emit_depth_ranges(rctx);
}
/* Set viewport dependencies on pipe_rasterizer_state. */
void r600_viewport_set_rast_deps(struct r600_common_context *rctx,
bool scissor_enable, bool clip_halfz)
{
if (rctx->scissor_enabled != scissor_enable) {
rctx->scissor_enabled = scissor_enable;
rctx->scissors.dirty_mask = (1 << R600_MAX_VIEWPORTS) - 1;
rctx->set_atom_dirty(rctx, &rctx->scissors.atom, true);
}
if (rctx->clip_halfz != clip_halfz) {
rctx->clip_halfz = clip_halfz;
rctx->viewports.depth_range_dirty_mask = (1 << R600_MAX_VIEWPORTS) - 1;
rctx->set_atom_dirty(rctx, &rctx->viewports.atom, true);
}
}
/**
* Normally, we only emit 1 viewport and 1 scissor if no shader is using
* the VIEWPORT_INDEX output, and emitting the other viewports and scissors