freedreno/a6xx: ARB_sample_locations

Note, gl_SamplePosition (rgetpos), and therefore interpolateAtSample(),
doesn't work with sample location_enable=true.  For vulkan, "If the
current pipeline uses custom sample locations the value of any variable
decorated with the SamplePosition built-in decoration is undefined."
But ARB_sample_positions doesn't mention this.  Possibly the vk text
should be backported to the gl extension.  (If the app is specifying the
sample locations, it shouldn't need gl_SamplePosition.)

The upshot of this is 2 out of 3 tests that piglit arb_sample_locations
test tests fail, even though sample locations itself is working.
Possibly the test should be updated.  Or we could use driconf to hide
ARB_gpu_shader5 and ARB_sample_shading from this test.

Signed-off-by: Rob Clark <robdclark@chromium.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/24999>
This commit is contained in:
Rob Clark 2023-08-30 18:22:31 -07:00 committed by Marge Bot
parent fd00e99444
commit 286b7723f0
8 changed files with 102 additions and 6 deletions

View file

@ -304,7 +304,7 @@ Khronos, ARB, and OES extensions that are not part of any OpenGL or OpenGL ES ve
GL_ARB_parallel_shader_compile DONE (freedreno, radeonsi, etnaviv, zink, iris, crocus/gen6+)
GL_ARB_post_depth_coverage DONE (freedreno/a6xx, nvc0, radeonsi, llvmpipe, zink, iris/gen9+)
GL_ARB_robustness_isolation not started
GL_ARB_sample_locations DONE (nvc0, zink)
GL_ARB_sample_locations DONE (freedreno/a6xx, nvc0, zink)
GL_ARB_seamless_cubemap_per_texture DONE (etnaviv/SEAMLESS_CUBE_MAP, freedreno, nvc0, r600, radeonsi, softpipe, virgl, zink, asahi, iris, crocus)
GL_ARB_shader_ballot DONE (nvc0, radeonsi, zink, iris, crocus/gen8)
GL_ARB_shader_clock DONE (nv50, nvc0, r600, radeonsi, llvmpipe, virgl, zink, iris, crocus/gen7+)

View file

@ -39,6 +39,7 @@
#include "fd6_emit.h"
#include "fd6_gmem.h"
#include "fd6_image.h"
#include "fd6_pack.h"
#include "fd6_program.h"
#include "fd6_query.h"
#include "fd6_rasterizer.h"
@ -58,6 +59,9 @@ fd6_context_destroy(struct pipe_context *pctx) in_dt
if (fd6_ctx->streamout_disable_stateobj)
fd_ringbuffer_del(fd6_ctx->streamout_disable_stateobj);
if (fd6_ctx->sample_locations_disable_stateobj)
fd_ringbuffer_del(fd6_ctx->sample_locations_disable_stateobj);
fd_context_destroy(pctx);
if (fd6_ctx->vsc_draw_strm)
@ -173,6 +177,7 @@ setup_state_map(struct fd_context *ctx)
BIT(FD6_GROUP_PROG_FB_RAST));
fd_context_add_map(ctx, FD_DIRTY_BLEND | FD_DIRTY_SAMPLE_MASK,
BIT(FD6_GROUP_BLEND));
fd_context_add_map(ctx, FD_DIRTY_SAMPLE_LOCATIONS, BIT(FD6_GROUP_SAMPLE_LOCATIONS));
fd_context_add_map(ctx, FD_DIRTY_BLEND_COLOR, BIT(FD6_GROUP_BLEND_COLOR));
fd_context_add_map(ctx, FD_DIRTY_PROG | FD_DIRTY_CONST,
BIT(FD6_GROUP_CONST));
@ -309,6 +314,15 @@ fd6_context_create(struct pipe_screen *pscreen, void *priv,
fd6_blitter_init<CHIP>(pctx);
struct fd_ringbuffer *ring =
fd_ringbuffer_new_object(fd6_ctx->base.pipe, 6 * 4);
OUT_REG(ring, A6XX_GRAS_SAMPLE_CONFIG());
OUT_REG(ring, A6XX_RB_SAMPLE_CONFIG());
OUT_REG(ring, A6XX_SP_TP_SAMPLE_CONFIG());
fd6_ctx->sample_locations_disable_stateobj = ring;
return fd_context_init_tc(pctx, flags);
}

View file

@ -103,9 +103,12 @@ struct fd6_context {
struct fd_bo *control_mem;
uint32_t seqno;
/* pre-backed stateobj for stream-out disable: */
/* pre-baked stateobj for stream-out disable: */
struct fd_ringbuffer *streamout_disable_stateobj;
/* pre-baked stateobj for sample-locations disable: */
struct fd_ringbuffer *sample_locations_disable_stateobj;
/* storage for ctx->last.key: */
struct ir3_shader_key last_key;

View file

@ -352,6 +352,45 @@ build_blend_color(struct fd6_emit *emit) assert_dt
return ring;
}
static struct fd_ringbuffer *
build_sample_locations(struct fd6_emit *emit)
assert_dt
{
struct fd_context *ctx = emit->ctx;
if (!ctx->sample_locations_enabled) {
struct fd6_context *fd6_ctx = fd6_context(ctx);
return fd_ringbuffer_ref(fd6_ctx->sample_locations_disable_stateobj);
}
struct fd_ringbuffer *ring = fd_submit_new_ringbuffer(
ctx->batch->submit, 9 * 4, FD_RINGBUFFER_STREAMING);
uint32_t sample_locations = 0;
for (int i = 0; i < 4; i++) {
float x = (ctx->sample_locations[i] & 0xf) / 16.0f;
float y = (16 - (ctx->sample_locations[i] >> 4)) / 16.0f;
x = CLAMP(x, 0.0f, 0.9375f);
y = CLAMP(y, 0.0f, 0.9375f);
sample_locations |=
(A6XX_RB_SAMPLE_LOCATION_0_SAMPLE_0_X(x) |
A6XX_RB_SAMPLE_LOCATION_0_SAMPLE_0_Y(y)) << i*8;
}
OUT_REG(ring, A6XX_GRAS_SAMPLE_CONFIG(.location_enable = true),
A6XX_GRAS_SAMPLE_LOCATION_0(.dword = sample_locations));
OUT_REG(ring, A6XX_RB_SAMPLE_CONFIG(.location_enable = true),
A6XX_RB_SAMPLE_LOCATION_0(.dword = sample_locations));
OUT_REG(ring, A6XX_SP_TP_SAMPLE_CONFIG(.location_enable = true),
A6XX_SP_TP_SAMPLE_LOCATION_0(.dword = sample_locations));
return ring;
}
static void
fd6_emit_streamout(struct fd_ringbuffer *ring, struct fd6_emit *emit) assert_dt
{
@ -603,6 +642,10 @@ fd6_emit_3d_state(struct fd_ringbuffer *ring, struct fd6_emit *emit)
state = build_blend_color(emit);
fd6_state_take_group(&emit->state, state, FD6_GROUP_BLEND_COLOR);
break;
case FD6_GROUP_SAMPLE_LOCATIONS:
state = build_sample_locations(emit);
fd6_state_take_group(&emit->state, state, FD6_GROUP_SAMPLE_LOCATIONS);
break;
case FD6_GROUP_VS_BINDLESS:
state = fd6_build_bindless_state<CHIP>(ctx, PIPE_SHADER_VERTEX, false);
fd6_state_take_group(&emit->state, state, FD6_GROUP_VS_BINDLESS);
@ -846,14 +889,11 @@ fd6_emit_restore(struct fd_batch *batch, struct fd_ringbuffer *ring)
WRITE(REG_A6XX_VPC_UNKNOWN_9211, 0);
WRITE(REG_A6XX_VPC_UNKNOWN_9602, 0);
WRITE(REG_A6XX_PC_UNKNOWN_9E72, 0);
WRITE(REG_A6XX_SP_TP_SAMPLE_CONFIG, 0);
/* NOTE blob seems to (mostly?) use 0xb2 for SP_TP_MODE_CNTL
* but this seems to kill texture gather offsets.
*/
WRITE(REG_A6XX_SP_TP_MODE_CNTL, 0xa0 |
A6XX_SP_TP_MODE_CNTL_ISAMMODE(ISAMMODE_GL));
WRITE(REG_A6XX_RB_SAMPLE_CONFIG, 0);
WRITE(REG_A6XX_GRAS_SAMPLE_CONFIG, 0);
WRITE(REG_A6XX_RB_Z_BOUNDS_MIN, 0);
WRITE(REG_A6XX_RB_Z_BOUNDS_MAX, 0);
OUT_REG(ring, HLSQ_CONTROL_5_REG(

View file

@ -64,6 +64,7 @@ enum fd6_state_id {
FD6_GROUP_BLEND,
FD6_GROUP_SCISSOR,
FD6_GROUP_BLEND_COLOR,
FD6_GROUP_SAMPLE_LOCATIONS,
FD6_GROUP_SO,
FD6_GROUP_VS_BINDLESS,
FD6_GROUP_HS_BINDLESS,

View file

@ -166,9 +166,10 @@ enum fd_dirty_3d_state {
FD_DIRTY_IMAGE = BIT(18),
FD_DIRTY_SSBO = BIT(19),
FD_DIRTY_QUERY = BIT(20),
FD_DIRTY_SAMPLE_LOCATIONS = BIT(21),
/* only used by a2xx.. possibly can be removed.. */
FD_DIRTY_TEXSTATE = BIT(21),
FD_DIRTY_TEXSTATE = BIT(22),
/* fine grained state changes, for cases where state is not orthogonal
* from hw perspective:
@ -487,6 +488,10 @@ struct fd_context {
unsigned sample_mask dt;
unsigned min_samples dt;
/* 1x1 grid, max 4x MSAA: */
uint8_t sample_locations[4] dt;
bool sample_locations_enabled dt;
/* local context fb state, for when ctx->batch is null: */
struct pipe_framebuffer_state framebuffer dt;
uint32_t all_mrt_channel_mask dt;

View file

@ -125,6 +125,14 @@ fd_screen_get_device_vendor(struct pipe_screen *pscreen)
return "Qualcomm";
}
static void
fd_get_sample_pixel_grid(struct pipe_screen *pscreen, unsigned sample_count,
unsigned *out_width, unsigned *out_height)
{
*out_width = 1;
*out_height = 1;
}
static uint64_t
fd_screen_get_timestamp(struct pipe_screen *pscreen)
{
@ -314,6 +322,9 @@ fd_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
case PIPE_CAP_DEPTH_CLIP_DISABLE_SEPARATE:
return is_a6xx(screen);
case PIPE_CAP_PROGRAMMABLE_SAMPLE_LOCATIONS:
return is_a6xx(screen) && screen->info->a6xx.has_sample_locations;
case PIPE_CAP_POLYGON_OFFSET_CLAMP:
return is_a4xx(screen) || is_a5xx(screen) || is_a6xx(screen);
@ -1250,6 +1261,8 @@ fd_screen_create(int fd,
pscreen->get_vendor = fd_screen_get_vendor;
pscreen->get_device_vendor = fd_screen_get_device_vendor;
pscreen->get_sample_pixel_grid = fd_get_sample_pixel_grid;
pscreen->get_timestamp = fd_screen_get_timestamp;
pscreen->fence_reference = _fd_fence_ref;

View file

@ -98,6 +98,25 @@ fd_set_sample_mask(struct pipe_context *pctx, unsigned sample_mask) in_dt
fd_context_dirty(ctx, FD_DIRTY_SAMPLE_MASK);
}
static void
fd_set_sample_locations(struct pipe_context *pctx, size_t size,
const uint8_t *locations)
in_dt
{
struct fd_context *ctx = fd_context(pctx);
if (!locations) {
ctx->sample_locations_enabled = false;
return;
}
size = MIN2(size, sizeof(ctx->sample_locations));
memcpy(ctx->sample_locations, locations, size);
ctx->sample_locations_enabled = true;
fd_context_dirty(ctx, FD_DIRTY_SAMPLE_LOCATIONS);
}
static void
fd_set_min_samples(struct pipe_context *pctx, unsigned min_samples) in_dt
{
@ -805,6 +824,7 @@ fd_state_init(struct pipe_context *pctx)
pctx->set_shader_buffers = fd_set_shader_buffers;
pctx->set_shader_images = fd_set_shader_images;
pctx->set_framebuffer_state = fd_set_framebuffer_state;
pctx->set_sample_locations = fd_set_sample_locations;
pctx->set_polygon_stipple = fd_set_polygon_stipple;
pctx->set_scissor_states = fd_set_scissor_states;
pctx->set_viewport_states = fd_set_viewport_states;