freedreno/a6xx: Switch to global bcolor buffer

Since we expect a limited # of unique border-color entry states, we can
use a global table of border-color entries, rather than constructing the
state at draw time.  This shifts all the border-color overhead from draw
time to sampler state CSO creation time.  And it's less code!

A hashtable is used to map unique border-color table value to entry so
multiple usages of what maps to the same table entry all re-use a single
slot in the table.  This puts an upper bound on the # of unique border-
color plus format value.  In practice this shouldn't be a problem, we'll
just size the table to be large enough to not run into problems with
CTS.  Note that the border-color table entry is not completely format
dependent (mostly just integer vs float dependent), so for example a
single color with different float formats can map to a single table
entry.

This also fixes the problem that we completely ignored border-color for
GS/tess stages.

Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/7518
Signed-off-by: Rob Clark <robdclark@chromium.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/19561>
This commit is contained in:
Rob Clark 2022-11-06 10:32:43 -08:00 committed by Marge Bot
parent 27b2496bae
commit c0fc8d5046
7 changed files with 97 additions and 164 deletions

View file

@ -112,7 +112,6 @@ spec@arb_arrays_of_arrays@execution@sampler@fs-nested-struct-arrays-nonconst-nes
# Skips prior to exposing gl45, now fails for same reason as above test
spec@arb_gl_spirv@execution@uniform@sampler2d-nonconst-nested-array,Fail
spec@arb_compute_shader@execution@border-color,Fail
spec@arb_depth_buffer_float@fbo-clear-formats stencil,Fail
spec@arb_depth_buffer_float@fbo-clear-formats stencil@GL_DEPTH32F_STENCIL8,Fail
spec@arb_depth_buffer_float@fbo-generatemipmap-formats,Fail
@ -186,9 +185,6 @@ spec@arb_tessellation_shader@execution@tcs-primitiveid,Fail
# error: Too many tessellation control shader atomic counters"
spec@arb_tessellation_shader@execution@tes-primitiveid,Fail
# https://gitlab.freedesktop.org/mesa/mesa/-/issues/7518
spec@arb_tessellation_shader@execution@tes-read-texture,Fail
# ir3_nir_lower_tess.c:251: lower_block_to_explicit_output: Assertion `util_is_power_of_two_nonzero(nir_intrinsic_write_mask(intr) + 1)' failed.
spec@arb_tessellation_shader@execution@tcs-input-read-mat,Crash

View file

@ -50,9 +50,6 @@ fd6_context_destroy(struct pipe_context *pctx) in_dt
{
struct fd6_context *fd6_ctx = fd6_context(fd_context(pctx));
u_upload_destroy(fd6_ctx->border_color_uploader);
pipe_resource_reference(&fd6_ctx->border_color_buf, NULL);
if (fd6_ctx->streamout_disable_stateobj)
fd_ringbuffer_del(fd6_ctx->streamout_disable_stateobj);
@ -269,8 +266,5 @@ fd6_context_create(struct pipe_screen *pscreen, void *priv,
fd6_blitter_init(pctx);
fd6_ctx->border_color_uploader =
u_upload_create(pctx, 4096, 0, PIPE_USAGE_STREAM, 0);
return fd_context_init_tc(pctx, flags);
}

View file

@ -67,9 +67,6 @@ struct fd6_context {
struct fd_bo *control_mem;
uint32_t seqno;
struct u_upload_mgr *border_color_uploader;
struct pipe_resource *border_color_buf;
/* pre-backed stateobj for stream-out disable: */
struct fd_ringbuffer *streamout_disable_stateobj;
@ -82,6 +79,15 @@ struct fd6_context {
/* cached stateobjs to avoid hashtable lookup when not dirty: */
const struct fd6_program_state *prog;
/* We expect to see a finite # of unique border-color entry values,
* which are a function of the color value and (to a limited degree)
* the border color format. These unique border-color entry values
* get populated into a global border-color buffer, and a hash-table
* is used to map to the matching entry in the table.
*/
struct hash_table *bcolor_cache;
struct fd_bo *bcolor_mem;
uint16_t tex_seqno;
struct hash_table *tex_cache;

View file

@ -50,50 +50,6 @@
#include "fd6_texture.h"
#include "fd6_zsa.h"
static void
setup_border_colors(struct fd_texture_stateobj *tex,
struct fd6_bcolor_entry *entries,
struct fd_screen *screen)
{
unsigned i;
for (i = 0; i < tex->num_samplers; i++) {
struct pipe_sampler_state *sampler = tex->samplers[i];
if (!sampler)
continue;
fd6_setup_border_color(screen, sampler, &entries[i]);
}
}
static void
emit_border_color(struct fd_context *ctx, struct fd_ringbuffer *ring) assert_dt
{
struct fd6_context *fd6_ctx = fd6_context(ctx);
struct fd6_bcolor_entry *entries;
unsigned off;
void *ptr;
STATIC_ASSERT(sizeof(struct fd6_bcolor_entry) == FD6_BORDER_COLOR_SIZE);
u_upload_alloc(fd6_ctx->border_color_uploader, 0,
FD6_BORDER_COLOR_UPLOAD_SIZE, FD6_BORDER_COLOR_UPLOAD_SIZE,
&off, &fd6_ctx->border_color_buf, &ptr);
entries = ptr;
setup_border_colors(&ctx->tex[PIPE_SHADER_VERTEX], &entries[0], ctx->screen);
setup_border_colors(&ctx->tex[PIPE_SHADER_FRAGMENT],
&entries[ctx->tex[PIPE_SHADER_VERTEX].num_samplers],
ctx->screen);
OUT_PKT4(ring, REG_A6XX_SP_TP_BORDER_COLOR_BASE_ADDR, 2);
OUT_RELOC(ring, fd_resource(fd6_ctx->border_color_buf)->bo, off, 0, 0);
u_upload_unmap(fd6_ctx->border_color_uploader);
}
static void
fd6_emit_fb_tex(struct fd_ringbuffer *state, struct fd_context *ctx) assert_dt
{
@ -120,14 +76,12 @@ fd6_emit_fb_tex(struct fd_ringbuffer *state, struct fd_context *ctx) assert_dt
OUT_RING(state, 0);
}
bool
void
fd6_emit_textures(struct fd_context *ctx, struct fd_ringbuffer *ring,
enum pipe_shader_type type, struct fd_texture_stateobj *tex,
unsigned bcolor_offset,
/* can be NULL if no image/SSBO/fb state to merge in: */
const struct ir3_shader_variant *v)
{
bool needs_border = false;
unsigned opcode, tex_samp_reg, tex_const_reg, tex_count_reg;
enum a6xx_state_block sb;
@ -188,10 +142,8 @@ fd6_emit_textures(struct fd_context *ctx, struct fd_ringbuffer *ring,
: &dummy_sampler;
OUT_RING(state, sampler->texsamp0);
OUT_RING(state, sampler->texsamp1);
OUT_RING(state, sampler->texsamp2 |
A6XX_TEX_SAMP_2_BCOLOR(i + bcolor_offset));
OUT_RING(state, sampler->texsamp2);
OUT_RING(state, sampler->texsamp3);
needs_border |= sampler->needs_border;
}
/* output sampler state: */
@ -311,8 +263,6 @@ fd6_emit_textures(struct fd_context *ctx, struct fd_ringbuffer *ring,
OUT_PKT4(ring, tex_count_reg, 1);
OUT_RING(ring, num_merged_textures);
return needs_border;
}
/* Emits combined texture state, which also includes any Image/SSBO
@ -324,16 +274,13 @@ fd6_emit_textures(struct fd_context *ctx, struct fd_ringbuffer *ring,
*
* TODO Is there some sane way we can still use cached texture stateobj
* with image/ssbo in use?
*
* returns whether border_color is required:
*/
static bool
static void
fd6_emit_combined_textures(struct fd6_emit *emit,
enum pipe_shader_type type,
const struct ir3_shader_variant *v) assert_dt
{
struct fd_context *ctx = emit->ctx;
bool needs_border = false;
static const struct {
enum fd6_state_id state_id;
@ -355,21 +302,12 @@ fd6_emit_combined_textures(struct fd6_emit *emit,
*
* Also, framebuffer-read is a slow-path because an extra
* texture needs to be inserted.
*
* TODO we can probably simmplify things if we also treated
* border_color as a slow-path.. this way the tex state key
* wouldn't depend on bcolor_offset.. but fb_read might rather
* be *somehow* a fast-path if we eventually used it for PLS.
* I suppose there would be no harm in just *always* inserting
* an fb_read texture?
*/
if ((ctx->dirty_shader[type] & FD_DIRTY_SHADER_TEX) &&
ctx->tex[type].num_textures > 0) {
struct fd6_texture_state *tex =
fd6_texture_state(ctx, type, &ctx->tex[type]);
needs_border |= tex->needs_border;
fd6_emit_add_group(emit, tex->stateobj, s[type].state_id,
s[type].enable_mask);
@ -386,17 +324,13 @@ fd6_emit_combined_textures(struct fd6_emit *emit,
struct fd_texture_stateobj *tex = &ctx->tex[type];
struct fd_ringbuffer *stateobj = fd_submit_new_ringbuffer(
ctx->batch->submit, 0x1000, FD_RINGBUFFER_STREAMING);
unsigned bcolor_offset = fd6_border_color_offset(ctx, type, tex);
needs_border |=
fd6_emit_textures(ctx, stateobj, type, tex, bcolor_offset, v);
fd6_emit_textures(ctx, stateobj, type, tex, v);
fd6_emit_take_group(emit, stateobj, s[type].state_id,
s[type].enable_mask);
}
}
return needs_border;
}
static struct fd_ringbuffer *
@ -876,7 +810,6 @@ fd6_emit_state(struct fd_ringbuffer *ring, struct fd6_emit *emit)
const struct ir3_shader_variant *ds = emit->ds;
const struct ir3_shader_variant *gs = emit->gs;
const struct ir3_shader_variant *fs = emit->fs;
bool needs_border = false;
emit_marker6(ring, 5);
@ -963,30 +896,25 @@ fd6_emit_state(struct fd_ringbuffer *ring, struct fd6_emit *emit)
state = fd6_build_tess_consts(emit);
break;
case FD6_GROUP_VS_TEX:
needs_border |=
fd6_emit_combined_textures(emit, PIPE_SHADER_VERTEX, vs);
fd6_emit_combined_textures(emit, PIPE_SHADER_VERTEX, vs);
continue;
case FD6_GROUP_HS_TEX:
if (hs) {
needs_border |=
fd6_emit_combined_textures(emit, PIPE_SHADER_TESS_CTRL, hs);
fd6_emit_combined_textures(emit, PIPE_SHADER_TESS_CTRL, hs);
}
continue;
case FD6_GROUP_DS_TEX:
if (ds) {
needs_border |=
fd6_emit_combined_textures(emit, PIPE_SHADER_TESS_EVAL, ds);
fd6_emit_combined_textures(emit, PIPE_SHADER_TESS_EVAL, ds);
}
continue;
case FD6_GROUP_GS_TEX:
if (gs) {
needs_border |=
fd6_emit_combined_textures(emit, PIPE_SHADER_GEOMETRY, gs);
fd6_emit_combined_textures(emit, PIPE_SHADER_GEOMETRY, gs);
}
continue;
case FD6_GROUP_FS_TEX:
needs_border |=
fd6_emit_combined_textures(emit, PIPE_SHADER_FRAGMENT, fs);
fd6_emit_combined_textures(emit, PIPE_SHADER_FRAGMENT, fs);
continue;
case FD6_GROUP_SO:
fd6_emit_streamout(ring, emit);
@ -1001,9 +929,6 @@ fd6_emit_state(struct fd_ringbuffer *ring, struct fd6_emit *emit)
fd6_emit_take_group(emit, state, group, enable_mask);
}
if (needs_border)
emit_border_color(ctx, ring);
if (emit->num_groups > 0) {
OUT_PKT7(ring, CP_SET_DRAW_STATE, 3 * emit->num_groups);
for (unsigned i = 0; i < emit->num_groups; i++) {
@ -1040,14 +965,8 @@ fd6_emit_cs_state(struct fd_context *ctx, struct fd_ringbuffer *ring,
if (dirty & (FD_DIRTY_SHADER_TEX | FD_DIRTY_SHADER_PROG |
FD_DIRTY_SHADER_IMAGE | FD_DIRTY_SHADER_SSBO)) {
struct fd_texture_stateobj *tex = &ctx->tex[PIPE_SHADER_COMPUTE];
unsigned bcolor_offset =
fd6_border_color_offset(ctx, PIPE_SHADER_COMPUTE, tex);
bool needs_border = fd6_emit_textures(ctx, ring, PIPE_SHADER_COMPUTE, tex,
bcolor_offset, cp);
if (needs_border)
emit_border_color(ctx, ring);
fd6_emit_textures(ctx, ring, PIPE_SHADER_COMPUTE, tex, cp);
OUT_PKT4(ring, REG_A6XX_SP_VS_TEX_COUNT, 1);
OUT_RING(ring, 0);
@ -1228,6 +1147,12 @@ fd6_emit_restore(struct fd_batch *batch, struct fd_ringbuffer *ring)
OUT_WFI5(ring);
}
OUT_PKT4(ring, REG_A6XX_SP_TP_BORDER_COLOR_BASE_ADDR, 2);
OUT_RELOC(ring, fd6_context(batch->ctx)->bcolor_mem, 0, 0, 0);
OUT_PKT4(ring, REG_A6XX_SP_PS_TP_BORDER_COLOR_BASE_ADDR, 2);
OUT_RELOC(ring, fd6_context(batch->ctx)->bcolor_mem, 0, 0, 0);
if (!batch->nondraw) {
trace_end_state_restore(&batch->trace, ring);
}

View file

@ -277,9 +277,9 @@ fd6_gl2spacing(enum gl_tess_spacing spacing)
}
}
bool fd6_emit_textures(struct fd_context *ctx, struct fd_ringbuffer *ring,
void fd6_emit_textures(struct fd_context *ctx, struct fd_ringbuffer *ring,
enum pipe_shader_type type,
struct fd_texture_stateobj *tex, unsigned bcolor_offset,
struct fd_texture_stateobj *tex,
const struct ir3_shader_variant *v) assert_dt;
void fd6_emit_state(struct fd_ringbuffer *ring,

View file

@ -89,10 +89,10 @@ tex_filter(unsigned filter, bool aniso)
}
}
void
fd6_setup_border_color(struct fd_screen *screen,
const struct pipe_sampler_state *sampler,
struct fd6_bcolor_entry *e)
static void
setup_border_color(struct fd_screen *screen,
const struct pipe_sampler_state *sampler,
struct fd6_bcolor_entry *e)
{
STATIC_ASSERT(sizeof(struct fd6_bcolor_entry) == FD6_BORDER_COLOR_SIZE);
const bool has_z24uint_s8uint = screen->info->a6xx.has_z24uint_s8uint;
@ -199,11 +199,54 @@ fd6_setup_border_color(struct fd_screen *screen,
e->z24 = f_u * 0xffffff;
}
}
}
#ifdef DEBUG
memset(&e->__pad0, 0, sizeof(e->__pad0));
memset(&e->__pad1, 0, sizeof(e->__pad1));
#endif
static uint32_t
bcolor_key_hash(const void *_key)
{
const struct fd6_bcolor_entry *key = _key;
return XXH32(key, sizeof(*key), 0);
}
static bool
bcolor_key_equals(const void *_a, const void *_b)
{
const struct fd6_bcolor_entry *a = _a;
const struct fd6_bcolor_entry *b = _b;
return memcmp(a, b, sizeof(struct fd6_bcolor_entry)) == 0;
}
static unsigned
get_bcolor_offset(struct fd_context *ctx, const struct pipe_sampler_state *sampler)
{
struct fd6_context *fd6_ctx = fd6_context(ctx);
struct fd6_bcolor_entry *entries = fd_bo_map(fd6_ctx->bcolor_mem);
struct fd6_bcolor_entry key = {};
setup_border_color(ctx->screen, sampler, &key);
uint32_t hash = bcolor_key_hash(&key);
struct hash_entry *entry =
_mesa_hash_table_search_pre_hashed(fd6_ctx->bcolor_cache, hash, &key);
if (entry) {
return (unsigned)(uintptr_t)entry->data;
}
unsigned idx = fd6_ctx->bcolor_cache->entries;
assert(idx < FD6_MAX_BORDER_COLORS);
if (idx >= FD6_MAX_BORDER_COLORS)
return 0;
entries[idx] = key;
_mesa_hash_table_insert_pre_hashed(fd6_ctx->bcolor_cache, hash,
&entries[idx], (void *)(uintptr_t)idx);
return idx;
}
static void *
@ -211,6 +254,7 @@ fd6_sampler_state_create(struct pipe_context *pctx,
const struct pipe_sampler_state *cso)
{
struct fd6_sampler_stateobj *so = CALLOC_STRUCT(fd6_sampler_stateobj);
struct fd_context *ctx = fd_context(pctx);
unsigned aniso = util_last_bit(MIN2(cso->max_anisotropy >> 1, 8));
bool miplinear = false;
@ -218,20 +262,20 @@ fd6_sampler_state_create(struct pipe_context *pctx,
return NULL;
so->base = *cso;
so->seqno = ++fd6_context(fd_context(pctx))->tex_seqno;
so->seqno = ++fd6_context(ctx)->tex_seqno;
if (cso->min_mip_filter == PIPE_TEX_MIPFILTER_LINEAR)
miplinear = true;
so->needs_border = false;
bool needs_border = false;
so->texsamp0 =
COND(miplinear, A6XX_TEX_SAMP_0_MIPFILTER_LINEAR_NEAR) |
A6XX_TEX_SAMP_0_XY_MAG(tex_filter(cso->mag_img_filter, aniso)) |
A6XX_TEX_SAMP_0_XY_MIN(tex_filter(cso->min_img_filter, aniso)) |
A6XX_TEX_SAMP_0_ANISO(aniso) |
A6XX_TEX_SAMP_0_WRAP_S(tex_clamp(cso->wrap_s, &so->needs_border)) |
A6XX_TEX_SAMP_0_WRAP_T(tex_clamp(cso->wrap_t, &so->needs_border)) |
A6XX_TEX_SAMP_0_WRAP_R(tex_clamp(cso->wrap_r, &so->needs_border));
A6XX_TEX_SAMP_0_WRAP_S(tex_clamp(cso->wrap_s, &needs_border)) |
A6XX_TEX_SAMP_0_WRAP_T(tex_clamp(cso->wrap_t, &needs_border)) |
A6XX_TEX_SAMP_0_WRAP_R(tex_clamp(cso->wrap_r, &needs_border));
so->texsamp1 =
COND(cso->min_mip_filter == PIPE_TEX_MIPFILTER_NONE,
@ -247,6 +291,9 @@ fd6_sampler_state_create(struct pipe_context *pctx,
so->texsamp1 |=
A6XX_TEX_SAMP_1_COMPARE_FUNC(cso->compare_func); /* maps 1:1 */
if (needs_border)
so->texsamp2 = A6XX_TEX_SAMP_2_BCOLOR(get_bcolor_offset(ctx, cso));
return so;
}
@ -455,7 +502,6 @@ fd6_texture_state(struct fd_context *ctx, enum pipe_shader_type type,
struct fd6_context *fd6_ctx = fd6_context(ctx);
struct fd6_texture_state *state = NULL;
struct fd6_texture_key key;
bool needs_border = false;
memset(&key, 0, sizeof(key));
@ -483,12 +529,9 @@ fd6_texture_state(struct fd_context *ctx, enum pipe_shader_type type,
fd6_sampler_stateobj(tex->samplers[i]);
key.samp[i].seqno = sampler->seqno;
needs_border |= sampler->needs_border;
}
key.type = type;
key.bcolor_offset = fd6_border_color_offset(ctx, type, tex);
uint32_t hash = tex_key_hash(&key);
fd_screen_lock(ctx->screen);
@ -506,9 +549,8 @@ fd6_texture_state(struct fd_context *ctx, enum pipe_shader_type type,
pipe_reference_init(&state->reference, 2);
state->key = key;
state->stateobj = fd_ringbuffer_new_object(ctx->pipe, 32 * 4);
state->needs_border = needs_border;
fd6_emit_textures(ctx, state->stateobj, type, tex, key.bcolor_offset, NULL);
fd6_emit_textures(ctx, state->stateobj, type, tex, NULL);
/* NOTE: uses copy of key in state obj, because pointer passed by caller
* is probably on the stack
@ -572,6 +614,12 @@ fd6_texture_init(struct pipe_context *pctx) disable_thread_safety_analysis
ctx->rebind_resource = fd6_rebind_resource;
fd6_ctx->bcolor_cache =
_mesa_hash_table_create(NULL, bcolor_key_hash, bcolor_key_equals);
fd6_ctx->bcolor_mem = fd_bo_new(ctx->screen->dev,
FD6_MAX_BORDER_COLORS * FD6_BORDER_COLOR_SIZE,
0, "bcolor");
fd6_ctx->tex_cache = _mesa_hash_table_create(NULL, tex_key_hash, tex_key_equals);
}
@ -590,4 +638,6 @@ fd6_texture_fini(struct pipe_context *pctx)
fd_screen_unlock(ctx->screen);
ralloc_free(fd6_ctx->tex_cache);
fd_bo_del(fd6_ctx->bcolor_mem);
ralloc_free(fd6_ctx->bcolor_cache);
}

View file

@ -60,17 +60,11 @@ struct PACKED fd6_bcolor_entry {
};
#define FD6_BORDER_COLOR_SIZE sizeof(struct fd6_bcolor_entry)
#define FD6_BORDER_COLOR_UPLOAD_SIZE \
(2 * PIPE_MAX_SAMPLERS * FD6_BORDER_COLOR_SIZE)
void fd6_setup_border_color(struct fd_screen *screen,
const struct pipe_sampler_state *sampler,
struct fd6_bcolor_entry *e);
#define FD6_MAX_BORDER_COLORS 128
struct fd6_sampler_stateobj {
struct pipe_sampler_state base;
uint32_t texsamp0, texsamp1, texsamp2, texsamp3;
bool needs_border;
uint16_t seqno;
};
@ -108,36 +102,6 @@ void fd6_sampler_view_update(struct fd_context *ctx,
void fd6_texture_init(struct pipe_context *pctx);
void fd6_texture_fini(struct pipe_context *pctx);
static inline unsigned
fd6_border_color_offset(struct fd_context *ctx, enum pipe_shader_type type,
struct fd_texture_stateobj *tex) assert_dt
{
/* Currently we put the FS border-color state after VS. Possibly
* we could swap the order.
*
* This will need update for HS/DS/GS
*/
if (type != PIPE_SHADER_FRAGMENT)
return 0;
unsigned needs_border = false;
for (unsigned i = 0; i < tex->num_samplers; i++) {
if (!tex->samplers[i])
continue;
struct fd6_sampler_stateobj *sampler =
fd6_sampler_stateobj(tex->samplers[i]);
needs_border |= sampler->needs_border;
}
if (!needs_border)
return 0;
return ctx->tex[PIPE_SHADER_VERTEX].num_samplers;
}
/*
* Texture stateobj:
*
@ -161,14 +125,12 @@ struct fd6_texture_key {
uint16_t seqno;
} samp[16];
uint8_t type;
uint8_t bcolor_offset;
};
struct fd6_texture_state {
struct pipe_reference reference;
struct fd6_texture_key key;
struct fd_ringbuffer *stateobj;
bool needs_border;
};
struct fd6_texture_state *