diff --git a/src/freedreno/ci/freedreno-a630-fails.txt b/src/freedreno/ci/freedreno-a630-fails.txt index 392d92ffcf6..4f3c520155b 100644 --- a/src/freedreno/ci/freedreno-a630-fails.txt +++ b/src/freedreno/ci/freedreno-a630-fails.txt @@ -112,7 +112,6 @@ spec@arb_arrays_of_arrays@execution@sampler@fs-nested-struct-arrays-nonconst-nes # Skips prior to exposing gl45, now fails for same reason as above test spec@arb_gl_spirv@execution@uniform@sampler2d-nonconst-nested-array,Fail -spec@arb_compute_shader@execution@border-color,Fail spec@arb_depth_buffer_float@fbo-clear-formats stencil,Fail spec@arb_depth_buffer_float@fbo-clear-formats stencil@GL_DEPTH32F_STENCIL8,Fail spec@arb_depth_buffer_float@fbo-generatemipmap-formats,Fail @@ -186,9 +185,6 @@ spec@arb_tessellation_shader@execution@tcs-primitiveid,Fail # error: Too many tessellation control shader atomic counters" spec@arb_tessellation_shader@execution@tes-primitiveid,Fail -# https://gitlab.freedesktop.org/mesa/mesa/-/issues/7518 -spec@arb_tessellation_shader@execution@tes-read-texture,Fail - # ir3_nir_lower_tess.c:251: lower_block_to_explicit_output: Assertion `util_is_power_of_two_nonzero(nir_intrinsic_write_mask(intr) + 1)' failed. spec@arb_tessellation_shader@execution@tcs-input-read-mat,Crash diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_context.c b/src/gallium/drivers/freedreno/a6xx/fd6_context.c index cc736aac8e8..f847c4d535f 100644 --- a/src/gallium/drivers/freedreno/a6xx/fd6_context.c +++ b/src/gallium/drivers/freedreno/a6xx/fd6_context.c @@ -50,9 +50,6 @@ fd6_context_destroy(struct pipe_context *pctx) in_dt { struct fd6_context *fd6_ctx = fd6_context(fd_context(pctx)); - u_upload_destroy(fd6_ctx->border_color_uploader); - pipe_resource_reference(&fd6_ctx->border_color_buf, NULL); - if (fd6_ctx->streamout_disable_stateobj) fd_ringbuffer_del(fd6_ctx->streamout_disable_stateobj); @@ -269,8 +266,5 @@ fd6_context_create(struct pipe_screen *pscreen, void *priv, fd6_blitter_init(pctx); - fd6_ctx->border_color_uploader = - u_upload_create(pctx, 4096, 0, PIPE_USAGE_STREAM, 0); - return fd_context_init_tc(pctx, flags); } diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_context.h b/src/gallium/drivers/freedreno/a6xx/fd6_context.h index 963afbc5b43..be0b00b7911 100644 --- a/src/gallium/drivers/freedreno/a6xx/fd6_context.h +++ b/src/gallium/drivers/freedreno/a6xx/fd6_context.h @@ -67,9 +67,6 @@ struct fd6_context { struct fd_bo *control_mem; uint32_t seqno; - struct u_upload_mgr *border_color_uploader; - struct pipe_resource *border_color_buf; - /* pre-backed stateobj for stream-out disable: */ struct fd_ringbuffer *streamout_disable_stateobj; @@ -82,6 +79,15 @@ struct fd6_context { /* cached stateobjs to avoid hashtable lookup when not dirty: */ const struct fd6_program_state *prog; + /* We expect to see a finite # of unique border-color entry values, + * which are a function of the color value and (to a limited degree) + * the border color format. These unique border-color entry values + * get populated into a global border-color buffer, and a hash-table + * is used to map to the matching entry in the table. + */ + struct hash_table *bcolor_cache; + struct fd_bo *bcolor_mem; + uint16_t tex_seqno; struct hash_table *tex_cache; diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_emit.c b/src/gallium/drivers/freedreno/a6xx/fd6_emit.c index 450e91bd4b7..77fddf20539 100644 --- a/src/gallium/drivers/freedreno/a6xx/fd6_emit.c +++ b/src/gallium/drivers/freedreno/a6xx/fd6_emit.c @@ -50,50 +50,6 @@ #include "fd6_texture.h" #include "fd6_zsa.h" -static void -setup_border_colors(struct fd_texture_stateobj *tex, - struct fd6_bcolor_entry *entries, - struct fd_screen *screen) -{ - unsigned i; - - for (i = 0; i < tex->num_samplers; i++) { - struct pipe_sampler_state *sampler = tex->samplers[i]; - - if (!sampler) - continue; - - fd6_setup_border_color(screen, sampler, &entries[i]); - } -} - -static void -emit_border_color(struct fd_context *ctx, struct fd_ringbuffer *ring) assert_dt -{ - struct fd6_context *fd6_ctx = fd6_context(ctx); - struct fd6_bcolor_entry *entries; - unsigned off; - void *ptr; - - STATIC_ASSERT(sizeof(struct fd6_bcolor_entry) == FD6_BORDER_COLOR_SIZE); - - u_upload_alloc(fd6_ctx->border_color_uploader, 0, - FD6_BORDER_COLOR_UPLOAD_SIZE, FD6_BORDER_COLOR_UPLOAD_SIZE, - &off, &fd6_ctx->border_color_buf, &ptr); - - entries = ptr; - - setup_border_colors(&ctx->tex[PIPE_SHADER_VERTEX], &entries[0], ctx->screen); - setup_border_colors(&ctx->tex[PIPE_SHADER_FRAGMENT], - &entries[ctx->tex[PIPE_SHADER_VERTEX].num_samplers], - ctx->screen); - - OUT_PKT4(ring, REG_A6XX_SP_TP_BORDER_COLOR_BASE_ADDR, 2); - OUT_RELOC(ring, fd_resource(fd6_ctx->border_color_buf)->bo, off, 0, 0); - - u_upload_unmap(fd6_ctx->border_color_uploader); -} - static void fd6_emit_fb_tex(struct fd_ringbuffer *state, struct fd_context *ctx) assert_dt { @@ -120,14 +76,12 @@ fd6_emit_fb_tex(struct fd_ringbuffer *state, struct fd_context *ctx) assert_dt OUT_RING(state, 0); } -bool +void fd6_emit_textures(struct fd_context *ctx, struct fd_ringbuffer *ring, enum pipe_shader_type type, struct fd_texture_stateobj *tex, - unsigned bcolor_offset, /* can be NULL if no image/SSBO/fb state to merge in: */ const struct ir3_shader_variant *v) { - bool needs_border = false; unsigned opcode, tex_samp_reg, tex_const_reg, tex_count_reg; enum a6xx_state_block sb; @@ -188,10 +142,8 @@ fd6_emit_textures(struct fd_context *ctx, struct fd_ringbuffer *ring, : &dummy_sampler; OUT_RING(state, sampler->texsamp0); OUT_RING(state, sampler->texsamp1); - OUT_RING(state, sampler->texsamp2 | - A6XX_TEX_SAMP_2_BCOLOR(i + bcolor_offset)); + OUT_RING(state, sampler->texsamp2); OUT_RING(state, sampler->texsamp3); - needs_border |= sampler->needs_border; } /* output sampler state: */ @@ -311,8 +263,6 @@ fd6_emit_textures(struct fd_context *ctx, struct fd_ringbuffer *ring, OUT_PKT4(ring, tex_count_reg, 1); OUT_RING(ring, num_merged_textures); - - return needs_border; } /* Emits combined texture state, which also includes any Image/SSBO @@ -324,16 +274,13 @@ fd6_emit_textures(struct fd_context *ctx, struct fd_ringbuffer *ring, * * TODO Is there some sane way we can still use cached texture stateobj * with image/ssbo in use? - * - * returns whether border_color is required: */ -static bool +static void fd6_emit_combined_textures(struct fd6_emit *emit, enum pipe_shader_type type, const struct ir3_shader_variant *v) assert_dt { struct fd_context *ctx = emit->ctx; - bool needs_border = false; static const struct { enum fd6_state_id state_id; @@ -355,21 +302,12 @@ fd6_emit_combined_textures(struct fd6_emit *emit, * * Also, framebuffer-read is a slow-path because an extra * texture needs to be inserted. - * - * TODO we can probably simmplify things if we also treated - * border_color as a slow-path.. this way the tex state key - * wouldn't depend on bcolor_offset.. but fb_read might rather - * be *somehow* a fast-path if we eventually used it for PLS. - * I suppose there would be no harm in just *always* inserting - * an fb_read texture? */ if ((ctx->dirty_shader[type] & FD_DIRTY_SHADER_TEX) && ctx->tex[type].num_textures > 0) { struct fd6_texture_state *tex = fd6_texture_state(ctx, type, &ctx->tex[type]); - needs_border |= tex->needs_border; - fd6_emit_add_group(emit, tex->stateobj, s[type].state_id, s[type].enable_mask); @@ -386,17 +324,13 @@ fd6_emit_combined_textures(struct fd6_emit *emit, struct fd_texture_stateobj *tex = &ctx->tex[type]; struct fd_ringbuffer *stateobj = fd_submit_new_ringbuffer( ctx->batch->submit, 0x1000, FD_RINGBUFFER_STREAMING); - unsigned bcolor_offset = fd6_border_color_offset(ctx, type, tex); - needs_border |= - fd6_emit_textures(ctx, stateobj, type, tex, bcolor_offset, v); + fd6_emit_textures(ctx, stateobj, type, tex, v); fd6_emit_take_group(emit, stateobj, s[type].state_id, s[type].enable_mask); } } - - return needs_border; } static struct fd_ringbuffer * @@ -876,7 +810,6 @@ fd6_emit_state(struct fd_ringbuffer *ring, struct fd6_emit *emit) const struct ir3_shader_variant *ds = emit->ds; const struct ir3_shader_variant *gs = emit->gs; const struct ir3_shader_variant *fs = emit->fs; - bool needs_border = false; emit_marker6(ring, 5); @@ -963,30 +896,25 @@ fd6_emit_state(struct fd_ringbuffer *ring, struct fd6_emit *emit) state = fd6_build_tess_consts(emit); break; case FD6_GROUP_VS_TEX: - needs_border |= - fd6_emit_combined_textures(emit, PIPE_SHADER_VERTEX, vs); + fd6_emit_combined_textures(emit, PIPE_SHADER_VERTEX, vs); continue; case FD6_GROUP_HS_TEX: if (hs) { - needs_border |= - fd6_emit_combined_textures(emit, PIPE_SHADER_TESS_CTRL, hs); + fd6_emit_combined_textures(emit, PIPE_SHADER_TESS_CTRL, hs); } continue; case FD6_GROUP_DS_TEX: if (ds) { - needs_border |= - fd6_emit_combined_textures(emit, PIPE_SHADER_TESS_EVAL, ds); + fd6_emit_combined_textures(emit, PIPE_SHADER_TESS_EVAL, ds); } continue; case FD6_GROUP_GS_TEX: if (gs) { - needs_border |= - fd6_emit_combined_textures(emit, PIPE_SHADER_GEOMETRY, gs); + fd6_emit_combined_textures(emit, PIPE_SHADER_GEOMETRY, gs); } continue; case FD6_GROUP_FS_TEX: - needs_border |= - fd6_emit_combined_textures(emit, PIPE_SHADER_FRAGMENT, fs); + fd6_emit_combined_textures(emit, PIPE_SHADER_FRAGMENT, fs); continue; case FD6_GROUP_SO: fd6_emit_streamout(ring, emit); @@ -1001,9 +929,6 @@ fd6_emit_state(struct fd_ringbuffer *ring, struct fd6_emit *emit) fd6_emit_take_group(emit, state, group, enable_mask); } - if (needs_border) - emit_border_color(ctx, ring); - if (emit->num_groups > 0) { OUT_PKT7(ring, CP_SET_DRAW_STATE, 3 * emit->num_groups); for (unsigned i = 0; i < emit->num_groups; i++) { @@ -1040,14 +965,8 @@ fd6_emit_cs_state(struct fd_context *ctx, struct fd_ringbuffer *ring, if (dirty & (FD_DIRTY_SHADER_TEX | FD_DIRTY_SHADER_PROG | FD_DIRTY_SHADER_IMAGE | FD_DIRTY_SHADER_SSBO)) { struct fd_texture_stateobj *tex = &ctx->tex[PIPE_SHADER_COMPUTE]; - unsigned bcolor_offset = - fd6_border_color_offset(ctx, PIPE_SHADER_COMPUTE, tex); - bool needs_border = fd6_emit_textures(ctx, ring, PIPE_SHADER_COMPUTE, tex, - bcolor_offset, cp); - - if (needs_border) - emit_border_color(ctx, ring); + fd6_emit_textures(ctx, ring, PIPE_SHADER_COMPUTE, tex, cp); OUT_PKT4(ring, REG_A6XX_SP_VS_TEX_COUNT, 1); OUT_RING(ring, 0); @@ -1228,6 +1147,12 @@ fd6_emit_restore(struct fd_batch *batch, struct fd_ringbuffer *ring) OUT_WFI5(ring); } + OUT_PKT4(ring, REG_A6XX_SP_TP_BORDER_COLOR_BASE_ADDR, 2); + OUT_RELOC(ring, fd6_context(batch->ctx)->bcolor_mem, 0, 0, 0); + + OUT_PKT4(ring, REG_A6XX_SP_PS_TP_BORDER_COLOR_BASE_ADDR, 2); + OUT_RELOC(ring, fd6_context(batch->ctx)->bcolor_mem, 0, 0, 0); + if (!batch->nondraw) { trace_end_state_restore(&batch->trace, ring); } diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_emit.h b/src/gallium/drivers/freedreno/a6xx/fd6_emit.h index 93d879bc4de..f921d751646 100644 --- a/src/gallium/drivers/freedreno/a6xx/fd6_emit.h +++ b/src/gallium/drivers/freedreno/a6xx/fd6_emit.h @@ -277,9 +277,9 @@ fd6_gl2spacing(enum gl_tess_spacing spacing) } } -bool fd6_emit_textures(struct fd_context *ctx, struct fd_ringbuffer *ring, +void fd6_emit_textures(struct fd_context *ctx, struct fd_ringbuffer *ring, enum pipe_shader_type type, - struct fd_texture_stateobj *tex, unsigned bcolor_offset, + struct fd_texture_stateobj *tex, const struct ir3_shader_variant *v) assert_dt; void fd6_emit_state(struct fd_ringbuffer *ring, diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_texture.c b/src/gallium/drivers/freedreno/a6xx/fd6_texture.c index 3292f35d6c2..647cf425cbb 100644 --- a/src/gallium/drivers/freedreno/a6xx/fd6_texture.c +++ b/src/gallium/drivers/freedreno/a6xx/fd6_texture.c @@ -89,10 +89,10 @@ tex_filter(unsigned filter, bool aniso) } } -void -fd6_setup_border_color(struct fd_screen *screen, - const struct pipe_sampler_state *sampler, - struct fd6_bcolor_entry *e) +static void +setup_border_color(struct fd_screen *screen, + const struct pipe_sampler_state *sampler, + struct fd6_bcolor_entry *e) { STATIC_ASSERT(sizeof(struct fd6_bcolor_entry) == FD6_BORDER_COLOR_SIZE); const bool has_z24uint_s8uint = screen->info->a6xx.has_z24uint_s8uint; @@ -199,11 +199,54 @@ fd6_setup_border_color(struct fd_screen *screen, e->z24 = f_u * 0xffffff; } } +} -#ifdef DEBUG - memset(&e->__pad0, 0, sizeof(e->__pad0)); - memset(&e->__pad1, 0, sizeof(e->__pad1)); -#endif +static uint32_t +bcolor_key_hash(const void *_key) +{ + const struct fd6_bcolor_entry *key = _key; + return XXH32(key, sizeof(*key), 0); +} + +static bool +bcolor_key_equals(const void *_a, const void *_b) +{ + const struct fd6_bcolor_entry *a = _a; + const struct fd6_bcolor_entry *b = _b; + return memcmp(a, b, sizeof(struct fd6_bcolor_entry)) == 0; +} + +static unsigned +get_bcolor_offset(struct fd_context *ctx, const struct pipe_sampler_state *sampler) +{ + struct fd6_context *fd6_ctx = fd6_context(ctx); + struct fd6_bcolor_entry *entries = fd_bo_map(fd6_ctx->bcolor_mem); + struct fd6_bcolor_entry key = {}; + + setup_border_color(ctx->screen, sampler, &key); + + uint32_t hash = bcolor_key_hash(&key); + + struct hash_entry *entry = + _mesa_hash_table_search_pre_hashed(fd6_ctx->bcolor_cache, hash, &key); + + if (entry) { + return (unsigned)(uintptr_t)entry->data; + } + + unsigned idx = fd6_ctx->bcolor_cache->entries; + + assert(idx < FD6_MAX_BORDER_COLORS); + + if (idx >= FD6_MAX_BORDER_COLORS) + return 0; + + entries[idx] = key; + + _mesa_hash_table_insert_pre_hashed(fd6_ctx->bcolor_cache, hash, + &entries[idx], (void *)(uintptr_t)idx); + + return idx; } static void * @@ -211,6 +254,7 @@ fd6_sampler_state_create(struct pipe_context *pctx, const struct pipe_sampler_state *cso) { struct fd6_sampler_stateobj *so = CALLOC_STRUCT(fd6_sampler_stateobj); + struct fd_context *ctx = fd_context(pctx); unsigned aniso = util_last_bit(MIN2(cso->max_anisotropy >> 1, 8)); bool miplinear = false; @@ -218,20 +262,20 @@ fd6_sampler_state_create(struct pipe_context *pctx, return NULL; so->base = *cso; - so->seqno = ++fd6_context(fd_context(pctx))->tex_seqno; + so->seqno = ++fd6_context(ctx)->tex_seqno; if (cso->min_mip_filter == PIPE_TEX_MIPFILTER_LINEAR) miplinear = true; - so->needs_border = false; + bool needs_border = false; so->texsamp0 = COND(miplinear, A6XX_TEX_SAMP_0_MIPFILTER_LINEAR_NEAR) | A6XX_TEX_SAMP_0_XY_MAG(tex_filter(cso->mag_img_filter, aniso)) | A6XX_TEX_SAMP_0_XY_MIN(tex_filter(cso->min_img_filter, aniso)) | A6XX_TEX_SAMP_0_ANISO(aniso) | - A6XX_TEX_SAMP_0_WRAP_S(tex_clamp(cso->wrap_s, &so->needs_border)) | - A6XX_TEX_SAMP_0_WRAP_T(tex_clamp(cso->wrap_t, &so->needs_border)) | - A6XX_TEX_SAMP_0_WRAP_R(tex_clamp(cso->wrap_r, &so->needs_border)); + A6XX_TEX_SAMP_0_WRAP_S(tex_clamp(cso->wrap_s, &needs_border)) | + A6XX_TEX_SAMP_0_WRAP_T(tex_clamp(cso->wrap_t, &needs_border)) | + A6XX_TEX_SAMP_0_WRAP_R(tex_clamp(cso->wrap_r, &needs_border)); so->texsamp1 = COND(cso->min_mip_filter == PIPE_TEX_MIPFILTER_NONE, @@ -247,6 +291,9 @@ fd6_sampler_state_create(struct pipe_context *pctx, so->texsamp1 |= A6XX_TEX_SAMP_1_COMPARE_FUNC(cso->compare_func); /* maps 1:1 */ + if (needs_border) + so->texsamp2 = A6XX_TEX_SAMP_2_BCOLOR(get_bcolor_offset(ctx, cso)); + return so; } @@ -455,7 +502,6 @@ fd6_texture_state(struct fd_context *ctx, enum pipe_shader_type type, struct fd6_context *fd6_ctx = fd6_context(ctx); struct fd6_texture_state *state = NULL; struct fd6_texture_key key; - bool needs_border = false; memset(&key, 0, sizeof(key)); @@ -483,12 +529,9 @@ fd6_texture_state(struct fd_context *ctx, enum pipe_shader_type type, fd6_sampler_stateobj(tex->samplers[i]); key.samp[i].seqno = sampler->seqno; - - needs_border |= sampler->needs_border; } key.type = type; - key.bcolor_offset = fd6_border_color_offset(ctx, type, tex); uint32_t hash = tex_key_hash(&key); fd_screen_lock(ctx->screen); @@ -506,9 +549,8 @@ fd6_texture_state(struct fd_context *ctx, enum pipe_shader_type type, pipe_reference_init(&state->reference, 2); state->key = key; state->stateobj = fd_ringbuffer_new_object(ctx->pipe, 32 * 4); - state->needs_border = needs_border; - fd6_emit_textures(ctx, state->stateobj, type, tex, key.bcolor_offset, NULL); + fd6_emit_textures(ctx, state->stateobj, type, tex, NULL); /* NOTE: uses copy of key in state obj, because pointer passed by caller * is probably on the stack @@ -572,6 +614,12 @@ fd6_texture_init(struct pipe_context *pctx) disable_thread_safety_analysis ctx->rebind_resource = fd6_rebind_resource; + fd6_ctx->bcolor_cache = + _mesa_hash_table_create(NULL, bcolor_key_hash, bcolor_key_equals); + fd6_ctx->bcolor_mem = fd_bo_new(ctx->screen->dev, + FD6_MAX_BORDER_COLORS * FD6_BORDER_COLOR_SIZE, + 0, "bcolor"); + fd6_ctx->tex_cache = _mesa_hash_table_create(NULL, tex_key_hash, tex_key_equals); } @@ -590,4 +638,6 @@ fd6_texture_fini(struct pipe_context *pctx) fd_screen_unlock(ctx->screen); ralloc_free(fd6_ctx->tex_cache); + fd_bo_del(fd6_ctx->bcolor_mem); + ralloc_free(fd6_ctx->bcolor_cache); } diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_texture.h b/src/gallium/drivers/freedreno/a6xx/fd6_texture.h index 00c53c8b4a5..d1b43a8164a 100644 --- a/src/gallium/drivers/freedreno/a6xx/fd6_texture.h +++ b/src/gallium/drivers/freedreno/a6xx/fd6_texture.h @@ -60,17 +60,11 @@ struct PACKED fd6_bcolor_entry { }; #define FD6_BORDER_COLOR_SIZE sizeof(struct fd6_bcolor_entry) -#define FD6_BORDER_COLOR_UPLOAD_SIZE \ - (2 * PIPE_MAX_SAMPLERS * FD6_BORDER_COLOR_SIZE) - -void fd6_setup_border_color(struct fd_screen *screen, - const struct pipe_sampler_state *sampler, - struct fd6_bcolor_entry *e); +#define FD6_MAX_BORDER_COLORS 128 struct fd6_sampler_stateobj { struct pipe_sampler_state base; uint32_t texsamp0, texsamp1, texsamp2, texsamp3; - bool needs_border; uint16_t seqno; }; @@ -108,36 +102,6 @@ void fd6_sampler_view_update(struct fd_context *ctx, void fd6_texture_init(struct pipe_context *pctx); void fd6_texture_fini(struct pipe_context *pctx); -static inline unsigned -fd6_border_color_offset(struct fd_context *ctx, enum pipe_shader_type type, - struct fd_texture_stateobj *tex) assert_dt -{ - /* Currently we put the FS border-color state after VS. Possibly - * we could swap the order. - * - * This will need update for HS/DS/GS - */ - if (type != PIPE_SHADER_FRAGMENT) - return 0; - - unsigned needs_border = false; - - for (unsigned i = 0; i < tex->num_samplers; i++) { - if (!tex->samplers[i]) - continue; - - struct fd6_sampler_stateobj *sampler = - fd6_sampler_stateobj(tex->samplers[i]); - - needs_border |= sampler->needs_border; - } - - if (!needs_border) - return 0; - - return ctx->tex[PIPE_SHADER_VERTEX].num_samplers; -} - /* * Texture stateobj: * @@ -161,14 +125,12 @@ struct fd6_texture_key { uint16_t seqno; } samp[16]; uint8_t type; - uint8_t bcolor_offset; }; struct fd6_texture_state { struct pipe_reference reference; struct fd6_texture_key key; struct fd_ringbuffer *stateobj; - bool needs_border; }; struct fd6_texture_state *