diff --git a/src/asahi/lib/agx_border.c b/src/asahi/lib/agx_border.c new file mode 100644 index 00000000000..0cbf474d2ca --- /dev/null +++ b/src/asahi/lib/agx_border.c @@ -0,0 +1,177 @@ +/* + * Copyright 2022 Alyssa Rosenzweig + * SPDX-License-Identifier: MIT + */ + +#include "util/format/format_utils.h" +#include "util/format/u_format.h" +#include "util/half_float.h" +#include "agx_formats.h" +#include "agx_pack.h" + +/* + * AGX allows the sampler descriptor to specify a custom border colour. The + * packing depends on the texture format (i.e. no + * customBorderColorWithoutFormat). + * + * Each channel is packed separately into 32-bit words. Pure integers are stored + * as-is. Pure floats are extended to 16-bit/32-bit as appropriate. Normalized + * formats are encoded as usual, except sRGB gets 4 extra bits. + * + * The texture descriptor swizzle is applied to the border colour. That swizzle + * includes the format swizzle. In effect, we want to encode the border colour + * like it would be encoded in memory, and then the swizzles work out + * for Vulkan. + */ + +struct channel { + enum util_format_type type; + bool normalized; + unsigned size; +}; + +static struct channel +get_channel_info(enum pipe_format format, unsigned channel) +{ + /* Compressed formats may have packing with no PIPE equivalent, handle + * specially. + */ + switch (format) { + case PIPE_FORMAT_ETC2_R11_UNORM: + case PIPE_FORMAT_ETC2_RG11_UNORM: + return (struct channel){UTIL_FORMAT_TYPE_UNSIGNED, true, 11}; + + case PIPE_FORMAT_ETC2_R11_SNORM: + case PIPE_FORMAT_ETC2_RG11_SNORM: + return (struct channel){UTIL_FORMAT_TYPE_SIGNED, true, 11}; + + case PIPE_FORMAT_RGTC1_UNORM: + case PIPE_FORMAT_RGTC2_UNORM: + return (struct channel){UTIL_FORMAT_TYPE_UNSIGNED, true, 14}; + case PIPE_FORMAT_RGTC1_SNORM: + case PIPE_FORMAT_RGTC2_SNORM: + return (struct channel){UTIL_FORMAT_TYPE_SIGNED, true, 14}; + + case PIPE_FORMAT_ETC1_RGB8: + case PIPE_FORMAT_ETC2_RGB8: + case PIPE_FORMAT_ETC2_RGBA8: + case PIPE_FORMAT_ETC2_RGB8A1: + case PIPE_FORMAT_BPTC_RGBA_UNORM: + case PIPE_FORMAT_DXT1_RGB: + case PIPE_FORMAT_DXT1_RGBA: + case PIPE_FORMAT_DXT3_RGBA: + case PIPE_FORMAT_DXT5_RGBA: + return (struct channel){UTIL_FORMAT_TYPE_UNSIGNED, true, 8}; + + case PIPE_FORMAT_ETC2_SRGB8: + case PIPE_FORMAT_ETC2_SRGBA8: + case PIPE_FORMAT_ETC2_SRGB8A1: + case PIPE_FORMAT_BPTC_SRGBA: + case PIPE_FORMAT_DXT1_SRGB: + case PIPE_FORMAT_DXT1_SRGBA: + case PIPE_FORMAT_DXT3_SRGBA: + case PIPE_FORMAT_DXT5_SRGBA: + return (struct channel){ + UTIL_FORMAT_TYPE_UNSIGNED, + true, + channel == 3 ? 8 : 12, + }; + + case PIPE_FORMAT_BPTC_RGB_FLOAT: + case PIPE_FORMAT_BPTC_RGB_UFLOAT: + return (struct channel){UTIL_FORMAT_TYPE_FLOAT, false, 16}; + + default: + assert( + !util_format_is_compressed(format) && + "Other compressed formats must be special cased for border colours." + "Add more cases if we have a use case"); + + break; + } + + const struct util_format_description *desc = util_format_description(format); + struct util_format_channel_description chan_desc = desc->channel[channel]; + bool srgb = (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB) && + (desc->swizzle[channel] <= PIPE_SWIZZLE_Z); + + if (chan_desc.type == UTIL_FORMAT_TYPE_UNSIGNED || + chan_desc.type == UTIL_FORMAT_TYPE_SIGNED) { + + assert((chan_desc.normalized ^ chan_desc.pure_integer) && + "no SCALED formats supported for texturing"); + } + + if (srgb && chan_desc.type != UTIL_FORMAT_TYPE_VOID) { + assert(chan_desc.normalized && chan_desc.size == 8 && + chan_desc.type == UTIL_FORMAT_TYPE_UNSIGNED && + "only 8-bit unorm supported with sRGB"); + } + + return (struct channel){ + .type = chan_desc.type, + .normalized = chan_desc.normalized, + .size = srgb ? 12 : chan_desc.size, + }; +} + +static uint32_t +pack_channel(uint32_t value, enum pipe_format format, unsigned channel) +{ + struct channel chan = get_channel_info(format, channel); + + switch (chan.type) { + case UTIL_FORMAT_TYPE_VOID: + return 0; + + case UTIL_FORMAT_TYPE_UNSIGNED: + if (chan.normalized) + return _mesa_float_to_unorm(uif(value), chan.size); + else + return _mesa_unsigned_to_unsigned(value, chan.size); + + case UTIL_FORMAT_TYPE_SIGNED: + if (chan.normalized) + return _mesa_float_to_snorm(uif(value), chan.size); + else + return _mesa_signed_to_signed(value, chan.size); + + case UTIL_FORMAT_TYPE_FLOAT: + assert(chan.size == 32 || chan.size <= 16); + return chan.size == 32 ? value : _mesa_float_to_half(uif(value)); + + case UTIL_FORMAT_TYPE_FIXED: + unreachable("no FIXED textures"); + } + + unreachable("invalid format type"); +} + +void +agx_pack_border(struct agx_border_packed *out, const uint32_t in[4], + enum pipe_format format) +{ + assert(format != PIPE_FORMAT_NONE); + + const struct util_format_description *desc = util_format_description(format); + uint8_t channel_map[4] = {0}; + + /* Determine the in-memory order of the format. That is the inverse of the + * format swizzle. If a component is replicated, we use the first component, + * by looping backwards and overwriting. + */ + for (int i = 3; i >= 0; --i) { + static_assert(PIPE_SWIZZLE_X == 0, "known ordering"); + static_assert(PIPE_SWIZZLE_W == 3, "known ordering"); + + if (desc->swizzle[i] <= PIPE_SWIZZLE_W) + channel_map[i] = desc->swizzle[i]; + } + + agx_pack(out, BORDER, cfg) { + cfg.channel_0 = pack_channel(in[channel_map[0]], format, 0); + cfg.channel_1 = pack_channel(in[channel_map[1]], format, 1); + cfg.channel_2 = pack_channel(in[channel_map[2]], format, 2); + cfg.channel_3 = pack_channel(in[channel_map[3]], format, 3); + } +} diff --git a/src/asahi/lib/agx_formats.h b/src/asahi/lib/agx_formats.h index b463418b0e6..4944d8ccb73 100644 --- a/src/asahi/lib/agx_formats.h +++ b/src/asahi/lib/agx_formats.h @@ -46,4 +46,9 @@ agx_is_valid_pixel_format(enum pipe_format format) return ((entry.channels | entry.type) != 0) || entry.renderable; } +struct agx_border_packed; + +void agx_pack_border(struct agx_border_packed *out, const uint32_t in[4], + enum pipe_format format); + #endif diff --git a/src/asahi/lib/meson.build b/src/asahi/lib/meson.build index 4c9e106bc00..054ddd7b3e3 100644 --- a/src/asahi/lib/meson.build +++ b/src/asahi/lib/meson.build @@ -29,6 +29,7 @@ endif libasahi_lib_files = files( 'agx_bo.c', + 'agx_border.c', agx_device, 'agx_formats.c', 'agx_meta.c', diff --git a/src/gallium/drivers/asahi/agx_pipe.c b/src/gallium/drivers/asahi/agx_pipe.c index 2a84b84e565..3efeed61b25 100644 --- a/src/gallium/drivers/asahi/agx_pipe.c +++ b/src/gallium/drivers/asahi/agx_pipe.c @@ -1375,6 +1375,9 @@ agx_get_param(struct pipe_screen *pscreen, enum pipe_cap param) case PIPE_CAP_NIR_IMAGES_AS_DEREF: return 0; + case PIPE_CAP_TEXTURE_BORDER_COLOR_QUIRK: + return PIPE_QUIRK_TEXTURE_BORDER_COLOR_SWIZZLE_FREEDRENO; + case PIPE_CAP_SUPPORTED_PRIM_MODES: case PIPE_CAP_SUPPORTED_PRIM_MODES_WITH_RESTART: return BITFIELD_BIT(PIPE_PRIM_POINTS) | BITFIELD_BIT(PIPE_PRIM_LINES) | diff --git a/src/gallium/drivers/asahi/agx_state.c b/src/gallium/drivers/asahi/agx_state.c index 964ded44010..e321875acf7 100644 --- a/src/gallium/drivers/asahi/agx_state.c +++ b/src/gallium/drivers/asahi/agx_state.c @@ -44,6 +44,8 @@ #include "pipe/p_defines.h" #include "pipe/p_screen.h" #include "pipe/p_state.h" +#include "util/format_srgb.h" +#include "util/half_float.h" #include "util/u_inlines.h" #include "util/u_memory.h" #include "util/u_prim.h" @@ -414,6 +416,29 @@ static const enum agx_compare_func agx_compare_funcs[PIPE_FUNC_ALWAYS + 1] = { [PIPE_FUNC_ALWAYS] = AGX_COMPARE_FUNC_ALWAYS, }; +static enum pipe_format +fixup_border_zs(enum pipe_format orig, union pipe_color_union *c) +{ + switch (orig) { + case PIPE_FORMAT_Z24_UNORM_S8_UINT: + case PIPE_FORMAT_Z24X8_UNORM: + /* Z24 is internally promoted to Z32F via transfer_helper. These formats + * are normalized so should get clamped, but Z32F does not get clamped, so + * we clamp here. + */ + c->f[0] = SATURATE(c->f[0]); + return PIPE_FORMAT_Z32_FLOAT; + + case PIPE_FORMAT_X24S8_UINT: + case PIPE_FORMAT_X32_S8X24_UINT: + /* Separate stencil is internally promoted */ + return PIPE_FORMAT_S8_UINT; + + default: + return orig; + } +} + static void * agx_create_sampler_state(struct pipe_context *pctx, const struct pipe_sampler_state *state) @@ -445,6 +470,20 @@ agx_create_sampler_state(struct pipe_context *pctx, cfg.seamful_cube_maps = !(agx_device(pctx->screen)->debug & AGX_DBG_DEQP) || !state->seamless_cube_map; + + if (state->border_color_format != PIPE_FORMAT_NONE) { + /* TODO: Optimize to use compact descriptors for black/white borders */ + so->uses_custom_border = true; + cfg.border_colour = AGX_BORDER_COLOUR_CUSTOM; + } + } + + if (so->uses_custom_border) { + union pipe_color_union border = state->border_color; + enum pipe_format format = + fixup_border_zs(state->border_color_format, &border); + + agx_pack_border(&so->border, border.ui, format); } return so; @@ -476,6 +515,14 @@ agx_bind_sampler_states(struct pipe_context *pctx, enum pipe_shader_type shader, ctx->stage[shader].sampler_count = util_last_bit(ctx->stage[shader].valid_samplers); + + /* Recalculate whether we need custom borders */ + ctx->stage[shader].custom_borders = false; + + u_foreach_bit(i, ctx->stage[shader].valid_samplers) { + if (ctx->stage[shader].samplers[i]->uses_custom_border) + ctx->stage[shader].custom_borders = true; + } } /* Channels agree for RGBA but are weird for force 0/1 */ @@ -1527,15 +1574,18 @@ agx_build_pipeline(struct agx_batch *batch, struct agx_compiled_shader *cs, struct agx_context *ctx = batch->ctx; unsigned nr_textures = ctx->stage[stage].texture_count; unsigned nr_samplers = ctx->stage[stage].sampler_count; + bool custom_borders = ctx->stage[stage].custom_borders; struct agx_ptr T_tex = agx_pool_alloc_aligned( &batch->pool, AGX_TEXTURE_LENGTH * nr_textures, 64); - struct agx_ptr T_samp = agx_pool_alloc_aligned( - &batch->pool, AGX_SAMPLER_LENGTH * nr_samplers, 64); + size_t sampler_length = + AGX_SAMPLER_LENGTH + (custom_borders ? AGX_BORDER_LENGTH : 0); + + struct agx_ptr T_samp = + agx_pool_alloc_aligned(&batch->pool, sampler_length * nr_samplers, 64); struct agx_texture_packed *textures = T_tex.cpu; - struct agx_sampler_packed *samplers = T_samp.cpu; /* TODO: Dirty track me to save some CPU cycles and maybe improve caching */ for (unsigned i = 0; i < nr_textures; ++i) { @@ -1569,13 +1619,25 @@ agx_build_pipeline(struct agx_batch *batch, struct agx_compiled_shader *cs, } /* TODO: Dirty track me to save some CPU cycles and maybe improve caching */ + uint8_t *out_sampler = T_samp.cpu; for (unsigned i = 0; i < nr_samplers; ++i) { struct agx_sampler_state *sampler = ctx->stage[stage].samplers[i]; + struct agx_sampler_packed *out = (struct agx_sampler_packed *)out_sampler; - if (sampler) - samplers[i] = sampler->desc; - else - memset(&samplers[i], 0, sizeof(samplers[i])); + if (sampler) { + *out = sampler->desc; + + if (custom_borders) { + memcpy(out_sampler + AGX_SAMPLER_LENGTH, &sampler->border, + AGX_BORDER_LENGTH); + } else { + assert(!sampler->uses_custom_border && "invalid combination"); + } + } else { + memset(out, 0, sampler_length); + } + + out_sampler += sampler_length; } struct agx_usc_builder b = @@ -1891,8 +1953,8 @@ agx_encode_state(struct agx_batch *batch, uint8_t *out, bool is_lines, cfg.uniform_register_count = ctx->vs->info.push_count; cfg.preshader_register_count = ctx->vs->info.nr_preamble_gprs; cfg.texture_state_register_count = tex_count; - cfg.sampler_state_register_count = - agx_translate_sampler_state_count(tex_count, false); + cfg.sampler_state_register_count = agx_translate_sampler_state_count( + tex_count, ctx->stage[PIPE_SHADER_VERTEX].custom_borders); } out += AGX_VDM_STATE_VERTEX_SHADER_WORD_0_LENGTH; @@ -2067,14 +2129,15 @@ agx_encode_state(struct agx_batch *batch, uint8_t *out, bool is_lines, if (dirty.fragment_shader) { unsigned frag_tex_count = ctx->stage[PIPE_SHADER_FRAGMENT].texture_count; + agx_ppp_push(&ppp, FRAGMENT_SHADER, cfg) { cfg.pipeline = agx_build_pipeline(batch, ctx->fs, PIPE_SHADER_FRAGMENT), cfg.uniform_register_count = ctx->fs->info.push_count; cfg.preshader_register_count = ctx->fs->info.nr_preamble_gprs; cfg.texture_state_register_count = frag_tex_count; - cfg.sampler_state_register_count = - agx_translate_sampler_state_count(frag_tex_count, false); + cfg.sampler_state_register_count = agx_translate_sampler_state_count( + frag_tex_count, ctx->stage[PIPE_SHADER_FRAGMENT].custom_borders); cfg.cf_binding_count = ctx->fs->info.varyings.fs.nr_bindings; cfg.cf_bindings = batch->varyings; diff --git a/src/gallium/drivers/asahi/agx_state.h b/src/gallium/drivers/asahi/agx_state.h index 380f9c21a1f..8c6cddcfd72 100644 --- a/src/gallium/drivers/asahi/agx_state.h +++ b/src/gallium/drivers/asahi/agx_state.h @@ -135,6 +135,9 @@ struct agx_stage { struct agx_sampler_state *samplers[PIPE_MAX_SAMPLERS]; struct agx_sampler_view *textures[PIPE_MAX_SHADER_SAMPLER_VIEWS]; + /* Does any bound sampler require custom border colours? */ + bool custom_borders; + unsigned sampler_count, texture_count; uint32_t valid_samplers; }; @@ -341,6 +344,12 @@ struct agx_sampler_state { /* Prepared descriptor */ struct agx_sampler_packed desc; + + /* Whether a custom border colour is required */ + bool uses_custom_border; + + /* Packed custom border colour, or zero if none is required */ + struct agx_border_packed border; }; struct agx_sampler_view {