diff --git a/src/asahi/lib/agx_device.h b/src/asahi/lib/agx_device.h index f64719d8e8f..79f9291c178 100644 --- a/src/asahi/lib/agx_device.h +++ b/src/asahi/lib/agx_device.h @@ -40,6 +40,7 @@ enum agx_dbg { AGX_DBG_DIRTY = BITFIELD_BIT(3), AGX_DBG_PRECOMPILE = BITFIELD_BIT(4), AGX_DBG_PERF = BITFIELD_BIT(5), + AGX_DBG_NOCOMPRESS = BITFIELD_BIT(6), }; /* How many power-of-two levels in the BO cache do we want? 2^14 minimum chosen diff --git a/src/gallium/drivers/asahi/agx_pipe.c b/src/gallium/drivers/asahi/agx_pipe.c index bef97e67031..54e478f1f20 100644 --- a/src/gallium/drivers/asahi/agx_pipe.c +++ b/src/gallium/drivers/asahi/agx_pipe.c @@ -67,6 +67,10 @@ #define DRM_FORMAT_MOD_APPLE_TWIDDLED (2) #endif +#ifndef DRM_FORMAT_MOD_APPLE_TWIDDLED_COMPRESSED +#define DRM_FORMAT_MOD_APPLE_TWIDDLED_COMPRESSED (3) +#endif + static const struct debug_named_value agx_debug_options[] = { {"trace", AGX_DBG_TRACE, "Trace the command stream"}, {"deqp", AGX_DBG_DEQP, "Hacks for dEQP"}, @@ -76,10 +80,12 @@ static const struct debug_named_value agx_debug_options[] = { {"dirty", AGX_DBG_DIRTY, "Disable dirty tracking"}, #endif {"precompile",AGX_DBG_PRECOMPILE,"Precompile shaders for shader-db"}, + {"nocompress",AGX_DBG_NOCOMPRESS,"Disable lossless compression"}, DEBUG_NAMED_VALUE_END }; uint64_t agx_best_modifiers[] = { + DRM_FORMAT_MOD_APPLE_TWIDDLED_COMPRESSED, DRM_FORMAT_MOD_APPLE_TWIDDLED, DRM_FORMAT_MOD_LINEAR, }; @@ -134,6 +140,21 @@ agx_set_active_query_state(struct pipe_context *pipe, bool enable) * resource */ +static enum ail_tiling +ail_modifier_to_tiling(uint64_t modifier) +{ + switch (modifier) { + case DRM_FORMAT_MOD_LINEAR: + return AIL_TILING_LINEAR; + case DRM_FORMAT_MOD_APPLE_TWIDDLED: + return AIL_TILING_TWIDDLED; + case DRM_FORMAT_MOD_APPLE_TWIDDLED_COMPRESSED: + return AIL_TILING_TWIDDLED_COMPRESSED; + default: + unreachable("Unsupported modifier"); + } +} + static void agx_resource_setup(struct agx_device *dev, struct agx_resource *nresource) @@ -141,8 +162,7 @@ agx_resource_setup(struct agx_device *dev, struct pipe_resource *templ = &nresource->base; nresource->layout = (struct ail_layout) { - .tiling = (nresource->modifier == DRM_FORMAT_MOD_LINEAR) ? - AIL_TILING_LINEAR : AIL_TILING_TWIDDLED, + .tiling = ail_modifier_to_tiling(nresource->modifier), .format = templ->format, .width_px = templ->width0, .height_px = templ->height0, @@ -347,13 +367,24 @@ agx_twiddled_allowed(const struct agx_resource *pres) return true; } +static bool +agx_compression_allowed(const struct agx_resource *pres) +{ + /* At this point in the series, compression isn't fully plumbed in */ + return false; +} + static uint64_t agx_select_modifier_from_list(const struct agx_resource *pres, const uint64_t *modifiers, int count) { + if (agx_twiddled_allowed(pres) && agx_compression_allowed(pres) && + drm_find_modifier(DRM_FORMAT_MOD_APPLE_TWIDDLED_COMPRESSED, modifiers, count)) + return DRM_FORMAT_MOD_APPLE_TWIDDLED_COMPRESSED; + if (agx_twiddled_allowed(pres) && drm_find_modifier(DRM_FORMAT_MOD_APPLE_TWIDDLED, modifiers, count)) - return DRM_FORMAT_MOD_APPLE_TWIDDLED; + return DRM_FORMAT_MOD_APPLE_TWIDDLED; if (agx_linear_allowed(pres) && drm_find_modifier(DRM_FORMAT_MOD_LINEAR, modifiers, count)) @@ -366,8 +397,12 @@ agx_select_modifier_from_list(const struct agx_resource *pres, static uint64_t agx_select_best_modifier(const struct agx_resource *pres) { - if (agx_twiddled_allowed(pres)) - return DRM_FORMAT_MOD_APPLE_TWIDDLED; + if (agx_twiddled_allowed(pres)) { + if (agx_compression_allowed(pres)) + return DRM_FORMAT_MOD_APPLE_TWIDDLED_COMPRESSED; + else + return DRM_FORMAT_MOD_APPLE_TWIDDLED; + } assert(agx_linear_allowed(pres)); return DRM_FORMAT_MOD_LINEAR; @@ -572,6 +607,90 @@ agx_prepare_for_map(struct agx_context *ctx, agx_flush_readers(ctx, rsrc, "Unsynchronized read"); } + +/* Most of the time we can do CPU-side transfers, but sometimes we need to use + * the 3D pipe for this. Let's wrap u_blitter to blit to/from staging textures. + * Code adapted from panfrost */ + +static struct agx_resource * +agx_alloc_staging(struct agx_context *ctx, struct agx_resource *rsc, + unsigned level, const struct pipe_box *box) +{ + struct pipe_context *pctx = &ctx->base; + struct pipe_resource tmpl = rsc->base; + + tmpl.width0 = box->width; + tmpl.height0 = box->height; + + /* for array textures, box->depth is the array_size, otherwise for 3d + * textures, it is the depth. + */ + if (tmpl.array_size > 1) { + if (tmpl.target == PIPE_TEXTURE_CUBE) + tmpl.target = PIPE_TEXTURE_2D_ARRAY; + tmpl.array_size = box->depth; + tmpl.depth0 = 1; + } else { + tmpl.array_size = 1; + tmpl.depth0 = box->depth; + } + tmpl.last_level = 0; + tmpl.bind |= PIPE_BIND_LINEAR; + + struct pipe_resource *pstaging = + pctx->screen->resource_create(pctx->screen, &tmpl); + if (!pstaging) + return NULL; + + return agx_resource(pstaging); +} + +static enum pipe_format +agx_blit_format(enum pipe_format fmt) +{ + return fmt; +} + +static void +agx_blit_from_staging(struct pipe_context *pctx, struct agx_transfer *trans) +{ + struct pipe_resource *dst = trans->base.resource; + struct pipe_blit_info blit = {0}; + + blit.dst.resource = dst; + blit.dst.format = agx_blit_format(dst->format); + blit.dst.level = trans->base.level; + blit.dst.box = trans->base.box; + blit.src.resource = trans->staging.rsrc; + blit.src.format = agx_blit_format(trans->staging.rsrc->format); + blit.src.level = 0; + blit.src.box = trans->staging.box; + blit.mask = util_format_get_mask(blit.src.format); + blit.filter = PIPE_TEX_FILTER_NEAREST; + + agx_blit(pctx, &blit); +} + +static void +agx_blit_to_staging(struct pipe_context *pctx, struct agx_transfer *trans) +{ + struct pipe_resource *src = trans->base.resource; + struct pipe_blit_info blit = {0}; + + blit.src.resource = src; + blit.src.format = agx_blit_format(src->format); + blit.src.level = trans->base.level; + blit.src.box = trans->base.box; + blit.dst.resource = trans->staging.rsrc; + blit.dst.format = agx_blit_format(trans->staging.rsrc->format); + blit.dst.level = 0; + blit.dst.box = trans->staging.box; + blit.mask = util_format_get_mask(blit.dst.format); + blit.filter = PIPE_TEX_FILTER_NEAREST; + + agx_blit(pctx, &blit); +} + static void * agx_transfer_map(struct pipe_context *pctx, struct pipe_resource *resource, @@ -597,6 +716,36 @@ agx_transfer_map(struct pipe_context *pctx, pipe_resource_reference(&transfer->base.resource, resource); *out_transfer = &transfer->base; + /* For compression, we use a staging blit as we do not implement AGX + * compression in software. In some cases, we could use this path for + * twiddled too, but we don't have a use case for that yet. + */ + if (rsrc->modifier == DRM_FORMAT_MOD_APPLE_TWIDDLED_COMPRESSED) { + struct agx_resource *staging = agx_alloc_staging(ctx, rsrc, level, box); + assert(staging); + + /* Staging resources have one LOD: level 0. Query the strides + * on this LOD. + */ + transfer->base.stride = ail_get_linear_stride_B(&staging->layout, 0); + transfer->base.layer_stride = staging->layout.layer_stride_B; + transfer->staging.rsrc = &staging->base; + + transfer->staging.box = *box; + transfer->staging.box.x = 0; + transfer->staging.box.y = 0; + transfer->staging.box.z = 0; + + assert(transfer->staging.rsrc != NULL); + + if ((usage & PIPE_MAP_READ) && BITSET_TEST(rsrc->data_valid, level)) { + agx_blit_to_staging(pctx, transfer); + agx_flush_writer(ctx, staging, "GPU read staging blit"); + } + + return staging->bo->ptr.cpu; + } + if (rsrc->modifier == DRM_FORMAT_MOD_APPLE_TWIDDLED) { transfer->base.stride = util_format_get_stride(rsrc->layout.format, box->width); @@ -652,10 +801,13 @@ agx_transfer_unmap(struct pipe_context *pctx, if (transfer->usage & PIPE_MAP_WRITE) BITSET_SET(rsrc->data_valid, transfer->level); - /* Tiling will occur in software from a staging cpu buffer */ - if ((transfer->usage & PIPE_MAP_WRITE) && - rsrc->modifier == DRM_FORMAT_MOD_APPLE_TWIDDLED) { - assert(trans->map != NULL); + if (trans->staging.rsrc && (transfer->usage & PIPE_MAP_WRITE)) { + agx_blit_from_staging(pctx, trans); + agx_flush_readers(agx_context(pctx), agx_resource(trans->staging.rsrc), + "GPU write staging blit"); + pipe_resource_reference(&trans->staging.rsrc, NULL); + } else if (trans->map && (transfer->usage & PIPE_MAP_WRITE)) { + assert(rsrc->modifier == DRM_FORMAT_MOD_APPLE_TWIDDLED); for (unsigned z = 0; z < transfer->box.depth; ++z) { uint8_t *map = agx_map_texture_cpu(rsrc, transfer->level, @@ -961,6 +1113,7 @@ agx_flush_frontbuffer(struct pipe_screen *_screen, ail_detile(rsrc->bo->ptr.cpu, map, &rsrc->layout, 0, rsrc->dt_stride, 0, 0, rsrc->base.width0, rsrc->base.height0); } else { + assert(rsrc->modifier == DRM_FORMAT_MOD_LINEAR); memcpy(map, rsrc->bo->ptr.cpu, rsrc->dt_stride * rsrc->base.height0); } diff --git a/src/gallium/drivers/asahi/agx_state.c b/src/gallium/drivers/asahi/agx_state.c index a4f803f95e6..df52b971d23 100644 --- a/src/gallium/drivers/asahi/agx_state.c +++ b/src/gallium/drivers/asahi/agx_state.c @@ -418,6 +418,7 @@ agx_translate_layout(enum ail_tiling tiling) { switch (tiling) { case AIL_TILING_TWIDDLED: + case AIL_TILING_TWIDDLED_COMPRESSED: return AGX_LAYOUT_TWIDDLED; case AIL_TILING_LINEAR: return AGX_LAYOUT_LINEAR; @@ -522,6 +523,12 @@ agx_pack_texture(void *out, struct agx_resource *rsrc, cfg.unk_mipmapped = rsrc->mipmapped; cfg.srgb_2_channel = cfg.srgb && util_format_colormask(desc) == 0x3; + if (ail_is_compressed(&rsrc->layout)) { + cfg.compressed_1 = true; + cfg.compressed_2 = true; + cfg.acceleration_buffer = cfg.address + rsrc->layout.metadata_offset_B; + } + if (state->target == PIPE_TEXTURE_3D) { cfg.depth = rsrc->base.depth0; } else { @@ -539,7 +546,9 @@ agx_pack_texture(void *out, struct agx_resource *rsrc, if (rsrc->layout.tiling == AIL_TILING_LINEAR) { cfg.stride = ail_get_linear_stride_B(&rsrc->layout, 0) - 16; } else { - assert(rsrc->layout.tiling == AIL_TILING_TWIDDLED); + assert(rsrc->layout.tiling == AIL_TILING_TWIDDLED || + rsrc->layout.tiling == AIL_TILING_TWIDDLED_COMPRESSED); + cfg.unk_tiled = true; } } @@ -859,6 +868,12 @@ agx_batch_upload_pbe(struct agx_batch *batch, unsigned rt) cfg.buffer = agx_map_texture_gpu(tex, layer); cfg.unk_mipmapped = tex->mipmapped; + if (ail_is_compressed(&tex->layout)) { + cfg.compressed_1 = true; + cfg.compressed_2 = true; + cfg.acceleration_buffer = cfg.buffer + tex->layout.metadata_offset_B; + } + if (tex->base.nr_samples > 1) cfg.samples = agx_translate_sample_count(tex->base.nr_samples);