diff --git a/src/asahi/genxml/cmdbuf.xml b/src/asahi/genxml/cmdbuf.xml
index d4101182be1..3394e098cad 100644
--- a/src/asahi/genxml/cmdbuf.xml
+++ b/src/asahi/genxml/cmdbuf.xml
@@ -204,36 +204,6 @@
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
@@ -279,8 +249,16 @@
-
-
+
+
+
+
+
+
+
diff --git a/src/asahi/lib/agx_nir_lower_texture.c b/src/asahi/lib/agx_nir_lower_texture.c
index 514d712a8ff..8eef8406d1b 100644
--- a/src/asahi/lib/agx_nir_lower_texture.c
+++ b/src/asahi/lib/agx_nir_lower_texture.c
@@ -490,19 +490,13 @@ image_texel_address(nir_builder *b, nir_intrinsic_instr *intr,
(dim == GLSL_SAMPLER_DIM_CUBE) ||
(dim == GLSL_SAMPLER_DIM_3D);
- /* The last 8 bytes of the 24-byte PBE descriptor points to the
- * software-defined atomic descriptor. Grab the address.
- */
- nir_def *meta_meta_ptr = nir_iadd_imm(b, desc_address, 16);
- nir_def *meta_ptr = nir_load_global_constant(b, meta_meta_ptr, 8, 1, 64);
-
if (dim == GLSL_SAMPLER_DIM_BUF && return_index) {
return nir_channel(b, coord, 0);
} else if (dim == GLSL_SAMPLER_DIM_BUF) {
- return libagx_buffer_texel_address(b, meta_ptr, coord, blocksize_B);
+ return libagx_buffer_texel_address(b, desc_address, coord, blocksize_B);
} else {
return libagx_image_texel_address(
- b, meta_ptr, coord, nir_u2u32(b, intr->src[2].ssa), blocksize_B,
+ b, desc_address, coord, nir_u2u32(b, intr->src[2].ssa), blocksize_B,
nir_imm_bool(b, dim == GLSL_SAMPLER_DIM_1D),
nir_imm_bool(b, dim == GLSL_SAMPLER_DIM_MS), nir_imm_bool(b, layered),
nir_imm_bool(b, return_index));
diff --git a/src/asahi/lib/shaders/geometry.cl b/src/asahi/lib/shaders/geometry.cl
index 0e79db6a91a..c66708b04e1 100644
--- a/src/asahi/lib/shaders/geometry.cl
+++ b/src/asahi/lib/shaders/geometry.cl
@@ -6,12 +6,6 @@
#include "geometry.h"
-static uint
-align(uint x, uint y)
-{
- return (x + y - 1) & ~(y - 1);
-}
-
/* Compatible with util/u_math.h */
static inline uint
util_logbase2_ceil(uint n)
diff --git a/src/asahi/lib/shaders/libagx.h b/src/asahi/lib/shaders/libagx.h
index 040d6ad57d4..1b25b8f0dae 100644
--- a/src/asahi/lib/shaders/libagx.h
+++ b/src/asahi/lib/shaders/libagx.h
@@ -44,6 +44,12 @@ uint ballot(bool cond);
#define AGX_STATIC_ASSERT(_COND) \
typedef char static_assertion_##__line__[(_COND) ? 1 : -1]
+static inline uint
+align(uint x, uint y)
+{
+ return (x + y - 1) & ~(y - 1);
+}
+
#endif
#endif
diff --git a/src/asahi/lib/shaders/texture.cl b/src/asahi/lib/shaders/texture.cl
index ec32e3c2eeb..2940c5e04e6 100644
--- a/src/asahi/lib/shaders/texture.cl
+++ b/src/asahi/lib/shaders/texture.cl
@@ -113,7 +113,7 @@ libagx_lower_txf_robustness(constant struct agx_texture_packed *ptr,
static uint32_t
calculate_twiddled_coordinates(ushort2 coord, uint16_t tile_w_px,
- uint16_t tile_h_px, uint32_t width_tl)
+ uint16_t tile_h_px, uint32_t aligned_width_px)
{
/* Modulo by the tile width/height to get the offsets within the tile */
ushort2 tile_mask_vec = (ushort2)(tile_w_px - 1, tile_h_px - 1);
@@ -131,7 +131,7 @@ calculate_twiddled_coordinates(ushort2 coord, uint16_t tile_w_px,
* tile height =
* align_down(y, tile height) * width_tl * tile width
*/
- uint32_t tile_row_start_px = tile_px.y * width_tl * tile_w_px;
+ uint32_t tile_row_start_px = tile_px.y * aligned_width_px;
/* tile column start (px) =
* (x // tile width) * (# of pix/tile) =
@@ -145,12 +145,12 @@ calculate_twiddled_coordinates(ushort2 coord, uint16_t tile_w_px,
}
uint64_t
-libagx_image_texel_address(constant const struct agx_atomic_software_packed *ptr,
+libagx_image_texel_address(constant const struct agx_pbe_packed *ptr,
uint4 coord, uint sample_idx,
uint bytes_per_sample_B, bool is_1d, bool is_msaa,
bool is_layered, bool return_index)
{
- agx_unpack(NULL, ptr, ATOMIC_SOFTWARE, d);
+ agx_unpack(NULL, ptr, PBE, d);
/* We do not allow atomics on linear 2D or linear 2D arrays, as there are no
* known use cases. So we're twiddled in this path, unless we're handling a
@@ -162,30 +162,41 @@ libagx_image_texel_address(constant const struct agx_atomic_software_packed *ptr
if (is_1d) {
total_px = coord.x;
} else {
- total_px =
- calculate_twiddled_coordinates(convert_ushort2(coord.xy), d.tile_width,
- d.tile_height, d.tiles_per_row);
+ uint aligned_width_px;
+ if (is_msaa) {
+ aligned_width_px = d.aligned_width_msaa_sw;
+ } else {
+ uint width_px = max(d.width >> d.level, 1u);
+ aligned_width_px = align(width_px, d.tile_width_sw);
+ }
+
+ total_px = calculate_twiddled_coordinates(
+ convert_ushort2(coord.xy), d.tile_width_sw, d.tile_height_sw,
+ aligned_width_px);
}
- if (is_layered)
- total_px += coord[is_1d ? 1 : 2] * d.layer_stride_pixels;
+ uint samples_log2 = is_msaa ? d.sample_count_log2_sw : 0;
- uint sample_count = is_msaa ? d.sample_count : 1;
- uint total_sa = (total_px * d.sample_count) + sample_idx;
+ if (is_layered) {
+ total_px += coord[is_1d ? 1 : 2] *
+ ((d.layer_stride_sw / bytes_per_sample_B) >> samples_log2);
+ }
+
+ uint total_sa = (total_px << samples_log2) + sample_idx;
if (return_index)
return total_sa;
else
- return d.base + (uint64_t)(total_sa * bytes_per_sample_B);
+ return (d.buffer + (is_msaa ? 0 : d.level_offset_sw)) +
+ (uint64_t)(total_sa * bytes_per_sample_B);
}
uint64_t
-libagx_buffer_texel_address(
- constant const struct agx_pbe_buffer_software_packed *ptr, uint4 coord,
- uint bytes_per_pixel_B)
+libagx_buffer_texel_address(constant const struct agx_pbe_packed *ptr,
+ uint4 coord, uint bytes_per_pixel_B)
{
- agx_unpack(NULL, ptr, PBE_BUFFER_SOFTWARE, d);
- return d.base + (uint64_t)(coord.x * bytes_per_pixel_B);
+ agx_unpack(NULL, ptr, PBE, d);
+ return d.buffer + (uint64_t)(coord.x * bytes_per_pixel_B);
}
/* Buffer texture lowerings */
diff --git a/src/gallium/drivers/asahi/agx_state.c b/src/gallium/drivers/asahi/agx_state.c
index 1783ed84f5f..c0de9d7dbb8 100644
--- a/src/gallium/drivers/asahi/agx_state.c
+++ b/src/gallium/drivers/asahi/agx_state.c
@@ -1174,41 +1174,6 @@ sampler_view_for_surface(struct pipe_surface *surf)
};
}
-static void
-agx_pack_image_atomic_data(void *packed, struct pipe_image_view *view)
-{
- struct agx_resource *tex = agx_resource(view->resource);
-
- if (tex->base.target == PIPE_BUFFER) {
- agx_pack(packed, PBE_BUFFER_SOFTWARE, cfg) {
- cfg.base = tex->bo->ptr.gpu + view->u.buf.offset;
- }
- } else if (tex->layout.writeable_image) {
- unsigned level = view->u.tex.level;
- unsigned blocksize_B = util_format_get_blocksize(tex->layout.format);
-
- agx_pack(packed, ATOMIC_SOFTWARE, cfg) {
- cfg.base =
- tex->bo->ptr.gpu +
- ail_get_layer_level_B(&tex->layout, view->u.tex.first_layer, level);
-
- cfg.sample_count = MAX2(util_res_sample_count(view->resource), 1);
-
- if (tex->layout.tiling == AIL_TILING_TWIDDLED) {
- struct ail_tile tile_size = tex->layout.tilesize_el[level];
- cfg.tile_width = tile_size.width_el;
- cfg.tile_height = tile_size.height_el;
-
- unsigned width_el = u_minify(tex->base.width0, level);
- cfg.tiles_per_row = DIV_ROUND_UP(width_el, tile_size.width_el);
-
- cfg.layer_stride_pixels = DIV_ROUND_UP(
- tex->layout.layer_stride_B, blocksize_B * cfg.sample_count);
- }
- }
- }
-}
-
static bool
target_is_array(enum pipe_texture_target target)
{
@@ -1355,12 +1320,27 @@ agx_batch_upload_pbe(struct agx_batch *batch, struct agx_pbe_packed *out,
/* When the descriptor isn't extended architecturally, we can use the last
* 8 bytes as a sideband. We use it to provide metadata for image atomics.
*/
- if (!cfg.extended) {
- struct agx_ptr desc =
- agx_pool_alloc_aligned(&batch->pool, AGX_ATOMIC_SOFTWARE_LENGTH, 8);
+ if (!cfg.extended && tex->layout.writeable_image &&
+ tex->base.target != PIPE_BUFFER) {
- agx_pack_image_atomic_data(desc.cpu, view);
- cfg.software_defined = desc.gpu;
+ if (util_res_sample_count(&tex->base) > 1) {
+ cfg.aligned_width_msaa_sw =
+ align(u_minify(view->resource->width0, level),
+ tex->layout.tilesize_el[level].width_el);
+ } else {
+ cfg.level_offset_sw =
+ ail_get_level_offset_B(&tex->layout, cfg.level);
+ }
+
+ cfg.sample_count_log2_sw = util_logbase2(tex->base.nr_samples);
+
+ if (tex->layout.tiling == AIL_TILING_TWIDDLED) {
+ struct ail_tile tile_size = tex->layout.tilesize_el[level];
+ cfg.tile_width_sw = tile_size.width_el;
+ cfg.tile_height_sw = tile_size.height_el;
+
+ cfg.layer_stride_sw = tex->layout.layer_stride_B;
+ }
}
};
}