mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-08 15:38:09 +02:00
asahi: switch to block based compute blitter
This fixes the compute blitter with compression in the general case, and then flips the switch since the compute blitter is faster / less buggy than the traditional path. Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/30633>
This commit is contained in:
parent
94cf7de88f
commit
6405153a07
7 changed files with 218 additions and 65 deletions
|
|
@ -50,7 +50,6 @@ static const struct debug_named_value agx_debug_options[] = {
|
|||
#ifndef NDEBUG
|
||||
{"dirty", AGX_DBG_DIRTY, "Disable dirty tracking"},
|
||||
#endif
|
||||
{"compblit", AGX_DBG_COMPBLIT, "Enable compute blitter"},
|
||||
{"precompile",AGX_DBG_PRECOMPILE,"Precompile shaders for shader-db"},
|
||||
{"nocompress",AGX_DBG_NOCOMPRESS,"Disable lossless compression"},
|
||||
{"nocluster", AGX_DBG_NOCLUSTER,"Disable vertex clustering"},
|
||||
|
|
|
|||
|
|
@ -41,7 +41,7 @@ enum agx_dbg {
|
|||
AGX_DBG_NOSHADOW = BITFIELD_BIT(16),
|
||||
/* bit 17 unused */
|
||||
AGX_DBG_SCRATCH = BITFIELD_BIT(18),
|
||||
AGX_DBG_COMPBLIT = BITFIELD_BIT(19),
|
||||
/* bit 19 unused */
|
||||
AGX_DBG_FEEDBACK = BITFIELD_BIT(20),
|
||||
AGX_DBG_1QUEUE = BITFIELD_BIT(21),
|
||||
};
|
||||
|
|
|
|||
|
|
@ -20,18 +20,43 @@
|
|||
#include "pipe/p_state.h"
|
||||
#include "util/format/u_format.h"
|
||||
#include "util/format/u_formats.h"
|
||||
#include "util/hash_table.h"
|
||||
#include "util/macros.h"
|
||||
#include "util/ralloc.h"
|
||||
#include "util/u_sampler.h"
|
||||
#include "util/u_surface.h"
|
||||
#include "agx_formats.h"
|
||||
#include "agx_internal_formats.h"
|
||||
#include "agx_state.h"
|
||||
#include "glsl_types.h"
|
||||
#include "nir.h"
|
||||
#include "nir_builder_opcodes.h"
|
||||
#include "shader_enums.h"
|
||||
|
||||
#define BLIT_WG_SIZE 32
|
||||
/* For block based blit kernels, we hardcode the maximum tile size which we can
|
||||
* always achieve. This simplifies our life.
|
||||
*/
|
||||
#define TILE_WIDTH 32
|
||||
#define TILE_HEIGHT 32
|
||||
|
||||
static enum pipe_format
|
||||
effective_format(enum pipe_format format)
|
||||
{
|
||||
switch (format) {
|
||||
case PIPE_FORMAT_Z32_FLOAT:
|
||||
case PIPE_FORMAT_Z24X8_UNORM:
|
||||
return PIPE_FORMAT_R32_FLOAT;
|
||||
case PIPE_FORMAT_Z16_UNORM:
|
||||
return PIPE_FORMAT_R16_UNORM;
|
||||
case PIPE_FORMAT_S8_UINT:
|
||||
return PIPE_FORMAT_R8_UINT;
|
||||
default:
|
||||
return format;
|
||||
}
|
||||
}
|
||||
|
||||
static void *
|
||||
asahi_blit_compute_shader(struct pipe_context *ctx, enum asahi_blit_clamp clamp,
|
||||
bool array)
|
||||
asahi_blit_compute_shader(struct pipe_context *ctx, struct asahi_blit_key *key)
|
||||
{
|
||||
const nir_shader_compiler_options *options =
|
||||
ctx->screen->get_compiler_options(ctx->screen, PIPE_SHADER_IR_NIR,
|
||||
|
|
@ -40,8 +65,8 @@ asahi_blit_compute_shader(struct pipe_context *ctx, enum asahi_blit_clamp clamp,
|
|||
nir_builder b_ =
|
||||
nir_builder_init_simple_shader(MESA_SHADER_COMPUTE, options, "blit_cs");
|
||||
nir_builder *b = &b_;
|
||||
b->shader->info.workgroup_size[0] = BLIT_WG_SIZE;
|
||||
b->shader->info.workgroup_size[1] = BLIT_WG_SIZE;
|
||||
b->shader->info.workgroup_size[0] = TILE_WIDTH;
|
||||
b->shader->info.workgroup_size[1] = TILE_HEIGHT;
|
||||
b->shader->info.num_ubos = 1;
|
||||
|
||||
BITSET_SET(b->shader->info.textures_used, 0);
|
||||
|
|
@ -50,51 +75,153 @@ asahi_blit_compute_shader(struct pipe_context *ctx, enum asahi_blit_clamp clamp,
|
|||
|
||||
nir_def *zero = nir_imm_int(b, 0);
|
||||
|
||||
nir_def *params[3];
|
||||
nir_def *params[4];
|
||||
b->shader->num_uniforms = ARRAY_SIZE(params);
|
||||
for (unsigned i = 0; i < b->shader->num_uniforms; ++i) {
|
||||
params[i] = nir_load_ubo(b, 2, 32, zero, nir_imm_int(b, i * 8),
|
||||
.align_mul = 4, .range = ~0);
|
||||
}
|
||||
|
||||
nir_def *ids =
|
||||
nir_trim_vector(b, nir_load_global_invocation_id(b, 32), array ? 3 : 2);
|
||||
nir_def *trans_offs = params[0];
|
||||
nir_def *trans_scale = params[1];
|
||||
nir_def *dst_offs_2d = params[2];
|
||||
nir_def *dimensions_el_2d = params[3];
|
||||
|
||||
nir_def *tex_pos = nir_u2f32(b, ids);
|
||||
nir_def *pos2 =
|
||||
nir_ffma(b, nir_trim_vector(b, tex_pos, 2), params[1], params[0]);
|
||||
if (array) {
|
||||
tex_pos = nir_vector_insert_imm(b, nir_pad_vector(b, pos2, 3),
|
||||
nir_channel(b, tex_pos, 2), 2);
|
||||
} else {
|
||||
tex_pos = pos2;
|
||||
nir_def *phys_id_el_nd = nir_trim_vector(
|
||||
b, nir_load_global_invocation_id(b, 32), key->array ? 3 : 2);
|
||||
nir_def *phys_id_el_2d = nir_trim_vector(b, phys_id_el_nd, 2);
|
||||
nir_def *layer = key->array ? nir_channel(b, phys_id_el_nd, 2) : NULL;
|
||||
|
||||
/* Offset within the tile. We're dispatched for the entire tile but the
|
||||
* beginning might be out-of-bounds, so fix up.
|
||||
*/
|
||||
nir_def *offs_in_tile_el_2d = nir_iand_imm(b, dst_offs_2d, 31);
|
||||
nir_def *logical_id_el_2d = nir_isub(b, phys_id_el_2d, offs_in_tile_el_2d);
|
||||
|
||||
nir_def *image_pos_2d = nir_iadd(b, logical_id_el_2d, dst_offs_2d);
|
||||
nir_def *image_pos_nd = image_pos_2d;
|
||||
if (layer) {
|
||||
image_pos_nd =
|
||||
nir_vector_insert_imm(b, nir_pad_vector(b, image_pos_nd, 3), layer, 2);
|
||||
}
|
||||
|
||||
nir_tex_instr *tex = nir_tex_instr_create(b->shader, 1);
|
||||
tex->dest_type = nir_type_uint32; /* irrelevant */
|
||||
tex->sampler_dim = GLSL_SAMPLER_DIM_2D;
|
||||
tex->is_array = array;
|
||||
tex->op = nir_texop_tex;
|
||||
tex->src[0] = nir_tex_src_for_ssa(nir_tex_src_coord, tex_pos);
|
||||
tex->backend_flags = AGX_TEXTURE_FLAG_NO_CLAMP;
|
||||
tex->coord_components = array ? 3 : 2;
|
||||
tex->texture_index = 0;
|
||||
tex->sampler_index = 0;
|
||||
nir_def_init(&tex->instr, &tex->def, 4, 32);
|
||||
nir_builder_instr_insert(b, &tex->instr);
|
||||
nir_def *color = &tex->def;
|
||||
nir_def *in_bounds = nir_ige(b, logical_id_el_2d, nir_imm_ivec2(b, 0, 0));
|
||||
in_bounds =
|
||||
nir_iand(b, in_bounds, nir_ilt(b, logical_id_el_2d, dimensions_el_2d));
|
||||
|
||||
nir_def *colour0, *colour1;
|
||||
nir_push_if(b, nir_ball(b, in_bounds));
|
||||
{
|
||||
/* For pixels within the copy area, texture from the source */
|
||||
nir_def *coords_el_2d =
|
||||
nir_ffma(b, nir_u2f32(b, logical_id_el_2d), trans_scale, trans_offs);
|
||||
|
||||
nir_def *coords_el_nd = coords_el_2d;
|
||||
if (layer) {
|
||||
coords_el_nd = nir_vector_insert_imm(
|
||||
b, nir_pad_vector(b, coords_el_nd, 3), nir_u2f32(b, layer), 2);
|
||||
}
|
||||
|
||||
nir_tex_instr *tex = nir_tex_instr_create(b->shader, 1);
|
||||
tex->dest_type = nir_type_uint32; /* irrelevant */
|
||||
tex->sampler_dim = GLSL_SAMPLER_DIM_2D;
|
||||
tex->is_array = key->array;
|
||||
tex->op = nir_texop_tex;
|
||||
tex->src[0] = nir_tex_src_for_ssa(nir_tex_src_coord, coords_el_nd);
|
||||
tex->backend_flags = AGX_TEXTURE_FLAG_NO_CLAMP;
|
||||
tex->coord_components = coords_el_nd->num_components;
|
||||
tex->texture_index = 0;
|
||||
tex->sampler_index = 0;
|
||||
nir_def_init(&tex->instr, &tex->def, 4, 32);
|
||||
nir_builder_instr_insert(b, &tex->instr);
|
||||
colour0 = &tex->def;
|
||||
}
|
||||
nir_push_else(b, NULL);
|
||||
{
|
||||
/* For out-of-bounds pixels, copy in the destination */
|
||||
colour1 = nir_image_load(
|
||||
b, 4, 32, nir_imm_int(b, 0), nir_pad_vec4(b, image_pos_nd), zero, zero,
|
||||
.image_array = key->array, .image_dim = GLSL_SAMPLER_DIM_2D,
|
||||
.access = ACCESS_IN_BOUNDS_AGX, .dest_type = nir_type_uint32);
|
||||
}
|
||||
nir_pop_if(b, NULL);
|
||||
nir_def *color = nir_if_phi(b, colour0, colour1);
|
||||
|
||||
enum asahi_blit_clamp clamp = ASAHI_BLIT_CLAMP_NONE;
|
||||
bool src_sint = util_format_is_pure_sint(key->src_format);
|
||||
bool dst_sint = util_format_is_pure_sint(key->dst_format);
|
||||
if (util_format_is_pure_integer(key->src_format) &&
|
||||
util_format_is_pure_integer(key->dst_format)) {
|
||||
|
||||
if (src_sint && !dst_sint)
|
||||
clamp = ASAHI_BLIT_CLAMP_SINT_TO_UINT;
|
||||
else if (!src_sint && dst_sint)
|
||||
clamp = ASAHI_BLIT_CLAMP_UINT_TO_SINT;
|
||||
}
|
||||
|
||||
if (clamp == ASAHI_BLIT_CLAMP_SINT_TO_UINT)
|
||||
color = nir_imax(b, color, nir_imm_int(b, 0));
|
||||
else if (clamp == ASAHI_BLIT_CLAMP_UINT_TO_SINT)
|
||||
color = nir_umin(b, color, nir_imm_int(b, INT32_MAX));
|
||||
|
||||
nir_def *image_pos =
|
||||
nir_iadd(b, ids, nir_pad_vector_imm_int(b, params[2], 0, array ? 3 : 2));
|
||||
nir_def *local_offset = nir_imm_intN_t(b, 0, 16);
|
||||
nir_def *lid = nir_trim_vector(b, nir_load_local_invocation_id(b), 2);
|
||||
lid = nir_u2u16(b, lid);
|
||||
|
||||
nir_image_store(b, nir_imm_int(b, 0), nir_pad_vec4(b, image_pos), zero,
|
||||
color, zero, .image_dim = GLSL_SAMPLER_DIM_2D,
|
||||
.access = ACCESS_NON_READABLE, .image_array = array);
|
||||
/* Pure integer formatss need to be clamped in software, at least in some
|
||||
* cases. We do so on store. Piglit gl-3.0-render-integer checks this, as
|
||||
* does KHR-GL33.packed_pixels.*.
|
||||
*
|
||||
* TODO: Make this common code somehow.
|
||||
*/
|
||||
const struct util_format_description *desc =
|
||||
util_format_description(key->dst_format);
|
||||
unsigned c = util_format_get_first_non_void_channel(key->dst_format);
|
||||
|
||||
if (desc->channel[c].size <= 16 &&
|
||||
util_format_is_pure_integer(key->dst_format)) {
|
||||
|
||||
unsigned bits[4] = {
|
||||
desc->channel[0].size ?: desc->channel[0].size,
|
||||
desc->channel[1].size ?: desc->channel[0].size,
|
||||
desc->channel[2].size ?: desc->channel[0].size,
|
||||
desc->channel[3].size ?: desc->channel[0].size,
|
||||
};
|
||||
|
||||
if (util_format_is_pure_sint(key->dst_format))
|
||||
color = nir_format_clamp_sint(b, color, bits);
|
||||
else
|
||||
color = nir_format_clamp_uint(b, color, bits);
|
||||
|
||||
color = nir_u2u16(b, color);
|
||||
}
|
||||
|
||||
/* The source texel has been converted into a 32-bit value. We need to
|
||||
* convert it to a tilebuffer format that can then be converted to the
|
||||
* destination format in the PBE hardware. That's the renderable format for
|
||||
* the destination format, which must exist along this path. This mirrors the
|
||||
* flow of fragment and end-of-tile shaders.
|
||||
*/
|
||||
enum pipe_format tib_format =
|
||||
agx_pixel_format[effective_format(key->dst_format)].renderable;
|
||||
|
||||
nir_store_local_pixel_agx(b, color, nir_imm_int(b, 1), lid, .base = 0,
|
||||
.write_mask = 0xf, .format = tib_format,
|
||||
.explicit_coord = true);
|
||||
|
||||
nir_barrier(b, .execution_scope = SCOPE_WORKGROUP);
|
||||
|
||||
nir_push_if(b, nir_ball(b, nir_ieq_imm(b, lid, 0)));
|
||||
{
|
||||
nir_def *pbe_index = nir_imm_int(b, 2);
|
||||
nir_block_image_store_agx(
|
||||
b, pbe_index, local_offset, image_pos_nd, .format = tib_format,
|
||||
.image_dim = GLSL_SAMPLER_DIM_2D, .image_array = key->array,
|
||||
.explicit_coord = true);
|
||||
}
|
||||
nir_pop_if(b, NULL);
|
||||
b->shader->info.cs.image_block_size_per_thread_agx =
|
||||
util_format_get_blocksize(key->dst_format);
|
||||
|
||||
return pipe_shader_from_nir(ctx, b->shader);
|
||||
}
|
||||
|
|
@ -219,6 +346,8 @@ asahi_compute_blit(struct pipe_context *ctx, const struct pipe_blit_info *info,
|
|||
fui(y_scale),
|
||||
info->dst.box.x,
|
||||
info->dst.box.y,
|
||||
info->dst.box.width,
|
||||
info->dst.box.height,
|
||||
};
|
||||
|
||||
struct pipe_constant_buffer cb = {
|
||||
|
|
@ -273,32 +402,51 @@ asahi_compute_blit(struct pipe_context *ctx, const struct pipe_blit_info *info,
|
|||
src_view = ctx->create_sampler_view(ctx, src, &src_templ);
|
||||
ctx->set_sampler_views(ctx, PIPE_SHADER_COMPUTE, 0, 1, 0, true, &src_view);
|
||||
|
||||
enum asahi_blit_clamp clamp = ASAHI_BLIT_CLAMP_NONE;
|
||||
bool src_sint = util_format_is_pure_sint(info->src.format);
|
||||
bool dst_sint = util_format_is_pure_sint(info->dst.format);
|
||||
if (util_format_is_pure_integer(info->src.format) &&
|
||||
util_format_is_pure_integer(info->dst.format)) {
|
||||
struct asahi_blit_key key = {
|
||||
.src_format = info->src.format,
|
||||
.dst_format = info->dst.format,
|
||||
.array = array,
|
||||
};
|
||||
struct hash_entry *ent = _mesa_hash_table_search(blitter->blit_cs, &key);
|
||||
void *cs = NULL;
|
||||
|
||||
if (src_sint && !dst_sint)
|
||||
clamp = ASAHI_BLIT_CLAMP_SINT_TO_UINT;
|
||||
else if (!src_sint && dst_sint)
|
||||
clamp = ASAHI_BLIT_CLAMP_UINT_TO_SINT;
|
||||
if (ent) {
|
||||
cs = ent->data;
|
||||
} else {
|
||||
cs = asahi_blit_compute_shader(ctx, &key);
|
||||
_mesa_hash_table_insert(
|
||||
blitter->blit_cs, ralloc_memdup(blitter->blit_cs, &key, sizeof(key)),
|
||||
cs);
|
||||
}
|
||||
|
||||
if (!blitter->blit_cs[clamp][array]) {
|
||||
blitter->blit_cs[clamp][array] =
|
||||
asahi_blit_compute_shader(ctx, clamp, array);
|
||||
}
|
||||
assert(cs != NULL);
|
||||
ctx->bind_compute_state(ctx, cs);
|
||||
|
||||
ctx->bind_compute_state(ctx, blitter->blit_cs[clamp][array]);
|
||||
/* Expand the grid so destinations are in tiles */
|
||||
unsigned expanded_x0 = info->dst.box.x & ~(TILE_WIDTH - 1);
|
||||
unsigned expanded_y0 = info->dst.box.y & ~(TILE_HEIGHT - 1);
|
||||
unsigned expanded_x1 =
|
||||
align(info->dst.box.x + info->dst.box.width, TILE_WIDTH);
|
||||
unsigned expanded_y1 =
|
||||
align(info->dst.box.y + info->dst.box.height, TILE_HEIGHT);
|
||||
|
||||
/* But clamp to the destination size to save some redundant threads */
|
||||
expanded_x1 =
|
||||
MIN2(expanded_x1, u_minify(info->dst.resource->width0, info->dst.level));
|
||||
expanded_y1 =
|
||||
MIN2(expanded_y1, u_minify(info->dst.resource->height0, info->dst.level));
|
||||
|
||||
/* Recalculate the width/height based on the expanded grid */
|
||||
width = expanded_x1 - expanded_x0;
|
||||
height = expanded_y1 - expanded_y0;
|
||||
|
||||
struct pipe_grid_info grid_info = {
|
||||
.block = {BLIT_WG_SIZE, BLIT_WG_SIZE, 1},
|
||||
.last_block = {width % BLIT_WG_SIZE, height % BLIT_WG_SIZE, 1},
|
||||
.block = {TILE_WIDTH, TILE_HEIGHT, 1},
|
||||
.last_block = {width % TILE_WIDTH, height % TILE_HEIGHT, 1},
|
||||
.grid =
|
||||
{
|
||||
DIV_ROUND_UP(width, BLIT_WG_SIZE),
|
||||
DIV_ROUND_UP(height, BLIT_WG_SIZE),
|
||||
DIV_ROUND_UP(width, TILE_WIDTH),
|
||||
DIV_ROUND_UP(height, TILE_HEIGHT),
|
||||
depth,
|
||||
},
|
||||
};
|
||||
|
|
@ -378,11 +526,7 @@ agx_blit(struct pipe_context *pipe, const struct pipe_blit_info *info)
|
|||
agx_legalize_compression(ctx, agx_resource(info->src.resource),
|
||||
info->src.format);
|
||||
|
||||
if (asahi_compute_blit_supported(info) &&
|
||||
(agx_device(pipe->screen)->debug & AGX_DBG_COMPBLIT) &&
|
||||
!(ail_is_compressed(&agx_resource(info->dst.resource)->layout) &&
|
||||
util_format_get_blocksize(info->dst.format) == 16)) {
|
||||
|
||||
if (asahi_compute_blit_supported(info)) {
|
||||
asahi_compute_blit(pipe, info, &ctx->compute_blitter);
|
||||
return;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -20,7 +20,7 @@
|
|||
#include "agx_state.h"
|
||||
|
||||
/* Flags that are allowed and do not disable the disk cache */
|
||||
#define ALLOWED_FLAGS (AGX_DBG_NO16 | AGX_DBG_COMPBLIT)
|
||||
#define ALLOWED_FLAGS (AGX_DBG_NO16)
|
||||
|
||||
/**
|
||||
* Compute a disk cache key for the given uncompiled shader and shader key.
|
||||
|
|
|
|||
|
|
@ -1919,6 +1919,7 @@ agx_create_context(struct pipe_screen *screen, void *priv, unsigned flags)
|
|||
agx_init_meta_shaders(ctx);
|
||||
|
||||
ctx->blitter = util_blitter_create(pctx);
|
||||
ctx->compute_blitter.blit_cs = asahi_blit_key_table_create(ctx);
|
||||
|
||||
ctx->result_buf =
|
||||
agx_bo_create(agx_device(screen),
|
||||
|
|
|
|||
|
|
@ -2767,9 +2767,13 @@ agx_upload_textures(struct agx_batch *batch, struct agx_compiled_shader *cs,
|
|||
struct pipe_sampler_view sampler_view = util_image_to_sampler_view(view);
|
||||
|
||||
/* For the texture descriptor, lower cubes to 2D arrays. This matches the
|
||||
* transform done in the compiler.
|
||||
* transform done in the compiler. Also, force 2D arrays for internal
|
||||
* blitter images, this helps reduce shader variants.
|
||||
*/
|
||||
if (target_is_cube(sampler_view.target))
|
||||
bool internal = (view->access & PIPE_IMAGE_ACCESS_DRIVER_INTERNAL);
|
||||
|
||||
if (target_is_cube(sampler_view.target) ||
|
||||
(sampler_view.target == PIPE_TEXTURE_3D && internal))
|
||||
sampler_view.target = PIPE_TEXTURE_2D_ARRAY;
|
||||
|
||||
agx_pack_texture(texture, agx_resource(view->resource), view->format,
|
||||
|
|
|
|||
|
|
@ -575,11 +575,16 @@ enum asahi_blit_clamp {
|
|||
ASAHI_BLIT_CLAMP_COUNT,
|
||||
};
|
||||
|
||||
struct asahi_blit_key {
|
||||
enum pipe_format src_format, dst_format;
|
||||
bool array;
|
||||
};
|
||||
|
||||
DERIVE_HASH_TABLE(asahi_blit_key);
|
||||
|
||||
struct asahi_blitter {
|
||||
bool active;
|
||||
|
||||
/* [clamp_type][is_array] */
|
||||
void *blit_cs[ASAHI_BLIT_CLAMP_COUNT][2];
|
||||
struct hash_table *blit_cs;
|
||||
|
||||
/* [filter] */
|
||||
void *sampler[2];
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue