mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-01-14 14:00:17 +01:00
hk: rework meta for compression
Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/30981>
This commit is contained in:
parent
50527475be
commit
a76a2edfde
1 changed files with 169 additions and 71 deletions
|
|
@ -17,12 +17,19 @@
|
|||
|
||||
#include "layout.h"
|
||||
#include "nir_builder.h"
|
||||
#include "nir_builder_opcodes.h"
|
||||
#include "nir_format_convert.h"
|
||||
#include "shader_enums.h"
|
||||
#include "vk_format.h"
|
||||
#include "vk_meta.h"
|
||||
#include "vk_pipeline.h"
|
||||
|
||||
/* For block based blit kernels, we hardcode the maximum tile size which we can
|
||||
* always achieve. This simplifies our life.
|
||||
*/
|
||||
#define TILE_WIDTH 32
|
||||
#define TILE_HEIGHT 32
|
||||
|
||||
static VkResult
|
||||
hk_cmd_bind_map_buffer(struct vk_command_buffer *vk_cmd,
|
||||
struct vk_meta_device *meta, VkBuffer _buffer,
|
||||
|
|
@ -250,9 +257,9 @@ aspect_format(VkFormat fmt, VkImageAspectFlags aspect)
|
|||
* the unfortunate exception).
|
||||
*/
|
||||
static enum pipe_format
|
||||
canonical_format_pipe(enum pipe_format fmt)
|
||||
canonical_format_pipe(enum pipe_format fmt, bool canonicalize_zs)
|
||||
{
|
||||
if (util_format_is_depth_or_stencil(fmt))
|
||||
if (!canonicalize_zs && util_format_is_depth_or_stencil(fmt))
|
||||
return fmt;
|
||||
|
||||
assert(ail_is_valid_pixel_format(fmt));
|
||||
|
|
@ -269,7 +276,7 @@ canonical_format_pipe(enum pipe_format fmt)
|
|||
/* clang-format off */
|
||||
static enum pipe_format map[] = {
|
||||
CASE(R8, R8_UINT),
|
||||
CASE(R16, R16_UINT),
|
||||
CASE(R16, R16_UNORM /* XXX: Hack for Z16 copies */),
|
||||
CASE(R8G8, R8G8_UINT),
|
||||
CASE(R5G6B5, R5G6B5_UNORM),
|
||||
CASE(R4G4B4A4, R4G4B4A4_UNORM),
|
||||
|
|
@ -298,7 +305,7 @@ static VkFormat
|
|||
canonical_format(VkFormat fmt)
|
||||
{
|
||||
return vk_format_from_pipe_format(
|
||||
canonical_format_pipe(vk_format_to_pipe_format(fmt)));
|
||||
canonical_format_pipe(vk_format_to_pipe_format(fmt), false));
|
||||
}
|
||||
|
||||
enum copy_type {
|
||||
|
|
@ -328,6 +335,7 @@ struct vk_meta_image_copy_key {
|
|||
enum pipe_format src_format, dst_format;
|
||||
unsigned block_size;
|
||||
unsigned nr_samples;
|
||||
bool block_based;
|
||||
};
|
||||
|
||||
static nir_def *
|
||||
|
|
@ -358,8 +366,8 @@ build_image_copy_shader(const struct vk_meta_image_copy_key *key)
|
|||
nir_builder_init_simple_shader(MESA_SHADER_COMPUTE, NULL, "vk-meta-copy");
|
||||
|
||||
nir_builder *b = &build;
|
||||
b->shader->info.workgroup_size[0] = 32;
|
||||
b->shader->info.workgroup_size[1] = 32;
|
||||
b->shader->info.workgroup_size[0] = TILE_WIDTH;
|
||||
b->shader->info.workgroup_size[1] = TILE_HEIGHT;
|
||||
|
||||
bool src_is_buf = key->type == BUF2IMG;
|
||||
bool dst_is_buf = key->type == IMG2BUF;
|
||||
|
|
@ -397,46 +405,119 @@ build_image_copy_shader(const struct vk_meta_image_copy_key *key)
|
|||
b, 3, 32,
|
||||
nir_imm_int(b, offsetof(struct vk_meta_push_data, dst_offset_el)));
|
||||
|
||||
nir_def *grid_el = nir_load_push_constant(
|
||||
b, 3, 32, nir_imm_int(b, offsetof(struct vk_meta_push_data, grid_el)));
|
||||
nir_def *grid_2d_el = nir_load_push_constant(
|
||||
b, 2, 32, nir_imm_int(b, offsetof(struct vk_meta_push_data, grid_el)));
|
||||
|
||||
/* We're done setting up variables, do the copy */
|
||||
nir_def *coord = nir_load_global_invocation_id(b, 32);
|
||||
|
||||
nir_push_if(b,
|
||||
nir_ball(b, nir_trim_vector(b, nir_ult(b, coord, grid_el), 2)));
|
||||
{
|
||||
nir_def *src_coord = nir_iadd(b, coord, src_offset_el);
|
||||
nir_def *dst_coord = nir_iadd(b, coord, dst_offset_el);
|
||||
/* The destination format is already canonical, convert to an ISA format */
|
||||
enum pipe_format isa_format;
|
||||
if (key->block_based) {
|
||||
isa_format =
|
||||
ail_pixel_format[canonical_format_pipe(key->dst_format, true)]
|
||||
.renderable;
|
||||
assert(isa_format != PIPE_FORMAT_NONE);
|
||||
}
|
||||
|
||||
/* Special case handle buffer indexing */
|
||||
if (dst_is_buf) {
|
||||
dst_coord = linearize_coords(b, coord, key);
|
||||
} else if (src_is_buf) {
|
||||
src_coord = linearize_coords(b, coord, key);
|
||||
}
|
||||
nir_def *local_offset = nir_imm_intN_t(b, 0, 16);
|
||||
nir_def *lid = nir_trim_vector(b, nir_load_local_invocation_id(b), 2);
|
||||
lid = nir_u2u16(b, lid);
|
||||
|
||||
/* Copy formatted texel from texture to storage image */
|
||||
for (unsigned s = 0; s < key->nr_samples; ++s) {
|
||||
nir_def *src_coord = src_is_buf ? coord : nir_iadd(b, coord, src_offset_el);
|
||||
nir_def *dst_coord = dst_is_buf ? coord : nir_iadd(b, coord, dst_offset_el);
|
||||
|
||||
nir_def *image_deref = &nir_build_deref_var(b, image)->def;
|
||||
|
||||
nir_def *coord_2d_el = nir_trim_vector(b, coord, 2);
|
||||
nir_def *in_bounds;
|
||||
if (key->block_based) {
|
||||
nir_def *offset_in_block_el =
|
||||
nir_umod_imm(b, nir_trim_vector(b, dst_offset_el, 2), TILE_WIDTH);
|
||||
|
||||
dst_coord =
|
||||
nir_vector_insert_imm(b, nir_isub(b, dst_coord, offset_in_block_el),
|
||||
nir_channel(b, dst_coord, 2), 2);
|
||||
|
||||
src_coord =
|
||||
nir_vector_insert_imm(b, nir_isub(b, src_coord, offset_in_block_el),
|
||||
nir_channel(b, src_coord, 2), 2);
|
||||
|
||||
in_bounds = nir_uge(b, coord_2d_el, offset_in_block_el);
|
||||
in_bounds = nir_iand(
|
||||
b, in_bounds,
|
||||
nir_ult(b, coord_2d_el, nir_iadd(b, offset_in_block_el, grid_2d_el)));
|
||||
} else {
|
||||
in_bounds = nir_ult(b, coord_2d_el, grid_2d_el);
|
||||
}
|
||||
|
||||
/* Special case handle buffer indexing */
|
||||
if (dst_is_buf) {
|
||||
assert(!key->block_based);
|
||||
dst_coord = linearize_coords(b, dst_coord, key);
|
||||
} else if (src_is_buf) {
|
||||
src_coord = linearize_coords(b, src_coord, key);
|
||||
}
|
||||
|
||||
for (unsigned s = 0; s < key->nr_samples; ++s) {
|
||||
nir_def *ms_index = nir_imm_int(b, s);
|
||||
nir_def *value1, *value2;
|
||||
|
||||
nir_push_if(b, nir_ball(b, in_bounds));
|
||||
{
|
||||
/* Copy formatted texel from texture to storage image */
|
||||
nir_deref_instr *deref = nir_build_deref_var(b, texture);
|
||||
nir_def *ms_index = nir_imm_int(b, s);
|
||||
|
||||
nir_def *value = msaa ? nir_txf_ms_deref(b, deref, src_coord, ms_index)
|
||||
: nir_txf_deref(b, deref, src_coord, NULL);
|
||||
value1 = msaa ? nir_txf_ms_deref(b, deref, src_coord, ms_index)
|
||||
: nir_txf_deref(b, deref, src_coord, NULL);
|
||||
|
||||
/* Munge according to the implicit conversions so we get a bit copy */
|
||||
if (key->src_format != key->dst_format) {
|
||||
nir_def *packed = nir_format_pack_rgba(b, key->src_format, value);
|
||||
value = nir_format_unpack_rgba(b, packed, key->dst_format);
|
||||
nir_def *packed = nir_format_pack_rgba(b, key->src_format, value1);
|
||||
value1 = nir_format_unpack_rgba(b, packed, key->dst_format);
|
||||
}
|
||||
|
||||
nir_image_deref_store(b, &nir_build_deref_var(b, image)->def,
|
||||
nir_pad_vec4(b, dst_coord), ms_index, value,
|
||||
nir_imm_int(b, 0), .image_dim = dim_dst,
|
||||
.image_array = !dst_is_buf);
|
||||
if (!key->block_based) {
|
||||
nir_image_deref_store(b, image_deref, nir_pad_vec4(b, dst_coord),
|
||||
ms_index, value1, nir_imm_int(b, 0),
|
||||
.image_dim = dim_dst,
|
||||
.image_array = !dst_is_buf);
|
||||
}
|
||||
}
|
||||
nir_push_else(b, NULL);
|
||||
if (key->block_based) {
|
||||
/* Copy back the existing destination content */
|
||||
value2 = nir_image_deref_load(b, 4, 32, image_deref,
|
||||
nir_pad_vec4(b, dst_coord), ms_index,
|
||||
nir_imm_int(b, 0), .image_dim = dim_dst,
|
||||
.image_array = !dst_is_buf);
|
||||
}
|
||||
nir_pop_if(b, NULL);
|
||||
|
||||
if (key->block_based) {
|
||||
nir_store_local_pixel_agx(b, nir_if_phi(b, value1, value2),
|
||||
nir_imm_int(b, 1 << s), lid, .base = 0,
|
||||
.write_mask = 0xf, .format = isa_format,
|
||||
.explicit_coord = true);
|
||||
}
|
||||
}
|
||||
nir_pop_if(b, NULL);
|
||||
|
||||
if (key->block_based) {
|
||||
assert(!dst_is_buf);
|
||||
|
||||
nir_barrier(b, .execution_scope = SCOPE_WORKGROUP);
|
||||
|
||||
nir_push_if(b, nir_ball(b, nir_ieq_imm(b, lid, 0)));
|
||||
{
|
||||
nir_image_deref_store_block_agx(
|
||||
b, image_deref, local_offset, dst_coord, .format = isa_format,
|
||||
.image_dim = dim_2d, .image_array = true, .explicit_coord = true);
|
||||
}
|
||||
nir_pop_if(b, NULL);
|
||||
b->shader->info.cs.image_block_size_per_thread_agx =
|
||||
util_format_get_blocksize(key->dst_format);
|
||||
}
|
||||
|
||||
return b->shader;
|
||||
}
|
||||
|
||||
|
|
@ -765,6 +846,42 @@ hk_meta_copy_image_to_buffer2(struct vk_command_buffer *cmd,
|
|||
}
|
||||
}
|
||||
|
||||
static void
|
||||
hk_meta_dispatch_to_image(struct vk_command_buffer *cmd,
|
||||
const struct vk_device_dispatch_table *disp,
|
||||
VkPipelineLayout pipeline_layout,
|
||||
struct vk_meta_push_data *push, VkOffset3D offset,
|
||||
VkExtent3D extent, bool per_layer, unsigned layers,
|
||||
enum pipe_format p_dst_fmt, enum pipe_format p_format)
|
||||
{
|
||||
push->dst_offset_el[0] = util_format_get_nblocksx(p_dst_fmt, offset.x);
|
||||
push->dst_offset_el[1] = util_format_get_nblocksy(p_dst_fmt, offset.y);
|
||||
push->dst_offset_el[2] = 0;
|
||||
|
||||
push->grid_el[0] = util_format_get_nblocksx(p_format, extent.width);
|
||||
push->grid_el[1] = util_format_get_nblocksy(p_format, extent.height);
|
||||
push->grid_el[2] = per_layer ? 1 : layers;
|
||||
|
||||
unsigned w_el = util_format_get_nblocksx(p_format, extent.width);
|
||||
unsigned h_el = util_format_get_nblocksy(p_format, extent.height);
|
||||
|
||||
/* Expand the grid so destinations are in tiles */
|
||||
unsigned expanded_x0 = push->dst_offset_el[0] & ~(TILE_WIDTH - 1);
|
||||
unsigned expanded_y0 = push->dst_offset_el[1] & ~(TILE_HEIGHT - 1);
|
||||
unsigned expanded_x1 = align(push->dst_offset_el[0] + w_el, TILE_WIDTH);
|
||||
unsigned expanded_y1 = align(push->dst_offset_el[1] + h_el, TILE_HEIGHT);
|
||||
|
||||
/* TODO: clamp to the destination size to save some redundant threads? */
|
||||
|
||||
disp->CmdPushConstants(vk_command_buffer_to_handle(cmd), pipeline_layout,
|
||||
VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(*push), push);
|
||||
|
||||
disp->CmdDispatch(vk_command_buffer_to_handle(cmd),
|
||||
(expanded_x1 - expanded_x0) / TILE_WIDTH,
|
||||
(expanded_y1 - expanded_y0) / TILE_HEIGHT,
|
||||
push->grid_el[2]);
|
||||
}
|
||||
|
||||
static void
|
||||
hk_meta_copy_buffer_to_image2(struct vk_command_buffer *cmd,
|
||||
struct vk_meta_device *meta,
|
||||
|
|
@ -808,7 +925,13 @@ hk_meta_copy_buffer_to_image2(struct vk_command_buffer *cmd,
|
|||
.block_size = blocksize_B,
|
||||
.nr_samples = image->samples,
|
||||
.src_format = vk_format_to_pipe_format(canonical),
|
||||
.dst_format = vk_format_to_pipe_format(canonical),
|
||||
.dst_format = canonical_format_pipe(
|
||||
vk_format_to_pipe_format(aspect_format(image->format, aspect)),
|
||||
false),
|
||||
|
||||
/* TODO: MSAA path */
|
||||
.block_based =
|
||||
(image->image_type != VK_IMAGE_TYPE_1D) && image->samples == 1,
|
||||
};
|
||||
|
||||
VkPipelineLayout pipeline_layout;
|
||||
|
|
@ -933,28 +1056,13 @@ hk_meta_copy_buffer_to_image2(struct vk_command_buffer *cmd,
|
|||
.buffer_offset = region->bufferOffset,
|
||||
.row_extent = row_extent,
|
||||
.slice_or_layer_extent = is_3d ? slice_extent : layer_extent,
|
||||
|
||||
.dst_offset_el[0] =
|
||||
util_format_get_nblocksx(p_format, region->imageOffset.x),
|
||||
.dst_offset_el[1] =
|
||||
util_format_get_nblocksy(p_format, region->imageOffset.y),
|
||||
|
||||
.grid_el[0] =
|
||||
util_format_get_nblocksx(p_format, region->imageExtent.width),
|
||||
.grid_el[1] =
|
||||
util_format_get_nblocksy(p_format, region->imageExtent.height),
|
||||
.grid_el[2] = per_layer ? 1 : layers,
|
||||
};
|
||||
|
||||
push.buffer_offset += push.slice_or_layer_extent * layer_offs;
|
||||
|
||||
disp->CmdPushConstants(vk_command_buffer_to_handle(cmd),
|
||||
pipeline_layout, VK_SHADER_STAGE_COMPUTE_BIT, 0,
|
||||
sizeof(push), &push);
|
||||
|
||||
disp->CmdDispatch(vk_command_buffer_to_handle(cmd),
|
||||
DIV_ROUND_UP(push.grid_el[0], 32),
|
||||
DIV_ROUND_UP(push.grid_el[1], 32), push.grid_el[2]);
|
||||
hk_meta_dispatch_to_image(cmd, disp, pipeline_layout, &push,
|
||||
region->imageOffset, region->imageExtent,
|
||||
per_layer, layers, p_format, p_format);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -1012,8 +1120,14 @@ hk_meta_copy_image2(struct vk_command_buffer *cmd, struct vk_meta_device *meta,
|
|||
.block_size = blocksize_B,
|
||||
.nr_samples = dst_image->samples,
|
||||
.src_format = vk_format_to_pipe_format(canonical),
|
||||
.dst_format = canonical_format_pipe(vk_format_to_pipe_format(
|
||||
aspect_format(dst_image->format, dst_aspect_mask))),
|
||||
.dst_format =
|
||||
canonical_format_pipe(vk_format_to_pipe_format(aspect_format(
|
||||
dst_image->format, dst_aspect_mask)),
|
||||
false),
|
||||
|
||||
/* TODO: MSAA path */
|
||||
.block_based = (dst_image->image_type != VK_IMAGE_TYPE_1D) &&
|
||||
dst_image->samples == 1,
|
||||
};
|
||||
|
||||
assert(key.nr_samples == src_image->samples);
|
||||
|
|
@ -1147,27 +1261,11 @@ hk_meta_copy_image2(struct vk_command_buffer *cmd, struct vk_meta_device *meta,
|
|||
util_format_get_nblocksx(p_src_fmt, region->srcOffset.x),
|
||||
.src_offset_el[1] =
|
||||
util_format_get_nblocksy(p_src_fmt, region->srcOffset.y),
|
||||
|
||||
.dst_offset_el[0] =
|
||||
util_format_get_nblocksx(p_dst_fmt, region->dstOffset.x),
|
||||
.dst_offset_el[1] =
|
||||
util_format_get_nblocksy(p_dst_fmt, region->dstOffset.y),
|
||||
|
||||
.grid_el[0] =
|
||||
util_format_get_nblocksx(p_format, region->extent.width),
|
||||
.grid_el[1] =
|
||||
util_format_get_nblocksy(p_format, region->extent.height),
|
||||
.grid_el[2] = per_layer ? 1 : layers,
|
||||
};
|
||||
|
||||
disp->CmdPushConstants(vk_command_buffer_to_handle(cmd),
|
||||
pipeline_layout, VK_SHADER_STAGE_COMPUTE_BIT,
|
||||
0, sizeof(push), &push);
|
||||
|
||||
disp->CmdDispatch(vk_command_buffer_to_handle(cmd),
|
||||
DIV_ROUND_UP(push.grid_el[0], 32),
|
||||
DIV_ROUND_UP(push.grid_el[1], 32),
|
||||
push.grid_el[2]);
|
||||
hk_meta_dispatch_to_image(cmd, disp, pipeline_layout, &push,
|
||||
region->dstOffset, region->extent,
|
||||
per_layer, layers, p_dst_fmt, p_format);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue