mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-08 02:38:04 +02:00
anv: implement VK_KHR_copy_memory_indirect
This implements the extension on the Graphics and Compute queues using Blorp OpenCL compute shaders. Support for the Transfer queue will come in a later patch. We also don't support 24/48/96 bpp formats yet. Signed-off-by: Paulo Zanoni <paulo.r.zanoni@intel.com>
This commit is contained in:
parent
373eabcdbf
commit
0bd9aa85eb
11 changed files with 1082 additions and 3 deletions
|
|
@ -542,7 +542,7 @@ Khronos extensions that are not part of any Vulkan version:
|
|||
VK_KHR_calibrated_timestamps DONE (anv, hk, kk, nvk, panvk/v10+, radv, tu/a750+, vn)
|
||||
VK_KHR_compute_shader_derivatives DONE (anv, lvp, nvk, radv, tu, vn)
|
||||
VK_KHR_cooperative_matrix DONE (anv, nvk/Turing+, radv/gfx11+, vn)
|
||||
VK_KHR_copy_memory_indirect DONE (nvk, radv/gfx8+)
|
||||
VK_KHR_copy_memory_indirect DONE (anv, nvk, radv/gfx8+)
|
||||
VK_KHR_deferred_host_operations DONE (anv, hasvk, lvp, radv, tu, vn)
|
||||
VK_KHR_depth_clamp_zero_one DONE (anv, nvk, panvk, radv, tu, vn)
|
||||
VK_KHR_device_address_commands DONE (radv)
|
||||
|
|
|
|||
|
|
@ -41,6 +41,8 @@ typedef struct nir_shader nir_shader;
|
|||
enum blorp_op {
|
||||
BLORP_OP_BLIT,
|
||||
BLORP_OP_COPY,
|
||||
BLORP_OP_COPY_INDIRECT,
|
||||
BLORP_OP_COPY_IMAGE_INDIRECT,
|
||||
BLORP_OP_CCS_AMBIGUATE,
|
||||
BLORP_OP_CCS_COLOR_CLEAR,
|
||||
BLORP_OP_CCS_PARTIAL_RESOLVE,
|
||||
|
|
@ -299,6 +301,22 @@ blorp_buffer_copy(struct blorp_batch *batch,
|
|||
struct blorp_address dst,
|
||||
uint64_t size);
|
||||
|
||||
void
|
||||
blorp_copy_memory_indirect(struct blorp_batch *batch,
|
||||
uint64_t indirect_buf_addr,
|
||||
uint32_t copy_count,
|
||||
uint64_t stride);
|
||||
|
||||
void
|
||||
blorp_copy_memory_to_image_indirect(struct blorp_batch *batch,
|
||||
const struct blorp_surf *img_blorp_surf,
|
||||
uint64_t indirect_buf_addr,
|
||||
uint64_t indirect_buf_stride,
|
||||
uint32_t first_copy_idx,
|
||||
uint32_t img_mip_level,
|
||||
int layer_count,
|
||||
int forced_layer_or_z);
|
||||
|
||||
void
|
||||
blorp_fast_clear(struct blorp_batch *batch,
|
||||
const struct blorp_surf *surf,
|
||||
|
|
|
|||
|
|
@ -1380,6 +1380,10 @@ blorp_setup_binding_table(struct blorp_batch *batch,
|
|||
uint32_t surface_offsets[BLORP_NUM_BT_ENTRIES], bind_offset = 0;
|
||||
void *surface_maps[BLORP_NUM_BT_ENTRIES];
|
||||
|
||||
/* There's nothing to bind here. */
|
||||
if (params->op == BLORP_OP_COPY_INDIRECT)
|
||||
return 0;
|
||||
|
||||
if (params->use_pre_baked_binding_table) {
|
||||
bind_offset = params->pre_baked_binding_table_offset;
|
||||
} else {
|
||||
|
|
@ -1784,6 +1788,170 @@ blorp_get_compute_push_const(struct blorp_batch *batch,
|
|||
*state_size = push_const_size;
|
||||
}
|
||||
|
||||
static void
|
||||
blorp_indirect_buffer_get_dispatch_size(struct blorp_batch *batch,
|
||||
const struct blorp_params *params,
|
||||
struct mi_builder *b,
|
||||
struct mi_value *size_x,
|
||||
struct mi_value *size_y,
|
||||
struct mi_value *size_z)
|
||||
{
|
||||
struct blorp_context *blorp = batch->blorp;
|
||||
const struct brw_cs_prog_data *cs_prog_data = params->cs_prog_data;
|
||||
|
||||
uint64_t indirect_buf_addr = params->wm_inputs.indirect.indirect_buf_addr;
|
||||
uint64_t stride = params->wm_inputs.indirect.indirect_buf_stride;
|
||||
uint32_t copy_count = params->wm_inputs.indirect.copy_count;
|
||||
|
||||
size_t size_offset = 16; /* offsetof(VkCopyMemoryIndirectCommandKHR, size) */
|
||||
|
||||
struct mi_value biggest_copy_size;
|
||||
for (int c = 0; c < copy_count; c++) {
|
||||
struct blorp_address copy_size_addr = {
|
||||
.buffer = NULL,
|
||||
.offset = indirect_buf_addr + c * stride + size_offset,
|
||||
.reloc_flags = 0,
|
||||
.mocs = isl_mocs(blorp->isl_dev, ISL_SURF_USAGE_STORAGE_BIT, false),
|
||||
.local_hint = false, /* We don't have a way to know this. */
|
||||
};
|
||||
|
||||
struct mi_value this_copy_size = mi_mem32(copy_size_addr);
|
||||
if (c == 0)
|
||||
biggest_copy_size = this_copy_size;
|
||||
else
|
||||
biggest_copy_size = mi_umax2(b, this_copy_size, biggest_copy_size);
|
||||
}
|
||||
|
||||
/* Each shader invocation writes an uint32_t. */
|
||||
int divisor = cs_prog_data->local_size[0] * sizeof(uint32_t);
|
||||
*size_x = mi_udiv32_imm(b, mi_iadd_imm(b, biggest_copy_size, divisor - 1),
|
||||
divisor);
|
||||
|
||||
assert(cs_prog_data->local_size[1] == 1);
|
||||
assert(cs_prog_data->local_size[2] == 1);
|
||||
*size_y = mi_imm(1);
|
||||
*size_z = mi_imm(1);
|
||||
}
|
||||
|
||||
static void
|
||||
blorp_indirect_buf2img_get_dispatch_size(struct blorp_batch *batch,
|
||||
const struct blorp_params *params,
|
||||
struct mi_builder *b,
|
||||
struct mi_value *size_x,
|
||||
struct mi_value *size_y,
|
||||
struct mi_value *size_z)
|
||||
{
|
||||
struct blorp_context *blorp = batch->blorp;
|
||||
const struct brw_cs_prog_data *cs_prog_data = params->cs_prog_data;
|
||||
|
||||
uint64_t indirect_buf_addr = params->wm_inputs.indirect.indirect_buf_addr;
|
||||
uint64_t stride = params->wm_inputs.indirect.indirect_buf_stride;
|
||||
uint32_t copy_idx = params->wm_inputs.indirect.copy_idx;
|
||||
bool is_forced_layer = params->wm_inputs.indirect.forced_layer_or_z != -1;
|
||||
bool is_3d = params->wm_inputs.indirect.dimensions == 3;
|
||||
|
||||
/* These are all offsetof(VkCopyMemoryToImageIndirectCommandKHR, x). */
|
||||
const size_t img_extent_x_offset = 44;
|
||||
const size_t img_extent_y_offset = 48;
|
||||
const size_t img_extent_z_offset = 52;
|
||||
|
||||
struct blorp_address x_extent_addr = {
|
||||
.buffer = NULL,
|
||||
.offset = indirect_buf_addr + copy_idx * stride + img_extent_x_offset,
|
||||
.reloc_flags = 0,
|
||||
.mocs = isl_mocs(blorp->isl_dev, ISL_SURF_USAGE_STORAGE_BIT, false),
|
||||
.local_hint = false, /* We don't have a way to know this. */
|
||||
};
|
||||
struct blorp_address y_extent_addr = {
|
||||
.buffer = NULL,
|
||||
.offset = indirect_buf_addr + copy_idx * stride + img_extent_y_offset,
|
||||
.reloc_flags = 0,
|
||||
.mocs = isl_mocs(blorp->isl_dev, ISL_SURF_USAGE_STORAGE_BIT, false),
|
||||
.local_hint = false, /* We don't have a way to know this. */
|
||||
};
|
||||
struct blorp_address z_extent_addr = {
|
||||
.buffer = NULL,
|
||||
.offset = indirect_buf_addr + copy_idx * stride + img_extent_z_offset,
|
||||
.reloc_flags = 0,
|
||||
.mocs = isl_mocs(blorp->isl_dev, ISL_SURF_USAGE_STORAGE_BIT, false),
|
||||
.local_hint = false, /* We don't have a way to know this. */
|
||||
};
|
||||
|
||||
/* Notes on the 'params->num_layers' usage below:
|
||||
*
|
||||
* - Please see how we handle this in function
|
||||
* blorp_copy_memory_to_image_indirect().
|
||||
*
|
||||
* - If we're using forced layers (see
|
||||
* params.wm_inputs.indirect.forced_layer_or_z), then we can just set
|
||||
* size_z to 1 and don't even look at the indirect buffer: each shader
|
||||
* call operates on a single layer.
|
||||
*
|
||||
* - The information on the number of layers for each copy is not truly
|
||||
* indirect: it has to be passed to during command creation, so we
|
||||
* have already processed it. See 'max_layer_count' in
|
||||
* blorp_copy_memory_to_image_indirect(). We don't want to be looking
|
||||
* at the indirect buffer if we don't need to.
|
||||
*
|
||||
* - For 3D images, the applications can use the Z axis as either an
|
||||
* actual axis (offset.z + extent.z) or pretend the Z axis is layers
|
||||
* (base_layer + layer_count), so would have to check both places.
|
||||
* Fortunately, base_layer + layer_count was already checked (see
|
||||
* above) and recorded in params->num_layers. If params->num_layers is
|
||||
* bigger than 1, then we don't even bother looking at extent.z.
|
||||
*/
|
||||
struct mi_value x_extent = mi_mem32(x_extent_addr);
|
||||
struct mi_value y_extent = mi_mem32(y_extent_addr);
|
||||
struct mi_value z_extent;
|
||||
if (is_forced_layer) {
|
||||
z_extent = mi_imm(1);
|
||||
} else {
|
||||
if (is_3d && params->num_layers == 1)
|
||||
z_extent = mi_mem32(z_extent_addr);
|
||||
else
|
||||
z_extent = mi_imm(params->num_layers);
|
||||
}
|
||||
|
||||
int divisor_x = cs_prog_data->local_size[0];
|
||||
int divisor_y = cs_prog_data->local_size[1];
|
||||
int divisor_z = cs_prog_data->local_size[2];
|
||||
|
||||
assert(divisor_x != 1 && divisor_y != 1 && divisor_z == 1);
|
||||
*size_x = mi_udiv32_imm(b, mi_iadd_imm(b, x_extent, divisor_x - 1),
|
||||
divisor_x);
|
||||
*size_y = mi_udiv32_imm(b, mi_iadd_imm(b, y_extent, divisor_y - 1),
|
||||
divisor_y);
|
||||
*size_z = z_extent;
|
||||
}
|
||||
|
||||
static void
|
||||
blorp_indirect_write_gpgpu_dispatch_regs(struct blorp_batch *batch,
|
||||
const struct blorp_params *params)
|
||||
{
|
||||
struct blorp_context *blorp = batch->blorp;
|
||||
const struct intel_device_info *devinfo = blorp->compiler->brw->devinfo;
|
||||
|
||||
struct mi_builder b;
|
||||
mi_builder_init(&b, devinfo, batch);
|
||||
mi_builder_set_mocs(&b, isl_mocs(blorp->isl_dev, 0, false));
|
||||
|
||||
struct mi_value size_x, size_y, size_z;
|
||||
|
||||
if (params->op == BLORP_OP_COPY_INDIRECT) {
|
||||
blorp_indirect_buffer_get_dispatch_size(batch, params, &b, &size_x,
|
||||
&size_y, &size_z);
|
||||
} else {
|
||||
assert(params->op == BLORP_OP_COPY_IMAGE_INDIRECT);
|
||||
|
||||
blorp_indirect_buf2img_get_dispatch_size(batch, params, &b, &size_x,
|
||||
&size_y, &size_z);
|
||||
}
|
||||
|
||||
mi_store(&b, mi_reg32(GENX(GPGPU_DISPATCHDIMX_num)), size_x);
|
||||
mi_store(&b, mi_reg32(GENX(GPGPU_DISPATCHDIMY_num)), size_y);
|
||||
mi_store(&b, mi_reg32(GENX(GPGPU_DISPATCHDIMZ_num)), size_z);
|
||||
}
|
||||
|
||||
static void
|
||||
blorp_exec_compute(struct blorp_batch *batch, const struct blorp_params *params)
|
||||
{
|
||||
|
|
@ -1798,6 +1966,11 @@ blorp_exec_compute(struct blorp_batch *batch, const struct blorp_params *params)
|
|||
const struct intel_cs_dispatch_info dispatch =
|
||||
brw_cs_get_dispatch_info(devinfo, cs_prog_data, NULL);
|
||||
|
||||
bool use_indirect = blorp_op_type_is_indirect(params->op);
|
||||
|
||||
if (use_indirect)
|
||||
blorp_indirect_write_gpgpu_dispatch_regs(batch, params);
|
||||
|
||||
uint32_t group_x0 = params->x0 / cs_prog_data->local_size[0];
|
||||
uint32_t group_y0 = params->y0 / cs_prog_data->local_size[1];
|
||||
uint32_t group_z0 = params->dst.z_offset;
|
||||
|
|
@ -1911,6 +2084,7 @@ blorp_exec_compute(struct blorp_batch *batch, const struct blorp_params *params)
|
|||
|
||||
assert(cs_prog_data->push.per_thread.regs == 0);
|
||||
blorp_emit(batch, GENX(COMPUTE_WALKER), cw) {
|
||||
cw.IndirectParameterEnable = use_indirect,
|
||||
cw.body = body;
|
||||
}
|
||||
#else /* GFX_VERx10 >= 125 */
|
||||
|
|
@ -1980,6 +2154,7 @@ blorp_exec_compute(struct blorp_batch *batch, const struct blorp_params *params)
|
|||
}
|
||||
|
||||
blorp_emit(batch, GENX(GPGPU_WALKER), ggw) {
|
||||
ggw.IndirectParameterEnable = use_indirect,
|
||||
ggw.SIMDSize = dispatch.simd_size / 16;
|
||||
ggw.ThreadDepthCounterMaximum = 0;
|
||||
ggw.ThreadHeightCounterMaximum = 0;
|
||||
|
|
|
|||
355
src/intel/blorp/blorp_indirect_copy.c
Normal file
355
src/intel/blorp/blorp_indirect_copy.c
Normal file
|
|
@ -0,0 +1,355 @@
|
|||
/* Copyright © 2025 Intel Corporation
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
|
||||
#include "blorp_nir_builder.h"
|
||||
#include "compiler/nir/nir_format_convert.h"
|
||||
|
||||
#include "blorp_priv.h"
|
||||
#include "dev/intel_debug.h"
|
||||
#include "dev/intel_device_info.h"
|
||||
|
||||
#include "blorp_shaders.h"
|
||||
|
||||
/* Refer to struct blorp_wm_inputs_indirect. */
|
||||
struct blorp_indirect_vars {
|
||||
nir_variable *indirect_buf_addr;
|
||||
nir_variable *stride;
|
||||
nir_variable *copy_count;
|
||||
nir_variable *copy_idx;
|
||||
nir_variable *max_layer;
|
||||
nir_variable *x_offset;
|
||||
nir_variable *y_offset;
|
||||
};
|
||||
|
||||
static enum isl_format
|
||||
get_format_for_copy(int format_bpb)
|
||||
{
|
||||
switch (format_bpb) {
|
||||
case 8: return ISL_FORMAT_R8_UINT;
|
||||
case 16: return ISL_FORMAT_R16_UINT;
|
||||
case 24: return ISL_FORMAT_R8G8B8_UINT;
|
||||
case 32: return ISL_FORMAT_R32_UINT;
|
||||
case 48: return ISL_FORMAT_R16G16B16_UINT;
|
||||
case 64: return ISL_FORMAT_R32G32_UINT;
|
||||
case 96: return ISL_FORMAT_R32G32B32_UINT;
|
||||
case 128: return ISL_FORMAT_R32G32B32A32_UINT;
|
||||
default:
|
||||
mesa_loge("unexpected format bpb: %d", format_bpb);
|
||||
assert(false);
|
||||
return ISL_FORMAT_UNSUPPORTED;
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
blorp_indirect_vars_init(nir_builder *b, struct blorp_indirect_vars *v)
|
||||
{
|
||||
v->copy_count =
|
||||
BLORP_CREATE_NIR_INPUT(b->shader, indirect.copy_count,
|
||||
glsl_uint_type());
|
||||
v->indirect_buf_addr =
|
||||
BLORP_CREATE_NIR_INPUT(b->shader, indirect.indirect_buf_addr,
|
||||
glsl_uint64_t_type());
|
||||
v->stride =
|
||||
BLORP_CREATE_NIR_INPUT(b->shader, indirect.indirect_buf_stride,
|
||||
glsl_uint64_t_type());
|
||||
v->copy_idx =
|
||||
BLORP_CREATE_NIR_INPUT(b->shader, indirect.copy_idx,
|
||||
glsl_uint_type());
|
||||
v->max_layer =
|
||||
BLORP_CREATE_NIR_INPUT(b->shader, indirect.max_layer,
|
||||
glsl_uint_type());
|
||||
v->x_offset =
|
||||
BLORP_CREATE_NIR_INPUT(b->shader, indirect.x_offset,
|
||||
glsl_uint_type());
|
||||
v->y_offset =
|
||||
BLORP_CREATE_NIR_INPUT(b->shader, indirect.y_offset,
|
||||
glsl_uint_type());
|
||||
}
|
||||
|
||||
static nir_shader *
|
||||
blorp_build_copy_mem_indirect_shader(struct blorp_batch *batch,
|
||||
void *mem_ctx)
|
||||
{
|
||||
|
||||
struct blorp_context *blorp = batch->blorp;
|
||||
|
||||
mesa_shader_stage stage = MESA_SHADER_COMPUTE;
|
||||
const nir_shader_compiler_options *nir_options =
|
||||
blorp->compiler->nir_options(blorp, stage);
|
||||
nir_builder b = nir_builder_init_simple_shader(stage, nir_options,
|
||||
"copy mem indirect");
|
||||
ralloc_steal(mem_ctx, b.shader);
|
||||
|
||||
b.shader->info.workgroup_size[0] = 32;
|
||||
b.shader->info.workgroup_size[1] = 1;
|
||||
b.shader->info.workgroup_size[2] = 1;
|
||||
|
||||
struct blorp_indirect_vars v;
|
||||
blorp_indirect_vars_init(&b, &v);
|
||||
|
||||
/* The indirect buffer is an array containing 'copy_count'
|
||||
* VkCopyMemoryIndirectCommandKHR structures, separated by 'stride' bytes.
|
||||
*/
|
||||
nir_def *copy_count = nir_load_var(&b, v.copy_count);
|
||||
nir_def *indirect_buf_addr = nir_load_var(&b, v.indirect_buf_addr);
|
||||
nir_def *stride = nir_load_var(&b, v.stride);
|
||||
nir_def *global_id =
|
||||
nir_channel(&b, nir_load_global_invocation_id(&b, 32), 0);
|
||||
|
||||
blorp_copy_memory_indirect_shader(&b, indirect_buf_addr, copy_count,
|
||||
stride, global_id);
|
||||
|
||||
return b.shader;
|
||||
}
|
||||
|
||||
static nir_shader *
|
||||
blorp_build_copy_mem2img_indirect_shader(struct blorp_batch *batch,
|
||||
void *mem_ctx,
|
||||
struct blorp_indirect_copy_mem2img_key *key)
|
||||
{
|
||||
struct blorp_context *blorp = batch->blorp;
|
||||
|
||||
mesa_shader_stage stage = MESA_SHADER_COMPUTE;
|
||||
const nir_shader_compiler_options *nir_options =
|
||||
blorp->compiler->nir_options(blorp, stage);
|
||||
nir_builder b = nir_builder_init_simple_shader(stage, nir_options,
|
||||
"copy mem2img indirect");
|
||||
ralloc_steal(mem_ctx, b.shader);
|
||||
|
||||
b.shader->info.workgroup_size[0] = 4;
|
||||
b.shader->info.workgroup_size[1] = 8;
|
||||
b.shader->info.workgroup_size[2] = 1;
|
||||
|
||||
struct blorp_indirect_vars v;
|
||||
blorp_indirect_vars_init(&b, &v);
|
||||
|
||||
/* The indirect buffer is an array containing 'copy_count'
|
||||
* VkCopyMemoryToImageIndirectCommandKHR structures, separated by 'stride'
|
||||
* bytes.
|
||||
*/
|
||||
nir_def *indirect_buf_addr = nir_load_var(&b, v.indirect_buf_addr);
|
||||
nir_def *stride = nir_load_var(&b, v.stride);
|
||||
nir_def *copy_idx = nir_load_var(&b, v.copy_idx);
|
||||
nir_def *max_layer = nir_load_var(&b, v.max_layer);
|
||||
nir_def *dest_coord_offsets_arr[2] = {
|
||||
nir_load_var(&b, v.x_offset),
|
||||
nir_load_var(&b, v.y_offset),
|
||||
};
|
||||
nir_def *dest_coord_offsets = nir_vec(&b, dest_coord_offsets_arr, 2);
|
||||
|
||||
nir_def *global_id = nir_load_global_invocation_id(&b, 32);
|
||||
|
||||
/* Shader keys (constants). */
|
||||
nir_def *dimensions = nir_imm_int(&b, key->dimensions);
|
||||
nir_def *forced_layer_or_z = nir_imm_int(&b, key->forced_layer_or_z);
|
||||
nir_def *format_Bpb = nir_imm_intN_t(&b, key->format_Bpb, 16);
|
||||
nir_def *format_block_size_arr[3] = {
|
||||
nir_imm_int(&b, key->format_bw),
|
||||
nir_imm_int(&b, key->format_bh),
|
||||
nir_imm_int(&b, key->format_bd),
|
||||
};
|
||||
nir_def *format_block_size = nir_vec(&b, format_block_size_arr, 3);
|
||||
|
||||
/* Constants derived from the shader keys. */
|
||||
nir_def *is_block_compressed =
|
||||
nir_imm_bool(&b, key->format_bw > 1 || key->format_bh > 1 ||
|
||||
key->format_bd > 1);
|
||||
|
||||
blorp_copy_memory_to_image_indirect_shader(&b, indirect_buf_addr,
|
||||
stride,
|
||||
copy_idx,
|
||||
max_layer,
|
||||
dest_coord_offsets,
|
||||
global_id,
|
||||
dimensions,
|
||||
forced_layer_or_z,
|
||||
format_Bpb,
|
||||
format_block_size,
|
||||
is_block_compressed);
|
||||
|
||||
return b.shader;
|
||||
}
|
||||
|
||||
static bool
|
||||
blorp_get_copy_mem_indirect_kernel_cs(struct blorp_batch *batch,
|
||||
struct blorp_params *params)
|
||||
{
|
||||
struct blorp_context *blorp = batch->blorp;
|
||||
const char *key = "copy_mem_indirect_kernel_cs";
|
||||
uint32_t key_size = strlen(key);
|
||||
|
||||
if (blorp->lookup_shader(batch, key, key_size, ¶ms->cs_prog_kernel,
|
||||
¶ms->cs_prog_data))
|
||||
return true;
|
||||
|
||||
void *mem_ctx = ralloc_context(NULL);
|
||||
|
||||
nir_shader *nir =
|
||||
blorp_build_copy_mem_indirect_shader(batch, mem_ctx);
|
||||
|
||||
const struct blorp_program prog =
|
||||
blorp_compile_cs(blorp, mem_ctx, nir, key, key_size);
|
||||
|
||||
bool result = blorp->upload_shader(batch, MESA_SHADER_COMPUTE,
|
||||
key, key_size,
|
||||
prog.kernel, prog.kernel_size,
|
||||
prog.prog_data, prog.prog_data_size,
|
||||
¶ms->cs_prog_kernel,
|
||||
¶ms->cs_prog_data);
|
||||
|
||||
ralloc_free(mem_ctx);
|
||||
return result;
|
||||
|
||||
}
|
||||
|
||||
static bool
|
||||
blorp_get_copy_mem2img_indirect_kernel_cs(struct blorp_batch *batch,
|
||||
struct blorp_params *params,
|
||||
struct blorp_indirect_copy_mem2img_key *key)
|
||||
{
|
||||
struct blorp_context *blorp = batch->blorp;
|
||||
|
||||
if (blorp->lookup_shader(batch, key, sizeof(*key), ¶ms->cs_prog_kernel,
|
||||
¶ms->cs_prog_data))
|
||||
return true;
|
||||
|
||||
void *mem_ctx = ralloc_context(NULL);
|
||||
|
||||
nir_shader *nir =
|
||||
blorp_build_copy_mem2img_indirect_shader(batch, mem_ctx, key);
|
||||
|
||||
const struct blorp_program prog =
|
||||
blorp_compile_cs(blorp, mem_ctx, nir, key, sizeof(*key));
|
||||
|
||||
bool result = blorp->upload_shader(batch, MESA_SHADER_COMPUTE,
|
||||
key, sizeof(*key),
|
||||
prog.kernel, prog.kernel_size,
|
||||
prog.prog_data, prog.prog_data_size,
|
||||
¶ms->cs_prog_kernel,
|
||||
¶ms->cs_prog_data);
|
||||
|
||||
ralloc_free(mem_ctx);
|
||||
return result;
|
||||
}
|
||||
|
||||
void
|
||||
blorp_copy_memory_indirect(struct blorp_batch *batch,
|
||||
uint64_t indirect_buf_addr,
|
||||
uint32_t copy_count,
|
||||
uint64_t stride)
|
||||
{
|
||||
struct blorp_params params;
|
||||
blorp_params_init(¶ms);
|
||||
|
||||
params.op = BLORP_OP_COPY_INDIRECT;
|
||||
params.shader_type = BLORP_SHADER_TYPE_COPY_INDIRECT;
|
||||
params.shader_pipeline = BLORP_SHADER_PIPELINE_COMPUTE;
|
||||
|
||||
params.x0 = 0;
|
||||
params.y0 = 0;
|
||||
params.x1 = 1;
|
||||
params.y1 = 1;
|
||||
|
||||
params.wm_inputs.indirect.indirect_buf_addr = indirect_buf_addr;
|
||||
params.wm_inputs.indirect.indirect_buf_stride = stride;
|
||||
params.wm_inputs.indirect.copy_count = copy_count;
|
||||
|
||||
if (!blorp_get_copy_mem_indirect_kernel_cs(batch, ¶ms)) {
|
||||
mesa_loge("failed to get copy_memory_indirect CS kernel");
|
||||
assert(false);
|
||||
return;
|
||||
}
|
||||
|
||||
batch->blorp->exec(batch, ¶ms);
|
||||
}
|
||||
|
||||
void
|
||||
blorp_copy_memory_to_image_indirect(struct blorp_batch *batch,
|
||||
const struct blorp_surf *img_blorp_surf,
|
||||
uint64_t indirect_buf_addr,
|
||||
uint64_t indirect_buf_stride,
|
||||
uint32_t copy_idx,
|
||||
uint32_t img_mip_level,
|
||||
int layer_count,
|
||||
int forced_layer_or_z)
|
||||
{
|
||||
enum isl_format original_format = img_blorp_surf->surf->format;
|
||||
const struct isl_format_layout *fmtl =
|
||||
isl_format_get_layout(original_format);
|
||||
enum isl_format copy_format = get_format_for_copy(fmtl->bpb);
|
||||
int dimensions = img_blorp_surf->surf->dim + 1;
|
||||
|
||||
struct blorp_indirect_copy_mem2img_key key = {
|
||||
.dimensions = dimensions,
|
||||
.forced_layer_or_z = forced_layer_or_z,
|
||||
.format_Bpb = fmtl->bpb / 8,
|
||||
.format_bw = fmtl->bw,
|
||||
.format_bh = fmtl->bh,
|
||||
.format_bd = fmtl->bd,
|
||||
};
|
||||
|
||||
struct blorp_params params;
|
||||
blorp_params_init(¶ms);
|
||||
|
||||
params.op = BLORP_OP_COPY_IMAGE_INDIRECT;
|
||||
params.shader_type = BLORP_SHADER_TYPE_COPY_INDIRECT;
|
||||
params.shader_pipeline = BLORP_SHADER_PIPELINE_COMPUTE;
|
||||
|
||||
params.wm_inputs.indirect.indirect_buf_addr = indirect_buf_addr;
|
||||
params.wm_inputs.indirect.indirect_buf_stride = indirect_buf_stride;
|
||||
params.wm_inputs.indirect.copy_idx = copy_idx;
|
||||
params.wm_inputs.indirect.dimensions = dimensions;
|
||||
params.wm_inputs.indirect.max_layer =
|
||||
img_blorp_surf->surf->logical_level0_px.array_len - 1;
|
||||
params.wm_inputs.indirect.forced_layer_or_z = forced_layer_or_z;
|
||||
|
||||
/* params.dst is our image. */
|
||||
blorp_surface_info_init(batch, ¶ms.dst, img_blorp_surf,
|
||||
img_mip_level,
|
||||
forced_layer_or_z == -1 ? 0 : forced_layer_or_z,
|
||||
copy_format,
|
||||
true /* is_dest */);
|
||||
|
||||
struct isl_extent3d mip_dimensions = {
|
||||
.width = MAX2(params.dst.surf.logical_level0_px.w >> img_mip_level, 1),
|
||||
.height = MAX2(params.dst.surf.logical_level0_px.h >> img_mip_level, 1),
|
||||
.depth = MAX2(params.dst.surf.logical_level0_px.d >> img_mip_level, 1),
|
||||
};
|
||||
|
||||
if (fmtl->bw > 1 || fmtl->bh > 1 || fmtl->bd > 1) {
|
||||
blorp_surf_convert_to_uncompressed(batch->blorp->isl_dev,
|
||||
¶ms.dst, NULL, NULL, NULL, NULL);
|
||||
params.wm_inputs.indirect.x_offset = params.dst.tile_x_sa;
|
||||
params.wm_inputs.indirect.y_offset = params.dst.tile_y_sa;
|
||||
|
||||
mip_dimensions.width = params.dst.surf.logical_level0_px.w;
|
||||
mip_dimensions.height = params.dst.surf.logical_level0_px.h;
|
||||
mip_dimensions.depth = params.dst.surf.logical_level0_px.d;
|
||||
}
|
||||
|
||||
/* These settings control the number of workgroups in the shader, see
|
||||
* blorp_exec_compute(). We don't need to divide by the local sizes here,
|
||||
* this will be done later.
|
||||
*/
|
||||
params.x0 = 0;
|
||||
params.y0 = 0;
|
||||
params.x1 = mip_dimensions.width;
|
||||
params.y1 = mip_dimensions.height;
|
||||
if (forced_layer_or_z == -1) {
|
||||
/* We set this here so blorp_indirect_buf2img_get_dispatch_size() can
|
||||
* read it while figuring out how many shader instances we'll need.
|
||||
*/
|
||||
params.num_layers = layer_count;
|
||||
} else {
|
||||
params.num_layers = 1;
|
||||
}
|
||||
|
||||
if (!blorp_get_copy_mem2img_indirect_kernel_cs(batch, ¶ms, &key)) {
|
||||
mesa_loge("failed to get copy_memory_to_image_indirect CS kernel");
|
||||
assert(false);
|
||||
return;
|
||||
}
|
||||
|
||||
batch->blorp->exec(batch, ¶ms);
|
||||
}
|
||||
|
|
@ -205,17 +205,55 @@ struct blorp_wm_inputs_clear {
|
|||
struct blorp_bounds_rect bounds_rect;
|
||||
};
|
||||
|
||||
/* Parameters using in blorp_indirect_copy.c */
|
||||
struct blorp_wm_inputs_indirect {
|
||||
/* The address of the indirect buffer containing the information about the
|
||||
* indirect copy.
|
||||
*/
|
||||
uint64_t indirect_buf_addr;
|
||||
|
||||
/* How far apart the information about each copy is inside the indirect
|
||||
* buffer.
|
||||
*/
|
||||
uint64_t indirect_buf_stride;
|
||||
|
||||
/* How many copies we have to do. */
|
||||
uint32_t copy_count;
|
||||
|
||||
/* For memory to image copies, we do a single copy per shader. This
|
||||
* represents the index of the copy to be done.
|
||||
*/
|
||||
uint32_t copy_idx;
|
||||
|
||||
/* How many dimensions does our image have? 1, 2 or 3. */
|
||||
uint32_t dimensions;
|
||||
|
||||
/* The maximum array layer of the image. */
|
||||
uint32_t max_layer;
|
||||
|
||||
/* When compressed formats are used, we pretend they are a non-compressed
|
||||
* format, of the same bpb. Since we can't maintain the exact same layout
|
||||
* of mipmap and layer offsets, we're forced to make adjustments to where X
|
||||
* and Y actually start, and are also forced to copy only one layer (or Z
|
||||
* axis position) per shader invocation.
|
||||
*/
|
||||
uint32_t x_offset;
|
||||
uint32_t y_offset;
|
||||
int forced_layer_or_z;
|
||||
};
|
||||
|
||||
struct blorp_wm_inputs
|
||||
{
|
||||
union {
|
||||
struct blorp_wm_inputs_blit blit;
|
||||
struct blorp_wm_inputs_clear clear;
|
||||
struct blorp_wm_inputs_indirect indirect;
|
||||
};
|
||||
|
||||
/* Note: Pad out to an integral number of registers when extending, but
|
||||
* make sure subgroup_id is the last 32-bit item.
|
||||
*/
|
||||
uint32_t pad[2];
|
||||
uint32_t pad[1];
|
||||
uint32_t subgroup_id;
|
||||
};
|
||||
|
||||
|
|
@ -257,6 +295,7 @@ enum blorp_shader_type {
|
|||
BLORP_SHADER_TYPE_MCS_PARTIAL_RESOLVE,
|
||||
BLORP_SHADER_TYPE_LAYER_OFFSET_VS,
|
||||
BLORP_SHADER_TYPE_GFX4_SF,
|
||||
BLORP_SHADER_TYPE_COPY_INDIRECT,
|
||||
};
|
||||
|
||||
enum blorp_shader_pipeline {
|
||||
|
|
@ -467,6 +506,25 @@ struct blorp_blit_prog_key
|
|||
uint8_t local_y;
|
||||
};
|
||||
|
||||
struct blorp_indirect_copy_mem2img_key {
|
||||
/* How many dimensions does our image have? 1, 2 or 3. */
|
||||
uint32_t dimensions;
|
||||
|
||||
/* When compressed formats are used, we pretend they are a non-compressed
|
||||
* format, of the same bpb. Since we can't maintain the exact same layout
|
||||
* of mipmap and layer offsets, we're forced to make adjustments to where X
|
||||
* and Y actually start, and are also forced to copy only one layer (or Z
|
||||
* axis position) per shader invocation.
|
||||
*/
|
||||
int forced_layer_or_z;
|
||||
|
||||
/* Info taken from isl_format_layout. */
|
||||
uint16_t format_Bpb;
|
||||
uint16_t format_bw;
|
||||
uint16_t format_bh;
|
||||
uint16_t format_bd;
|
||||
};
|
||||
|
||||
/**
|
||||
* \name BLORP internals
|
||||
* \{
|
||||
|
|
@ -597,6 +655,19 @@ blorp_op_type_is_clear(enum blorp_op op)
|
|||
}
|
||||
}
|
||||
|
||||
/* This means: blorp->wm_inputs.indirect should be used. */
|
||||
static inline bool
|
||||
blorp_op_type_is_indirect(enum blorp_op op)
|
||||
{
|
||||
switch (op) {
|
||||
case BLORP_OP_COPY_INDIRECT:
|
||||
case BLORP_OP_COPY_IMAGE_INDIRECT:
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/* Asserts unless the surface is a buffer to image copy */
|
||||
#define blorp_assert_is_buffer(surf, view) \
|
||||
do { \
|
||||
|
|
|
|||
|
|
@ -3,6 +3,7 @@
|
|||
*/
|
||||
|
||||
#include "compiler/libcl/libcl.h"
|
||||
#include "compiler/libcl/libcl_vk.h"
|
||||
#include "compiler/nir/nir_defines.h"
|
||||
#include "compiler/shader_enums.h"
|
||||
|
||||
|
|
@ -15,3 +16,279 @@ blorp_check_in_bounds(uint4 bounds_rect, uint2 pos)
|
|||
return pos.x >= x0 && pos.x < x1 &&
|
||||
pos.y >= y0 && pos.y < y1;
|
||||
}
|
||||
|
||||
void nir_image_store(uint handle, int4 coords, uint sample_index,
|
||||
uint4 colors, uint lod, uint image_dim,
|
||||
uint image_array, uint format, uint access,
|
||||
uint range_base, uint src_type);
|
||||
|
||||
/* Used by vkCmdCopyMemoryIndirectKHR. */
|
||||
void
|
||||
blorp_copy_memory_indirect_shader(
|
||||
global uint *indirect_buf,
|
||||
uint copy_count,
|
||||
ulong stride_bytes,
|
||||
uint global_id)
|
||||
{
|
||||
for (uint c = 0; c < copy_count; c++) {
|
||||
uint idx = (uint)(c * (stride_bytes / 4));
|
||||
|
||||
/* The spec says the minimum alignment is 4 instead of 8, so we have to
|
||||
* do these tricks.
|
||||
*/
|
||||
VkCopyMemoryIndirectCommandKHR cmd =
|
||||
(*(global VkCopyMemoryIndirectCommandKHR*)&indirect_buf[idx]);
|
||||
|
||||
uint copy_size_ints = cmd.size / 4;
|
||||
|
||||
global int *src = (global int*)cmd.srcAddress;
|
||||
global int *dst = (global int*)cmd.dstAddress;
|
||||
|
||||
if (global_id < copy_size_ints)
|
||||
dst[global_id] = src[global_id];
|
||||
}
|
||||
}
|
||||
|
||||
struct img_copy_params {
|
||||
ulong src_address;
|
||||
uint2 row_size_px;
|
||||
uint base_layer;
|
||||
uint layer_count;
|
||||
uint3 offset;
|
||||
uint3 extent;
|
||||
};
|
||||
|
||||
void
|
||||
read_img_params(
|
||||
global uint *indirect_buf,
|
||||
uint copy_idx,
|
||||
ulong stride_bytes,
|
||||
uint dimensions,
|
||||
uint max_layer,
|
||||
uint3 format_block_size,
|
||||
bool is_block_compressed,
|
||||
struct img_copy_params *p)
|
||||
{
|
||||
uint idx = copy_idx * (stride_bytes / 4);
|
||||
|
||||
VkCopyMemoryToImageIndirectCommandKHR cmd =
|
||||
(*(global VkCopyMemoryToImageIndirectCommandKHR*)&indirect_buf[idx]);
|
||||
|
||||
p->src_address = cmd.srcAddress;
|
||||
p->row_size_px = (uint2)(cmd.bufferRowLength, cmd.bufferImageHeight);
|
||||
|
||||
/* We don't use imageSubresource.aspectMask and imageSubresource.mipLevel,
|
||||
* those are dealt with when the application calls
|
||||
* vkCmdCopyMemoryToImageIndirectKHR().
|
||||
*/
|
||||
|
||||
p->base_layer = cmd.imageSubresource.baseArrayLayer;
|
||||
p->layer_count = cmd.imageSubresource.layerCount;
|
||||
|
||||
p->offset = (uint3)(cmd.imageOffset.x, cmd.imageOffset.y,
|
||||
cmd.imageOffset.z);
|
||||
p->extent = (uint3)(cmd.imageExtent.width, cmd.imageExtent.height,
|
||||
cmd.imageExtent.depth);
|
||||
|
||||
if (p->row_size_px.x == 0)
|
||||
p->row_size_px.x = p->extent.x;
|
||||
if (p->row_size_px.y == 0)
|
||||
p->row_size_px.y = p->extent.y;
|
||||
|
||||
/* Our code deals with blocks, not pixels. */
|
||||
if (is_block_compressed) {
|
||||
p->offset /= format_block_size;
|
||||
p->extent = DIV_ROUND_UP(p->extent, format_block_size);
|
||||
}
|
||||
|
||||
/* Users can pass 3D images with the Z axis as an array layer. */
|
||||
if (dimensions == 3 && (p->base_layer != 0 || p->layer_count != 1)) {
|
||||
p->offset.z = p->base_layer;
|
||||
p->extent.z = p->layer_count;
|
||||
p->base_layer = 0;
|
||||
p->layer_count = 1;
|
||||
}
|
||||
|
||||
/* This handles VK_REMAINING_ARRAY_LAYERS and bugs. */
|
||||
if (p->base_layer + p->layer_count > max_layer + 1)
|
||||
p->layer_count = max_layer - p->base_layer + 1;
|
||||
}
|
||||
|
||||
uint4
|
||||
get_pixel(
|
||||
ulong src_address,
|
||||
uint format_Bpb)
|
||||
{
|
||||
switch (format_Bpb) {
|
||||
case 1: {
|
||||
global uchar *src_buf = (global uchar *)src_address;
|
||||
return (uint4)(src_buf[0], 0, 0, 0);
|
||||
}
|
||||
case 2: {
|
||||
global ushort *src_buf = (global ushort *)src_address;
|
||||
return (uint4)(src_buf[0], 0, 0, 0);
|
||||
}
|
||||
case 4: {
|
||||
global uint *src_buf = (global uint *)src_address;
|
||||
return (uint4)(src_buf[0], 0, 0, 0);
|
||||
}
|
||||
case 8: {
|
||||
global uint *src_buf = (global uint *)src_address;
|
||||
return (uint4)(vload2(0, src_buf), 0, 0);
|
||||
}
|
||||
case 16: {
|
||||
global uint *src_buf = (global uint *)src_address;
|
||||
return (uint4)(vload4(0, src_buf));
|
||||
}
|
||||
default:
|
||||
/* TODO: support 3, 6, 12. */
|
||||
return (uint4)(0xFF, 0, 0, 0xFF);
|
||||
}
|
||||
}
|
||||
|
||||
int4
|
||||
get_coords(
|
||||
uint3 pos,
|
||||
uint layer,
|
||||
uint2 dest_coord_offsets,
|
||||
int dimensions,
|
||||
int forced_layer_or_z)
|
||||
{
|
||||
int4 ret;
|
||||
|
||||
pos.xy += dest_coord_offsets;
|
||||
|
||||
switch (dimensions) {
|
||||
case 1:
|
||||
ret = (int4)(pos.x, layer, 0, 0);
|
||||
break;
|
||||
case 2:
|
||||
ret = (int4)(pos.x, pos.y, layer, 0);
|
||||
break;
|
||||
case 3:
|
||||
default:
|
||||
ret = (int4)(pos.x, pos.y, pos.z, 0);
|
||||
break;
|
||||
}
|
||||
|
||||
if (forced_layer_or_z != -1) {
|
||||
ret.z = 0;
|
||||
ret.w = 0;
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
void
|
||||
write_pixel(
|
||||
const int4 coords,
|
||||
const uint4 colors)
|
||||
{
|
||||
/* We don't seem to need to set image_array to true if we set sampler_dim
|
||||
* to 3D.
|
||||
* Setting mip_level does not do anything on Intel, we set mip levels
|
||||
* through the bindings.
|
||||
*/
|
||||
nir_image_store(0 /* The image handle. */,
|
||||
coords, /* See get_coords(). */
|
||||
0, /* Sample index for multi-sampling. */
|
||||
colors, /* The RGBA pixels, in src_type. */
|
||||
0 /* mip_level */,
|
||||
GLSL_SAMPLER_DIM_3D,
|
||||
false, /* image_array */
|
||||
0, /* format */
|
||||
ACCESS_NON_READABLE, /* access */
|
||||
0, /* range_base */
|
||||
nir_type_int32 /* src_type */);
|
||||
}
|
||||
|
||||
/* Used by vkCmdCopyMemoryToImageIndirectKHR. */
|
||||
void
|
||||
blorp_copy_memory_to_image_indirect_shader(
|
||||
/* Actual parameters. */
|
||||
global uint *indirect_buf,
|
||||
ulong stride_bytes,
|
||||
uint copy_idx,
|
||||
uint max_layer,
|
||||
uint2 dest_coord_offsets,
|
||||
uint3 global_id,
|
||||
|
||||
/* These are shader keys, they are NIR immediates. */
|
||||
uint dimensions,
|
||||
int forced_layer_or_z,
|
||||
ushort format_Bpb,
|
||||
uint3 format_block_size,
|
||||
bool is_block_compressed)
|
||||
{
|
||||
/* We have one invocation per texel of a given a mip level. This means
|
||||
* that for the pixels outside the copy area, we'll hit the 'continue'
|
||||
* below.
|
||||
*/
|
||||
uint3 src_pos = global_id;
|
||||
uint layer;
|
||||
|
||||
/* 'forced_layer_or_z' means that whatever slice or layer we're
|
||||
* trying to work with is set as part of our binding (view) as depth
|
||||
* 0 or layer 0, depending on the dimensionality. This can happen
|
||||
* when we're trying to pretend a format is something that it's not
|
||||
* (e.g., we're treating a block compressed 4x4 64bpp format as an
|
||||
* r32g32 format).
|
||||
*/
|
||||
if (forced_layer_or_z != -1) {
|
||||
if (dimensions == 3) {
|
||||
src_pos.z = forced_layer_or_z;
|
||||
layer = 0;
|
||||
} else {
|
||||
src_pos.z = 0;
|
||||
layer = forced_layer_or_z;
|
||||
}
|
||||
} else {
|
||||
if (dimensions == 3) {
|
||||
src_pos.z = global_id.z;
|
||||
layer = 0;
|
||||
} else {
|
||||
src_pos.z = 0;
|
||||
layer = global_id.z;
|
||||
}
|
||||
}
|
||||
|
||||
struct img_copy_params p;
|
||||
read_img_params(indirect_buf, copy_idx, stride_bytes, dimensions,
|
||||
max_layer, format_block_size, is_block_compressed, &p);
|
||||
|
||||
if (any(src_pos >= p.extent))
|
||||
return;
|
||||
|
||||
uint3 dst_pos = src_pos + p.offset;
|
||||
if (forced_layer_or_z == -1 && dimensions != 3)
|
||||
layer = global_id.z + p.base_layer;
|
||||
|
||||
if (layer > p.base_layer + p.layer_count - 1)
|
||||
return;
|
||||
|
||||
uint2 row_size_blocks = p.row_size_px;
|
||||
if (is_block_compressed) {
|
||||
row_size_blocks = DIV_ROUND_UP(row_size_blocks,
|
||||
format_block_size.xy);
|
||||
}
|
||||
uint row_length_bytes = row_size_blocks.x * format_Bpb;
|
||||
uint row_height_bytes = row_size_blocks.y * row_length_bytes;
|
||||
|
||||
uint buf_z_offset_bytes = src_pos.z * row_height_bytes;
|
||||
uint buf_y_offset_bytes = src_pos.y * row_length_bytes;
|
||||
uint buf_x_offset_bytes = src_pos.x * format_Bpb;
|
||||
|
||||
uint layer_offset_bytes = (layer - p.base_layer) * row_height_bytes;
|
||||
uint buf_offset_bytes = layer_offset_bytes +
|
||||
buf_z_offset_bytes +
|
||||
buf_y_offset_bytes +
|
||||
buf_x_offset_bytes;
|
||||
|
||||
uint4 colors = get_pixel(p.src_address + buf_offset_bytes,
|
||||
format_Bpb);
|
||||
|
||||
int4 coords = get_coords(dst_pos, layer, dest_coord_offsets, dimensions,
|
||||
forced_layer_or_z);
|
||||
|
||||
write_pixel(coords, colors);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -37,6 +37,7 @@ files_libblorp = files(
|
|||
'blorp.h',
|
||||
'blorp_blit.c',
|
||||
'blorp_clear.c',
|
||||
'blorp_indirect_copy.c',
|
||||
'blorp_nir_builder.h',
|
||||
'blorp_priv.h',
|
||||
)
|
||||
|
|
|
|||
|
|
@ -2811,3 +2811,135 @@ anv_image_ccs_op(struct anv_cmd_buffer *cmd_buffer,
|
|||
|
||||
anv_blorp_batch_finish(&batch);
|
||||
}
|
||||
|
||||
void
|
||||
anv_CmdCopyMemoryIndirectKHR(
|
||||
VkCommandBuffer commandBuffer,
|
||||
const VkCopyMemoryIndirectInfoKHR* pCopyMemoryIndirectInfo)
|
||||
{
|
||||
ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
|
||||
|
||||
if (pCopyMemoryIndirectInfo->copyCount == 0)
|
||||
return;
|
||||
|
||||
const uint64_t indirect_buf_addr =
|
||||
pCopyMemoryIndirectInfo->copyAddressRange.address;
|
||||
const uint32_t copy_count = pCopyMemoryIndirectInfo->copyCount;
|
||||
const uint64_t stride = pCopyMemoryIndirectInfo->copyAddressRange.stride;
|
||||
|
||||
/* These are all restrictions by the spec. */
|
||||
assert((pCopyMemoryIndirectInfo->srcCopyFlags &
|
||||
VK_ADDRESS_COPY_PROTECTED_BIT_KHR) == 0);
|
||||
assert((pCopyMemoryIndirectInfo->dstCopyFlags &
|
||||
VK_ADDRESS_COPY_PROTECTED_BIT_KHR) == 0);
|
||||
assert(pCopyMemoryIndirectInfo->copyAddressRange.size >=
|
||||
pCopyMemoryIndirectInfo->copyCount *
|
||||
pCopyMemoryIndirectInfo->copyAddressRange.stride);
|
||||
|
||||
assert(!anv_cmd_buffer_is_blitter_queue(cmd_buffer));
|
||||
|
||||
enum blorp_batch_flags blorp_flags = BLORP_BATCH_USE_COMPUTE;
|
||||
|
||||
struct blorp_batch batch;
|
||||
anv_blorp_batch_init(cmd_buffer, &batch, blorp_flags);
|
||||
|
||||
blorp_copy_memory_indirect(&batch, indirect_buf_addr, copy_count, stride);
|
||||
|
||||
anv_blorp_batch_finish(&batch);
|
||||
}
|
||||
|
||||
void
|
||||
anv_CmdCopyMemoryToImageIndirectKHR(
|
||||
VkCommandBuffer commandBuffer,
|
||||
const VkCopyMemoryToImageIndirectInfoKHR* pCopyMemoryToImageIndirectInfo)
|
||||
{
|
||||
ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
|
||||
ANV_FROM_HANDLE(anv_image, anv_image,
|
||||
pCopyMemoryToImageIndirectInfo->dstImage);
|
||||
|
||||
if (pCopyMemoryToImageIndirectInfo->copyCount == 0)
|
||||
return;
|
||||
|
||||
const uint64_t indirect_buf_addr =
|
||||
pCopyMemoryToImageIndirectInfo->copyAddressRange.address;
|
||||
const uint32_t copy_count = pCopyMemoryToImageIndirectInfo->copyCount;
|
||||
const uint64_t stride =
|
||||
pCopyMemoryToImageIndirectInfo->copyAddressRange.stride;
|
||||
const VkImageLayout img_layout =
|
||||
pCopyMemoryToImageIndirectInfo->dstImageLayout;
|
||||
|
||||
assert((pCopyMemoryToImageIndirectInfo->srcCopyFlags &
|
||||
VK_ADDRESS_COPY_PROTECTED_BIT_KHR) == 0);
|
||||
assert(pCopyMemoryToImageIndirectInfo->copyAddressRange.size >=
|
||||
pCopyMemoryToImageIndirectInfo->copyCount *
|
||||
pCopyMemoryToImageIndirectInfo->copyAddressRange.stride);
|
||||
|
||||
assert(!anv_cmd_buffer_is_blitter_queue(cmd_buffer));
|
||||
|
||||
enum blorp_batch_flags blorp_flags = BLORP_BATCH_USE_COMPUTE;
|
||||
struct blorp_batch batch;
|
||||
anv_blorp_batch_init(cmd_buffer, &batch, blorp_flags);
|
||||
|
||||
for (int c = 0; c < copy_count; c++) {
|
||||
const VkImageSubresourceLayers *img_subresource =
|
||||
&pCopyMemoryToImageIndirectInfo->pImageSubresources[c];
|
||||
VkImageAspectFlags aspect_mask = img_subresource->aspectMask;
|
||||
uint32_t mip_level = img_subresource->mipLevel;
|
||||
uint32_t base_layer = img_subresource->baseArrayLayer;
|
||||
uint32_t layer_count = img_subresource->layerCount;
|
||||
|
||||
assert(mip_level != VK_REMAINING_MIP_LEVELS);
|
||||
if (layer_count == VK_REMAINING_ARRAY_LAYERS)
|
||||
layer_count = anv_image->vk.array_layers - base_layer;
|
||||
|
||||
const unsigned plane =
|
||||
anv_image_aspect_to_plane(anv_image, aspect_mask);
|
||||
struct isl_surf *img_isl_surf =
|
||||
&anv_image->planes[plane].primary_surface.isl;
|
||||
enum isl_format format = img_isl_surf->format;
|
||||
bool format_is_compressed = isl_format_is_compressed(format);
|
||||
|
||||
struct blorp_surf img_blorp_surf;
|
||||
get_blorp_surf_for_anv_image(cmd_buffer, anv_image,
|
||||
aspect_mask, VK_IMAGE_USAGE_STORAGE_BIT,
|
||||
img_layout,
|
||||
anv_image->planes[plane].aux_usage,
|
||||
format, false /* cross_aspect */,
|
||||
&img_blorp_surf);
|
||||
|
||||
anv_cmd_buffer_mark_image_written(cmd_buffer, anv_image,
|
||||
img_subresource->aspectMask,
|
||||
img_blorp_surf.aux_usage, mip_level,
|
||||
base_layer, layer_count);
|
||||
|
||||
/* If the format is compressed, we will pretend the format is one where
|
||||
* each pixel is the size of the compressed block, so image stores can
|
||||
* work. Unfortunately, that translation only works if we do it for one
|
||||
* mip level of a specific layer, as the complete layout of, say, BC4
|
||||
* and R32G32 formats are not compatible: so we loop through all layers
|
||||
* here.
|
||||
*/
|
||||
if (format_is_compressed) {
|
||||
int min_l_or_z, top_l_or_z;
|
||||
if (img_blorp_surf.surf->dim == ISL_SURF_DIM_3D) {
|
||||
min_l_or_z = 0;
|
||||
top_l_or_z = img_isl_surf->logical_level0_px.depth >> mip_level;
|
||||
} else {
|
||||
min_l_or_z = base_layer;
|
||||
top_l_or_z = base_layer + layer_count;
|
||||
}
|
||||
for (int l_or_z = min_l_or_z; l_or_z < top_l_or_z; l_or_z++) {
|
||||
blorp_copy_memory_to_image_indirect(&batch, &img_blorp_surf,
|
||||
indirect_buf_addr,
|
||||
stride, c, mip_level,
|
||||
layer_count, l_or_z);
|
||||
}
|
||||
} else {
|
||||
blorp_copy_memory_to_image_indirect(&batch, &img_blorp_surf,
|
||||
indirect_buf_addr, stride, c,
|
||||
mip_level, layer_count, -1);
|
||||
}
|
||||
}
|
||||
|
||||
anv_blorp_batch_finish(&batch);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -629,6 +629,39 @@ anv_get_format_aspect(const struct anv_physical_device *device,
|
|||
return anv_get_format_plane(device, vk_format, plane, tiling);
|
||||
}
|
||||
|
||||
static bool
|
||||
anv_format_supports_indirect_copies(const struct anv_physical_device *pdevice,
|
||||
const struct anv_format *anv_format)
|
||||
{
|
||||
const struct isl_format_layout *fmtl =
|
||||
isl_format_get_layout(anv_format->planes[0].isl_format);
|
||||
|
||||
/* CTS insists on it even when we say we don't support it. */
|
||||
if (!pdevice->vk.supported_features.indirectMemoryToImageCopy)
|
||||
return false;
|
||||
|
||||
/* TODO: implement support for this in the copy shader. */
|
||||
if (!util_is_power_of_two_or_zero(fmtl->bpb))
|
||||
return false;
|
||||
|
||||
/* TODO: we use compute for indirect copies, and compute cannot write HiZ,
|
||||
* we could try to support that if we see that applications want it.
|
||||
*/
|
||||
if (vk_format_is_depth_or_stencil(anv_format->vk_format))
|
||||
return false;
|
||||
|
||||
/* Let's leave YCbCr and multi-planar formats out until we have proper
|
||||
* tests to verify they work.
|
||||
*/
|
||||
if (isl_format_is_yuv(anv_format->planes[0].isl_format))
|
||||
return false;
|
||||
|
||||
if (anv_format->n_planes > 1)
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
// Format capabilities
|
||||
|
||||
static bool
|
||||
|
|
@ -852,7 +885,8 @@ anv_get_color_format_features(const struct anv_physical_device *physical_device,
|
|||
if (vk_tiling == VK_IMAGE_TILING_LINEAR &&
|
||||
isl_format_get_layout(plane_format.isl_format)->txc == ISL_TXC_ASTC)
|
||||
return VK_FORMAT_FEATURE_2_TRANSFER_SRC_BIT |
|
||||
VK_FORMAT_FEATURE_2_TRANSFER_DST_BIT;
|
||||
VK_FORMAT_FEATURE_2_TRANSFER_DST_BIT |
|
||||
VK_FORMAT_FEATURE_2_COPY_IMAGE_INDIRECT_DST_BIT_KHR;
|
||||
|
||||
flags |= VK_FORMAT_FEATURE_2_SAMPLED_IMAGE_BIT |
|
||||
VK_FORMAT_FEATURE_2_SAMPLED_IMAGE_FILTER_MINMAX_BIT |
|
||||
|
|
@ -935,6 +969,10 @@ anv_get_color_format_features(const struct anv_physical_device *physical_device,
|
|||
VK_FORMAT_FEATURE_2_TRANSFER_DST_BIT;
|
||||
}
|
||||
|
||||
if ((flags & VK_FORMAT_FEATURE_2_TRANSFER_DST_BIT) &&
|
||||
anv_format_supports_indirect_copies(physical_device, anv_format))
|
||||
flags |= VK_FORMAT_FEATURE_2_COPY_IMAGE_INDIRECT_DST_BIT_KHR;
|
||||
|
||||
/* XXX: We handle 3-channel formats by switching them out for RGBX or
|
||||
* RGBA formats behind-the-scenes. This works fine for textures
|
||||
* because the upload process will fill in the extra channel.
|
||||
|
|
|
|||
|
|
@ -152,6 +152,7 @@ get_device_extensions(const struct anv_physical_device *device,
|
|||
.KHR_cooperative_matrix = device->has_cooperative_matrix,
|
||||
.NV_cooperative_matrix2 = device->has_cooperative_matrix,
|
||||
.KHR_copy_commands2 = true,
|
||||
.KHR_copy_memory_indirect = true,
|
||||
.KHR_create_renderpass2 = true,
|
||||
.KHR_dedicated_allocation = true,
|
||||
.KHR_deferred_host_operations = true,
|
||||
|
|
@ -1023,6 +1024,10 @@ get_features(const struct anv_physical_device *pdevice,
|
|||
/* VK_KHR_pipeline_binary */
|
||||
.pipelineBinaries = true,
|
||||
|
||||
/* VK_KHR_copy_memory_indirect */
|
||||
.indirectMemoryCopy = true,
|
||||
.indirectMemoryToImageCopy = pdevice->info.ver >= 12,
|
||||
|
||||
#ifdef ANV_USE_WSI_PLATFORM
|
||||
/* VK_EXT_present_timing */
|
||||
.presentTiming = true,
|
||||
|
|
@ -2205,6 +2210,11 @@ get_properties(const struct anv_physical_device *pdevice,
|
|||
*/
|
||||
props->shaderBinaryVersion = 0;
|
||||
}
|
||||
|
||||
/* VK_KHR_copy_memory_indirect */
|
||||
{
|
||||
props->supportedQueues = VK_QUEUE_GRAPHICS_BIT | VK_QUEUE_COMPUTE_BIT;
|
||||
}
|
||||
}
|
||||
|
||||
/* This function restricts the maximum size of system memory heap. The
|
||||
|
|
|
|||
|
|
@ -534,6 +534,8 @@ get_color_aux_op(const struct blorp_params *params)
|
|||
case BLORP_OP_SLOW_COLOR_CLEAR:
|
||||
case BLORP_OP_BLIT:
|
||||
case BLORP_OP_COPY:
|
||||
case BLORP_OP_COPY_INDIRECT:
|
||||
case BLORP_OP_COPY_IMAGE_INDIRECT:
|
||||
assert(params->fast_clear_op == ISL_AUX_OP_NONE);
|
||||
return ANV_COLOR_AUX_OP_CLASS_NONE;
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue