From 0bd9aa85eb695c01a5ad0cf3c4f0d8f1345f8196 Mon Sep 17 00:00:00 2001 From: Paulo Zanoni Date: Thu, 18 Dec 2025 15:34:36 -0800 Subject: [PATCH] anv: implement VK_KHR_copy_memory_indirect This implements the extension on the Graphics and Compute queues using Blorp OpenCL compute shaders. Support for the Transfer queue will come in a later patch. We also don't support 24/48/96 bpp formats yet. Signed-off-by: Paulo Zanoni --- docs/features.txt | 2 +- src/intel/blorp/blorp.h | 18 ++ src/intel/blorp/blorp_genX_exec_brw.h | 175 ++++++++++++ src/intel/blorp/blorp_indirect_copy.c | 355 +++++++++++++++++++++++++ src/intel/blorp/blorp_priv.h | 73 ++++- src/intel/blorp/blorp_shaders.cl | 277 +++++++++++++++++++ src/intel/blorp/meson.build | 1 + src/intel/vulkan/anv_blorp.c | 132 +++++++++ src/intel/vulkan/anv_formats.c | 40 ++- src/intel/vulkan/anv_physical_device.c | 10 + src/intel/vulkan/genX_blorp_exec.c | 2 + 11 files changed, 1082 insertions(+), 3 deletions(-) create mode 100644 src/intel/blorp/blorp_indirect_copy.c diff --git a/docs/features.txt b/docs/features.txt index 29197179949..0634d7cbe28 100644 --- a/docs/features.txt +++ b/docs/features.txt @@ -542,7 +542,7 @@ Khronos extensions that are not part of any Vulkan version: VK_KHR_calibrated_timestamps DONE (anv, hk, kk, nvk, panvk/v10+, radv, tu/a750+, vn) VK_KHR_compute_shader_derivatives DONE (anv, lvp, nvk, radv, tu, vn) VK_KHR_cooperative_matrix DONE (anv, nvk/Turing+, radv/gfx11+, vn) - VK_KHR_copy_memory_indirect DONE (nvk, radv/gfx8+) + VK_KHR_copy_memory_indirect DONE (anv, nvk, radv/gfx8+) VK_KHR_deferred_host_operations DONE (anv, hasvk, lvp, radv, tu, vn) VK_KHR_depth_clamp_zero_one DONE (anv, nvk, panvk, radv, tu, vn) VK_KHR_device_address_commands DONE (radv) diff --git a/src/intel/blorp/blorp.h b/src/intel/blorp/blorp.h index b07006777f5..31c1e14805b 100644 --- a/src/intel/blorp/blorp.h +++ b/src/intel/blorp/blorp.h @@ -41,6 +41,8 @@ typedef struct nir_shader nir_shader; enum blorp_op { BLORP_OP_BLIT, BLORP_OP_COPY, + BLORP_OP_COPY_INDIRECT, + BLORP_OP_COPY_IMAGE_INDIRECT, BLORP_OP_CCS_AMBIGUATE, BLORP_OP_CCS_COLOR_CLEAR, BLORP_OP_CCS_PARTIAL_RESOLVE, @@ -299,6 +301,22 @@ blorp_buffer_copy(struct blorp_batch *batch, struct blorp_address dst, uint64_t size); +void +blorp_copy_memory_indirect(struct blorp_batch *batch, + uint64_t indirect_buf_addr, + uint32_t copy_count, + uint64_t stride); + +void +blorp_copy_memory_to_image_indirect(struct blorp_batch *batch, + const struct blorp_surf *img_blorp_surf, + uint64_t indirect_buf_addr, + uint64_t indirect_buf_stride, + uint32_t first_copy_idx, + uint32_t img_mip_level, + int layer_count, + int forced_layer_or_z); + void blorp_fast_clear(struct blorp_batch *batch, const struct blorp_surf *surf, diff --git a/src/intel/blorp/blorp_genX_exec_brw.h b/src/intel/blorp/blorp_genX_exec_brw.h index f72f439d3f6..44f420af8fd 100644 --- a/src/intel/blorp/blorp_genX_exec_brw.h +++ b/src/intel/blorp/blorp_genX_exec_brw.h @@ -1380,6 +1380,10 @@ blorp_setup_binding_table(struct blorp_batch *batch, uint32_t surface_offsets[BLORP_NUM_BT_ENTRIES], bind_offset = 0; void *surface_maps[BLORP_NUM_BT_ENTRIES]; + /* There's nothing to bind here. */ + if (params->op == BLORP_OP_COPY_INDIRECT) + return 0; + if (params->use_pre_baked_binding_table) { bind_offset = params->pre_baked_binding_table_offset; } else { @@ -1784,6 +1788,170 @@ blorp_get_compute_push_const(struct blorp_batch *batch, *state_size = push_const_size; } +static void +blorp_indirect_buffer_get_dispatch_size(struct blorp_batch *batch, + const struct blorp_params *params, + struct mi_builder *b, + struct mi_value *size_x, + struct mi_value *size_y, + struct mi_value *size_z) +{ + struct blorp_context *blorp = batch->blorp; + const struct brw_cs_prog_data *cs_prog_data = params->cs_prog_data; + + uint64_t indirect_buf_addr = params->wm_inputs.indirect.indirect_buf_addr; + uint64_t stride = params->wm_inputs.indirect.indirect_buf_stride; + uint32_t copy_count = params->wm_inputs.indirect.copy_count; + + size_t size_offset = 16; /* offsetof(VkCopyMemoryIndirectCommandKHR, size) */ + + struct mi_value biggest_copy_size; + for (int c = 0; c < copy_count; c++) { + struct blorp_address copy_size_addr = { + .buffer = NULL, + .offset = indirect_buf_addr + c * stride + size_offset, + .reloc_flags = 0, + .mocs = isl_mocs(blorp->isl_dev, ISL_SURF_USAGE_STORAGE_BIT, false), + .local_hint = false, /* We don't have a way to know this. */ + }; + + struct mi_value this_copy_size = mi_mem32(copy_size_addr); + if (c == 0) + biggest_copy_size = this_copy_size; + else + biggest_copy_size = mi_umax2(b, this_copy_size, biggest_copy_size); + } + + /* Each shader invocation writes an uint32_t. */ + int divisor = cs_prog_data->local_size[0] * sizeof(uint32_t); + *size_x = mi_udiv32_imm(b, mi_iadd_imm(b, biggest_copy_size, divisor - 1), + divisor); + + assert(cs_prog_data->local_size[1] == 1); + assert(cs_prog_data->local_size[2] == 1); + *size_y = mi_imm(1); + *size_z = mi_imm(1); +} + +static void +blorp_indirect_buf2img_get_dispatch_size(struct blorp_batch *batch, + const struct blorp_params *params, + struct mi_builder *b, + struct mi_value *size_x, + struct mi_value *size_y, + struct mi_value *size_z) +{ + struct blorp_context *blorp = batch->blorp; + const struct brw_cs_prog_data *cs_prog_data = params->cs_prog_data; + + uint64_t indirect_buf_addr = params->wm_inputs.indirect.indirect_buf_addr; + uint64_t stride = params->wm_inputs.indirect.indirect_buf_stride; + uint32_t copy_idx = params->wm_inputs.indirect.copy_idx; + bool is_forced_layer = params->wm_inputs.indirect.forced_layer_or_z != -1; + bool is_3d = params->wm_inputs.indirect.dimensions == 3; + + /* These are all offsetof(VkCopyMemoryToImageIndirectCommandKHR, x). */ + const size_t img_extent_x_offset = 44; + const size_t img_extent_y_offset = 48; + const size_t img_extent_z_offset = 52; + + struct blorp_address x_extent_addr = { + .buffer = NULL, + .offset = indirect_buf_addr + copy_idx * stride + img_extent_x_offset, + .reloc_flags = 0, + .mocs = isl_mocs(blorp->isl_dev, ISL_SURF_USAGE_STORAGE_BIT, false), + .local_hint = false, /* We don't have a way to know this. */ + }; + struct blorp_address y_extent_addr = { + .buffer = NULL, + .offset = indirect_buf_addr + copy_idx * stride + img_extent_y_offset, + .reloc_flags = 0, + .mocs = isl_mocs(blorp->isl_dev, ISL_SURF_USAGE_STORAGE_BIT, false), + .local_hint = false, /* We don't have a way to know this. */ + }; + struct blorp_address z_extent_addr = { + .buffer = NULL, + .offset = indirect_buf_addr + copy_idx * stride + img_extent_z_offset, + .reloc_flags = 0, + .mocs = isl_mocs(blorp->isl_dev, ISL_SURF_USAGE_STORAGE_BIT, false), + .local_hint = false, /* We don't have a way to know this. */ + }; + + /* Notes on the 'params->num_layers' usage below: + * + * - Please see how we handle this in function + * blorp_copy_memory_to_image_indirect(). + * + * - If we're using forced layers (see + * params.wm_inputs.indirect.forced_layer_or_z), then we can just set + * size_z to 1 and don't even look at the indirect buffer: each shader + * call operates on a single layer. + * + * - The information on the number of layers for each copy is not truly + * indirect: it has to be passed to during command creation, so we + * have already processed it. See 'max_layer_count' in + * blorp_copy_memory_to_image_indirect(). We don't want to be looking + * at the indirect buffer if we don't need to. + * + * - For 3D images, the applications can use the Z axis as either an + * actual axis (offset.z + extent.z) or pretend the Z axis is layers + * (base_layer + layer_count), so would have to check both places. + * Fortunately, base_layer + layer_count was already checked (see + * above) and recorded in params->num_layers. If params->num_layers is + * bigger than 1, then we don't even bother looking at extent.z. + */ + struct mi_value x_extent = mi_mem32(x_extent_addr); + struct mi_value y_extent = mi_mem32(y_extent_addr); + struct mi_value z_extent; + if (is_forced_layer) { + z_extent = mi_imm(1); + } else { + if (is_3d && params->num_layers == 1) + z_extent = mi_mem32(z_extent_addr); + else + z_extent = mi_imm(params->num_layers); + } + + int divisor_x = cs_prog_data->local_size[0]; + int divisor_y = cs_prog_data->local_size[1]; + int divisor_z = cs_prog_data->local_size[2]; + + assert(divisor_x != 1 && divisor_y != 1 && divisor_z == 1); + *size_x = mi_udiv32_imm(b, mi_iadd_imm(b, x_extent, divisor_x - 1), + divisor_x); + *size_y = mi_udiv32_imm(b, mi_iadd_imm(b, y_extent, divisor_y - 1), + divisor_y); + *size_z = z_extent; +} + +static void +blorp_indirect_write_gpgpu_dispatch_regs(struct blorp_batch *batch, + const struct blorp_params *params) +{ + struct blorp_context *blorp = batch->blorp; + const struct intel_device_info *devinfo = blorp->compiler->brw->devinfo; + + struct mi_builder b; + mi_builder_init(&b, devinfo, batch); + mi_builder_set_mocs(&b, isl_mocs(blorp->isl_dev, 0, false)); + + struct mi_value size_x, size_y, size_z; + + if (params->op == BLORP_OP_COPY_INDIRECT) { + blorp_indirect_buffer_get_dispatch_size(batch, params, &b, &size_x, + &size_y, &size_z); + } else { + assert(params->op == BLORP_OP_COPY_IMAGE_INDIRECT); + + blorp_indirect_buf2img_get_dispatch_size(batch, params, &b, &size_x, + &size_y, &size_z); + } + + mi_store(&b, mi_reg32(GENX(GPGPU_DISPATCHDIMX_num)), size_x); + mi_store(&b, mi_reg32(GENX(GPGPU_DISPATCHDIMY_num)), size_y); + mi_store(&b, mi_reg32(GENX(GPGPU_DISPATCHDIMZ_num)), size_z); +} + static void blorp_exec_compute(struct blorp_batch *batch, const struct blorp_params *params) { @@ -1798,6 +1966,11 @@ blorp_exec_compute(struct blorp_batch *batch, const struct blorp_params *params) const struct intel_cs_dispatch_info dispatch = brw_cs_get_dispatch_info(devinfo, cs_prog_data, NULL); + bool use_indirect = blorp_op_type_is_indirect(params->op); + + if (use_indirect) + blorp_indirect_write_gpgpu_dispatch_regs(batch, params); + uint32_t group_x0 = params->x0 / cs_prog_data->local_size[0]; uint32_t group_y0 = params->y0 / cs_prog_data->local_size[1]; uint32_t group_z0 = params->dst.z_offset; @@ -1911,6 +2084,7 @@ blorp_exec_compute(struct blorp_batch *batch, const struct blorp_params *params) assert(cs_prog_data->push.per_thread.regs == 0); blorp_emit(batch, GENX(COMPUTE_WALKER), cw) { + cw.IndirectParameterEnable = use_indirect, cw.body = body; } #else /* GFX_VERx10 >= 125 */ @@ -1980,6 +2154,7 @@ blorp_exec_compute(struct blorp_batch *batch, const struct blorp_params *params) } blorp_emit(batch, GENX(GPGPU_WALKER), ggw) { + ggw.IndirectParameterEnable = use_indirect, ggw.SIMDSize = dispatch.simd_size / 16; ggw.ThreadDepthCounterMaximum = 0; ggw.ThreadHeightCounterMaximum = 0; diff --git a/src/intel/blorp/blorp_indirect_copy.c b/src/intel/blorp/blorp_indirect_copy.c new file mode 100644 index 00000000000..4d46d3c8bd4 --- /dev/null +++ b/src/intel/blorp/blorp_indirect_copy.c @@ -0,0 +1,355 @@ +/* Copyright © 2025 Intel Corporation + * SPDX-License-Identifier: MIT + */ + +#include "blorp_nir_builder.h" +#include "compiler/nir/nir_format_convert.h" + +#include "blorp_priv.h" +#include "dev/intel_debug.h" +#include "dev/intel_device_info.h" + +#include "blorp_shaders.h" + +/* Refer to struct blorp_wm_inputs_indirect. */ +struct blorp_indirect_vars { + nir_variable *indirect_buf_addr; + nir_variable *stride; + nir_variable *copy_count; + nir_variable *copy_idx; + nir_variable *max_layer; + nir_variable *x_offset; + nir_variable *y_offset; +}; + +static enum isl_format +get_format_for_copy(int format_bpb) +{ + switch (format_bpb) { + case 8: return ISL_FORMAT_R8_UINT; + case 16: return ISL_FORMAT_R16_UINT; + case 24: return ISL_FORMAT_R8G8B8_UINT; + case 32: return ISL_FORMAT_R32_UINT; + case 48: return ISL_FORMAT_R16G16B16_UINT; + case 64: return ISL_FORMAT_R32G32_UINT; + case 96: return ISL_FORMAT_R32G32B32_UINT; + case 128: return ISL_FORMAT_R32G32B32A32_UINT; + default: + mesa_loge("unexpected format bpb: %d", format_bpb); + assert(false); + return ISL_FORMAT_UNSUPPORTED; + } +} + +static void +blorp_indirect_vars_init(nir_builder *b, struct blorp_indirect_vars *v) +{ + v->copy_count = + BLORP_CREATE_NIR_INPUT(b->shader, indirect.copy_count, + glsl_uint_type()); + v->indirect_buf_addr = + BLORP_CREATE_NIR_INPUT(b->shader, indirect.indirect_buf_addr, + glsl_uint64_t_type()); + v->stride = + BLORP_CREATE_NIR_INPUT(b->shader, indirect.indirect_buf_stride, + glsl_uint64_t_type()); + v->copy_idx = + BLORP_CREATE_NIR_INPUT(b->shader, indirect.copy_idx, + glsl_uint_type()); + v->max_layer = + BLORP_CREATE_NIR_INPUT(b->shader, indirect.max_layer, + glsl_uint_type()); + v->x_offset = + BLORP_CREATE_NIR_INPUT(b->shader, indirect.x_offset, + glsl_uint_type()); + v->y_offset = + BLORP_CREATE_NIR_INPUT(b->shader, indirect.y_offset, + glsl_uint_type()); +} + +static nir_shader * +blorp_build_copy_mem_indirect_shader(struct blorp_batch *batch, + void *mem_ctx) +{ + + struct blorp_context *blorp = batch->blorp; + + mesa_shader_stage stage = MESA_SHADER_COMPUTE; + const nir_shader_compiler_options *nir_options = + blorp->compiler->nir_options(blorp, stage); + nir_builder b = nir_builder_init_simple_shader(stage, nir_options, + "copy mem indirect"); + ralloc_steal(mem_ctx, b.shader); + + b.shader->info.workgroup_size[0] = 32; + b.shader->info.workgroup_size[1] = 1; + b.shader->info.workgroup_size[2] = 1; + + struct blorp_indirect_vars v; + blorp_indirect_vars_init(&b, &v); + + /* The indirect buffer is an array containing 'copy_count' + * VkCopyMemoryIndirectCommandKHR structures, separated by 'stride' bytes. + */ + nir_def *copy_count = nir_load_var(&b, v.copy_count); + nir_def *indirect_buf_addr = nir_load_var(&b, v.indirect_buf_addr); + nir_def *stride = nir_load_var(&b, v.stride); + nir_def *global_id = + nir_channel(&b, nir_load_global_invocation_id(&b, 32), 0); + + blorp_copy_memory_indirect_shader(&b, indirect_buf_addr, copy_count, + stride, global_id); + + return b.shader; +} + +static nir_shader * +blorp_build_copy_mem2img_indirect_shader(struct blorp_batch *batch, + void *mem_ctx, + struct blorp_indirect_copy_mem2img_key *key) +{ + struct blorp_context *blorp = batch->blorp; + + mesa_shader_stage stage = MESA_SHADER_COMPUTE; + const nir_shader_compiler_options *nir_options = + blorp->compiler->nir_options(blorp, stage); + nir_builder b = nir_builder_init_simple_shader(stage, nir_options, + "copy mem2img indirect"); + ralloc_steal(mem_ctx, b.shader); + + b.shader->info.workgroup_size[0] = 4; + b.shader->info.workgroup_size[1] = 8; + b.shader->info.workgroup_size[2] = 1; + + struct blorp_indirect_vars v; + blorp_indirect_vars_init(&b, &v); + + /* The indirect buffer is an array containing 'copy_count' + * VkCopyMemoryToImageIndirectCommandKHR structures, separated by 'stride' + * bytes. + */ + nir_def *indirect_buf_addr = nir_load_var(&b, v.indirect_buf_addr); + nir_def *stride = nir_load_var(&b, v.stride); + nir_def *copy_idx = nir_load_var(&b, v.copy_idx); + nir_def *max_layer = nir_load_var(&b, v.max_layer); + nir_def *dest_coord_offsets_arr[2] = { + nir_load_var(&b, v.x_offset), + nir_load_var(&b, v.y_offset), + }; + nir_def *dest_coord_offsets = nir_vec(&b, dest_coord_offsets_arr, 2); + + nir_def *global_id = nir_load_global_invocation_id(&b, 32); + + /* Shader keys (constants). */ + nir_def *dimensions = nir_imm_int(&b, key->dimensions); + nir_def *forced_layer_or_z = nir_imm_int(&b, key->forced_layer_or_z); + nir_def *format_Bpb = nir_imm_intN_t(&b, key->format_Bpb, 16); + nir_def *format_block_size_arr[3] = { + nir_imm_int(&b, key->format_bw), + nir_imm_int(&b, key->format_bh), + nir_imm_int(&b, key->format_bd), + }; + nir_def *format_block_size = nir_vec(&b, format_block_size_arr, 3); + + /* Constants derived from the shader keys. */ + nir_def *is_block_compressed = + nir_imm_bool(&b, key->format_bw > 1 || key->format_bh > 1 || + key->format_bd > 1); + + blorp_copy_memory_to_image_indirect_shader(&b, indirect_buf_addr, + stride, + copy_idx, + max_layer, + dest_coord_offsets, + global_id, + dimensions, + forced_layer_or_z, + format_Bpb, + format_block_size, + is_block_compressed); + + return b.shader; +} + +static bool +blorp_get_copy_mem_indirect_kernel_cs(struct blorp_batch *batch, + struct blorp_params *params) +{ + struct blorp_context *blorp = batch->blorp; + const char *key = "copy_mem_indirect_kernel_cs"; + uint32_t key_size = strlen(key); + + if (blorp->lookup_shader(batch, key, key_size, ¶ms->cs_prog_kernel, + ¶ms->cs_prog_data)) + return true; + + void *mem_ctx = ralloc_context(NULL); + + nir_shader *nir = + blorp_build_copy_mem_indirect_shader(batch, mem_ctx); + + const struct blorp_program prog = + blorp_compile_cs(blorp, mem_ctx, nir, key, key_size); + + bool result = blorp->upload_shader(batch, MESA_SHADER_COMPUTE, + key, key_size, + prog.kernel, prog.kernel_size, + prog.prog_data, prog.prog_data_size, + ¶ms->cs_prog_kernel, + ¶ms->cs_prog_data); + + ralloc_free(mem_ctx); + return result; + +} + +static bool +blorp_get_copy_mem2img_indirect_kernel_cs(struct blorp_batch *batch, + struct blorp_params *params, + struct blorp_indirect_copy_mem2img_key *key) +{ + struct blorp_context *blorp = batch->blorp; + + if (blorp->lookup_shader(batch, key, sizeof(*key), ¶ms->cs_prog_kernel, + ¶ms->cs_prog_data)) + return true; + + void *mem_ctx = ralloc_context(NULL); + + nir_shader *nir = + blorp_build_copy_mem2img_indirect_shader(batch, mem_ctx, key); + + const struct blorp_program prog = + blorp_compile_cs(blorp, mem_ctx, nir, key, sizeof(*key)); + + bool result = blorp->upload_shader(batch, MESA_SHADER_COMPUTE, + key, sizeof(*key), + prog.kernel, prog.kernel_size, + prog.prog_data, prog.prog_data_size, + ¶ms->cs_prog_kernel, + ¶ms->cs_prog_data); + + ralloc_free(mem_ctx); + return result; +} + +void +blorp_copy_memory_indirect(struct blorp_batch *batch, + uint64_t indirect_buf_addr, + uint32_t copy_count, + uint64_t stride) +{ + struct blorp_params params; + blorp_params_init(¶ms); + + params.op = BLORP_OP_COPY_INDIRECT; + params.shader_type = BLORP_SHADER_TYPE_COPY_INDIRECT; + params.shader_pipeline = BLORP_SHADER_PIPELINE_COMPUTE; + + params.x0 = 0; + params.y0 = 0; + params.x1 = 1; + params.y1 = 1; + + params.wm_inputs.indirect.indirect_buf_addr = indirect_buf_addr; + params.wm_inputs.indirect.indirect_buf_stride = stride; + params.wm_inputs.indirect.copy_count = copy_count; + + if (!blorp_get_copy_mem_indirect_kernel_cs(batch, ¶ms)) { + mesa_loge("failed to get copy_memory_indirect CS kernel"); + assert(false); + return; + } + + batch->blorp->exec(batch, ¶ms); +} + +void +blorp_copy_memory_to_image_indirect(struct blorp_batch *batch, + const struct blorp_surf *img_blorp_surf, + uint64_t indirect_buf_addr, + uint64_t indirect_buf_stride, + uint32_t copy_idx, + uint32_t img_mip_level, + int layer_count, + int forced_layer_or_z) +{ + enum isl_format original_format = img_blorp_surf->surf->format; + const struct isl_format_layout *fmtl = + isl_format_get_layout(original_format); + enum isl_format copy_format = get_format_for_copy(fmtl->bpb); + int dimensions = img_blorp_surf->surf->dim + 1; + + struct blorp_indirect_copy_mem2img_key key = { + .dimensions = dimensions, + .forced_layer_or_z = forced_layer_or_z, + .format_Bpb = fmtl->bpb / 8, + .format_bw = fmtl->bw, + .format_bh = fmtl->bh, + .format_bd = fmtl->bd, + }; + + struct blorp_params params; + blorp_params_init(¶ms); + + params.op = BLORP_OP_COPY_IMAGE_INDIRECT; + params.shader_type = BLORP_SHADER_TYPE_COPY_INDIRECT; + params.shader_pipeline = BLORP_SHADER_PIPELINE_COMPUTE; + + params.wm_inputs.indirect.indirect_buf_addr = indirect_buf_addr; + params.wm_inputs.indirect.indirect_buf_stride = indirect_buf_stride; + params.wm_inputs.indirect.copy_idx = copy_idx; + params.wm_inputs.indirect.dimensions = dimensions; + params.wm_inputs.indirect.max_layer = + img_blorp_surf->surf->logical_level0_px.array_len - 1; + params.wm_inputs.indirect.forced_layer_or_z = forced_layer_or_z; + + /* params.dst is our image. */ + blorp_surface_info_init(batch, ¶ms.dst, img_blorp_surf, + img_mip_level, + forced_layer_or_z == -1 ? 0 : forced_layer_or_z, + copy_format, + true /* is_dest */); + + struct isl_extent3d mip_dimensions = { + .width = MAX2(params.dst.surf.logical_level0_px.w >> img_mip_level, 1), + .height = MAX2(params.dst.surf.logical_level0_px.h >> img_mip_level, 1), + .depth = MAX2(params.dst.surf.logical_level0_px.d >> img_mip_level, 1), + }; + + if (fmtl->bw > 1 || fmtl->bh > 1 || fmtl->bd > 1) { + blorp_surf_convert_to_uncompressed(batch->blorp->isl_dev, + ¶ms.dst, NULL, NULL, NULL, NULL); + params.wm_inputs.indirect.x_offset = params.dst.tile_x_sa; + params.wm_inputs.indirect.y_offset = params.dst.tile_y_sa; + + mip_dimensions.width = params.dst.surf.logical_level0_px.w; + mip_dimensions.height = params.dst.surf.logical_level0_px.h; + mip_dimensions.depth = params.dst.surf.logical_level0_px.d; + } + + /* These settings control the number of workgroups in the shader, see + * blorp_exec_compute(). We don't need to divide by the local sizes here, + * this will be done later. + */ + params.x0 = 0; + params.y0 = 0; + params.x1 = mip_dimensions.width; + params.y1 = mip_dimensions.height; + if (forced_layer_or_z == -1) { + /* We set this here so blorp_indirect_buf2img_get_dispatch_size() can + * read it while figuring out how many shader instances we'll need. + */ + params.num_layers = layer_count; + } else { + params.num_layers = 1; + } + + if (!blorp_get_copy_mem2img_indirect_kernel_cs(batch, ¶ms, &key)) { + mesa_loge("failed to get copy_memory_to_image_indirect CS kernel"); + assert(false); + return; + } + + batch->blorp->exec(batch, ¶ms); +} diff --git a/src/intel/blorp/blorp_priv.h b/src/intel/blorp/blorp_priv.h index cc77cd770bd..a295084f9b7 100644 --- a/src/intel/blorp/blorp_priv.h +++ b/src/intel/blorp/blorp_priv.h @@ -205,17 +205,55 @@ struct blorp_wm_inputs_clear { struct blorp_bounds_rect bounds_rect; }; +/* Parameters using in blorp_indirect_copy.c */ +struct blorp_wm_inputs_indirect { + /* The address of the indirect buffer containing the information about the + * indirect copy. + */ + uint64_t indirect_buf_addr; + + /* How far apart the information about each copy is inside the indirect + * buffer. + */ + uint64_t indirect_buf_stride; + + /* How many copies we have to do. */ + uint32_t copy_count; + + /* For memory to image copies, we do a single copy per shader. This + * represents the index of the copy to be done. + */ + uint32_t copy_idx; + + /* How many dimensions does our image have? 1, 2 or 3. */ + uint32_t dimensions; + + /* The maximum array layer of the image. */ + uint32_t max_layer; + + /* When compressed formats are used, we pretend they are a non-compressed + * format, of the same bpb. Since we can't maintain the exact same layout + * of mipmap and layer offsets, we're forced to make adjustments to where X + * and Y actually start, and are also forced to copy only one layer (or Z + * axis position) per shader invocation. + */ + uint32_t x_offset; + uint32_t y_offset; + int forced_layer_or_z; +}; + struct blorp_wm_inputs { union { struct blorp_wm_inputs_blit blit; struct blorp_wm_inputs_clear clear; + struct blorp_wm_inputs_indirect indirect; }; /* Note: Pad out to an integral number of registers when extending, but * make sure subgroup_id is the last 32-bit item. */ - uint32_t pad[2]; + uint32_t pad[1]; uint32_t subgroup_id; }; @@ -257,6 +295,7 @@ enum blorp_shader_type { BLORP_SHADER_TYPE_MCS_PARTIAL_RESOLVE, BLORP_SHADER_TYPE_LAYER_OFFSET_VS, BLORP_SHADER_TYPE_GFX4_SF, + BLORP_SHADER_TYPE_COPY_INDIRECT, }; enum blorp_shader_pipeline { @@ -467,6 +506,25 @@ struct blorp_blit_prog_key uint8_t local_y; }; +struct blorp_indirect_copy_mem2img_key { + /* How many dimensions does our image have? 1, 2 or 3. */ + uint32_t dimensions; + + /* When compressed formats are used, we pretend they are a non-compressed + * format, of the same bpb. Since we can't maintain the exact same layout + * of mipmap and layer offsets, we're forced to make adjustments to where X + * and Y actually start, and are also forced to copy only one layer (or Z + * axis position) per shader invocation. + */ + int forced_layer_or_z; + + /* Info taken from isl_format_layout. */ + uint16_t format_Bpb; + uint16_t format_bw; + uint16_t format_bh; + uint16_t format_bd; +}; + /** * \name BLORP internals * \{ @@ -597,6 +655,19 @@ blorp_op_type_is_clear(enum blorp_op op) } } +/* This means: blorp->wm_inputs.indirect should be used. */ +static inline bool +blorp_op_type_is_indirect(enum blorp_op op) +{ + switch (op) { + case BLORP_OP_COPY_INDIRECT: + case BLORP_OP_COPY_IMAGE_INDIRECT: + return true; + default: + return false; + } +} + /* Asserts unless the surface is a buffer to image copy */ #define blorp_assert_is_buffer(surf, view) \ do { \ diff --git a/src/intel/blorp/blorp_shaders.cl b/src/intel/blorp/blorp_shaders.cl index 08dd30a37f9..27fce179cb9 100644 --- a/src/intel/blorp/blorp_shaders.cl +++ b/src/intel/blorp/blorp_shaders.cl @@ -3,6 +3,7 @@ */ #include "compiler/libcl/libcl.h" +#include "compiler/libcl/libcl_vk.h" #include "compiler/nir/nir_defines.h" #include "compiler/shader_enums.h" @@ -15,3 +16,279 @@ blorp_check_in_bounds(uint4 bounds_rect, uint2 pos) return pos.x >= x0 && pos.x < x1 && pos.y >= y0 && pos.y < y1; } + +void nir_image_store(uint handle, int4 coords, uint sample_index, + uint4 colors, uint lod, uint image_dim, + uint image_array, uint format, uint access, + uint range_base, uint src_type); + +/* Used by vkCmdCopyMemoryIndirectKHR. */ +void +blorp_copy_memory_indirect_shader( + global uint *indirect_buf, + uint copy_count, + ulong stride_bytes, + uint global_id) +{ + for (uint c = 0; c < copy_count; c++) { + uint idx = (uint)(c * (stride_bytes / 4)); + + /* The spec says the minimum alignment is 4 instead of 8, so we have to + * do these tricks. + */ + VkCopyMemoryIndirectCommandKHR cmd = + (*(global VkCopyMemoryIndirectCommandKHR*)&indirect_buf[idx]); + + uint copy_size_ints = cmd.size / 4; + + global int *src = (global int*)cmd.srcAddress; + global int *dst = (global int*)cmd.dstAddress; + + if (global_id < copy_size_ints) + dst[global_id] = src[global_id]; + } +} + +struct img_copy_params { + ulong src_address; + uint2 row_size_px; + uint base_layer; + uint layer_count; + uint3 offset; + uint3 extent; +}; + +void +read_img_params( + global uint *indirect_buf, + uint copy_idx, + ulong stride_bytes, + uint dimensions, + uint max_layer, + uint3 format_block_size, + bool is_block_compressed, + struct img_copy_params *p) +{ + uint idx = copy_idx * (stride_bytes / 4); + + VkCopyMemoryToImageIndirectCommandKHR cmd = + (*(global VkCopyMemoryToImageIndirectCommandKHR*)&indirect_buf[idx]); + + p->src_address = cmd.srcAddress; + p->row_size_px = (uint2)(cmd.bufferRowLength, cmd.bufferImageHeight); + + /* We don't use imageSubresource.aspectMask and imageSubresource.mipLevel, + * those are dealt with when the application calls + * vkCmdCopyMemoryToImageIndirectKHR(). + */ + + p->base_layer = cmd.imageSubresource.baseArrayLayer; + p->layer_count = cmd.imageSubresource.layerCount; + + p->offset = (uint3)(cmd.imageOffset.x, cmd.imageOffset.y, + cmd.imageOffset.z); + p->extent = (uint3)(cmd.imageExtent.width, cmd.imageExtent.height, + cmd.imageExtent.depth); + + if (p->row_size_px.x == 0) + p->row_size_px.x = p->extent.x; + if (p->row_size_px.y == 0) + p->row_size_px.y = p->extent.y; + + /* Our code deals with blocks, not pixels. */ + if (is_block_compressed) { + p->offset /= format_block_size; + p->extent = DIV_ROUND_UP(p->extent, format_block_size); + } + + /* Users can pass 3D images with the Z axis as an array layer. */ + if (dimensions == 3 && (p->base_layer != 0 || p->layer_count != 1)) { + p->offset.z = p->base_layer; + p->extent.z = p->layer_count; + p->base_layer = 0; + p->layer_count = 1; + } + + /* This handles VK_REMAINING_ARRAY_LAYERS and bugs. */ + if (p->base_layer + p->layer_count > max_layer + 1) + p->layer_count = max_layer - p->base_layer + 1; +} + +uint4 +get_pixel( + ulong src_address, + uint format_Bpb) +{ + switch (format_Bpb) { + case 1: { + global uchar *src_buf = (global uchar *)src_address; + return (uint4)(src_buf[0], 0, 0, 0); + } + case 2: { + global ushort *src_buf = (global ushort *)src_address; + return (uint4)(src_buf[0], 0, 0, 0); + } + case 4: { + global uint *src_buf = (global uint *)src_address; + return (uint4)(src_buf[0], 0, 0, 0); + } + case 8: { + global uint *src_buf = (global uint *)src_address; + return (uint4)(vload2(0, src_buf), 0, 0); + } + case 16: { + global uint *src_buf = (global uint *)src_address; + return (uint4)(vload4(0, src_buf)); + } + default: + /* TODO: support 3, 6, 12. */ + return (uint4)(0xFF, 0, 0, 0xFF); + } +} + +int4 +get_coords( + uint3 pos, + uint layer, + uint2 dest_coord_offsets, + int dimensions, + int forced_layer_or_z) +{ + int4 ret; + + pos.xy += dest_coord_offsets; + + switch (dimensions) { + case 1: + ret = (int4)(pos.x, layer, 0, 0); + break; + case 2: + ret = (int4)(pos.x, pos.y, layer, 0); + break; + case 3: + default: + ret = (int4)(pos.x, pos.y, pos.z, 0); + break; + } + + if (forced_layer_or_z != -1) { + ret.z = 0; + ret.w = 0; + } + + return ret; +} + +void +write_pixel( + const int4 coords, + const uint4 colors) +{ + /* We don't seem to need to set image_array to true if we set sampler_dim + * to 3D. + * Setting mip_level does not do anything on Intel, we set mip levels + * through the bindings. + */ + nir_image_store(0 /* The image handle. */, + coords, /* See get_coords(). */ + 0, /* Sample index for multi-sampling. */ + colors, /* The RGBA pixels, in src_type. */ + 0 /* mip_level */, + GLSL_SAMPLER_DIM_3D, + false, /* image_array */ + 0, /* format */ + ACCESS_NON_READABLE, /* access */ + 0, /* range_base */ + nir_type_int32 /* src_type */); +} + +/* Used by vkCmdCopyMemoryToImageIndirectKHR. */ +void +blorp_copy_memory_to_image_indirect_shader( + /* Actual parameters. */ + global uint *indirect_buf, + ulong stride_bytes, + uint copy_idx, + uint max_layer, + uint2 dest_coord_offsets, + uint3 global_id, + + /* These are shader keys, they are NIR immediates. */ + uint dimensions, + int forced_layer_or_z, + ushort format_Bpb, + uint3 format_block_size, + bool is_block_compressed) +{ + /* We have one invocation per texel of a given a mip level. This means + * that for the pixels outside the copy area, we'll hit the 'continue' + * below. + */ + uint3 src_pos = global_id; + uint layer; + + /* 'forced_layer_or_z' means that whatever slice or layer we're + * trying to work with is set as part of our binding (view) as depth + * 0 or layer 0, depending on the dimensionality. This can happen + * when we're trying to pretend a format is something that it's not + * (e.g., we're treating a block compressed 4x4 64bpp format as an + * r32g32 format). + */ + if (forced_layer_or_z != -1) { + if (dimensions == 3) { + src_pos.z = forced_layer_or_z; + layer = 0; + } else { + src_pos.z = 0; + layer = forced_layer_or_z; + } + } else { + if (dimensions == 3) { + src_pos.z = global_id.z; + layer = 0; + } else { + src_pos.z = 0; + layer = global_id.z; + } + } + + struct img_copy_params p; + read_img_params(indirect_buf, copy_idx, stride_bytes, dimensions, + max_layer, format_block_size, is_block_compressed, &p); + + if (any(src_pos >= p.extent)) + return; + + uint3 dst_pos = src_pos + p.offset; + if (forced_layer_or_z == -1 && dimensions != 3) + layer = global_id.z + p.base_layer; + + if (layer > p.base_layer + p.layer_count - 1) + return; + + uint2 row_size_blocks = p.row_size_px; + if (is_block_compressed) { + row_size_blocks = DIV_ROUND_UP(row_size_blocks, + format_block_size.xy); + } + uint row_length_bytes = row_size_blocks.x * format_Bpb; + uint row_height_bytes = row_size_blocks.y * row_length_bytes; + + uint buf_z_offset_bytes = src_pos.z * row_height_bytes; + uint buf_y_offset_bytes = src_pos.y * row_length_bytes; + uint buf_x_offset_bytes = src_pos.x * format_Bpb; + + uint layer_offset_bytes = (layer - p.base_layer) * row_height_bytes; + uint buf_offset_bytes = layer_offset_bytes + + buf_z_offset_bytes + + buf_y_offset_bytes + + buf_x_offset_bytes; + + uint4 colors = get_pixel(p.src_address + buf_offset_bytes, + format_Bpb); + + int4 coords = get_coords(dst_pos, layer, dest_coord_offsets, dimensions, + forced_layer_or_z); + + write_pixel(coords, colors); +} diff --git a/src/intel/blorp/meson.build b/src/intel/blorp/meson.build index 1b82cc5ce1f..9ccbf59275b 100644 --- a/src/intel/blorp/meson.build +++ b/src/intel/blorp/meson.build @@ -37,6 +37,7 @@ files_libblorp = files( 'blorp.h', 'blorp_blit.c', 'blorp_clear.c', + 'blorp_indirect_copy.c', 'blorp_nir_builder.h', 'blorp_priv.h', ) diff --git a/src/intel/vulkan/anv_blorp.c b/src/intel/vulkan/anv_blorp.c index 3348d95e589..899c25e2df2 100644 --- a/src/intel/vulkan/anv_blorp.c +++ b/src/intel/vulkan/anv_blorp.c @@ -2811,3 +2811,135 @@ anv_image_ccs_op(struct anv_cmd_buffer *cmd_buffer, anv_blorp_batch_finish(&batch); } + +void +anv_CmdCopyMemoryIndirectKHR( + VkCommandBuffer commandBuffer, + const VkCopyMemoryIndirectInfoKHR* pCopyMemoryIndirectInfo) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + + if (pCopyMemoryIndirectInfo->copyCount == 0) + return; + + const uint64_t indirect_buf_addr = + pCopyMemoryIndirectInfo->copyAddressRange.address; + const uint32_t copy_count = pCopyMemoryIndirectInfo->copyCount; + const uint64_t stride = pCopyMemoryIndirectInfo->copyAddressRange.stride; + + /* These are all restrictions by the spec. */ + assert((pCopyMemoryIndirectInfo->srcCopyFlags & + VK_ADDRESS_COPY_PROTECTED_BIT_KHR) == 0); + assert((pCopyMemoryIndirectInfo->dstCopyFlags & + VK_ADDRESS_COPY_PROTECTED_BIT_KHR) == 0); + assert(pCopyMemoryIndirectInfo->copyAddressRange.size >= + pCopyMemoryIndirectInfo->copyCount * + pCopyMemoryIndirectInfo->copyAddressRange.stride); + + assert(!anv_cmd_buffer_is_blitter_queue(cmd_buffer)); + + enum blorp_batch_flags blorp_flags = BLORP_BATCH_USE_COMPUTE; + + struct blorp_batch batch; + anv_blorp_batch_init(cmd_buffer, &batch, blorp_flags); + + blorp_copy_memory_indirect(&batch, indirect_buf_addr, copy_count, stride); + + anv_blorp_batch_finish(&batch); +} + +void +anv_CmdCopyMemoryToImageIndirectKHR( + VkCommandBuffer commandBuffer, + const VkCopyMemoryToImageIndirectInfoKHR* pCopyMemoryToImageIndirectInfo) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + ANV_FROM_HANDLE(anv_image, anv_image, + pCopyMemoryToImageIndirectInfo->dstImage); + + if (pCopyMemoryToImageIndirectInfo->copyCount == 0) + return; + + const uint64_t indirect_buf_addr = + pCopyMemoryToImageIndirectInfo->copyAddressRange.address; + const uint32_t copy_count = pCopyMemoryToImageIndirectInfo->copyCount; + const uint64_t stride = + pCopyMemoryToImageIndirectInfo->copyAddressRange.stride; + const VkImageLayout img_layout = + pCopyMemoryToImageIndirectInfo->dstImageLayout; + + assert((pCopyMemoryToImageIndirectInfo->srcCopyFlags & + VK_ADDRESS_COPY_PROTECTED_BIT_KHR) == 0); + assert(pCopyMemoryToImageIndirectInfo->copyAddressRange.size >= + pCopyMemoryToImageIndirectInfo->copyCount * + pCopyMemoryToImageIndirectInfo->copyAddressRange.stride); + + assert(!anv_cmd_buffer_is_blitter_queue(cmd_buffer)); + + enum blorp_batch_flags blorp_flags = BLORP_BATCH_USE_COMPUTE; + struct blorp_batch batch; + anv_blorp_batch_init(cmd_buffer, &batch, blorp_flags); + + for (int c = 0; c < copy_count; c++) { + const VkImageSubresourceLayers *img_subresource = + &pCopyMemoryToImageIndirectInfo->pImageSubresources[c]; + VkImageAspectFlags aspect_mask = img_subresource->aspectMask; + uint32_t mip_level = img_subresource->mipLevel; + uint32_t base_layer = img_subresource->baseArrayLayer; + uint32_t layer_count = img_subresource->layerCount; + + assert(mip_level != VK_REMAINING_MIP_LEVELS); + if (layer_count == VK_REMAINING_ARRAY_LAYERS) + layer_count = anv_image->vk.array_layers - base_layer; + + const unsigned plane = + anv_image_aspect_to_plane(anv_image, aspect_mask); + struct isl_surf *img_isl_surf = + &anv_image->planes[plane].primary_surface.isl; + enum isl_format format = img_isl_surf->format; + bool format_is_compressed = isl_format_is_compressed(format); + + struct blorp_surf img_blorp_surf; + get_blorp_surf_for_anv_image(cmd_buffer, anv_image, + aspect_mask, VK_IMAGE_USAGE_STORAGE_BIT, + img_layout, + anv_image->planes[plane].aux_usage, + format, false /* cross_aspect */, + &img_blorp_surf); + + anv_cmd_buffer_mark_image_written(cmd_buffer, anv_image, + img_subresource->aspectMask, + img_blorp_surf.aux_usage, mip_level, + base_layer, layer_count); + + /* If the format is compressed, we will pretend the format is one where + * each pixel is the size of the compressed block, so image stores can + * work. Unfortunately, that translation only works if we do it for one + * mip level of a specific layer, as the complete layout of, say, BC4 + * and R32G32 formats are not compatible: so we loop through all layers + * here. + */ + if (format_is_compressed) { + int min_l_or_z, top_l_or_z; + if (img_blorp_surf.surf->dim == ISL_SURF_DIM_3D) { + min_l_or_z = 0; + top_l_or_z = img_isl_surf->logical_level0_px.depth >> mip_level; + } else { + min_l_or_z = base_layer; + top_l_or_z = base_layer + layer_count; + } + for (int l_or_z = min_l_or_z; l_or_z < top_l_or_z; l_or_z++) { + blorp_copy_memory_to_image_indirect(&batch, &img_blorp_surf, + indirect_buf_addr, + stride, c, mip_level, + layer_count, l_or_z); + } + } else { + blorp_copy_memory_to_image_indirect(&batch, &img_blorp_surf, + indirect_buf_addr, stride, c, + mip_level, layer_count, -1); + } + } + + anv_blorp_batch_finish(&batch); +} diff --git a/src/intel/vulkan/anv_formats.c b/src/intel/vulkan/anv_formats.c index 22f177adf2c..cbbebbf6f34 100644 --- a/src/intel/vulkan/anv_formats.c +++ b/src/intel/vulkan/anv_formats.c @@ -629,6 +629,39 @@ anv_get_format_aspect(const struct anv_physical_device *device, return anv_get_format_plane(device, vk_format, plane, tiling); } +static bool +anv_format_supports_indirect_copies(const struct anv_physical_device *pdevice, + const struct anv_format *anv_format) +{ + const struct isl_format_layout *fmtl = + isl_format_get_layout(anv_format->planes[0].isl_format); + + /* CTS insists on it even when we say we don't support it. */ + if (!pdevice->vk.supported_features.indirectMemoryToImageCopy) + return false; + + /* TODO: implement support for this in the copy shader. */ + if (!util_is_power_of_two_or_zero(fmtl->bpb)) + return false; + + /* TODO: we use compute for indirect copies, and compute cannot write HiZ, + * we could try to support that if we see that applications want it. + */ + if (vk_format_is_depth_or_stencil(anv_format->vk_format)) + return false; + + /* Let's leave YCbCr and multi-planar formats out until we have proper + * tests to verify they work. + */ + if (isl_format_is_yuv(anv_format->planes[0].isl_format)) + return false; + + if (anv_format->n_planes > 1) + return false; + + return true; +} + // Format capabilities static bool @@ -852,7 +885,8 @@ anv_get_color_format_features(const struct anv_physical_device *physical_device, if (vk_tiling == VK_IMAGE_TILING_LINEAR && isl_format_get_layout(plane_format.isl_format)->txc == ISL_TXC_ASTC) return VK_FORMAT_FEATURE_2_TRANSFER_SRC_BIT | - VK_FORMAT_FEATURE_2_TRANSFER_DST_BIT; + VK_FORMAT_FEATURE_2_TRANSFER_DST_BIT | + VK_FORMAT_FEATURE_2_COPY_IMAGE_INDIRECT_DST_BIT_KHR; flags |= VK_FORMAT_FEATURE_2_SAMPLED_IMAGE_BIT | VK_FORMAT_FEATURE_2_SAMPLED_IMAGE_FILTER_MINMAX_BIT | @@ -935,6 +969,10 @@ anv_get_color_format_features(const struct anv_physical_device *physical_device, VK_FORMAT_FEATURE_2_TRANSFER_DST_BIT; } + if ((flags & VK_FORMAT_FEATURE_2_TRANSFER_DST_BIT) && + anv_format_supports_indirect_copies(physical_device, anv_format)) + flags |= VK_FORMAT_FEATURE_2_COPY_IMAGE_INDIRECT_DST_BIT_KHR; + /* XXX: We handle 3-channel formats by switching them out for RGBX or * RGBA formats behind-the-scenes. This works fine for textures * because the upload process will fill in the extra channel. diff --git a/src/intel/vulkan/anv_physical_device.c b/src/intel/vulkan/anv_physical_device.c index 5b723093e86..06b4502e12a 100644 --- a/src/intel/vulkan/anv_physical_device.c +++ b/src/intel/vulkan/anv_physical_device.c @@ -152,6 +152,7 @@ get_device_extensions(const struct anv_physical_device *device, .KHR_cooperative_matrix = device->has_cooperative_matrix, .NV_cooperative_matrix2 = device->has_cooperative_matrix, .KHR_copy_commands2 = true, + .KHR_copy_memory_indirect = true, .KHR_create_renderpass2 = true, .KHR_dedicated_allocation = true, .KHR_deferred_host_operations = true, @@ -1023,6 +1024,10 @@ get_features(const struct anv_physical_device *pdevice, /* VK_KHR_pipeline_binary */ .pipelineBinaries = true, + /* VK_KHR_copy_memory_indirect */ + .indirectMemoryCopy = true, + .indirectMemoryToImageCopy = pdevice->info.ver >= 12, + #ifdef ANV_USE_WSI_PLATFORM /* VK_EXT_present_timing */ .presentTiming = true, @@ -2205,6 +2210,11 @@ get_properties(const struct anv_physical_device *pdevice, */ props->shaderBinaryVersion = 0; } + + /* VK_KHR_copy_memory_indirect */ + { + props->supportedQueues = VK_QUEUE_GRAPHICS_BIT | VK_QUEUE_COMPUTE_BIT; + } } /* This function restricts the maximum size of system memory heap. The diff --git a/src/intel/vulkan/genX_blorp_exec.c b/src/intel/vulkan/genX_blorp_exec.c index 376d7dc06b5..a6a9362fff4 100644 --- a/src/intel/vulkan/genX_blorp_exec.c +++ b/src/intel/vulkan/genX_blorp_exec.c @@ -534,6 +534,8 @@ get_color_aux_op(const struct blorp_params *params) case BLORP_OP_SLOW_COLOR_CLEAR: case BLORP_OP_BLIT: case BLORP_OP_COPY: + case BLORP_OP_COPY_INDIRECT: + case BLORP_OP_COPY_IMAGE_INDIRECT: assert(params->fast_clear_op == ISL_AUX_OP_NONE); return ANV_COLOR_AUX_OP_CLASS_NONE; }