mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-08 06:58:05 +02:00
Merge branch 'copy-mem-indirect-blorp' into 'main'
anv: implement VK_KHR_copy_memory_indirect See merge request mesa/mesa!39338
This commit is contained in:
commit
889ee7456d
15 changed files with 1270 additions and 18 deletions
|
|
@ -542,7 +542,7 @@ Khronos extensions that are not part of any Vulkan version:
|
|||
VK_KHR_calibrated_timestamps DONE (anv, hk, kk, nvk, panvk/v10+, radv, tu/a750+, vn)
|
||||
VK_KHR_compute_shader_derivatives DONE (anv, lvp, nvk, radv, tu, vn)
|
||||
VK_KHR_cooperative_matrix DONE (anv, nvk/Turing+, radv/gfx11+, vn)
|
||||
VK_KHR_copy_memory_indirect DONE (nvk, radv/gfx8+)
|
||||
VK_KHR_copy_memory_indirect DONE (anv, nvk, radv/gfx8+)
|
||||
VK_KHR_deferred_host_operations DONE (anv, hasvk, lvp, radv, tu, vn)
|
||||
VK_KHR_depth_clamp_zero_one DONE (anv, nvk, panvk, radv, tu, vn)
|
||||
VK_KHR_device_address_commands DONE (radv)
|
||||
|
|
|
|||
|
|
@ -11,6 +11,7 @@ typedef uint32_t VkBool32;
|
|||
typedef uint64_t VkDeviceAddress;
|
||||
typedef uint64_t VkDeviceSize;
|
||||
typedef uint32_t VkFlags;
|
||||
typedef VkFlags VkImageAspectFlags;
|
||||
|
||||
typedef enum VkQueryType {
|
||||
VK_QUERY_TYPE_OCCLUSION = 0,
|
||||
|
|
@ -126,6 +127,25 @@ typedef enum VkDepthBiasRepresentationEXT {
|
|||
VK_DEPTH_BIAS_REPRESENTATION_MAX_ENUM_EXT = 0x7FFFFFFF
|
||||
} VkDepthBiasRepresentationEXT;
|
||||
|
||||
typedef struct VkOffset3D {
|
||||
int32_t x;
|
||||
int32_t y;
|
||||
int32_t z;
|
||||
} VkOffset3D __attribute__((aligned(4)));
|
||||
|
||||
typedef struct VkExtent3D {
|
||||
uint32_t width;
|
||||
uint32_t height;
|
||||
uint32_t depth;
|
||||
} VkExtent3D __attribute__((aligned(4)));
|
||||
|
||||
typedef struct VkImageSubresourceLayers {
|
||||
VkImageAspectFlags aspectMask;
|
||||
uint32_t mipLevel;
|
||||
uint32_t baseArrayLayer;
|
||||
uint32_t layerCount;
|
||||
} VkImageSubresourceLayers __attribute__((aligned(4)));
|
||||
|
||||
typedef struct VkDispatchIndirectCommand {
|
||||
uint32_t x;
|
||||
uint32_t y;
|
||||
|
|
@ -186,4 +206,13 @@ typedef struct VkCopyMemoryIndirectCommandKHR {
|
|||
VkDeviceAddress srcAddress;
|
||||
VkDeviceAddress dstAddress;
|
||||
VkDeviceSize size;
|
||||
} VkCopyMemoryIndirectCommandKHR;
|
||||
} VkCopyMemoryIndirectCommandKHR __attribute__((aligned(4)));
|
||||
|
||||
typedef struct VkCopyMemoryToImageIndirectCommandKHR {
|
||||
VkDeviceAddress srcAddress;
|
||||
uint32_t bufferRowLength;
|
||||
uint32_t bufferImageHeight;
|
||||
VkImageSubresourceLayers imageSubresource;
|
||||
VkOffset3D imageOffset;
|
||||
VkExtent3D imageExtent;
|
||||
} VkCopyMemoryToImageIndirectCommandKHR __attribute__((aligned(4)));
|
||||
|
|
|
|||
|
|
@ -534,3 +534,10 @@ blorp_emit_post_draw(struct blorp_batch *blorp_batch, const struct blorp_params
|
|||
genX(maybe_emit_breakpoint)(batch, false);
|
||||
blorp_measure_end(blorp_batch, params);
|
||||
}
|
||||
|
||||
static bool *
|
||||
blorp_get_write_fencing_status(struct blorp_batch *blorp_batch)
|
||||
{
|
||||
struct iris_batch *batch = blorp_batch->driver_batch;
|
||||
return &batch->write_fence_status;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -41,6 +41,8 @@ typedef struct nir_shader nir_shader;
|
|||
enum blorp_op {
|
||||
BLORP_OP_BLIT,
|
||||
BLORP_OP_COPY,
|
||||
BLORP_OP_COPY_INDIRECT,
|
||||
BLORP_OP_COPY_IMAGE_INDIRECT,
|
||||
BLORP_OP_CCS_AMBIGUATE,
|
||||
BLORP_OP_CCS_COLOR_CLEAR,
|
||||
BLORP_OP_CCS_PARTIAL_RESOLVE,
|
||||
|
|
@ -299,6 +301,22 @@ blorp_buffer_copy(struct blorp_batch *batch,
|
|||
struct blorp_address dst,
|
||||
uint64_t size);
|
||||
|
||||
void
|
||||
blorp_copy_memory_indirect(struct blorp_batch *batch,
|
||||
uint64_t indirect_buf_addr,
|
||||
uint32_t copy_count,
|
||||
uint64_t stride);
|
||||
|
||||
void
|
||||
blorp_copy_memory_to_image_indirect(struct blorp_batch *batch,
|
||||
const struct blorp_surf *img_blorp_surf,
|
||||
uint64_t indirect_buf_addr,
|
||||
uint64_t indirect_buf_stride,
|
||||
uint32_t first_copy_idx,
|
||||
uint32_t img_mip_level,
|
||||
int layer_count,
|
||||
int forced_layer_or_z);
|
||||
|
||||
void
|
||||
blorp_fast_clear(struct blorp_batch *batch,
|
||||
const struct blorp_surf *surf,
|
||||
|
|
|
|||
|
|
@ -148,6 +148,9 @@ brw_blorp_get_urb_length(const struct brw_fs_prog_data *prog_data)
|
|||
return MAX2((prog_data->num_varying_inputs + 1) / 2, 1);
|
||||
}
|
||||
|
||||
static bool *
|
||||
blorp_get_write_fencing_status(struct blorp_batch *batch);
|
||||
|
||||
/***** BEGIN blorp_exec implementation ******/
|
||||
|
||||
static uint64_t
|
||||
|
|
@ -164,9 +167,25 @@ _blorp_combine_address(struct blorp_batch *batch, void *location,
|
|||
#define __gen_address_type struct blorp_address
|
||||
#define __gen_user_data struct blorp_batch
|
||||
#define __gen_combine_address _blorp_combine_address
|
||||
#define __gen_get_write_fencing_status(b) blorp_get_write_fencing_status(b)
|
||||
#define __gen_get_batch_dwords(b, d) blorp_emit_dwords((b), (d))
|
||||
|
||||
static inline struct blorp_address
|
||||
__gen_address_offset(struct blorp_address addr, uint64_t offset)
|
||||
{
|
||||
addr.offset += offset;
|
||||
return addr;
|
||||
}
|
||||
|
||||
static inline struct blorp_address
|
||||
__gen_get_batch_address(struct blorp_batch *batch, void *location)
|
||||
{
|
||||
UNREACHABLE("Not supported by blorp");
|
||||
}
|
||||
|
||||
#include "genxml/genX_pack.h"
|
||||
#include "common/intel_genX_state_brw.h"
|
||||
#include "common/mi_builder.h"
|
||||
|
||||
#define _blorp_cmd_length(cmd) cmd ## _length
|
||||
#define _blorp_cmd_length_bias(cmd) cmd ## _length_bias
|
||||
|
|
@ -1361,6 +1380,10 @@ blorp_setup_binding_table(struct blorp_batch *batch,
|
|||
uint32_t surface_offsets[BLORP_NUM_BT_ENTRIES], bind_offset = 0;
|
||||
void *surface_maps[BLORP_NUM_BT_ENTRIES];
|
||||
|
||||
/* There's nothing to bind here. */
|
||||
if (params->op == BLORP_OP_COPY_INDIRECT)
|
||||
return 0;
|
||||
|
||||
if (params->use_pre_baked_binding_table) {
|
||||
bind_offset = params->pre_baked_binding_table_offset;
|
||||
} else {
|
||||
|
|
@ -1765,6 +1788,170 @@ blorp_get_compute_push_const(struct blorp_batch *batch,
|
|||
*state_size = push_const_size;
|
||||
}
|
||||
|
||||
static void
|
||||
blorp_indirect_buffer_get_dispatch_size(struct blorp_batch *batch,
|
||||
const struct blorp_params *params,
|
||||
struct mi_builder *b,
|
||||
struct mi_value *size_x,
|
||||
struct mi_value *size_y,
|
||||
struct mi_value *size_z)
|
||||
{
|
||||
struct blorp_context *blorp = batch->blorp;
|
||||
const struct brw_cs_prog_data *cs_prog_data = params->cs_prog_data;
|
||||
|
||||
uint64_t indirect_buf_addr = params->wm_inputs.indirect.indirect_buf_addr;
|
||||
uint64_t stride = params->wm_inputs.indirect.indirect_buf_stride;
|
||||
uint32_t copy_count = params->wm_inputs.indirect.copy_count;
|
||||
|
||||
size_t size_offset = 16; /* offsetof(VkCopyMemoryIndirectCommandKHR, size) */
|
||||
|
||||
struct mi_value biggest_copy_size;
|
||||
for (int c = 0; c < copy_count; c++) {
|
||||
struct blorp_address copy_size_addr = {
|
||||
.buffer = NULL,
|
||||
.offset = indirect_buf_addr + c * stride + size_offset,
|
||||
.reloc_flags = 0,
|
||||
.mocs = isl_mocs(blorp->isl_dev, ISL_SURF_USAGE_STORAGE_BIT, false),
|
||||
.local_hint = false, /* We don't have a way to know this. */
|
||||
};
|
||||
|
||||
struct mi_value this_copy_size = mi_mem32(copy_size_addr);
|
||||
if (c == 0)
|
||||
biggest_copy_size = this_copy_size;
|
||||
else
|
||||
biggest_copy_size = mi_umax2(b, this_copy_size, biggest_copy_size);
|
||||
}
|
||||
|
||||
/* Each shader invocation writes an uint32_t. */
|
||||
int divisor = cs_prog_data->local_size[0] * sizeof(uint32_t);
|
||||
*size_x = mi_udiv32_imm(b, mi_iadd_imm(b, biggest_copy_size, divisor - 1),
|
||||
divisor);
|
||||
|
||||
assert(cs_prog_data->local_size[1] == 1);
|
||||
assert(cs_prog_data->local_size[2] == 1);
|
||||
*size_y = mi_imm(1);
|
||||
*size_z = mi_imm(1);
|
||||
}
|
||||
|
||||
static void
|
||||
blorp_indirect_buf2img_get_dispatch_size(struct blorp_batch *batch,
|
||||
const struct blorp_params *params,
|
||||
struct mi_builder *b,
|
||||
struct mi_value *size_x,
|
||||
struct mi_value *size_y,
|
||||
struct mi_value *size_z)
|
||||
{
|
||||
struct blorp_context *blorp = batch->blorp;
|
||||
const struct brw_cs_prog_data *cs_prog_data = params->cs_prog_data;
|
||||
|
||||
uint64_t indirect_buf_addr = params->wm_inputs.indirect.indirect_buf_addr;
|
||||
uint64_t stride = params->wm_inputs.indirect.indirect_buf_stride;
|
||||
uint32_t copy_idx = params->wm_inputs.indirect.copy_idx;
|
||||
bool is_forced_layer = params->wm_inputs.indirect.forced_layer_or_z != -1;
|
||||
bool is_3d = params->wm_inputs.indirect.dimensions == 3;
|
||||
|
||||
/* These are all offsetof(VkCopyMemoryToImageIndirectCommandKHR, x). */
|
||||
const size_t img_extent_x_offset = 44;
|
||||
const size_t img_extent_y_offset = 48;
|
||||
const size_t img_extent_z_offset = 52;
|
||||
|
||||
struct blorp_address x_extent_addr = {
|
||||
.buffer = NULL,
|
||||
.offset = indirect_buf_addr + copy_idx * stride + img_extent_x_offset,
|
||||
.reloc_flags = 0,
|
||||
.mocs = isl_mocs(blorp->isl_dev, ISL_SURF_USAGE_STORAGE_BIT, false),
|
||||
.local_hint = false, /* We don't have a way to know this. */
|
||||
};
|
||||
struct blorp_address y_extent_addr = {
|
||||
.buffer = NULL,
|
||||
.offset = indirect_buf_addr + copy_idx * stride + img_extent_y_offset,
|
||||
.reloc_flags = 0,
|
||||
.mocs = isl_mocs(blorp->isl_dev, ISL_SURF_USAGE_STORAGE_BIT, false),
|
||||
.local_hint = false, /* We don't have a way to know this. */
|
||||
};
|
||||
struct blorp_address z_extent_addr = {
|
||||
.buffer = NULL,
|
||||
.offset = indirect_buf_addr + copy_idx * stride + img_extent_z_offset,
|
||||
.reloc_flags = 0,
|
||||
.mocs = isl_mocs(blorp->isl_dev, ISL_SURF_USAGE_STORAGE_BIT, false),
|
||||
.local_hint = false, /* We don't have a way to know this. */
|
||||
};
|
||||
|
||||
/* Notes on the 'params->num_layers' usage below:
|
||||
*
|
||||
* - Please see how we handle this in function
|
||||
* blorp_copy_memory_to_image_indirect().
|
||||
*
|
||||
* - If we're using forced layers (see
|
||||
* params.wm_inputs.indirect.forced_layer_or_z), then we can just set
|
||||
* size_z to 1 and don't even look at the indirect buffer: each shader
|
||||
* call operates on a single layer.
|
||||
*
|
||||
* - The information on the number of layers for each copy is not truly
|
||||
* indirect: it has to be passed to during command creation, so we
|
||||
* have already processed it. See 'max_layer_count' in
|
||||
* blorp_copy_memory_to_image_indirect(). We don't want to be looking
|
||||
* at the indirect buffer if we don't need to.
|
||||
*
|
||||
* - For 3D images, the applications can use the Z axis as either an
|
||||
* actual axis (offset.z + extent.z) or pretend the Z axis is layers
|
||||
* (base_layer + layer_count), so would have to check both places.
|
||||
* Fortunately, base_layer + layer_count was already checked (see
|
||||
* above) and recorded in params->num_layers. If params->num_layers is
|
||||
* bigger than 1, then we don't even bother looking at extent.z.
|
||||
*/
|
||||
struct mi_value x_extent = mi_mem32(x_extent_addr);
|
||||
struct mi_value y_extent = mi_mem32(y_extent_addr);
|
||||
struct mi_value z_extent;
|
||||
if (is_forced_layer) {
|
||||
z_extent = mi_imm(1);
|
||||
} else {
|
||||
if (is_3d && params->num_layers == 1)
|
||||
z_extent = mi_mem32(z_extent_addr);
|
||||
else
|
||||
z_extent = mi_imm(params->num_layers);
|
||||
}
|
||||
|
||||
int divisor_x = cs_prog_data->local_size[0];
|
||||
int divisor_y = cs_prog_data->local_size[1];
|
||||
int divisor_z = cs_prog_data->local_size[2];
|
||||
|
||||
assert(divisor_x != 1 && divisor_y != 1 && divisor_z == 1);
|
||||
*size_x = mi_udiv32_imm(b, mi_iadd_imm(b, x_extent, divisor_x - 1),
|
||||
divisor_x);
|
||||
*size_y = mi_udiv32_imm(b, mi_iadd_imm(b, y_extent, divisor_y - 1),
|
||||
divisor_y);
|
||||
*size_z = z_extent;
|
||||
}
|
||||
|
||||
static void
|
||||
blorp_indirect_write_gpgpu_dispatch_regs(struct blorp_batch *batch,
|
||||
const struct blorp_params *params)
|
||||
{
|
||||
struct blorp_context *blorp = batch->blorp;
|
||||
const struct intel_device_info *devinfo = blorp->compiler->brw->devinfo;
|
||||
|
||||
struct mi_builder b;
|
||||
mi_builder_init(&b, devinfo, batch);
|
||||
mi_builder_set_mocs(&b, isl_mocs(blorp->isl_dev, 0, false));
|
||||
|
||||
struct mi_value size_x, size_y, size_z;
|
||||
|
||||
if (params->op == BLORP_OP_COPY_INDIRECT) {
|
||||
blorp_indirect_buffer_get_dispatch_size(batch, params, &b, &size_x,
|
||||
&size_y, &size_z);
|
||||
} else {
|
||||
assert(params->op == BLORP_OP_COPY_IMAGE_INDIRECT);
|
||||
|
||||
blorp_indirect_buf2img_get_dispatch_size(batch, params, &b, &size_x,
|
||||
&size_y, &size_z);
|
||||
}
|
||||
|
||||
mi_store(&b, mi_reg32(GENX(GPGPU_DISPATCHDIMX_num)), size_x);
|
||||
mi_store(&b, mi_reg32(GENX(GPGPU_DISPATCHDIMY_num)), size_y);
|
||||
mi_store(&b, mi_reg32(GENX(GPGPU_DISPATCHDIMZ_num)), size_z);
|
||||
}
|
||||
|
||||
static void
|
||||
blorp_exec_compute(struct blorp_batch *batch, const struct blorp_params *params)
|
||||
{
|
||||
|
|
@ -1779,6 +1966,11 @@ blorp_exec_compute(struct blorp_batch *batch, const struct blorp_params *params)
|
|||
const struct intel_cs_dispatch_info dispatch =
|
||||
brw_cs_get_dispatch_info(devinfo, cs_prog_data, NULL);
|
||||
|
||||
bool use_indirect = blorp_op_type_is_indirect(params->op);
|
||||
|
||||
if (use_indirect)
|
||||
blorp_indirect_write_gpgpu_dispatch_regs(batch, params);
|
||||
|
||||
uint32_t group_x0 = params->x0 / cs_prog_data->local_size[0];
|
||||
uint32_t group_y0 = params->y0 / cs_prog_data->local_size[1];
|
||||
uint32_t group_z0 = params->dst.z_offset;
|
||||
|
|
@ -1892,6 +2084,7 @@ blorp_exec_compute(struct blorp_batch *batch, const struct blorp_params *params)
|
|||
|
||||
assert(cs_prog_data->push.per_thread.regs == 0);
|
||||
blorp_emit(batch, GENX(COMPUTE_WALKER), cw) {
|
||||
cw.IndirectParameterEnable = use_indirect,
|
||||
cw.body = body;
|
||||
}
|
||||
#else /* GFX_VERx10 >= 125 */
|
||||
|
|
@ -1961,6 +2154,7 @@ blorp_exec_compute(struct blorp_batch *batch, const struct blorp_params *params)
|
|||
}
|
||||
|
||||
blorp_emit(batch, GENX(GPGPU_WALKER), ggw) {
|
||||
ggw.IndirectParameterEnable = use_indirect,
|
||||
ggw.SIMDSize = dispatch.simd_size / 16;
|
||||
ggw.ThreadDepthCounterMaximum = 0;
|
||||
ggw.ThreadHeightCounterMaximum = 0;
|
||||
|
|
|
|||
355
src/intel/blorp/blorp_indirect_copy.c
Normal file
355
src/intel/blorp/blorp_indirect_copy.c
Normal file
|
|
@ -0,0 +1,355 @@
|
|||
/* Copyright © 2025 Intel Corporation
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
|
||||
#include "blorp_nir_builder.h"
|
||||
#include "compiler/nir/nir_format_convert.h"
|
||||
|
||||
#include "blorp_priv.h"
|
||||
#include "dev/intel_debug.h"
|
||||
#include "dev/intel_device_info.h"
|
||||
|
||||
#include "blorp_shaders.h"
|
||||
|
||||
/* Refer to struct blorp_wm_inputs_indirect. */
|
||||
struct blorp_indirect_vars {
|
||||
nir_variable *indirect_buf_addr;
|
||||
nir_variable *stride;
|
||||
nir_variable *copy_count;
|
||||
nir_variable *copy_idx;
|
||||
nir_variable *max_layer;
|
||||
nir_variable *x_offset;
|
||||
nir_variable *y_offset;
|
||||
};
|
||||
|
||||
static enum isl_format
|
||||
get_format_for_copy(int format_bpb)
|
||||
{
|
||||
switch (format_bpb) {
|
||||
case 8: return ISL_FORMAT_R8_UINT;
|
||||
case 16: return ISL_FORMAT_R16_UINT;
|
||||
case 24: return ISL_FORMAT_R8G8B8_UINT;
|
||||
case 32: return ISL_FORMAT_R32_UINT;
|
||||
case 48: return ISL_FORMAT_R16G16B16_UINT;
|
||||
case 64: return ISL_FORMAT_R32G32_UINT;
|
||||
case 96: return ISL_FORMAT_R32G32B32_UINT;
|
||||
case 128: return ISL_FORMAT_R32G32B32A32_UINT;
|
||||
default:
|
||||
mesa_loge("unexpected format bpb: %d", format_bpb);
|
||||
assert(false);
|
||||
return ISL_FORMAT_UNSUPPORTED;
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
blorp_indirect_vars_init(nir_builder *b, struct blorp_indirect_vars *v)
|
||||
{
|
||||
v->copy_count =
|
||||
BLORP_CREATE_NIR_INPUT(b->shader, indirect.copy_count,
|
||||
glsl_uint_type());
|
||||
v->indirect_buf_addr =
|
||||
BLORP_CREATE_NIR_INPUT(b->shader, indirect.indirect_buf_addr,
|
||||
glsl_uint64_t_type());
|
||||
v->stride =
|
||||
BLORP_CREATE_NIR_INPUT(b->shader, indirect.indirect_buf_stride,
|
||||
glsl_uint64_t_type());
|
||||
v->copy_idx =
|
||||
BLORP_CREATE_NIR_INPUT(b->shader, indirect.copy_idx,
|
||||
glsl_uint_type());
|
||||
v->max_layer =
|
||||
BLORP_CREATE_NIR_INPUT(b->shader, indirect.max_layer,
|
||||
glsl_uint_type());
|
||||
v->x_offset =
|
||||
BLORP_CREATE_NIR_INPUT(b->shader, indirect.x_offset,
|
||||
glsl_uint_type());
|
||||
v->y_offset =
|
||||
BLORP_CREATE_NIR_INPUT(b->shader, indirect.y_offset,
|
||||
glsl_uint_type());
|
||||
}
|
||||
|
||||
static nir_shader *
|
||||
blorp_build_copy_mem_indirect_shader(struct blorp_batch *batch,
|
||||
void *mem_ctx)
|
||||
{
|
||||
|
||||
struct blorp_context *blorp = batch->blorp;
|
||||
|
||||
mesa_shader_stage stage = MESA_SHADER_COMPUTE;
|
||||
const nir_shader_compiler_options *nir_options =
|
||||
blorp->compiler->nir_options(blorp, stage);
|
||||
nir_builder b = nir_builder_init_simple_shader(stage, nir_options,
|
||||
"copy mem indirect");
|
||||
ralloc_steal(mem_ctx, b.shader);
|
||||
|
||||
b.shader->info.workgroup_size[0] = 32;
|
||||
b.shader->info.workgroup_size[1] = 1;
|
||||
b.shader->info.workgroup_size[2] = 1;
|
||||
|
||||
struct blorp_indirect_vars v;
|
||||
blorp_indirect_vars_init(&b, &v);
|
||||
|
||||
/* The indirect buffer is an array containing 'copy_count'
|
||||
* VkCopyMemoryIndirectCommandKHR structures, separated by 'stride' bytes.
|
||||
*/
|
||||
nir_def *copy_count = nir_load_var(&b, v.copy_count);
|
||||
nir_def *indirect_buf_addr = nir_load_var(&b, v.indirect_buf_addr);
|
||||
nir_def *stride = nir_load_var(&b, v.stride);
|
||||
nir_def *global_id =
|
||||
nir_channel(&b, nir_load_global_invocation_id(&b, 32), 0);
|
||||
|
||||
blorp_copy_memory_indirect_shader(&b, indirect_buf_addr, copy_count,
|
||||
stride, global_id);
|
||||
|
||||
return b.shader;
|
||||
}
|
||||
|
||||
static nir_shader *
|
||||
blorp_build_copy_mem2img_indirect_shader(struct blorp_batch *batch,
|
||||
void *mem_ctx,
|
||||
struct blorp_indirect_copy_mem2img_key *key)
|
||||
{
|
||||
struct blorp_context *blorp = batch->blorp;
|
||||
|
||||
mesa_shader_stage stage = MESA_SHADER_COMPUTE;
|
||||
const nir_shader_compiler_options *nir_options =
|
||||
blorp->compiler->nir_options(blorp, stage);
|
||||
nir_builder b = nir_builder_init_simple_shader(stage, nir_options,
|
||||
"copy mem2img indirect");
|
||||
ralloc_steal(mem_ctx, b.shader);
|
||||
|
||||
b.shader->info.workgroup_size[0] = 4;
|
||||
b.shader->info.workgroup_size[1] = 8;
|
||||
b.shader->info.workgroup_size[2] = 1;
|
||||
|
||||
struct blorp_indirect_vars v;
|
||||
blorp_indirect_vars_init(&b, &v);
|
||||
|
||||
/* The indirect buffer is an array containing 'copy_count'
|
||||
* VkCopyMemoryToImageIndirectCommandKHR structures, separated by 'stride'
|
||||
* bytes.
|
||||
*/
|
||||
nir_def *indirect_buf_addr = nir_load_var(&b, v.indirect_buf_addr);
|
||||
nir_def *stride = nir_load_var(&b, v.stride);
|
||||
nir_def *copy_idx = nir_load_var(&b, v.copy_idx);
|
||||
nir_def *max_layer = nir_load_var(&b, v.max_layer);
|
||||
nir_def *dest_coord_offsets_arr[2] = {
|
||||
nir_load_var(&b, v.x_offset),
|
||||
nir_load_var(&b, v.y_offset),
|
||||
};
|
||||
nir_def *dest_coord_offsets = nir_vec(&b, dest_coord_offsets_arr, 2);
|
||||
|
||||
nir_def *global_id = nir_load_global_invocation_id(&b, 32);
|
||||
|
||||
/* Shader keys (constants). */
|
||||
nir_def *dimensions = nir_imm_int(&b, key->dimensions);
|
||||
nir_def *forced_layer_or_z = nir_imm_int(&b, key->forced_layer_or_z);
|
||||
nir_def *format_Bpb = nir_imm_intN_t(&b, key->format_Bpb, 16);
|
||||
nir_def *format_block_size_arr[3] = {
|
||||
nir_imm_int(&b, key->format_bw),
|
||||
nir_imm_int(&b, key->format_bh),
|
||||
nir_imm_int(&b, key->format_bd),
|
||||
};
|
||||
nir_def *format_block_size = nir_vec(&b, format_block_size_arr, 3);
|
||||
|
||||
/* Constants derived from the shader keys. */
|
||||
nir_def *is_block_compressed =
|
||||
nir_imm_bool(&b, key->format_bw > 1 || key->format_bh > 1 ||
|
||||
key->format_bd > 1);
|
||||
|
||||
blorp_copy_memory_to_image_indirect_shader(&b, indirect_buf_addr,
|
||||
stride,
|
||||
copy_idx,
|
||||
max_layer,
|
||||
dest_coord_offsets,
|
||||
global_id,
|
||||
dimensions,
|
||||
forced_layer_or_z,
|
||||
format_Bpb,
|
||||
format_block_size,
|
||||
is_block_compressed);
|
||||
|
||||
return b.shader;
|
||||
}
|
||||
|
||||
static bool
|
||||
blorp_get_copy_mem_indirect_kernel_cs(struct blorp_batch *batch,
|
||||
struct blorp_params *params)
|
||||
{
|
||||
struct blorp_context *blorp = batch->blorp;
|
||||
const char *key = "copy_mem_indirect_kernel_cs";
|
||||
uint32_t key_size = strlen(key);
|
||||
|
||||
if (blorp->lookup_shader(batch, key, key_size, ¶ms->cs_prog_kernel,
|
||||
¶ms->cs_prog_data))
|
||||
return true;
|
||||
|
||||
void *mem_ctx = ralloc_context(NULL);
|
||||
|
||||
nir_shader *nir =
|
||||
blorp_build_copy_mem_indirect_shader(batch, mem_ctx);
|
||||
|
||||
const struct blorp_program prog =
|
||||
blorp_compile_cs(blorp, mem_ctx, nir, key, key_size);
|
||||
|
||||
bool result = blorp->upload_shader(batch, MESA_SHADER_COMPUTE,
|
||||
key, key_size,
|
||||
prog.kernel, prog.kernel_size,
|
||||
prog.prog_data, prog.prog_data_size,
|
||||
¶ms->cs_prog_kernel,
|
||||
¶ms->cs_prog_data);
|
||||
|
||||
ralloc_free(mem_ctx);
|
||||
return result;
|
||||
|
||||
}
|
||||
|
||||
static bool
|
||||
blorp_get_copy_mem2img_indirect_kernel_cs(struct blorp_batch *batch,
|
||||
struct blorp_params *params,
|
||||
struct blorp_indirect_copy_mem2img_key *key)
|
||||
{
|
||||
struct blorp_context *blorp = batch->blorp;
|
||||
|
||||
if (blorp->lookup_shader(batch, key, sizeof(*key), ¶ms->cs_prog_kernel,
|
||||
¶ms->cs_prog_data))
|
||||
return true;
|
||||
|
||||
void *mem_ctx = ralloc_context(NULL);
|
||||
|
||||
nir_shader *nir =
|
||||
blorp_build_copy_mem2img_indirect_shader(batch, mem_ctx, key);
|
||||
|
||||
const struct blorp_program prog =
|
||||
blorp_compile_cs(blorp, mem_ctx, nir, key, sizeof(*key));
|
||||
|
||||
bool result = blorp->upload_shader(batch, MESA_SHADER_COMPUTE,
|
||||
key, sizeof(*key),
|
||||
prog.kernel, prog.kernel_size,
|
||||
prog.prog_data, prog.prog_data_size,
|
||||
¶ms->cs_prog_kernel,
|
||||
¶ms->cs_prog_data);
|
||||
|
||||
ralloc_free(mem_ctx);
|
||||
return result;
|
||||
}
|
||||
|
||||
void
|
||||
blorp_copy_memory_indirect(struct blorp_batch *batch,
|
||||
uint64_t indirect_buf_addr,
|
||||
uint32_t copy_count,
|
||||
uint64_t stride)
|
||||
{
|
||||
struct blorp_params params;
|
||||
blorp_params_init(¶ms);
|
||||
|
||||
params.op = BLORP_OP_COPY_INDIRECT;
|
||||
params.shader_type = BLORP_SHADER_TYPE_COPY_INDIRECT;
|
||||
params.shader_pipeline = BLORP_SHADER_PIPELINE_COMPUTE;
|
||||
|
||||
params.x0 = 0;
|
||||
params.y0 = 0;
|
||||
params.x1 = 1;
|
||||
params.y1 = 1;
|
||||
|
||||
params.wm_inputs.indirect.indirect_buf_addr = indirect_buf_addr;
|
||||
params.wm_inputs.indirect.indirect_buf_stride = stride;
|
||||
params.wm_inputs.indirect.copy_count = copy_count;
|
||||
|
||||
if (!blorp_get_copy_mem_indirect_kernel_cs(batch, ¶ms)) {
|
||||
mesa_loge("failed to get copy_memory_indirect CS kernel");
|
||||
assert(false);
|
||||
return;
|
||||
}
|
||||
|
||||
batch->blorp->exec(batch, ¶ms);
|
||||
}
|
||||
|
||||
void
|
||||
blorp_copy_memory_to_image_indirect(struct blorp_batch *batch,
|
||||
const struct blorp_surf *img_blorp_surf,
|
||||
uint64_t indirect_buf_addr,
|
||||
uint64_t indirect_buf_stride,
|
||||
uint32_t copy_idx,
|
||||
uint32_t img_mip_level,
|
||||
int layer_count,
|
||||
int forced_layer_or_z)
|
||||
{
|
||||
enum isl_format original_format = img_blorp_surf->surf->format;
|
||||
const struct isl_format_layout *fmtl =
|
||||
isl_format_get_layout(original_format);
|
||||
enum isl_format copy_format = get_format_for_copy(fmtl->bpb);
|
||||
int dimensions = img_blorp_surf->surf->dim + 1;
|
||||
|
||||
struct blorp_indirect_copy_mem2img_key key = {
|
||||
.dimensions = dimensions,
|
||||
.forced_layer_or_z = forced_layer_or_z,
|
||||
.format_Bpb = fmtl->bpb / 8,
|
||||
.format_bw = fmtl->bw,
|
||||
.format_bh = fmtl->bh,
|
||||
.format_bd = fmtl->bd,
|
||||
};
|
||||
|
||||
struct blorp_params params;
|
||||
blorp_params_init(¶ms);
|
||||
|
||||
params.op = BLORP_OP_COPY_IMAGE_INDIRECT;
|
||||
params.shader_type = BLORP_SHADER_TYPE_COPY_INDIRECT;
|
||||
params.shader_pipeline = BLORP_SHADER_PIPELINE_COMPUTE;
|
||||
|
||||
params.wm_inputs.indirect.indirect_buf_addr = indirect_buf_addr;
|
||||
params.wm_inputs.indirect.indirect_buf_stride = indirect_buf_stride;
|
||||
params.wm_inputs.indirect.copy_idx = copy_idx;
|
||||
params.wm_inputs.indirect.dimensions = dimensions;
|
||||
params.wm_inputs.indirect.max_layer =
|
||||
img_blorp_surf->surf->logical_level0_px.array_len - 1;
|
||||
params.wm_inputs.indirect.forced_layer_or_z = forced_layer_or_z;
|
||||
|
||||
/* params.dst is our image. */
|
||||
blorp_surface_info_init(batch, ¶ms.dst, img_blorp_surf,
|
||||
img_mip_level,
|
||||
forced_layer_or_z == -1 ? 0 : forced_layer_or_z,
|
||||
copy_format,
|
||||
true /* is_dest */);
|
||||
|
||||
struct isl_extent3d mip_dimensions = {
|
||||
.width = MAX2(params.dst.surf.logical_level0_px.w >> img_mip_level, 1),
|
||||
.height = MAX2(params.dst.surf.logical_level0_px.h >> img_mip_level, 1),
|
||||
.depth = MAX2(params.dst.surf.logical_level0_px.d >> img_mip_level, 1),
|
||||
};
|
||||
|
||||
if (fmtl->bw > 1 || fmtl->bh > 1 || fmtl->bd > 1) {
|
||||
blorp_surf_convert_to_uncompressed(batch->blorp->isl_dev,
|
||||
¶ms.dst, NULL, NULL, NULL, NULL);
|
||||
params.wm_inputs.indirect.x_offset = params.dst.tile_x_sa;
|
||||
params.wm_inputs.indirect.y_offset = params.dst.tile_y_sa;
|
||||
|
||||
mip_dimensions.width = params.dst.surf.logical_level0_px.w;
|
||||
mip_dimensions.height = params.dst.surf.logical_level0_px.h;
|
||||
mip_dimensions.depth = params.dst.surf.logical_level0_px.d;
|
||||
}
|
||||
|
||||
/* These settings control the number of workgroups in the shader, see
|
||||
* blorp_exec_compute(). We don't need to divide by the local sizes here,
|
||||
* this will be done later.
|
||||
*/
|
||||
params.x0 = 0;
|
||||
params.y0 = 0;
|
||||
params.x1 = mip_dimensions.width;
|
||||
params.y1 = mip_dimensions.height;
|
||||
if (forced_layer_or_z == -1) {
|
||||
/* We set this here so blorp_indirect_buf2img_get_dispatch_size() can
|
||||
* read it while figuring out how many shader instances we'll need.
|
||||
*/
|
||||
params.num_layers = layer_count;
|
||||
} else {
|
||||
params.num_layers = 1;
|
||||
}
|
||||
|
||||
if (!blorp_get_copy_mem2img_indirect_kernel_cs(batch, ¶ms, &key)) {
|
||||
mesa_loge("failed to get copy_memory_to_image_indirect CS kernel");
|
||||
assert(false);
|
||||
return;
|
||||
}
|
||||
|
||||
batch->blorp->exec(batch, ¶ms);
|
||||
}
|
||||
|
|
@ -205,17 +205,55 @@ struct blorp_wm_inputs_clear {
|
|||
struct blorp_bounds_rect bounds_rect;
|
||||
};
|
||||
|
||||
/* Parameters using in blorp_indirect_copy.c */
|
||||
struct blorp_wm_inputs_indirect {
|
||||
/* The address of the indirect buffer containing the information about the
|
||||
* indirect copy.
|
||||
*/
|
||||
uint64_t indirect_buf_addr;
|
||||
|
||||
/* How far apart the information about each copy is inside the indirect
|
||||
* buffer.
|
||||
*/
|
||||
uint64_t indirect_buf_stride;
|
||||
|
||||
/* How many copies we have to do. */
|
||||
uint32_t copy_count;
|
||||
|
||||
/* For memory to image copies, we do a single copy per shader. This
|
||||
* represents the index of the copy to be done.
|
||||
*/
|
||||
uint32_t copy_idx;
|
||||
|
||||
/* How many dimensions does our image have? 1, 2 or 3. */
|
||||
uint32_t dimensions;
|
||||
|
||||
/* The maximum array layer of the image. */
|
||||
uint32_t max_layer;
|
||||
|
||||
/* When compressed formats are used, we pretend they are a non-compressed
|
||||
* format, of the same bpb. Since we can't maintain the exact same layout
|
||||
* of mipmap and layer offsets, we're forced to make adjustments to where X
|
||||
* and Y actually start, and are also forced to copy only one layer (or Z
|
||||
* axis position) per shader invocation.
|
||||
*/
|
||||
uint32_t x_offset;
|
||||
uint32_t y_offset;
|
||||
int forced_layer_or_z;
|
||||
};
|
||||
|
||||
struct blorp_wm_inputs
|
||||
{
|
||||
union {
|
||||
struct blorp_wm_inputs_blit blit;
|
||||
struct blorp_wm_inputs_clear clear;
|
||||
struct blorp_wm_inputs_indirect indirect;
|
||||
};
|
||||
|
||||
/* Note: Pad out to an integral number of registers when extending, but
|
||||
* make sure subgroup_id is the last 32-bit item.
|
||||
*/
|
||||
uint32_t pad[2];
|
||||
uint32_t pad[1];
|
||||
uint32_t subgroup_id;
|
||||
};
|
||||
|
||||
|
|
@ -257,6 +295,7 @@ enum blorp_shader_type {
|
|||
BLORP_SHADER_TYPE_MCS_PARTIAL_RESOLVE,
|
||||
BLORP_SHADER_TYPE_LAYER_OFFSET_VS,
|
||||
BLORP_SHADER_TYPE_GFX4_SF,
|
||||
BLORP_SHADER_TYPE_COPY_INDIRECT,
|
||||
};
|
||||
|
||||
enum blorp_shader_pipeline {
|
||||
|
|
@ -467,6 +506,25 @@ struct blorp_blit_prog_key
|
|||
uint8_t local_y;
|
||||
};
|
||||
|
||||
struct blorp_indirect_copy_mem2img_key {
|
||||
/* How many dimensions does our image have? 1, 2 or 3. */
|
||||
uint32_t dimensions;
|
||||
|
||||
/* When compressed formats are used, we pretend they are a non-compressed
|
||||
* format, of the same bpb. Since we can't maintain the exact same layout
|
||||
* of mipmap and layer offsets, we're forced to make adjustments to where X
|
||||
* and Y actually start, and are also forced to copy only one layer (or Z
|
||||
* axis position) per shader invocation.
|
||||
*/
|
||||
int forced_layer_or_z;
|
||||
|
||||
/* Info taken from isl_format_layout. */
|
||||
uint16_t format_Bpb;
|
||||
uint16_t format_bw;
|
||||
uint16_t format_bh;
|
||||
uint16_t format_bd;
|
||||
};
|
||||
|
||||
/**
|
||||
* \name BLORP internals
|
||||
* \{
|
||||
|
|
@ -597,6 +655,19 @@ blorp_op_type_is_clear(enum blorp_op op)
|
|||
}
|
||||
}
|
||||
|
||||
/* This means: blorp->wm_inputs.indirect should be used. */
|
||||
static inline bool
|
||||
blorp_op_type_is_indirect(enum blorp_op op)
|
||||
{
|
||||
switch (op) {
|
||||
case BLORP_OP_COPY_INDIRECT:
|
||||
case BLORP_OP_COPY_IMAGE_INDIRECT:
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/* Asserts unless the surface is a buffer to image copy */
|
||||
#define blorp_assert_is_buffer(surf, view) \
|
||||
do { \
|
||||
|
|
|
|||
|
|
@ -3,6 +3,7 @@
|
|||
*/
|
||||
|
||||
#include "compiler/libcl/libcl.h"
|
||||
#include "compiler/libcl/libcl_vk.h"
|
||||
#include "compiler/nir/nir_defines.h"
|
||||
#include "compiler/shader_enums.h"
|
||||
|
||||
|
|
@ -15,3 +16,279 @@ blorp_check_in_bounds(uint4 bounds_rect, uint2 pos)
|
|||
return pos.x >= x0 && pos.x < x1 &&
|
||||
pos.y >= y0 && pos.y < y1;
|
||||
}
|
||||
|
||||
void nir_image_store(uint handle, int4 coords, uint sample_index,
|
||||
uint4 colors, uint lod, uint image_dim,
|
||||
uint image_array, uint format, uint access,
|
||||
uint range_base, uint src_type);
|
||||
|
||||
/* Used by vkCmdCopyMemoryIndirectKHR. */
|
||||
void
|
||||
blorp_copy_memory_indirect_shader(
|
||||
global uint *indirect_buf,
|
||||
uint copy_count,
|
||||
ulong stride_bytes,
|
||||
uint global_id)
|
||||
{
|
||||
for (uint c = 0; c < copy_count; c++) {
|
||||
uint idx = (uint)(c * (stride_bytes / 4));
|
||||
|
||||
/* The spec says the minimum alignment is 4 instead of 8, so we have to
|
||||
* do these tricks.
|
||||
*/
|
||||
VkCopyMemoryIndirectCommandKHR cmd =
|
||||
(*(global VkCopyMemoryIndirectCommandKHR*)&indirect_buf[idx]);
|
||||
|
||||
uint copy_size_ints = cmd.size / 4;
|
||||
|
||||
global int *src = (global int*)cmd.srcAddress;
|
||||
global int *dst = (global int*)cmd.dstAddress;
|
||||
|
||||
if (global_id < copy_size_ints)
|
||||
dst[global_id] = src[global_id];
|
||||
}
|
||||
}
|
||||
|
||||
struct img_copy_params {
|
||||
ulong src_address;
|
||||
uint2 row_size_px;
|
||||
uint base_layer;
|
||||
uint layer_count;
|
||||
uint3 offset;
|
||||
uint3 extent;
|
||||
};
|
||||
|
||||
void
|
||||
read_img_params(
|
||||
global uint *indirect_buf,
|
||||
uint copy_idx,
|
||||
ulong stride_bytes,
|
||||
uint dimensions,
|
||||
uint max_layer,
|
||||
uint3 format_block_size,
|
||||
bool is_block_compressed,
|
||||
struct img_copy_params *p)
|
||||
{
|
||||
uint idx = copy_idx * (stride_bytes / 4);
|
||||
|
||||
VkCopyMemoryToImageIndirectCommandKHR cmd =
|
||||
(*(global VkCopyMemoryToImageIndirectCommandKHR*)&indirect_buf[idx]);
|
||||
|
||||
p->src_address = cmd.srcAddress;
|
||||
p->row_size_px = (uint2)(cmd.bufferRowLength, cmd.bufferImageHeight);
|
||||
|
||||
/* We don't use imageSubresource.aspectMask and imageSubresource.mipLevel,
|
||||
* those are dealt with when the application calls
|
||||
* vkCmdCopyMemoryToImageIndirectKHR().
|
||||
*/
|
||||
|
||||
p->base_layer = cmd.imageSubresource.baseArrayLayer;
|
||||
p->layer_count = cmd.imageSubresource.layerCount;
|
||||
|
||||
p->offset = (uint3)(cmd.imageOffset.x, cmd.imageOffset.y,
|
||||
cmd.imageOffset.z);
|
||||
p->extent = (uint3)(cmd.imageExtent.width, cmd.imageExtent.height,
|
||||
cmd.imageExtent.depth);
|
||||
|
||||
if (p->row_size_px.x == 0)
|
||||
p->row_size_px.x = p->extent.x;
|
||||
if (p->row_size_px.y == 0)
|
||||
p->row_size_px.y = p->extent.y;
|
||||
|
||||
/* Our code deals with blocks, not pixels. */
|
||||
if (is_block_compressed) {
|
||||
p->offset /= format_block_size;
|
||||
p->extent = DIV_ROUND_UP(p->extent, format_block_size);
|
||||
}
|
||||
|
||||
/* Users can pass 3D images with the Z axis as an array layer. */
|
||||
if (dimensions == 3 && (p->base_layer != 0 || p->layer_count != 1)) {
|
||||
p->offset.z = p->base_layer;
|
||||
p->extent.z = p->layer_count;
|
||||
p->base_layer = 0;
|
||||
p->layer_count = 1;
|
||||
}
|
||||
|
||||
/* This handles VK_REMAINING_ARRAY_LAYERS and bugs. */
|
||||
if (p->base_layer + p->layer_count > max_layer + 1)
|
||||
p->layer_count = max_layer - p->base_layer + 1;
|
||||
}
|
||||
|
||||
uint4
|
||||
get_pixel(
|
||||
ulong src_address,
|
||||
uint format_Bpb)
|
||||
{
|
||||
switch (format_Bpb) {
|
||||
case 1: {
|
||||
global uchar *src_buf = (global uchar *)src_address;
|
||||
return (uint4)(src_buf[0], 0, 0, 0);
|
||||
}
|
||||
case 2: {
|
||||
global ushort *src_buf = (global ushort *)src_address;
|
||||
return (uint4)(src_buf[0], 0, 0, 0);
|
||||
}
|
||||
case 4: {
|
||||
global uint *src_buf = (global uint *)src_address;
|
||||
return (uint4)(src_buf[0], 0, 0, 0);
|
||||
}
|
||||
case 8: {
|
||||
global uint *src_buf = (global uint *)src_address;
|
||||
return (uint4)(vload2(0, src_buf), 0, 0);
|
||||
}
|
||||
case 16: {
|
||||
global uint *src_buf = (global uint *)src_address;
|
||||
return (uint4)(vload4(0, src_buf));
|
||||
}
|
||||
default:
|
||||
/* TODO: support 3, 6, 12. */
|
||||
return (uint4)(0xFF, 0, 0, 0xFF);
|
||||
}
|
||||
}
|
||||
|
||||
int4
|
||||
get_coords(
|
||||
uint3 pos,
|
||||
uint layer,
|
||||
uint2 dest_coord_offsets,
|
||||
int dimensions,
|
||||
int forced_layer_or_z)
|
||||
{
|
||||
int4 ret;
|
||||
|
||||
pos.xy += dest_coord_offsets;
|
||||
|
||||
switch (dimensions) {
|
||||
case 1:
|
||||
ret = (int4)(pos.x, layer, 0, 0);
|
||||
break;
|
||||
case 2:
|
||||
ret = (int4)(pos.x, pos.y, layer, 0);
|
||||
break;
|
||||
case 3:
|
||||
default:
|
||||
ret = (int4)(pos.x, pos.y, pos.z, 0);
|
||||
break;
|
||||
}
|
||||
|
||||
if (forced_layer_or_z != -1) {
|
||||
ret.z = 0;
|
||||
ret.w = 0;
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
void
|
||||
write_pixel(
|
||||
const int4 coords,
|
||||
const uint4 colors)
|
||||
{
|
||||
/* We don't seem to need to set image_array to true if we set sampler_dim
|
||||
* to 3D.
|
||||
* Setting mip_level does not do anything on Intel, we set mip levels
|
||||
* through the bindings.
|
||||
*/
|
||||
nir_image_store(0 /* The image handle. */,
|
||||
coords, /* See get_coords(). */
|
||||
0, /* Sample index for multi-sampling. */
|
||||
colors, /* The RGBA pixels, in src_type. */
|
||||
0 /* mip_level */,
|
||||
GLSL_SAMPLER_DIM_3D,
|
||||
false, /* image_array */
|
||||
0, /* format */
|
||||
ACCESS_NON_READABLE, /* access */
|
||||
0, /* range_base */
|
||||
nir_type_int32 /* src_type */);
|
||||
}
|
||||
|
||||
/* Used by vkCmdCopyMemoryToImageIndirectKHR. */
|
||||
void
|
||||
blorp_copy_memory_to_image_indirect_shader(
|
||||
/* Actual parameters. */
|
||||
global uint *indirect_buf,
|
||||
ulong stride_bytes,
|
||||
uint copy_idx,
|
||||
uint max_layer,
|
||||
uint2 dest_coord_offsets,
|
||||
uint3 global_id,
|
||||
|
||||
/* These are shader keys, they are NIR immediates. */
|
||||
uint dimensions,
|
||||
int forced_layer_or_z,
|
||||
ushort format_Bpb,
|
||||
uint3 format_block_size,
|
||||
bool is_block_compressed)
|
||||
{
|
||||
/* We have one invocation per texel of a given a mip level. This means
|
||||
* that for the pixels outside the copy area, we'll hit the 'continue'
|
||||
* below.
|
||||
*/
|
||||
uint3 src_pos = global_id;
|
||||
uint layer;
|
||||
|
||||
/* 'forced_layer_or_z' means that whatever slice or layer we're
|
||||
* trying to work with is set as part of our binding (view) as depth
|
||||
* 0 or layer 0, depending on the dimensionality. This can happen
|
||||
* when we're trying to pretend a format is something that it's not
|
||||
* (e.g., we're treating a block compressed 4x4 64bpp format as an
|
||||
* r32g32 format).
|
||||
*/
|
||||
if (forced_layer_or_z != -1) {
|
||||
if (dimensions == 3) {
|
||||
src_pos.z = forced_layer_or_z;
|
||||
layer = 0;
|
||||
} else {
|
||||
src_pos.z = 0;
|
||||
layer = forced_layer_or_z;
|
||||
}
|
||||
} else {
|
||||
if (dimensions == 3) {
|
||||
src_pos.z = global_id.z;
|
||||
layer = 0;
|
||||
} else {
|
||||
src_pos.z = 0;
|
||||
layer = global_id.z;
|
||||
}
|
||||
}
|
||||
|
||||
struct img_copy_params p;
|
||||
read_img_params(indirect_buf, copy_idx, stride_bytes, dimensions,
|
||||
max_layer, format_block_size, is_block_compressed, &p);
|
||||
|
||||
if (any(src_pos >= p.extent))
|
||||
return;
|
||||
|
||||
uint3 dst_pos = src_pos + p.offset;
|
||||
if (forced_layer_or_z == -1 && dimensions != 3)
|
||||
layer = global_id.z + p.base_layer;
|
||||
|
||||
if (layer > p.base_layer + p.layer_count - 1)
|
||||
return;
|
||||
|
||||
uint2 row_size_blocks = p.row_size_px;
|
||||
if (is_block_compressed) {
|
||||
row_size_blocks = DIV_ROUND_UP(row_size_blocks,
|
||||
format_block_size.xy);
|
||||
}
|
||||
uint row_length_bytes = row_size_blocks.x * format_Bpb;
|
||||
uint row_height_bytes = row_size_blocks.y * row_length_bytes;
|
||||
|
||||
uint buf_z_offset_bytes = src_pos.z * row_height_bytes;
|
||||
uint buf_y_offset_bytes = src_pos.y * row_length_bytes;
|
||||
uint buf_x_offset_bytes = src_pos.x * format_Bpb;
|
||||
|
||||
uint layer_offset_bytes = (layer - p.base_layer) * row_height_bytes;
|
||||
uint buf_offset_bytes = layer_offset_bytes +
|
||||
buf_z_offset_bytes +
|
||||
buf_y_offset_bytes +
|
||||
buf_x_offset_bytes;
|
||||
|
||||
uint4 colors = get_pixel(p.src_address + buf_offset_bytes,
|
||||
format_Bpb);
|
||||
|
||||
int4 coords = get_coords(dst_pos, layer, dest_coord_offsets, dimensions,
|
||||
forced_layer_or_z);
|
||||
|
||||
write_pixel(coords, colors);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -37,6 +37,7 @@ files_libblorp = files(
|
|||
'blorp.h',
|
||||
'blorp_blit.c',
|
||||
'blorp_clear.c',
|
||||
'blorp_indirect_copy.c',
|
||||
'blorp_nir_builder.h',
|
||||
'blorp_priv.h',
|
||||
)
|
||||
|
|
|
|||
|
|
@ -1066,6 +1066,17 @@ mi_ior(struct mi_builder *b,
|
|||
MI_ALU_STORE, MI_ALU_ACCU);
|
||||
}
|
||||
|
||||
static inline struct mi_value
|
||||
mi_ixor(struct mi_builder *b,
|
||||
struct mi_value src0, struct mi_value src1)
|
||||
{
|
||||
if (src0.type == MI_VALUE_TYPE_IMM && src1.type == MI_VALUE_TYPE_IMM)
|
||||
return mi_imm(mi_value_to_u64(src0) ^ mi_value_to_u64(src1));
|
||||
|
||||
return mi_math_binop(b, MI_ALU_XOR, src0, src1,
|
||||
MI_ALU_STORE, MI_ALU_ACCU);
|
||||
}
|
||||
|
||||
#if GFX_VERx10 >= 125
|
||||
static inline struct mi_value
|
||||
mi_ishl(struct mi_builder *b, struct mi_value src0, struct mi_value src1)
|
||||
|
|
@ -1290,6 +1301,40 @@ mi_udiv32_imm(struct mi_builder *b, struct mi_value N, uint32_t D)
|
|||
}
|
||||
}
|
||||
|
||||
/* Finds the maximum between the two specified unsigned numbers. */
|
||||
static inline struct mi_value
|
||||
mi_umax2(struct mi_builder *b, struct mi_value val1, struct mi_value val2)
|
||||
{
|
||||
/* The idea of the alrogithm here is that the value of 'mask' will be
|
||||
* either 0 or ~0 depending on which number is bigger. Then we use AND
|
||||
* operations to ensure the smaller value becomes zero and the bigger value
|
||||
* is preserved, and finally OR both values to the destination (the bigger
|
||||
* and zero).
|
||||
*
|
||||
* In other words:
|
||||
* mask = val1 < val2 ? 0xFFFFFFFF : 0x0;
|
||||
* biggest = (val1 & ~mask) | (val2 & mask);
|
||||
*/
|
||||
|
||||
/* If 'val1' is smaller, 'mask' is ~0, otherwise it's 0. */
|
||||
struct mi_value mask = mi_ult(b, mi_value_ref(b, val1),
|
||||
mi_value_ref(b, val2));
|
||||
struct mi_value notmask = mi_ixor(b, mi_value_ref(b, mask),
|
||||
mi_imm(UINT64_MAX));
|
||||
/* If 'val1' is smaller, 'notmask' is 0, so we zero it, otherwise we
|
||||
* preserve the value by ANDing it with ~0.
|
||||
*/
|
||||
struct mi_value val1_or_zero = mi_iand(b, val1, notmask);
|
||||
/* If 'val2' is smaller, mask is 0, so we zero it, otherwise we preserve
|
||||
* the value.
|
||||
*/
|
||||
struct mi_value val2_or_zero = mi_iand(b, val2, mask);
|
||||
/* The smaller value was zeroed, the other was preserved, so just OR
|
||||
* them now.
|
||||
*/
|
||||
return mi_ior(b, val1_or_zero, val2_or_zero);
|
||||
}
|
||||
|
||||
#endif /* MI_MATH section */
|
||||
|
||||
/* This assumes addresses of strictly more than 32bits (aka. Gfx8+). */
|
||||
|
|
|
|||
|
|
@ -1252,6 +1252,58 @@ TEST_F(mi_builder_test, udiv32_imm)
|
|||
}
|
||||
}
|
||||
|
||||
TEST_F(mi_builder_test, umax2_32)
|
||||
{
|
||||
const uint32_t values[] = {
|
||||
0x01234567,
|
||||
0x42424242,
|
||||
0xffffffff,
|
||||
0xf0000000,
|
||||
0x00000001,
|
||||
0x00000000,
|
||||
0x000f0000,
|
||||
0x000f0001,
|
||||
};
|
||||
memcpy(input, values, sizeof(values));
|
||||
|
||||
uint32_t cmp_val = 0x000f0001;
|
||||
for (unsigned i = 0; i < ARRAY_SIZE(values); i++)
|
||||
mi_store(&b, out_mem64(i * 4), mi_umax2(&b, in_mem32(i * 4), mi_imm(cmp_val)));
|
||||
|
||||
submit_batch();
|
||||
|
||||
for (unsigned i = 0; i < ARRAY_SIZE(values); i++) {
|
||||
EXPECT_EQ(*(uint32_t *)(output + i * 4),
|
||||
values[i] >= cmp_val ? values[i] : cmp_val);
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(mi_builder_test, umax2_64)
|
||||
{
|
||||
const uint64_t values[] = {
|
||||
0x0123456789abcdef,
|
||||
0x4242424242424242,
|
||||
0xffffffffffffffff,
|
||||
0xf000000000000000,
|
||||
0x0000000000000001,
|
||||
0x0000000000000000,
|
||||
0x00000000f0000000,
|
||||
0x00000000f0000001,
|
||||
};
|
||||
memcpy(input, values, sizeof(values));
|
||||
|
||||
uint64_t cmp_val = 0x00000000f0000001;
|
||||
for (unsigned i = 0; i < ARRAY_SIZE(values); i++)
|
||||
mi_store(&b, out_mem64(i * 8), mi_umax2(&b, in_mem64(i * 8), mi_imm(cmp_val)));
|
||||
|
||||
submit_batch();
|
||||
|
||||
for (unsigned i = 0; i < ARRAY_SIZE(values); i++) {
|
||||
EXPECT_EQ(*(uint64_t *)(output + i * 8),
|
||||
values[i] >= cmp_val ? values[i] : cmp_val);
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(mi_builder_test, store_if)
|
||||
{
|
||||
uint64_t u64 = 0xb453b411deadc0deull;
|
||||
|
|
|
|||
|
|
@ -2811,3 +2811,135 @@ anv_image_ccs_op(struct anv_cmd_buffer *cmd_buffer,
|
|||
|
||||
anv_blorp_batch_finish(&batch);
|
||||
}
|
||||
|
||||
void
|
||||
anv_CmdCopyMemoryIndirectKHR(
|
||||
VkCommandBuffer commandBuffer,
|
||||
const VkCopyMemoryIndirectInfoKHR* pCopyMemoryIndirectInfo)
|
||||
{
|
||||
ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
|
||||
|
||||
if (pCopyMemoryIndirectInfo->copyCount == 0)
|
||||
return;
|
||||
|
||||
const uint64_t indirect_buf_addr =
|
||||
pCopyMemoryIndirectInfo->copyAddressRange.address;
|
||||
const uint32_t copy_count = pCopyMemoryIndirectInfo->copyCount;
|
||||
const uint64_t stride = pCopyMemoryIndirectInfo->copyAddressRange.stride;
|
||||
|
||||
/* These are all restrictions by the spec. */
|
||||
assert((pCopyMemoryIndirectInfo->srcCopyFlags &
|
||||
VK_ADDRESS_COPY_PROTECTED_BIT_KHR) == 0);
|
||||
assert((pCopyMemoryIndirectInfo->dstCopyFlags &
|
||||
VK_ADDRESS_COPY_PROTECTED_BIT_KHR) == 0);
|
||||
assert(pCopyMemoryIndirectInfo->copyAddressRange.size >=
|
||||
pCopyMemoryIndirectInfo->copyCount *
|
||||
pCopyMemoryIndirectInfo->copyAddressRange.stride);
|
||||
|
||||
assert(!anv_cmd_buffer_is_blitter_queue(cmd_buffer));
|
||||
|
||||
enum blorp_batch_flags blorp_flags = BLORP_BATCH_USE_COMPUTE;
|
||||
|
||||
struct blorp_batch batch;
|
||||
anv_blorp_batch_init(cmd_buffer, &batch, blorp_flags);
|
||||
|
||||
blorp_copy_memory_indirect(&batch, indirect_buf_addr, copy_count, stride);
|
||||
|
||||
anv_blorp_batch_finish(&batch);
|
||||
}
|
||||
|
||||
void
|
||||
anv_CmdCopyMemoryToImageIndirectKHR(
|
||||
VkCommandBuffer commandBuffer,
|
||||
const VkCopyMemoryToImageIndirectInfoKHR* pCopyMemoryToImageIndirectInfo)
|
||||
{
|
||||
ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
|
||||
ANV_FROM_HANDLE(anv_image, anv_image,
|
||||
pCopyMemoryToImageIndirectInfo->dstImage);
|
||||
|
||||
if (pCopyMemoryToImageIndirectInfo->copyCount == 0)
|
||||
return;
|
||||
|
||||
const uint64_t indirect_buf_addr =
|
||||
pCopyMemoryToImageIndirectInfo->copyAddressRange.address;
|
||||
const uint32_t copy_count = pCopyMemoryToImageIndirectInfo->copyCount;
|
||||
const uint64_t stride =
|
||||
pCopyMemoryToImageIndirectInfo->copyAddressRange.stride;
|
||||
const VkImageLayout img_layout =
|
||||
pCopyMemoryToImageIndirectInfo->dstImageLayout;
|
||||
|
||||
assert((pCopyMemoryToImageIndirectInfo->srcCopyFlags &
|
||||
VK_ADDRESS_COPY_PROTECTED_BIT_KHR) == 0);
|
||||
assert(pCopyMemoryToImageIndirectInfo->copyAddressRange.size >=
|
||||
pCopyMemoryToImageIndirectInfo->copyCount *
|
||||
pCopyMemoryToImageIndirectInfo->copyAddressRange.stride);
|
||||
|
||||
assert(!anv_cmd_buffer_is_blitter_queue(cmd_buffer));
|
||||
|
||||
enum blorp_batch_flags blorp_flags = BLORP_BATCH_USE_COMPUTE;
|
||||
struct blorp_batch batch;
|
||||
anv_blorp_batch_init(cmd_buffer, &batch, blorp_flags);
|
||||
|
||||
for (int c = 0; c < copy_count; c++) {
|
||||
const VkImageSubresourceLayers *img_subresource =
|
||||
&pCopyMemoryToImageIndirectInfo->pImageSubresources[c];
|
||||
VkImageAspectFlags aspect_mask = img_subresource->aspectMask;
|
||||
uint32_t mip_level = img_subresource->mipLevel;
|
||||
uint32_t base_layer = img_subresource->baseArrayLayer;
|
||||
uint32_t layer_count = img_subresource->layerCount;
|
||||
|
||||
assert(mip_level != VK_REMAINING_MIP_LEVELS);
|
||||
if (layer_count == VK_REMAINING_ARRAY_LAYERS)
|
||||
layer_count = anv_image->vk.array_layers - base_layer;
|
||||
|
||||
const unsigned plane =
|
||||
anv_image_aspect_to_plane(anv_image, aspect_mask);
|
||||
struct isl_surf *img_isl_surf =
|
||||
&anv_image->planes[plane].primary_surface.isl;
|
||||
enum isl_format format = img_isl_surf->format;
|
||||
bool format_is_compressed = isl_format_is_compressed(format);
|
||||
|
||||
struct blorp_surf img_blorp_surf;
|
||||
get_blorp_surf_for_anv_image(cmd_buffer, anv_image,
|
||||
aspect_mask, VK_IMAGE_USAGE_STORAGE_BIT,
|
||||
img_layout,
|
||||
anv_image->planes[plane].aux_usage,
|
||||
format, false /* cross_aspect */,
|
||||
&img_blorp_surf);
|
||||
|
||||
anv_cmd_buffer_mark_image_written(cmd_buffer, anv_image,
|
||||
img_subresource->aspectMask,
|
||||
img_blorp_surf.aux_usage, mip_level,
|
||||
base_layer, layer_count);
|
||||
|
||||
/* If the format is compressed, we will pretend the format is one where
|
||||
* each pixel is the size of the compressed block, so image stores can
|
||||
* work. Unfortunately, that translation only works if we do it for one
|
||||
* mip level of a specific layer, as the complete layout of, say, BC4
|
||||
* and R32G32 formats are not compatible: so we loop through all layers
|
||||
* here.
|
||||
*/
|
||||
if (format_is_compressed) {
|
||||
int min_l_or_z, top_l_or_z;
|
||||
if (img_blorp_surf.surf->dim == ISL_SURF_DIM_3D) {
|
||||
min_l_or_z = 0;
|
||||
top_l_or_z = img_isl_surf->logical_level0_px.depth >> mip_level;
|
||||
} else {
|
||||
min_l_or_z = base_layer;
|
||||
top_l_or_z = base_layer + layer_count;
|
||||
}
|
||||
for (int l_or_z = min_l_or_z; l_or_z < top_l_or_z; l_or_z++) {
|
||||
blorp_copy_memory_to_image_indirect(&batch, &img_blorp_surf,
|
||||
indirect_buf_addr,
|
||||
stride, c, mip_level,
|
||||
layer_count, l_or_z);
|
||||
}
|
||||
} else {
|
||||
blorp_copy_memory_to_image_indirect(&batch, &img_blorp_surf,
|
||||
indirect_buf_addr, stride, c,
|
||||
mip_level, layer_count, -1);
|
||||
}
|
||||
}
|
||||
|
||||
anv_blorp_batch_finish(&batch);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -629,6 +629,39 @@ anv_get_format_aspect(const struct anv_physical_device *device,
|
|||
return anv_get_format_plane(device, vk_format, plane, tiling);
|
||||
}
|
||||
|
||||
static bool
|
||||
anv_format_supports_indirect_copies(const struct anv_physical_device *pdevice,
|
||||
const struct anv_format *anv_format)
|
||||
{
|
||||
const struct isl_format_layout *fmtl =
|
||||
isl_format_get_layout(anv_format->planes[0].isl_format);
|
||||
|
||||
/* CTS insists on it even when we say we don't support it. */
|
||||
if (!pdevice->vk.supported_features.indirectMemoryToImageCopy)
|
||||
return false;
|
||||
|
||||
/* TODO: implement support for this in the copy shader. */
|
||||
if (!util_is_power_of_two_or_zero(fmtl->bpb))
|
||||
return false;
|
||||
|
||||
/* TODO: we use compute for indirect copies, and compute cannot write HiZ,
|
||||
* we could try to support that if we see that applications want it.
|
||||
*/
|
||||
if (vk_format_is_depth_or_stencil(anv_format->vk_format))
|
||||
return false;
|
||||
|
||||
/* Let's leave YCbCr and multi-planar formats out until we have proper
|
||||
* tests to verify they work.
|
||||
*/
|
||||
if (isl_format_is_yuv(anv_format->planes[0].isl_format))
|
||||
return false;
|
||||
|
||||
if (anv_format->n_planes > 1)
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
// Format capabilities
|
||||
|
||||
static bool
|
||||
|
|
@ -715,28 +748,41 @@ anv_color_format_supports_drm_modifier_tiling(const struct anv_physical_device *
|
|||
}
|
||||
|
||||
static VkFormatFeatureFlags2
|
||||
anv_get_compressed_emulated_format_features(const struct anv_format *anv_format,
|
||||
anv_get_compressed_emulated_format_features(const struct anv_physical_device *pdevice,
|
||||
const struct anv_format *anv_format,
|
||||
const VkImageTiling vk_tiling)
|
||||
{
|
||||
VkFormatFeatureFlags2 flags = 0;
|
||||
|
||||
assert(isl_format_is_compressed(anv_format->planes[0].isl_format));
|
||||
|
||||
/* Require optimal tiling so that we can decompress on upload */
|
||||
if (vk_tiling == VK_IMAGE_TILING_OPTIMAL) {
|
||||
switch (vk_tiling) {
|
||||
case VK_IMAGE_TILING_OPTIMAL:
|
||||
/* Required features for compressed formats */
|
||||
return VK_FORMAT_FEATURE_2_SAMPLED_IMAGE_BIT |
|
||||
VK_FORMAT_FEATURE_2_BLIT_SRC_BIT |
|
||||
VK_FORMAT_FEATURE_2_SAMPLED_IMAGE_FILTER_LINEAR_BIT |
|
||||
VK_FORMAT_FEATURE_2_TRANSFER_SRC_BIT |
|
||||
VK_FORMAT_FEATURE_2_TRANSFER_DST_BIT |
|
||||
VK_FORMAT_FEATURE_2_HOST_IMAGE_TRANSFER_BIT_EXT;
|
||||
} else if (vk_tiling == VK_IMAGE_TILING_LINEAR) {
|
||||
flags |= VK_FORMAT_FEATURE_2_SAMPLED_IMAGE_BIT |
|
||||
VK_FORMAT_FEATURE_2_BLIT_SRC_BIT |
|
||||
VK_FORMAT_FEATURE_2_SAMPLED_IMAGE_FILTER_LINEAR_BIT |
|
||||
VK_FORMAT_FEATURE_2_TRANSFER_SRC_BIT |
|
||||
VK_FORMAT_FEATURE_2_TRANSFER_DST_BIT |
|
||||
VK_FORMAT_FEATURE_2_HOST_IMAGE_TRANSFER_BIT_EXT;
|
||||
break;
|
||||
|
||||
case VK_IMAGE_TILING_LINEAR:
|
||||
/* Images used for transfers */
|
||||
return VK_FORMAT_FEATURE_2_TRANSFER_SRC_BIT |
|
||||
VK_FORMAT_FEATURE_2_TRANSFER_DST_BIT |
|
||||
VK_FORMAT_FEATURE_2_HOST_IMAGE_TRANSFER_BIT_EXT;
|
||||
flags |= VK_FORMAT_FEATURE_2_TRANSFER_SRC_BIT |
|
||||
VK_FORMAT_FEATURE_2_TRANSFER_DST_BIT |
|
||||
VK_FORMAT_FEATURE_2_HOST_IMAGE_TRANSFER_BIT_EXT;
|
||||
break;
|
||||
|
||||
default:
|
||||
return 0;
|
||||
}
|
||||
|
||||
return 0;
|
||||
if (anv_format_supports_indirect_copies(pdevice, anv_format))
|
||||
flags |= VK_FORMAT_FEATURE_2_COPY_IMAGE_INDIRECT_DST_BIT_KHR;
|
||||
|
||||
return flags;
|
||||
}
|
||||
|
||||
static VkFormatFeatureFlags2
|
||||
|
|
@ -852,7 +898,8 @@ anv_get_color_format_features(const struct anv_physical_device *physical_device,
|
|||
if (vk_tiling == VK_IMAGE_TILING_LINEAR &&
|
||||
isl_format_get_layout(plane_format.isl_format)->txc == ISL_TXC_ASTC)
|
||||
return VK_FORMAT_FEATURE_2_TRANSFER_SRC_BIT |
|
||||
VK_FORMAT_FEATURE_2_TRANSFER_DST_BIT;
|
||||
VK_FORMAT_FEATURE_2_TRANSFER_DST_BIT |
|
||||
VK_FORMAT_FEATURE_2_COPY_IMAGE_INDIRECT_DST_BIT_KHR;
|
||||
|
||||
flags |= VK_FORMAT_FEATURE_2_SAMPLED_IMAGE_BIT |
|
||||
VK_FORMAT_FEATURE_2_SAMPLED_IMAGE_FILTER_MINMAX_BIT |
|
||||
|
|
@ -935,6 +982,10 @@ anv_get_color_format_features(const struct anv_physical_device *physical_device,
|
|||
VK_FORMAT_FEATURE_2_TRANSFER_DST_BIT;
|
||||
}
|
||||
|
||||
if ((flags & VK_FORMAT_FEATURE_2_TRANSFER_DST_BIT) &&
|
||||
anv_format_supports_indirect_copies(physical_device, anv_format))
|
||||
flags |= VK_FORMAT_FEATURE_2_COPY_IMAGE_INDIRECT_DST_BIT_KHR;
|
||||
|
||||
/* XXX: We handle 3-channel formats by switching them out for RGBX or
|
||||
* RGBA formats behind-the-scenes. This works fine for textures
|
||||
* because the upload process will fill in the extra channel.
|
||||
|
|
@ -1125,7 +1176,8 @@ anv_get_image_format_features2(const struct anv_physical_device *physical_device
|
|||
}
|
||||
|
||||
if (anv_is_compressed_format_emulated(physical_device, vk_format)) {
|
||||
return anv_get_compressed_emulated_format_features(anv_format,
|
||||
return anv_get_compressed_emulated_format_features(physical_device,
|
||||
anv_format,
|
||||
vk_tiling);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -152,6 +152,7 @@ get_device_extensions(const struct anv_physical_device *device,
|
|||
.KHR_cooperative_matrix = device->has_cooperative_matrix,
|
||||
.NV_cooperative_matrix2 = device->has_cooperative_matrix,
|
||||
.KHR_copy_commands2 = true,
|
||||
.KHR_copy_memory_indirect = true,
|
||||
.KHR_create_renderpass2 = true,
|
||||
.KHR_dedicated_allocation = true,
|
||||
.KHR_deferred_host_operations = true,
|
||||
|
|
@ -1023,6 +1024,10 @@ get_features(const struct anv_physical_device *pdevice,
|
|||
/* VK_KHR_pipeline_binary */
|
||||
.pipelineBinaries = true,
|
||||
|
||||
/* VK_KHR_copy_memory_indirect */
|
||||
.indirectMemoryCopy = true,
|
||||
.indirectMemoryToImageCopy = pdevice->info.ver >= 12,
|
||||
|
||||
#ifdef ANV_USE_WSI_PLATFORM
|
||||
/* VK_EXT_present_timing */
|
||||
.presentTiming = true,
|
||||
|
|
@ -2205,6 +2210,11 @@ get_properties(const struct anv_physical_device *pdevice,
|
|||
*/
|
||||
props->shaderBinaryVersion = 0;
|
||||
}
|
||||
|
||||
/* VK_KHR_copy_memory_indirect */
|
||||
{
|
||||
props->supportedQueues = VK_QUEUE_GRAPHICS_BIT | VK_QUEUE_COMPUTE_BIT;
|
||||
}
|
||||
}
|
||||
|
||||
/* This function restricts the maximum size of system memory heap. The
|
||||
|
|
|
|||
|
|
@ -534,6 +534,8 @@ get_color_aux_op(const struct blorp_params *params)
|
|||
case BLORP_OP_SLOW_COLOR_CLEAR:
|
||||
case BLORP_OP_BLIT:
|
||||
case BLORP_OP_COPY:
|
||||
case BLORP_OP_COPY_INDIRECT:
|
||||
case BLORP_OP_COPY_IMAGE_INDIRECT:
|
||||
assert(params->fast_clear_op == ISL_AUX_OP_NONE);
|
||||
return ANV_COLOR_AUX_OP_CLASS_NONE;
|
||||
}
|
||||
|
|
@ -596,3 +598,10 @@ genX(blorp_init_dynamic_states)(struct blorp_context *context)
|
|||
{
|
||||
blorp_init_dynamic_states(context);
|
||||
}
|
||||
|
||||
static bool *
|
||||
blorp_get_write_fencing_status(struct blorp_batch *blorp_batch)
|
||||
{
|
||||
struct anv_cmd_buffer *cmd_buffer = blorp_batch->driver_batch;
|
||||
return &cmd_buffer->batch.write_fence_status;
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue