From 3a8c8d25fd7ebce9038872efe50da07d10e210ae Mon Sep 17 00:00:00 2001 From: Jesse Natalie Date: Mon, 10 Jan 2022 14:14:00 -0800 Subject: [PATCH] d3d12: Add a compute transformation to handle indirect draws that need draw params Reviewed-by: Sil Vilerino Part-of: --- .../drivers/d3d12/d3d12_cmd_signature.cpp | 21 +- .../drivers/d3d12/d3d12_cmd_signature.h | 9 +- src/gallium/drivers/d3d12/d3d12_compiler.h | 1 + .../d3d12/d3d12_compute_transforms.cpp | 182 ++++++++++++++++++ .../drivers/d3d12/d3d12_compute_transforms.h | 55 ++++++ src/gallium/drivers/d3d12/d3d12_context.cpp | 3 + src/gallium/drivers/d3d12/d3d12_context.h | 3 + src/gallium/drivers/d3d12/d3d12_draw.cpp | 136 ++++++++++++- src/gallium/drivers/d3d12/meson.build | 1 + 9 files changed, 400 insertions(+), 11 deletions(-) create mode 100644 src/gallium/drivers/d3d12/d3d12_compute_transforms.cpp create mode 100644 src/gallium/drivers/d3d12/d3d12_compute_transforms.h diff --git a/src/gallium/drivers/d3d12/d3d12_cmd_signature.cpp b/src/gallium/drivers/d3d12/d3d12_cmd_signature.cpp index e5436b3c573..5c52429e62d 100644 --- a/src/gallium/drivers/d3d12/d3d12_cmd_signature.cpp +++ b/src/gallium/drivers/d3d12/d3d12_cmd_signature.cpp @@ -27,6 +27,8 @@ #include "util/u_memory.h" +#include + struct d3d12_cmd_signature { struct d3d12_cmd_signature_key key; ID3D12CommandSignature *sig; @@ -36,16 +38,25 @@ static ID3D12CommandSignature * create_cmd_signature(struct d3d12_context *ctx, const struct d3d12_cmd_signature_key *key) { D3D12_COMMAND_SIGNATURE_DESC cmd_sig_desc = {}; - D3D12_INDIRECT_ARGUMENT_DESC indirect_arg = {}; - indirect_arg.Type = key->compute ? D3D12_INDIRECT_ARGUMENT_TYPE_DISPATCH : + D3D12_INDIRECT_ARGUMENT_DESC indirect_args[2] = {}; + + unsigned num_args = 0; + if (key->draw_params) { + indirect_args[num_args].Type = D3D12_INDIRECT_ARGUMENT_TYPE_CONSTANT; + indirect_args[num_args].Constant.RootParameterIndex = key->draw_params_root_const_param; + indirect_args[num_args].Constant.DestOffsetIn32BitValues = key->draw_params_root_const_offset; + indirect_args[num_args++].Constant.Num32BitValuesToSet = 4; + } + + indirect_args[num_args++].Type = key->compute ? D3D12_INDIRECT_ARGUMENT_TYPE_DISPATCH : key->indexed ? D3D12_INDIRECT_ARGUMENT_TYPE_DRAW_INDEXED : D3D12_INDIRECT_ARGUMENT_TYPE_DRAW; cmd_sig_desc.ByteStride = key->multi_draw_stride; - cmd_sig_desc.NumArgumentDescs = 1; - cmd_sig_desc.pArgumentDescs = &indirect_arg; + cmd_sig_desc.NumArgumentDescs = num_args; + cmd_sig_desc.pArgumentDescs = indirect_args; ID3D12CommandSignature *ret = nullptr; - d3d12_screen(ctx->base.screen)->dev->CreateCommandSignature(&cmd_sig_desc, nullptr, + d3d12_screen(ctx->base.screen)->dev->CreateCommandSignature(&cmd_sig_desc, key->root_sig, IID_PPV_ARGS(&ret)); return ret; } diff --git a/src/gallium/drivers/d3d12/d3d12_cmd_signature.h b/src/gallium/drivers/d3d12/d3d12_cmd_signature.h index f62d22361ec..dbf65d0318e 100644 --- a/src/gallium/drivers/d3d12/d3d12_cmd_signature.h +++ b/src/gallium/drivers/d3d12/d3d12_cmd_signature.h @@ -29,9 +29,16 @@ struct d3d12_cmd_signature_key { uint8_t compute:1; uint8_t indexed:1; - /* 30 bits padding */ + uint8_t draw_params:1; + /* 5 bits padding */ + uint8_t draw_params_root_const_param; + uint8_t draw_params_root_const_offset; + /* 8 bits padding */ unsigned multi_draw_stride; + + /* Will be zero if no constant/resource updates in the arg buffer */ + ID3D12RootSignature *root_sig; }; void diff --git a/src/gallium/drivers/d3d12/d3d12_compiler.h b/src/gallium/drivers/d3d12/d3d12_compiler.h index 40cc4fba286..fcab4873dd8 100644 --- a/src/gallium/drivers/d3d12/d3d12_compiler.h +++ b/src/gallium/drivers/d3d12/d3d12_compiler.h @@ -48,6 +48,7 @@ enum d3d12_state_var { D3D12_MAX_GRAPHICS_STATE_VARS, D3D12_STATE_VAR_NUM_WORKGROUPS = 0, + D3D12_STATE_VAR_TRANSFORM_GENERIC0, D3D12_MAX_COMPUTE_STATE_VARS, D3D12_MAX_STATE_VARS = MAX2(D3D12_MAX_GRAPHICS_STATE_VARS, D3D12_MAX_COMPUTE_STATE_VARS) diff --git a/src/gallium/drivers/d3d12/d3d12_compute_transforms.cpp b/src/gallium/drivers/d3d12/d3d12_compute_transforms.cpp new file mode 100644 index 00000000000..69eb5771f49 --- /dev/null +++ b/src/gallium/drivers/d3d12/d3d12_compute_transforms.cpp @@ -0,0 +1,182 @@ +/* + * Copyright © Microsoft Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "d3d12_compute_transforms.h" +#include "d3d12_nir_passes.h" + +#include "nir.h" +#include "nir_builder.h" + +#include "nir_to_dxil.h" + +#include "util/u_memory.h" + +nir_shader * +get_indirect_draw_base_vertex_transform(const d3d12_compute_transform_key *args) +{ + nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_COMPUTE, + dxil_get_nir_compiler_options(), "TransformIndirectDrawBaseVertex"); + + if (args->base_vertex.dynamic_count) { + nir_variable *count_ubo = nir_variable_create(b.shader, nir_var_mem_ubo, + glsl_uint_type(), "in_count"); + count_ubo->data.driver_location = 0; + } + + nir_variable *input_ssbo = nir_variable_create(b.shader, nir_var_mem_ssbo, + glsl_array_type(glsl_uint_type(), 0, 0), "input"); + nir_variable *output_ssbo = nir_variable_create(b.shader, nir_var_mem_ssbo, + input_ssbo->type, "output"); + input_ssbo->data.driver_location = 0; + output_ssbo->data.driver_location = 1; + + nir_ssa_def *draw_id = nir_channel(&b, nir_load_global_invocation_id(&b, 32), 0); + if (args->base_vertex.dynamic_count) { + nir_ssa_def *count = nir_load_ubo(&b, 1, 32, nir_imm_int(&b, 1), nir_imm_int(&b, 0)); + nir_push_if(&b, nir_ilt(&b, draw_id, count)); + } + + nir_variable *stride_ubo = NULL; + nir_ssa_def *in_stride_offset_and_base_drawid = d3d12_get_state_var(&b, D3D12_STATE_VAR_TRANSFORM_GENERIC0, "d3d12_Stride", + glsl_uvec4_type(), &stride_ubo); + nir_ssa_def *in_offset = nir_iadd(&b, nir_channel(&b, in_stride_offset_and_base_drawid, 1), + nir_imul(&b, nir_channel(&b, in_stride_offset_and_base_drawid, 0), draw_id)); + nir_ssa_def *in_data0 = nir_load_ssbo(&b, 4, 32, nir_imm_int(&b, 0), in_offset, (gl_access_qualifier)0, 4, 0); + + nir_ssa_def *in_data1 = NULL; + nir_ssa_def *base_vertex = NULL, *base_instance = NULL; + if (args->base_vertex.indexed) { + nir_ssa_def *in_offset1 = nir_iadd(&b, in_offset, nir_imm_int(&b, 16)); + in_data1 = nir_load_ssbo(&b, 1, 32, nir_imm_int(&b, 0), in_offset1, (gl_access_qualifier)0, 4, 0); + base_vertex = nir_channel(&b, in_data0, 3); + base_instance = in_data1; + } else { + base_vertex = nir_channel(&b, in_data0, 2); + base_instance = nir_channel(&b, in_data0, 3); + } + + /* 4 additional uints for base vertex, base instance, draw ID, and a bool for indexed draw */ + unsigned out_stride = sizeof(uint32_t) * ((args->base_vertex.indexed ? 5 : 4) + 4); + + nir_ssa_def *out_offset = nir_imul(&b, draw_id, nir_imm_int(&b, out_stride)); + nir_ssa_def *out_data0 = nir_vec4(&b, base_vertex, base_instance, + nir_iadd(&b, draw_id, nir_channel(&b, in_stride_offset_and_base_drawid, 2)), + nir_imm_int(&b, args->base_vertex.indexed ? -1 : 0)); + nir_ssa_def *out_data1 = in_data0; + + nir_store_ssbo(&b, out_data0, nir_imm_int(&b, 1), out_offset, 0xf, (gl_access_qualifier)0, 4, 0); + nir_store_ssbo(&b, out_data1, nir_imm_int(&b, 1), nir_iadd(&b, out_offset, nir_imm_int(&b, 16)), + (1u << out_data1->num_components) - 1, (gl_access_qualifier)0, 4, 0); + if (args->base_vertex.indexed) + nir_store_ssbo(&b, in_data1, nir_imm_int(&b, 1), nir_iadd(&b, out_offset, nir_imm_int(&b, 32)), 1, (gl_access_qualifier)0, 4, 0); + + if (args->base_vertex.dynamic_count) + nir_pop_if(&b, NULL); + + nir_validate_shader(b.shader, "creation"); + b.shader->info.num_ssbos = 2; + b.shader->info.num_ubos = (args->base_vertex.dynamic_count ? 1 : 0); + + return b.shader; +} + +static struct nir_shader * +create_compute_transform(const d3d12_compute_transform_key *key) +{ + switch (key->type) { + case d3d12_compute_transform_type::base_vertex: + return get_indirect_draw_base_vertex_transform(key); + default: + unreachable("Invalid transform"); + } +} + +struct compute_transform +{ + d3d12_compute_transform_key key; + d3d12_shader_selector *shader; +}; + +d3d12_shader_selector * +d3d12_get_compute_transform(struct d3d12_context *ctx, const d3d12_compute_transform_key *key) +{ + struct hash_entry *entry = _mesa_hash_table_search(ctx->compute_transform_cache, key); + if (!entry) { + compute_transform *data = (compute_transform *)MALLOC(sizeof(compute_transform)); + if (!data) + return NULL; + + memcpy(&data->key, key, sizeof(*key)); + nir_shader *s = create_compute_transform(key); + if (!s) { + FREE(data); + return NULL; + } + struct pipe_compute_state shader_args = { PIPE_SHADER_IR_NIR, s }; + data->shader = d3d12_create_compute_shader(ctx, &shader_args); + if (!data->shader) { + ralloc_free(s); + FREE(data); + return NULL; + } + + entry = _mesa_hash_table_insert(ctx->compute_transform_cache, &data->key, data); + assert(entry); + } + + return ((struct compute_transform *)entry->data)->shader; +} + +static uint32_t +hash_compute_transform_key(const void *key) +{ + return _mesa_hash_data(key, sizeof(struct d3d12_compute_transform_key)); +} + +static bool +equals_compute_transform_key(const void *a, const void *b) +{ + return memcmp(a, b, sizeof(struct d3d12_compute_transform_key)) == 0; +} + +void +d3d12_compute_transform_cache_init(struct d3d12_context *ctx) +{ + ctx->compute_transform_cache = _mesa_hash_table_create(NULL, + hash_compute_transform_key, + equals_compute_transform_key); +} + +static void +delete_entry(struct hash_entry *entry) +{ + struct compute_transform *data = (struct compute_transform *)entry->data; + d3d12_shader_free(data->shader); + FREE(data); +} + +void +d3d12_compute_transform_cache_destroy(struct d3d12_context *ctx) +{ + _mesa_hash_table_destroy(ctx->compute_transform_cache, delete_entry); +} diff --git a/src/gallium/drivers/d3d12/d3d12_compute_transforms.h b/src/gallium/drivers/d3d12/d3d12_compute_transforms.h new file mode 100644 index 00000000000..707bf52b0ba --- /dev/null +++ b/src/gallium/drivers/d3d12/d3d12_compute_transforms.h @@ -0,0 +1,55 @@ +/* + * Copyright © Microsoft Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#ifndef D3D12_COMPUTE_TRANSFORMS_H +#define D3D12_COMPUTE_TRANSFORMS_H + +#include "d3d12_context.h" +#include "d3d12_compiler.h" + +enum class d3d12_compute_transform_type +{ + base_vertex, + max, +}; + +struct d3d12_compute_transform_key +{ + d3d12_compute_transform_type type; + + struct { + unsigned indexed:1; + unsigned dynamic_count:1; + } base_vertex; +}; + +d3d12_shader_selector * +d3d12_get_compute_transform(struct d3d12_context *ctx, const d3d12_compute_transform_key *key); + +void +d3d12_compute_transform_cache_init(struct d3d12_context *ctx); + +void +d3d12_compute_transform_cache_destroy(struct d3d12_context *ctx); + +#endif diff --git a/src/gallium/drivers/d3d12/d3d12_context.cpp b/src/gallium/drivers/d3d12/d3d12_context.cpp index 1bd8e631a76..9e2144135bd 100644 --- a/src/gallium/drivers/d3d12/d3d12_context.cpp +++ b/src/gallium/drivers/d3d12/d3d12_context.cpp @@ -25,6 +25,7 @@ #include "d3d12_cmd_signature.h" #include "d3d12_context.h" #include "d3d12_compiler.h" +#include "d3d12_compute_transforms.h" #include "d3d12_debug.h" #include "d3d12_fence.h" #include "d3d12_format.h" @@ -81,6 +82,7 @@ d3d12_context_destroy(struct pipe_context *pctx) d3d12_compute_pipeline_state_cache_destroy(ctx); d3d12_root_signature_cache_destroy(ctx); d3d12_cmd_signature_cache_destroy(ctx); + d3d12_compute_transform_cache_destroy(ctx); u_suballocator_destroy(&ctx->query_allocator); @@ -2265,6 +2267,7 @@ d3d12_context_create(struct pipe_screen *pscreen, void *priv, unsigned flags) d3d12_root_signature_cache_init(ctx); d3d12_cmd_signature_cache_init(ctx); d3d12_gs_variant_cache_init(ctx); + d3d12_compute_transform_cache_init(ctx); util_dl_library *d3d12_mod = util_dl_open(UTIL_DL_PREFIX "d3d12" UTIL_DL_EXT); if (!d3d12_mod) { diff --git a/src/gallium/drivers/d3d12/d3d12_context.h b/src/gallium/drivers/d3d12/d3d12_context.h index c6f4d359fa1..db563e8a2fb 100644 --- a/src/gallium/drivers/d3d12/d3d12_context.h +++ b/src/gallium/drivers/d3d12/d3d12_context.h @@ -173,6 +173,7 @@ struct d3d12_context { struct hash_table *root_signature_cache; struct hash_table *cmd_signature_cache; struct hash_table *gs_variant_cache; + struct hash_table *compute_transform_cache; struct d3d12_batch batches[4]; unsigned current_batch_idx; @@ -249,6 +250,8 @@ struct d3d12_context { struct d3d12_resource *current_predication; bool predication_condition; + uint32_t transform_state_vars[4]; + #ifdef __cplusplus ResourceStateManager *resource_state_manager; #else diff --git a/src/gallium/drivers/d3d12/d3d12_draw.cpp b/src/gallium/drivers/d3d12/d3d12_draw.cpp index 3cb8c234a6a..f8d778c2600 100644 --- a/src/gallium/drivers/d3d12/d3d12_draw.cpp +++ b/src/gallium/drivers/d3d12/d3d12_draw.cpp @@ -23,6 +23,7 @@ #include "d3d12_cmd_signature.h" #include "d3d12_compiler.h" +#include "d3d12_compute_transforms.h" #include "d3d12_context.h" #include "d3d12_format.h" #include "d3d12_query.h" @@ -344,7 +345,8 @@ fill_graphics_state_vars(struct d3d12_context *ctx, unsigned drawid, const struct pipe_draw_start_count_bias *draw, struct d3d12_shader *shader, - uint32_t *values) + uint32_t *values, + struct d3d12_cmd_signature_key *cmd_sig_key) { unsigned size = 0; @@ -368,6 +370,9 @@ fill_graphics_state_vars(struct d3d12_context *ctx, ptr[1] = dinfo->start_instance; ptr[2] = drawid; ptr[3] = dinfo->index_size ? -1 : 0; + cmd_sig_key->draw_params = 1; + cmd_sig_key->root_sig = ctx->gfx_pipeline_state.root_signature; + cmd_sig_key->draw_params_root_const_offset = size; size += 4; break; case D3D12_STATE_VAR_DEPTH_TRANSFORM: @@ -401,6 +406,15 @@ fill_compute_state_vars(struct d3d12_context *ctx, ptr[2] = info->grid[2]; size += 4; break; + case D3D12_STATE_VAR_TRANSFORM_GENERIC0: { + unsigned idx = shader->state_vars[j].var - D3D12_STATE_VAR_TRANSFORM_GENERIC0; + ptr[0] = ctx->transform_state_vars[idx * 4]; + ptr[1] = ctx->transform_state_vars[idx * 4 + 1]; + ptr[2] = ctx->transform_state_vars[idx * 4 + 2]; + ptr[3] = ctx->transform_state_vars[idx * 4 + 3]; + size += 4; + break; + } default: unreachable("unknown state variable"); } @@ -508,7 +522,8 @@ update_graphics_root_parameters(struct d3d12_context *ctx, unsigned drawid, const struct pipe_draw_start_count_bias *draw, D3D12_GPU_DESCRIPTOR_HANDLE root_desc_tables[MAX_DESCRIPTOR_TABLES], - int root_desc_indices[MAX_DESCRIPTOR_TABLES]) + int root_desc_indices[MAX_DESCRIPTOR_TABLES], + struct d3d12_cmd_signature_key *cmd_sig_key) { unsigned num_params = 0; unsigned num_root_descriptors = 0; @@ -522,7 +537,9 @@ update_graphics_root_parameters(struct d3d12_context *ctx, /* TODO Don't always update state vars */ if (shader_sel->current->num_state_vars > 0) { uint32_t constants[D3D12_MAX_GRAPHICS_STATE_VARS * 4]; - unsigned size = fill_graphics_state_vars(ctx, dinfo, drawid, draw, shader_sel->current, constants); + unsigned size = fill_graphics_state_vars(ctx, dinfo, drawid, draw, shader_sel->current, constants, cmd_sig_key); + if (cmd_sig_key->draw_params) + cmd_sig_key->draw_params_root_const_param = num_params; ctx->cmdlist->SetGraphicsRoot32BitConstants(num_params, size, constants, 0); num_params++; } @@ -695,6 +712,110 @@ d3d12_last_vertex_stage(struct d3d12_context *ctx) return sel; } +static bool +update_draw_indirect_with_sysvals(struct d3d12_context *ctx, + const struct pipe_draw_info *dinfo, + unsigned drawid, + const struct pipe_draw_indirect_info **indirect_inout, + struct pipe_draw_indirect_info *indirect_out) +{ + if (*indirect_inout == nullptr || + ctx->gfx_stages[PIPE_SHADER_VERTEX] == nullptr) + return false; + + unsigned sysvals[] = { + SYSTEM_VALUE_VERTEX_ID_ZERO_BASE, + SYSTEM_VALUE_BASE_VERTEX, + SYSTEM_VALUE_FIRST_VERTEX, + SYSTEM_VALUE_BASE_INSTANCE, + SYSTEM_VALUE_DRAW_ID, + }; + bool any = false; + for (unsigned sysval : sysvals) { + any |= (BITSET_TEST(ctx->gfx_stages[PIPE_SHADER_VERTEX]->initial->info.system_values_read, sysval)); + } + if (!any) + return false; + + if (ctx->current_predication) + ctx->cmdlist->SetPredication(nullptr, 0, D3D12_PREDICATION_OP_EQUAL_ZERO); + + auto indirect_in = *indirect_inout; + *indirect_inout = indirect_out; + + d3d12_shader_selector *save_cs = ctx->compute_state; + + pipe_constant_buffer save_cs_cbuf0 = {}; + + pipe_shader_buffer save_cs_ssbos[2] = {}; + for (unsigned i = 0; i < 2; ++i) { + pipe_resource_reference(&save_cs_ssbos[i].buffer, ctx->ssbo_views[PIPE_SHADER_COMPUTE][i].buffer); + save_cs_ssbos[i] = ctx->ssbo_views[PIPE_SHADER_COMPUTE][i]; + } + + d3d12_compute_transform_key key = { + d3d12_compute_transform_type::base_vertex, + }; + key.base_vertex.indexed = dinfo->index_size > 0; + key.base_vertex.dynamic_count = indirect_in->indirect_draw_count != nullptr; + ctx->base.bind_compute_state(&ctx->base, d3d12_get_compute_transform(ctx, &key)); + + ctx->transform_state_vars[0] = indirect_in->stride; + ctx->transform_state_vars[1] = indirect_in->offset; + ctx->transform_state_vars[2] = drawid; + + if (indirect_in->indirect_draw_count) { + pipe_resource_reference(&save_cs_cbuf0.buffer, ctx->cbufs[PIPE_SHADER_COMPUTE][0].buffer); + save_cs_cbuf0 = ctx->cbufs[PIPE_SHADER_COMPUTE][0]; + + pipe_constant_buffer draw_count_cbuf; + draw_count_cbuf.buffer = indirect_in->indirect_draw_count; + draw_count_cbuf.buffer_offset = indirect_in->indirect_draw_count_offset; + draw_count_cbuf.buffer_size = 4; + draw_count_cbuf.user_buffer = nullptr; + ctx->base.set_constant_buffer(&ctx->base, PIPE_SHADER_COMPUTE, 0, true, &draw_count_cbuf); + } + + pipe_shader_buffer new_cs_ssbos[2]; + new_cs_ssbos[0].buffer = indirect_in->buffer; + new_cs_ssbos[0].buffer_offset = 0; + new_cs_ssbos[0].buffer_size = indirect_in->buffer->width0; + + /* 4 additional uints for base vertex, base instance, draw ID, and a bool for indexed draw */ + unsigned out_stride = sizeof(uint32_t) * ((key.base_vertex.indexed ? 5 : 4) + 4); + pipe_resource output_buf_templ = {}; + output_buf_templ.target = PIPE_BUFFER; + output_buf_templ.width0 = out_stride * indirect_in->draw_count; + output_buf_templ.height0 = output_buf_templ.depth0 = output_buf_templ.array_size = + output_buf_templ.last_level = 1; + output_buf_templ.usage = PIPE_USAGE_DEFAULT; + + new_cs_ssbos[1].buffer = ctx->base.screen->resource_create(ctx->base.screen, &output_buf_templ); + new_cs_ssbos[1].buffer_offset = 0; + new_cs_ssbos[1].buffer_size = output_buf_templ.width0; + ctx->base.set_shader_buffers(&ctx->base, PIPE_SHADER_COMPUTE, 0, 2, new_cs_ssbos, 2); + + pipe_grid_info grid = { + .block = {1, 1, 1}, + .grid = {indirect_in->draw_count, 1, 1} + }; + ctx->base.launch_grid(&ctx->base, &grid); + + ctx->base.bind_compute_state(&ctx->base, save_cs); + if (save_cs_cbuf0.buffer) + ctx->base.set_constant_buffer(&ctx->base, PIPE_SHADER_COMPUTE, 0, true, &save_cs_cbuf0); + ctx->base.set_shader_buffers(&ctx->base, PIPE_SHADER_COMPUTE, 0, 2, save_cs_ssbos, 3); + + if (ctx->current_predication) + d3d12_enable_predication(ctx); + + *indirect_out = *indirect_in; + indirect_out->buffer = new_cs_ssbos[1].buffer; + indirect_out->offset = 0; + indirect_out->stride = out_stride; + return true; +} + void d3d12_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *dinfo, @@ -717,6 +838,7 @@ d3d12_draw_vbo(struct pipe_context *pctx, struct pipe_resource *index_buffer = NULL; unsigned index_offset = 0; enum d3d12_surface_conversion_mode conversion_modes[PIPE_MAX_COLOR_BUFS] = {}; + struct pipe_draw_indirect_info patched_indirect = {}; if (!prim_supported((enum pipe_prim_type)dinfo->mode) || dinfo->index_size == 1 || @@ -734,6 +856,7 @@ d3d12_draw_vbo(struct pipe_context *pctx, return; } + bool indirect_with_sysvals = update_draw_indirect_with_sysvals(ctx, dinfo, drawid_offset, &indirect, &patched_indirect); struct d3d12_cmd_signature_key cmd_sig_key; memset(&cmd_sig_key, 0, sizeof(cmd_sig_key)); @@ -741,7 +864,8 @@ d3d12_draw_vbo(struct pipe_context *pctx, cmd_sig_key.compute = false; cmd_sig_key.indexed = dinfo->index_size > 0; if (indirect->draw_count > 1 || - indirect->indirect_draw_count) + indirect->indirect_draw_count || + indirect_with_sysvals) cmd_sig_key.multi_draw_stride = indirect->stride; else if (cmd_sig_key.indexed) cmd_sig_key.multi_draw_stride = sizeof(D3D12_DRAW_INDEXED_ARGUMENTS); @@ -871,7 +995,7 @@ d3d12_draw_vbo(struct pipe_context *pctx, D3D12_GPU_DESCRIPTOR_HANDLE root_desc_tables[MAX_DESCRIPTOR_TABLES]; int root_desc_indices[MAX_DESCRIPTOR_TABLES]; unsigned num_root_descriptors = update_graphics_root_parameters(ctx, dinfo, drawid_offset, &draws[0], - root_desc_tables, root_desc_indices); + root_desc_tables, root_desc_indices, &cmd_sig_key); bool need_zero_one_depth_range = d3d12_need_zero_one_depth_range(ctx); if (need_zero_one_depth_range != ctx->need_zero_one_depth_range) { @@ -1069,6 +1193,8 @@ d3d12_draw_vbo(struct pipe_context *pctx, d3d12_surface_update_post_draw(pctx, surface, conversion_modes[i]); } } + + pipe_resource_reference(&patched_indirect.buffer, NULL); } void diff --git a/src/gallium/drivers/d3d12/meson.build b/src/gallium/drivers/d3d12/meson.build index 1beeb28d48c..591b67955d4 100644 --- a/src/gallium/drivers/d3d12/meson.build +++ b/src/gallium/drivers/d3d12/meson.build @@ -25,6 +25,7 @@ files_libd3d12 = files( 'd3d12_bufmgr.cpp', 'd3d12_cmd_signature.cpp', 'd3d12_compiler.cpp', + 'd3d12_compute_transforms.cpp', 'd3d12_context.cpp', 'd3d12_descriptor_pool.cpp', 'd3d12_draw.cpp',