mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-21 22:20:14 +01:00
anv: implement generated (indexed) indirect draws
Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Reviewed-by: Rohan Garg <rohan.garg@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/15642>
This commit is contained in:
parent
3596a8ea7a
commit
c950fe97a0
15 changed files with 1605 additions and 23 deletions
|
|
@ -7,6 +7,9 @@ Debugging
|
||||||
Here are a few environment variable debug environment variables
|
Here are a few environment variable debug environment variables
|
||||||
specific to ANV:
|
specific to ANV:
|
||||||
|
|
||||||
|
:envvar:`ANV_ENABLE_GENERATED_INDIRECT_DRAWS`
|
||||||
|
If defined to ``0`` or ``false``, this will disable the generated
|
||||||
|
indirect draw optimization in Anv. This will only affect Gfx11+.
|
||||||
:envvar:`ANV_ENABLE_PIPELINE_CACHE`
|
:envvar:`ANV_ENABLE_PIPELINE_CACHE`
|
||||||
If defined to ``0`` or ``false``, this will disable pipeline
|
If defined to ``0`` or ``false``, this will disable pipeline
|
||||||
caching, forcing ANV to reparse and recompile any VkShaderModule
|
caching, forcing ANV to reparse and recompile any VkShaderModule
|
||||||
|
|
@ -272,3 +275,34 @@ checking for ``ANV_CMD_DIRTY_PIPELINE``. It should only do so if it
|
||||||
requires to know some value that is coming from the
|
requires to know some value that is coming from the
|
||||||
``anv_graphics_pipeline`` object that is not available from
|
``anv_graphics_pipeline`` object that is not available from
|
||||||
``anv_dynamic_state``.
|
``anv_dynamic_state``.
|
||||||
|
|
||||||
|
|
||||||
|
Generated indirect draws optimization
|
||||||
|
-------------------------------------
|
||||||
|
|
||||||
|
Indirect draws have traditionally been implemented on Intel HW by
|
||||||
|
loading the indirect parameters from memory into HW registers using
|
||||||
|
the command streamer's ``MI_LOAD_REGISTER_MEM`` instruction before
|
||||||
|
dispatching a draw call to the 3D pipeline.
|
||||||
|
|
||||||
|
On recent products, it was found that the command streamer is showing
|
||||||
|
as performance bottleneck, because it cannot dispatch draw calls fast
|
||||||
|
enough to keep the 3D pipeline busy.
|
||||||
|
|
||||||
|
The solution to this problem is to change the way we deal with
|
||||||
|
indirect draws. Instead of loading HW registers with values using the
|
||||||
|
command streamer, we generate entire set of ``3DPRIMITIVE``
|
||||||
|
instructions using a shader. The generated instructions contain the
|
||||||
|
entire draw call parameters. This way the command streamer executes
|
||||||
|
only ``3DPRIMITIVE`` instructions and doesn´t do any data loading from
|
||||||
|
memory or touch HW registers, feeding the 3D pipeline as fast as it
|
||||||
|
can.
|
||||||
|
|
||||||
|
In Anv this implemented by using a side batch buffer. When Anv
|
||||||
|
encounters the first indirect draws, it generates a jump into the side
|
||||||
|
batch, the side batch contains a draw call using a generation shader
|
||||||
|
for each indirect draw. We keep adding on more generation draws into
|
||||||
|
the batch until we have to stop due to command buffer end, secondary
|
||||||
|
command buffer calls or a barrier containing the access flag
|
||||||
|
``VK_ACCESS_INDIRECT_COMMAND_READ_BIT``. The side batch buffer jump
|
||||||
|
back right after the instruction where it was called.
|
||||||
|
|
|
||||||
|
|
@ -84,6 +84,8 @@ def define_tracepoints(args):
|
||||||
Arg(type='enum isl_format', name='src_fmt', var='src_fmt', c_format='%s', to_prim_type='isl_format_get_short_name({})'),
|
Arg(type='enum isl_format', name='src_fmt', var='src_fmt', c_format='%s', to_prim_type='isl_format_get_short_name({})'),
|
||||||
])
|
])
|
||||||
|
|
||||||
|
begin_end_tp('generate_draws')
|
||||||
|
|
||||||
begin_end_tp('draw',
|
begin_end_tp('draw',
|
||||||
tp_args=[Arg(type='uint32_t', var='count', c_format='%u')])
|
tp_args=[Arg(type='uint32_t', var='count', c_format='%u')])
|
||||||
begin_end_tp('draw_multi',
|
begin_end_tp('draw_multi',
|
||||||
|
|
|
||||||
|
|
@ -193,6 +193,14 @@ anv_batch_emit_ensure_space(struct anv_batch *batch, uint32_t size)
|
||||||
return VK_SUCCESS;
|
return VK_SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
anv_batch_advance(struct anv_batch *batch, uint32_t size)
|
||||||
|
{
|
||||||
|
assert(batch->next + size <= batch->end);
|
||||||
|
|
||||||
|
batch->next += size;
|
||||||
|
}
|
||||||
|
|
||||||
struct anv_address
|
struct anv_address
|
||||||
anv_batch_address(struct anv_batch *batch, void *batch_location)
|
anv_batch_address(struct anv_batch *batch, void *batch_location)
|
||||||
{
|
{
|
||||||
|
|
|
||||||
|
|
@ -72,6 +72,7 @@ static const driOptionDescription anv_dri_options[] = {
|
||||||
DRI_CONF_ANV_ASSUME_FULL_SUBGROUPS(false)
|
DRI_CONF_ANV_ASSUME_FULL_SUBGROUPS(false)
|
||||||
DRI_CONF_ANV_SAMPLE_MASK_OUT_OPENGL_BEHAVIOUR(false)
|
DRI_CONF_ANV_SAMPLE_MASK_OUT_OPENGL_BEHAVIOUR(false)
|
||||||
DRI_CONF_ANV_FP64_WORKAROUND_ENABLED(false)
|
DRI_CONF_ANV_FP64_WORKAROUND_ENABLED(false)
|
||||||
|
DRI_CONF_ANV_GENERATED_INDIRECT_THRESHOLD(4)
|
||||||
DRI_CONF_SECTION_END
|
DRI_CONF_SECTION_END
|
||||||
|
|
||||||
DRI_CONF_SECTION_DEBUG
|
DRI_CONF_SECTION_DEBUG
|
||||||
|
|
@ -923,6 +924,12 @@ anv_physical_device_try_create(struct vk_instance *vk_instance,
|
||||||
if (debug_get_bool_option("ANV_QUEUE_THREAD_DISABLE", false))
|
if (debug_get_bool_option("ANV_QUEUE_THREAD_DISABLE", false))
|
||||||
device->has_exec_timeline = false;
|
device->has_exec_timeline = false;
|
||||||
|
|
||||||
|
|
||||||
|
device->generated_indirect_draws =
|
||||||
|
device->info.ver >= 11 &&
|
||||||
|
debug_get_bool_option("ANV_ENABLE_GENERATED_INDIRECT_DRAWS",
|
||||||
|
true);
|
||||||
|
|
||||||
unsigned st_idx = 0;
|
unsigned st_idx = 0;
|
||||||
|
|
||||||
device->sync_syncobj_type = vk_drm_syncobj_get_type(fd);
|
device->sync_syncobj_type = vk_drm_syncobj_get_type(fd);
|
||||||
|
|
@ -1104,6 +1111,8 @@ anv_init_dri_options(struct anv_instance *instance)
|
||||||
driQueryOptionf(&instance->dri_options, "lower_depth_range_rate");
|
driQueryOptionf(&instance->dri_options, "lower_depth_range_rate");
|
||||||
instance->fp64_workaround_enabled =
|
instance->fp64_workaround_enabled =
|
||||||
driQueryOptionb(&instance->dri_options, "fp64_workaround_enabled");
|
driQueryOptionb(&instance->dri_options, "fp64_workaround_enabled");
|
||||||
|
instance->generated_indirect_threshold =
|
||||||
|
driQueryOptioni(&instance->dri_options, "generated_indirect_threshold");
|
||||||
}
|
}
|
||||||
|
|
||||||
VkResult anv_CreateInstance(
|
VkResult anv_CreateInstance(
|
||||||
|
|
@ -3660,6 +3669,8 @@ VkResult anv_CreateDevice(
|
||||||
|
|
||||||
anv_device_init_border_colors(device);
|
anv_device_init_border_colors(device);
|
||||||
|
|
||||||
|
anv_device_init_generated_indirect_draws(device);
|
||||||
|
|
||||||
anv_device_perf_init(device);
|
anv_device_perf_init(device);
|
||||||
|
|
||||||
anv_device_utrace_init(device);
|
anv_device_utrace_init(device);
|
||||||
|
|
@ -3747,6 +3758,8 @@ void anv_DestroyDevice(
|
||||||
|
|
||||||
anv_device_finish_rt_shaders(device);
|
anv_device_finish_rt_shaders(device);
|
||||||
|
|
||||||
|
anv_device_finish_generated_indirect_draws(device);
|
||||||
|
|
||||||
vk_pipeline_cache_destroy(device->internal_cache, NULL);
|
vk_pipeline_cache_destroy(device->internal_cache, NULL);
|
||||||
vk_pipeline_cache_destroy(device->default_pipeline_cache, NULL);
|
vk_pipeline_cache_destroy(device->default_pipeline_cache, NULL);
|
||||||
|
|
||||||
|
|
|
||||||
341
src/intel/vulkan/anv_generated_indirect_draws.c
Normal file
341
src/intel/vulkan/anv_generated_indirect_draws.c
Normal file
|
|
@ -0,0 +1,341 @@
|
||||||
|
/*
|
||||||
|
* Copyright © 2022 Intel Corporation
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||||
|
* copy of this software and associated documentation files (the "Software"),
|
||||||
|
* to deal in the Software without restriction, including without limitation
|
||||||
|
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||||
|
* and/or sell copies of the Software, and to permit persons to whom the
|
||||||
|
* Software is furnished to do so, subject to the following conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice (including the next
|
||||||
|
* paragraph) shall be included in all copies or substantial portions of the
|
||||||
|
* Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||||
|
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||||
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||||
|
* IN THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "anv_private.h"
|
||||||
|
|
||||||
|
#include "compiler/brw_compiler.h"
|
||||||
|
#include "compiler/brw_nir.h"
|
||||||
|
#include "compiler/spirv/nir_spirv.h"
|
||||||
|
#include "dev/intel_debug.h"
|
||||||
|
#include "util/macros.h"
|
||||||
|
|
||||||
|
#include "anv_generated_indirect_draws.h"
|
||||||
|
|
||||||
|
#include "shaders/generated_draws_spv.h"
|
||||||
|
#include "shaders/generated_draws_count_spv.h"
|
||||||
|
|
||||||
|
/* This pass takes vulkan descriptor bindings 0 & 1 and turns them into global
|
||||||
|
* 64bit addresses. Binding 2 is left UBO that would normally be accessed
|
||||||
|
* through the binding table but it fully promoted to push constants.
|
||||||
|
*
|
||||||
|
* As a result we're not using the binding table at all which is nice because
|
||||||
|
* of the side command buffer we use for the generating shader does not
|
||||||
|
* interact with the binding table allocation.
|
||||||
|
*/
|
||||||
|
static bool
|
||||||
|
lower_vulkan_descriptors_instr(nir_builder *b, nir_instr *instr, void *cb_data)
|
||||||
|
{
|
||||||
|
if (instr->type != nir_instr_type_intrinsic)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
|
||||||
|
if (intrin->intrinsic != nir_intrinsic_load_vulkan_descriptor)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
nir_instr *res_index_instr = intrin->src[0].ssa->parent_instr;
|
||||||
|
assert(res_index_instr->type == nir_instr_type_intrinsic);
|
||||||
|
nir_intrinsic_instr *res_index_intrin =
|
||||||
|
nir_instr_as_intrinsic(res_index_instr);
|
||||||
|
assert(res_index_intrin->intrinsic == nir_intrinsic_vulkan_resource_index);
|
||||||
|
|
||||||
|
b->cursor = nir_after_instr(instr);
|
||||||
|
|
||||||
|
nir_ssa_def *desc_value = NULL;
|
||||||
|
switch (nir_intrinsic_binding(res_index_intrin)) {
|
||||||
|
case 0: {
|
||||||
|
desc_value =
|
||||||
|
nir_load_ubo(b, 1, 64,
|
||||||
|
nir_imm_int(b, 2),
|
||||||
|
nir_imm_int(b,
|
||||||
|
offsetof(struct anv_generate_indirect_params,
|
||||||
|
indirect_data_addr)),
|
||||||
|
.align_mul = 8,
|
||||||
|
.align_offset = 0,
|
||||||
|
.range_base = 0,
|
||||||
|
.range = ~0);
|
||||||
|
desc_value =
|
||||||
|
nir_vec4(b,
|
||||||
|
nir_unpack_64_2x32_split_x(b, desc_value),
|
||||||
|
nir_unpack_64_2x32_split_y(b, desc_value),
|
||||||
|
nir_imm_int(b, 0),
|
||||||
|
nir_imm_int(b, 0));
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
case 1: {
|
||||||
|
desc_value =
|
||||||
|
nir_load_ubo(b, 1, 64,
|
||||||
|
nir_imm_int(b, 2),
|
||||||
|
nir_imm_int(b,
|
||||||
|
offsetof(struct anv_generate_indirect_params,
|
||||||
|
generated_cmds_addr)),
|
||||||
|
.align_mul = 8,
|
||||||
|
.align_offset = 0,
|
||||||
|
.range_base = 0,
|
||||||
|
.range = ~0);
|
||||||
|
desc_value =
|
||||||
|
nir_vec4(b,
|
||||||
|
nir_unpack_64_2x32_split_x(b, desc_value),
|
||||||
|
nir_unpack_64_2x32_split_y(b, desc_value),
|
||||||
|
nir_imm_int(b, 0),
|
||||||
|
nir_imm_int(b, 0));
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
case 2:
|
||||||
|
desc_value =
|
||||||
|
nir_vec2(b,
|
||||||
|
nir_imm_int(b, 2),
|
||||||
|
nir_imm_int(b, 0));
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
nir_ssa_def_rewrite_uses(&intrin->dest.ssa, desc_value);
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
static bool
|
||||||
|
lower_vulkan_descriptors(nir_shader *shader)
|
||||||
|
{
|
||||||
|
return nir_shader_instructions_pass(shader,
|
||||||
|
lower_vulkan_descriptors_instr,
|
||||||
|
nir_metadata_block_index |
|
||||||
|
nir_metadata_dominance,
|
||||||
|
NULL);
|
||||||
|
}
|
||||||
|
|
||||||
|
static struct anv_shader_bin *
|
||||||
|
compile_upload_spirv(struct anv_device *device,
|
||||||
|
const void *key,
|
||||||
|
uint32_t key_size,
|
||||||
|
const uint32_t *spirv_source,
|
||||||
|
uint32_t spirv_source_size,
|
||||||
|
uint32_t sends_count_expectation)
|
||||||
|
{
|
||||||
|
struct spirv_to_nir_options spirv_options = {
|
||||||
|
.caps = {
|
||||||
|
},
|
||||||
|
.ubo_addr_format = nir_address_format_32bit_index_offset,
|
||||||
|
.ssbo_addr_format = nir_address_format_64bit_global_32bit_offset,
|
||||||
|
.environment = NIR_SPIRV_VULKAN,
|
||||||
|
.create_library = false,
|
||||||
|
};
|
||||||
|
const nir_shader_compiler_options *nir_options =
|
||||||
|
device->physical->compiler->nir_options[MESA_SHADER_FRAGMENT];
|
||||||
|
|
||||||
|
nir_shader* nir =
|
||||||
|
spirv_to_nir(spirv_source, spirv_source_size,
|
||||||
|
NULL, 0, MESA_SHADER_FRAGMENT, "main",
|
||||||
|
&spirv_options, nir_options);
|
||||||
|
|
||||||
|
assert(nir != NULL);
|
||||||
|
|
||||||
|
nir->info.internal = true;
|
||||||
|
|
||||||
|
nir_validate_shader(nir, "after spirv_to_nir");
|
||||||
|
nir_validate_ssa_dominance(nir, "after spirv_to_nir");
|
||||||
|
|
||||||
|
NIR_PASS_V(nir, nir_lower_variable_initializers, nir_var_function_temp);
|
||||||
|
NIR_PASS_V(nir, nir_lower_returns);
|
||||||
|
NIR_PASS_V(nir, nir_inline_functions);
|
||||||
|
NIR_PASS_V(nir, nir_opt_deref);
|
||||||
|
|
||||||
|
NIR_PASS_V(nir, nir_lower_vars_to_ssa);
|
||||||
|
NIR_PASS_V(nir, nir_copy_prop);
|
||||||
|
NIR_PASS_V(nir, nir_opt_dce);
|
||||||
|
NIR_PASS_V(nir, nir_opt_cse);
|
||||||
|
NIR_PASS_V(nir, nir_opt_gcm, true);
|
||||||
|
NIR_PASS_V(nir, nir_opt_peephole_select, 1, false, false);
|
||||||
|
NIR_PASS_V(nir, nir_opt_dce);
|
||||||
|
|
||||||
|
NIR_PASS_V(nir, nir_lower_variable_initializers, ~0);
|
||||||
|
|
||||||
|
NIR_PASS_V(nir, nir_split_var_copies);
|
||||||
|
NIR_PASS_V(nir, nir_split_per_member_structs);
|
||||||
|
|
||||||
|
struct brw_compiler *compiler = device->physical->compiler;
|
||||||
|
struct brw_nir_compiler_opts opts = {};
|
||||||
|
brw_preprocess_nir(compiler, nir, &opts);
|
||||||
|
|
||||||
|
NIR_PASS_V(nir, nir_propagate_invariant, false);
|
||||||
|
|
||||||
|
NIR_PASS_V(nir, nir_lower_input_attachments,
|
||||||
|
&(nir_input_attachment_options) {
|
||||||
|
.use_fragcoord_sysval = true,
|
||||||
|
.use_layer_id_sysval = true,
|
||||||
|
});
|
||||||
|
|
||||||
|
nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));
|
||||||
|
|
||||||
|
/* Do vectorizing here. For some reason when trying to do it in the back
|
||||||
|
* this just isn't working.
|
||||||
|
*/
|
||||||
|
nir_load_store_vectorize_options options = {
|
||||||
|
.modes = nir_var_mem_ubo | nir_var_mem_ssbo,
|
||||||
|
.callback = brw_nir_should_vectorize_mem,
|
||||||
|
.robust_modes = (nir_variable_mode)0,
|
||||||
|
};
|
||||||
|
NIR_PASS_V(nir, nir_opt_load_store_vectorize, &options);
|
||||||
|
|
||||||
|
NIR_PASS_V(nir, lower_vulkan_descriptors);
|
||||||
|
NIR_PASS_V(nir, nir_opt_dce);
|
||||||
|
|
||||||
|
NIR_PASS_V(nir, nir_lower_explicit_io, nir_var_mem_ubo,
|
||||||
|
nir_address_format_32bit_index_offset);
|
||||||
|
NIR_PASS_V(nir, nir_lower_explicit_io, nir_var_mem_ssbo,
|
||||||
|
nir_address_format_64bit_global_32bit_offset);
|
||||||
|
|
||||||
|
NIR_PASS_V(nir, nir_copy_prop);
|
||||||
|
NIR_PASS_V(nir, nir_opt_constant_folding);
|
||||||
|
NIR_PASS_V(nir, nir_opt_dce);
|
||||||
|
|
||||||
|
struct brw_wm_prog_key wm_key;
|
||||||
|
memset(&wm_key, 0, sizeof(wm_key));
|
||||||
|
|
||||||
|
struct brw_wm_prog_data wm_prog_data = {
|
||||||
|
.base.nr_params = nir->num_uniforms / 4,
|
||||||
|
};
|
||||||
|
|
||||||
|
brw_nir_analyze_ubo_ranges(compiler, nir, NULL, wm_prog_data.base.ubo_ranges);
|
||||||
|
|
||||||
|
struct brw_compile_stats stats[3];
|
||||||
|
struct brw_compile_fs_params params = {
|
||||||
|
.nir = nir,
|
||||||
|
.key = &wm_key,
|
||||||
|
.prog_data = &wm_prog_data,
|
||||||
|
.stats = stats,
|
||||||
|
.log_data = device,
|
||||||
|
.debug_flag = DEBUG_WM,
|
||||||
|
};
|
||||||
|
const unsigned *program = brw_compile_fs(compiler, nir, ¶ms);
|
||||||
|
|
||||||
|
if (wm_prog_data.dispatch_8) {
|
||||||
|
assert(stats[0].spills == 0);
|
||||||
|
assert(stats[0].fills == 0);
|
||||||
|
assert(stats[0].sends == sends_count_expectation);
|
||||||
|
}
|
||||||
|
if (wm_prog_data.dispatch_16) {
|
||||||
|
assert(stats[1].spills == 0);
|
||||||
|
assert(stats[1].fills == 0);
|
||||||
|
assert(stats[1].sends == sends_count_expectation);
|
||||||
|
}
|
||||||
|
if (wm_prog_data.dispatch_32) {
|
||||||
|
assert(stats[2].spills == 0);
|
||||||
|
assert(stats[2].fills == 0);
|
||||||
|
assert(stats[2].sends == sends_count_expectation);
|
||||||
|
}
|
||||||
|
|
||||||
|
struct anv_pipeline_bind_map bind_map;
|
||||||
|
memset(&bind_map, 0, sizeof(bind_map));
|
||||||
|
|
||||||
|
struct anv_push_descriptor_info push_desc_info = {};
|
||||||
|
|
||||||
|
struct anv_shader_bin *kernel =
|
||||||
|
anv_device_upload_kernel(device,
|
||||||
|
device->internal_cache,
|
||||||
|
nir->info.stage,
|
||||||
|
key, key_size, program,
|
||||||
|
wm_prog_data.base.program_size,
|
||||||
|
&wm_prog_data.base, sizeof(wm_prog_data),
|
||||||
|
NULL, 0, NULL, &bind_map,
|
||||||
|
&push_desc_info);
|
||||||
|
|
||||||
|
ralloc_free(nir);
|
||||||
|
|
||||||
|
return kernel;
|
||||||
|
}
|
||||||
|
|
||||||
|
VkResult
|
||||||
|
anv_device_init_generated_indirect_draws(struct anv_device *device)
|
||||||
|
{
|
||||||
|
if (device->info->ver < 11)
|
||||||
|
return VK_SUCCESS;
|
||||||
|
|
||||||
|
const struct intel_l3_weights w =
|
||||||
|
intel_get_default_l3_weights(device->info,
|
||||||
|
true /* wants_dc_cache */,
|
||||||
|
false /* needs_slm */);
|
||||||
|
device->generated_draw_l3_config = intel_get_l3_config(device->info, w);
|
||||||
|
|
||||||
|
struct {
|
||||||
|
char name[40];
|
||||||
|
} indirect_draws_key = {
|
||||||
|
.name = "anv-generated-indirect-draws",
|
||||||
|
}, indirect_draws_count_key = {
|
||||||
|
.name = "anv-generated-indirect-draws-count",
|
||||||
|
};
|
||||||
|
|
||||||
|
device->generated_draw_kernel =
|
||||||
|
anv_device_search_for_kernel(device,
|
||||||
|
device->internal_cache,
|
||||||
|
&indirect_draws_key,
|
||||||
|
sizeof(indirect_draws_key),
|
||||||
|
NULL);
|
||||||
|
if (device->generated_draw_kernel == NULL) {
|
||||||
|
device->generated_draw_kernel =
|
||||||
|
compile_upload_spirv(device,
|
||||||
|
&indirect_draws_key,
|
||||||
|
sizeof(indirect_draws_key),
|
||||||
|
generated_draws_spv_source,
|
||||||
|
ARRAY_SIZE(generated_draws_spv_source),
|
||||||
|
10 /* 2 * (2 loads + 3 stores) */);
|
||||||
|
}
|
||||||
|
if (device->generated_draw_kernel == NULL)
|
||||||
|
return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
|
||||||
|
|
||||||
|
/* The cache already has a reference and it's not going anywhere so there
|
||||||
|
* is no need to hold a second reference.
|
||||||
|
*/
|
||||||
|
anv_shader_bin_unref(device, device->generated_draw_kernel);
|
||||||
|
|
||||||
|
device->generated_draw_count_kernel =
|
||||||
|
anv_device_search_for_kernel(device,
|
||||||
|
device->internal_cache,
|
||||||
|
&indirect_draws_count_key,
|
||||||
|
sizeof(indirect_draws_count_key),
|
||||||
|
NULL);
|
||||||
|
if (device->generated_draw_count_kernel == NULL) {
|
||||||
|
device->generated_draw_count_kernel =
|
||||||
|
compile_upload_spirv(device,
|
||||||
|
&indirect_draws_count_key,
|
||||||
|
sizeof(indirect_draws_count_key),
|
||||||
|
generated_draws_count_spv_source,
|
||||||
|
ARRAY_SIZE(generated_draws_count_spv_source),
|
||||||
|
11 /* 2 * (3 loads + 3 stores) */);
|
||||||
|
}
|
||||||
|
if (device->generated_draw_count_kernel == NULL)
|
||||||
|
return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
|
||||||
|
|
||||||
|
/* The cache already has a reference and it's not going anywhere so there
|
||||||
|
* is no need to hold a second reference.
|
||||||
|
*/
|
||||||
|
anv_shader_bin_unref(device, device->generated_draw_count_kernel);
|
||||||
|
|
||||||
|
return VK_SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
anv_device_finish_generated_indirect_draws(struct anv_device *device)
|
||||||
|
{
|
||||||
|
}
|
||||||
71
src/intel/vulkan/anv_generated_indirect_draws.h
Normal file
71
src/intel/vulkan/anv_generated_indirect_draws.h
Normal file
|
|
@ -0,0 +1,71 @@
|
||||||
|
/*
|
||||||
|
* Copyright © 2022 Intel Corporation
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||||
|
* copy of this software and associated documentation files (the "Software"),
|
||||||
|
* to deal in the Software without restriction, including without limitation
|
||||||
|
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||||
|
* and/or sell copies of the Software, and to permit persons to whom the
|
||||||
|
* Software is furnished to do so, subject to the following conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice (including the next
|
||||||
|
* paragraph) shall be included in all copies or substantial portions of the
|
||||||
|
* Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||||
|
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||||
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||||
|
* IN THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef ANV_GENERATED_INDIRECT_DRAWS_H
|
||||||
|
#define ANV_GENERATED_INDIRECT_DRAWS_H
|
||||||
|
|
||||||
|
#include <stdint.h>
|
||||||
|
|
||||||
|
/* This needs to match generated_draws.glsl :
|
||||||
|
*
|
||||||
|
* layout(set = 0, binding = 2) uniform block
|
||||||
|
*/
|
||||||
|
struct anv_generated_indirect_draw_params {
|
||||||
|
uint32_t is_indexed;
|
||||||
|
uint32_t is_predicated;
|
||||||
|
uint32_t draw_base;
|
||||||
|
uint32_t draw_count;
|
||||||
|
uint32_t instance_multiplier;
|
||||||
|
uint32_t indirect_data_stride;
|
||||||
|
};
|
||||||
|
|
||||||
|
/* This needs to match generated_draws_count.glsl :
|
||||||
|
*
|
||||||
|
* layout(set = 0, binding = 2) uniform block
|
||||||
|
*/
|
||||||
|
struct anv_generated_indirect_draw_count_params {
|
||||||
|
uint32_t is_indexed;
|
||||||
|
uint32_t is_predicated;
|
||||||
|
uint32_t draw_base;
|
||||||
|
uint32_t item_count;
|
||||||
|
uint32_t draw_count;
|
||||||
|
uint32_t instance_multiplier;
|
||||||
|
uint32_t indirect_data_stride;
|
||||||
|
uint32_t end_addr_ldw;
|
||||||
|
uint32_t end_addr_udw;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct anv_generate_indirect_params {
|
||||||
|
union {
|
||||||
|
struct anv_generated_indirect_draw_params draw;
|
||||||
|
struct anv_generated_indirect_draw_count_params draw_count;
|
||||||
|
};
|
||||||
|
|
||||||
|
/* Global address of binding 0 */
|
||||||
|
uint64_t indirect_data_addr;
|
||||||
|
|
||||||
|
/* Global address of binding 1 */
|
||||||
|
uint64_t generated_cmds_addr;
|
||||||
|
};
|
||||||
|
|
||||||
|
#endif /* ANV_GENERATED_INDIRECT_DRAWS_H */
|
||||||
|
|
@ -31,7 +31,7 @@
|
||||||
#include "nir/nir_xfb_info.h"
|
#include "nir/nir_xfb_info.h"
|
||||||
#include "vulkan/util/vk_util.h"
|
#include "vulkan/util/vk_util.h"
|
||||||
#include "compiler/spirv/nir_spirv.h"
|
#include "compiler/spirv/nir_spirv.h"
|
||||||
#include "float64_spv.h"
|
#include "shaders/float64_spv.h"
|
||||||
|
|
||||||
static bool
|
static bool
|
||||||
anv_shader_bin_serialize(struct vk_pipeline_cache_object *object,
|
anv_shader_bin_serialize(struct vk_pipeline_cache_object *object,
|
||||||
|
|
|
||||||
|
|
@ -1009,6 +1009,15 @@ struct anv_physical_device {
|
||||||
|
|
||||||
bool always_flush_cache;
|
bool always_flush_cache;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* True if the generated indirect draw optimization is turned on.
|
||||||
|
*
|
||||||
|
* This optimization is currently only available on Gfx11+ to avoid
|
||||||
|
* dealing with the annoying Gfx8/9 tracking of vertex buffer for the VF
|
||||||
|
* cache workaround.
|
||||||
|
*/
|
||||||
|
bool generated_indirect_draws;
|
||||||
|
|
||||||
struct {
|
struct {
|
||||||
uint32_t family_count;
|
uint32_t family_count;
|
||||||
struct anv_queue_family families[ANV_MAX_QUEUE_FAMILIES];
|
struct anv_queue_family families[ANV_MAX_QUEUE_FAMILIES];
|
||||||
|
|
@ -1075,6 +1084,7 @@ struct anv_instance {
|
||||||
bool sample_mask_out_opengl_behaviour;
|
bool sample_mask_out_opengl_behaviour;
|
||||||
bool fp64_workaround_enabled;
|
bool fp64_workaround_enabled;
|
||||||
float lower_depth_range_rate;
|
float lower_depth_range_rate;
|
||||||
|
unsigned generated_indirect_threshold;
|
||||||
};
|
};
|
||||||
|
|
||||||
VkResult anv_init_wsi(struct anv_physical_device *physical_device);
|
VkResult anv_init_wsi(struct anv_physical_device *physical_device);
|
||||||
|
|
@ -1241,6 +1251,15 @@ struct anv_device {
|
||||||
|
|
||||||
enum anv_rt_bvh_build_method bvh_build_method;
|
enum anv_rt_bvh_build_method bvh_build_method;
|
||||||
|
|
||||||
|
/** Draw generation shader
|
||||||
|
*
|
||||||
|
* Generates direct draw calls out of indirect parameters. Used to
|
||||||
|
* workaround slowness with indirect draw calls.
|
||||||
|
*/
|
||||||
|
struct anv_shader_bin *generated_draw_kernel;
|
||||||
|
struct anv_shader_bin *generated_draw_count_kernel;
|
||||||
|
const struct intel_l3_config *generated_draw_l3_config;
|
||||||
|
|
||||||
pthread_mutex_t mutex;
|
pthread_mutex_t mutex;
|
||||||
pthread_cond_t queue_submit;
|
pthread_cond_t queue_submit;
|
||||||
|
|
||||||
|
|
@ -1462,6 +1481,7 @@ struct anv_batch {
|
||||||
|
|
||||||
void *anv_batch_emit_dwords(struct anv_batch *batch, int num_dwords);
|
void *anv_batch_emit_dwords(struct anv_batch *batch, int num_dwords);
|
||||||
VkResult anv_batch_emit_ensure_space(struct anv_batch *batch, uint32_t size);
|
VkResult anv_batch_emit_ensure_space(struct anv_batch *batch, uint32_t size);
|
||||||
|
void anv_batch_advance(struct anv_batch *batch, uint32_t size);
|
||||||
void anv_batch_emit_batch(struct anv_batch *batch, struct anv_batch *other);
|
void anv_batch_emit_batch(struct anv_batch *batch, struct anv_batch *other);
|
||||||
struct anv_address anv_batch_address(struct anv_batch *batch, void *batch_location);
|
struct anv_address anv_batch_address(struct anv_batch *batch, void *batch_location);
|
||||||
|
|
||||||
|
|
@ -2887,6 +2907,13 @@ void anv_cmd_buffer_dump(struct anv_cmd_buffer *cmd_buffer);
|
||||||
|
|
||||||
void anv_cmd_emit_conditional_render_predicate(struct anv_cmd_buffer *cmd_buffer);
|
void anv_cmd_emit_conditional_render_predicate(struct anv_cmd_buffer *cmd_buffer);
|
||||||
|
|
||||||
|
static inline unsigned
|
||||||
|
anv_cmd_buffer_get_view_count(struct anv_cmd_buffer *cmd_buffer)
|
||||||
|
{
|
||||||
|
struct anv_cmd_graphics_state *gfx = &cmd_buffer->state.gfx;
|
||||||
|
return MAX2(1, util_bitcount(gfx->view_mask));
|
||||||
|
}
|
||||||
|
|
||||||
enum anv_bo_sync_state {
|
enum anv_bo_sync_state {
|
||||||
/** Indicates that this is a new (or newly reset fence) */
|
/** Indicates that this is a new (or newly reset fence) */
|
||||||
ANV_BO_SYNC_STATE_RESET,
|
ANV_BO_SYNC_STATE_RESET,
|
||||||
|
|
@ -4139,6 +4166,18 @@ struct anv_memcpy_state {
|
||||||
struct anv_vb_cache_range vb_dirty;
|
struct anv_vb_cache_range vb_dirty;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
VkResult
|
||||||
|
anv_device_init_generated_indirect_draws(struct anv_device *device);
|
||||||
|
void
|
||||||
|
anv_device_finish_generated_indirect_draws(struct anv_device *device);
|
||||||
|
|
||||||
|
static inline bool anv_use_generated_draws(const struct anv_device *device,
|
||||||
|
uint32_t count)
|
||||||
|
{
|
||||||
|
return device->physical->generated_indirect_draws &&
|
||||||
|
count >= device->physical->instance->generated_indirect_threshold;
|
||||||
|
}
|
||||||
|
|
||||||
struct anv_utrace_flush_copy {
|
struct anv_utrace_flush_copy {
|
||||||
/* Needs to be the first field */
|
/* Needs to be the first field */
|
||||||
struct intel_ds_flush_data ds;
|
struct intel_ds_flush_data ds;
|
||||||
|
|
|
||||||
|
|
@ -3413,6 +3413,11 @@ genX(cmd_buffer_flush_gfx_state)(struct anv_cmd_buffer *cmd_buffer)
|
||||||
genX(cmd_buffer_flush_dynamic_state)(cmd_buffer);
|
genX(cmd_buffer_flush_dynamic_state)(cmd_buffer);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#define GFX_HAS_GENERATED_CMDS GFX_VER >= 11
|
||||||
|
#if GFX_VER >= 11
|
||||||
|
#include "genX_cmd_draw_generated_indirect.h"
|
||||||
|
#endif
|
||||||
|
|
||||||
VkResult
|
VkResult
|
||||||
genX(BeginCommandBuffer)(
|
genX(BeginCommandBuffer)(
|
||||||
VkCommandBuffer commandBuffer,
|
VkCommandBuffer commandBuffer,
|
||||||
|
|
@ -3618,6 +3623,10 @@ genX(EndCommandBuffer)(
|
||||||
|
|
||||||
anv_measure_endcommandbuffer(cmd_buffer);
|
anv_measure_endcommandbuffer(cmd_buffer);
|
||||||
|
|
||||||
|
#if GFX_HAS_GENERATED_CMDS
|
||||||
|
genX(cmd_buffer_flush_generated_draws)(cmd_buffer);
|
||||||
|
#endif
|
||||||
|
|
||||||
/* We want every command buffer to start with the PMA fix in a known state,
|
/* We want every command buffer to start with the PMA fix in a known state,
|
||||||
* so we disable it at the end of the command buffer.
|
* so we disable it at the end of the command buffer.
|
||||||
*/
|
*/
|
||||||
|
|
@ -3657,6 +3666,10 @@ genX(CmdExecuteCommands)(
|
||||||
*/
|
*/
|
||||||
genX(cmd_buffer_apply_pipe_flushes)(primary);
|
genX(cmd_buffer_apply_pipe_flushes)(primary);
|
||||||
|
|
||||||
|
#if GFX_HAS_GENERATED_CMDS
|
||||||
|
genX(cmd_buffer_flush_generated_draws)(primary);
|
||||||
|
#endif
|
||||||
|
|
||||||
for (uint32_t i = 0; i < commandBufferCount; i++) {
|
for (uint32_t i = 0; i < commandBufferCount; i++) {
|
||||||
ANV_FROM_HANDLE(anv_cmd_buffer, secondary, pCmdBuffers[i]);
|
ANV_FROM_HANDLE(anv_cmd_buffer, secondary, pCmdBuffers[i]);
|
||||||
|
|
||||||
|
|
@ -3819,6 +3832,11 @@ cmd_buffer_barrier(struct anv_cmd_buffer *cmd_buffer,
|
||||||
anv_pipe_flush_bits_for_access_flags(cmd_buffer->device, src_flags) |
|
anv_pipe_flush_bits_for_access_flags(cmd_buffer->device, src_flags) |
|
||||||
anv_pipe_invalidate_bits_for_access_flags(cmd_buffer->device, dst_flags);
|
anv_pipe_invalidate_bits_for_access_flags(cmd_buffer->device, dst_flags);
|
||||||
|
|
||||||
|
#if GFX_HAS_GENERATED_CMDS
|
||||||
|
if (dst_flags & VK_ACCESS_INDIRECT_COMMAND_READ_BIT)
|
||||||
|
genX(cmd_buffer_flush_generated_draws)(cmd_buffer);
|
||||||
|
#endif
|
||||||
|
|
||||||
anv_add_pending_pipe_bits(cmd_buffer, bits, reason);
|
anv_add_pending_pipe_bits(cmd_buffer, bits, reason);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -4442,9 +4460,24 @@ void genX(CmdDrawIndirect)(
|
||||||
drawCount);
|
drawCount);
|
||||||
trace_intel_begin_draw_indirect(&cmd_buffer->trace);
|
trace_intel_begin_draw_indirect(&cmd_buffer->trace);
|
||||||
|
|
||||||
|
#if GFX_HAS_GENERATED_CMDS
|
||||||
|
if (anv_use_generated_draws(cmd_buffer->device, drawCount)) {
|
||||||
|
genX(cmd_buffer_emit_indirect_generated_draws)(
|
||||||
|
cmd_buffer,
|
||||||
|
anv_address_add(buffer->address, offset),
|
||||||
|
MAX2(stride, sizeof(VkDrawIndirectCommand)),
|
||||||
|
drawCount,
|
||||||
|
false /* indexed */);
|
||||||
|
} else {
|
||||||
|
emit_indirect_draws(cmd_buffer,
|
||||||
|
anv_address_add(buffer->address, offset),
|
||||||
|
stride, drawCount, false /* indexed */);
|
||||||
|
}
|
||||||
|
#else
|
||||||
emit_indirect_draws(cmd_buffer,
|
emit_indirect_draws(cmd_buffer,
|
||||||
anv_address_add(buffer->address, offset),
|
anv_address_add(buffer->address, offset),
|
||||||
stride, drawCount, false /* indexed */);
|
stride, drawCount, false /* indexed */);
|
||||||
|
#endif
|
||||||
|
|
||||||
trace_intel_end_draw_indirect(&cmd_buffer->trace, drawCount);
|
trace_intel_end_draw_indirect(&cmd_buffer->trace, drawCount);
|
||||||
}
|
}
|
||||||
|
|
@ -4468,9 +4501,24 @@ void genX(CmdDrawIndexedIndirect)(
|
||||||
drawCount);
|
drawCount);
|
||||||
trace_intel_begin_draw_indexed_indirect(&cmd_buffer->trace);
|
trace_intel_begin_draw_indexed_indirect(&cmd_buffer->trace);
|
||||||
|
|
||||||
|
#if GFX_HAS_GENERATED_CMDS
|
||||||
|
if (anv_use_generated_draws(cmd_buffer->device, drawCount)) {
|
||||||
|
genX(cmd_buffer_emit_indirect_generated_draws)(
|
||||||
|
cmd_buffer,
|
||||||
|
anv_address_add(buffer->address, offset),
|
||||||
|
MAX2(stride, sizeof(VkDrawIndexedIndirectCommand)),
|
||||||
|
drawCount,
|
||||||
|
true /* indexed */);
|
||||||
|
} else {
|
||||||
|
emit_indirect_draws(cmd_buffer,
|
||||||
|
anv_address_add(buffer->address, offset),
|
||||||
|
stride, drawCount, true /* indexed */);
|
||||||
|
}
|
||||||
|
#else
|
||||||
emit_indirect_draws(cmd_buffer,
|
emit_indirect_draws(cmd_buffer,
|
||||||
anv_address_add(buffer->address, offset),
|
anv_address_add(buffer->address, offset),
|
||||||
stride, drawCount, true /* indexed */);
|
stride, drawCount, true /* indexed */);
|
||||||
|
#endif
|
||||||
|
|
||||||
trace_intel_end_draw_indexed_indirect(&cmd_buffer->trace, drawCount);
|
trace_intel_end_draw_indexed_indirect(&cmd_buffer->trace, drawCount);
|
||||||
}
|
}
|
||||||
|
|
@ -4643,12 +4691,37 @@ void genX(CmdDrawIndirectCount)(
|
||||||
0);
|
0);
|
||||||
trace_intel_begin_draw_indirect_count(&cmd_buffer->trace);
|
trace_intel_begin_draw_indirect_count(&cmd_buffer->trace);
|
||||||
|
|
||||||
|
struct anv_address indirect_data_address =
|
||||||
|
anv_address_add(buffer->address, offset);
|
||||||
|
struct anv_address count_address =
|
||||||
|
anv_address_add(count_buffer->address, countBufferOffset);
|
||||||
|
stride = MAX2(stride, sizeof(VkDrawIndirectCommand));
|
||||||
|
|
||||||
|
#if GFX_HAS_GENERATED_CMDS
|
||||||
|
if (anv_use_generated_draws(cmd_buffer->device, maxDrawCount)) {
|
||||||
|
genX(cmd_buffer_emit_indirect_generated_draws_count)(
|
||||||
|
cmd_buffer,
|
||||||
|
indirect_data_address,
|
||||||
|
stride,
|
||||||
|
count_address,
|
||||||
|
maxDrawCount,
|
||||||
|
false /* indexed */);
|
||||||
|
} else {
|
||||||
|
emit_indirect_count_draws(cmd_buffer,
|
||||||
|
indirect_data_address,
|
||||||
|
stride,
|
||||||
|
count_address,
|
||||||
|
maxDrawCount,
|
||||||
|
false /* indexed */);
|
||||||
|
}
|
||||||
|
#else
|
||||||
emit_indirect_count_draws(cmd_buffer,
|
emit_indirect_count_draws(cmd_buffer,
|
||||||
anv_address_add(buffer->address, offset),
|
indirect_data_address,
|
||||||
MAX2(stride, sizeof(VkDrawIndirectCommand)),
|
stride,
|
||||||
anv_address_add(count_buffer->address, countBufferOffset),
|
count_address,
|
||||||
maxDrawCount,
|
maxDrawCount,
|
||||||
false /* indexed */);
|
false /* indexed */);
|
||||||
|
#endif
|
||||||
|
|
||||||
trace_intel_end_draw_indirect_count(&cmd_buffer->trace, maxDrawCount);
|
trace_intel_end_draw_indirect_count(&cmd_buffer->trace, maxDrawCount);
|
||||||
}
|
}
|
||||||
|
|
@ -4675,12 +4748,37 @@ void genX(CmdDrawIndexedIndirectCount)(
|
||||||
0);
|
0);
|
||||||
trace_intel_begin_draw_indexed_indirect_count(&cmd_buffer->trace);
|
trace_intel_begin_draw_indexed_indirect_count(&cmd_buffer->trace);
|
||||||
|
|
||||||
|
struct anv_address indirect_data_address =
|
||||||
|
anv_address_add(buffer->address, offset);
|
||||||
|
struct anv_address count_address =
|
||||||
|
anv_address_add(count_buffer->address, countBufferOffset);
|
||||||
|
stride = MAX2(stride, sizeof(VkDrawIndexedIndirectCommand));
|
||||||
|
|
||||||
|
#if GFX_HAS_GENERATED_CMDS
|
||||||
|
if (anv_use_generated_draws(cmd_buffer->device, maxDrawCount)) {
|
||||||
|
genX(cmd_buffer_emit_indirect_generated_draws_count)(
|
||||||
|
cmd_buffer,
|
||||||
|
indirect_data_address,
|
||||||
|
stride,
|
||||||
|
count_address,
|
||||||
|
maxDrawCount,
|
||||||
|
true /* indexed */);
|
||||||
|
} else {
|
||||||
|
emit_indirect_count_draws(cmd_buffer,
|
||||||
|
indirect_data_address,
|
||||||
|
stride,
|
||||||
|
count_address,
|
||||||
|
maxDrawCount,
|
||||||
|
true /* indexed */);
|
||||||
|
}
|
||||||
|
#else
|
||||||
emit_indirect_count_draws(cmd_buffer,
|
emit_indirect_count_draws(cmd_buffer,
|
||||||
anv_address_add(buffer->address, offset),
|
indirect_data_address,
|
||||||
MAX2(stride, sizeof(VkDrawIndirectCommand)),
|
stride,
|
||||||
anv_address_add(count_buffer->address, countBufferOffset),
|
count_address,
|
||||||
maxDrawCount,
|
maxDrawCount,
|
||||||
true /* indexed */);
|
true /* indexed */);
|
||||||
|
#endif
|
||||||
|
|
||||||
trace_intel_end_draw_indexed_indirect_count(&cmd_buffer->trace, maxDrawCount);
|
trace_intel_end_draw_indexed_indirect_count(&cmd_buffer->trace, maxDrawCount);
|
||||||
|
|
||||||
|
|
|
||||||
704
src/intel/vulkan/genX_cmd_draw_generated_indirect.h
Normal file
704
src/intel/vulkan/genX_cmd_draw_generated_indirect.h
Normal file
|
|
@ -0,0 +1,704 @@
|
||||||
|
/*
|
||||||
|
* Copyright © 2022 Intel Corporation
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||||
|
* copy of this software and associated documentation files (the "Software"),
|
||||||
|
* to deal in the Software without restriction, including without limitation
|
||||||
|
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||||
|
* and/or sell copies of the Software, and to permit persons to whom the
|
||||||
|
* Software is furnished to do so, subject to the following conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice (including the next
|
||||||
|
* paragraph) shall be included in all copies or substantial portions of the
|
||||||
|
* Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||||
|
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||||
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||||
|
* IN THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef GENX_CMD_GENERATED_INDIRECT_DRAW_H
|
||||||
|
#define GENX_CMD_GENERATED_INDIRECT_DRAW_H
|
||||||
|
|
||||||
|
#include <assert.h>
|
||||||
|
#include <stdbool.h>
|
||||||
|
|
||||||
|
#include "util/macros.h"
|
||||||
|
|
||||||
|
#include "anv_private.h"
|
||||||
|
#include "anv_generated_indirect_draws.h"
|
||||||
|
|
||||||
|
#if GFX_VER < 11
|
||||||
|
#error "Generated draws optimization not supported prior to Gfx11"
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/* This is a maximum number of items a fragment shader can generate due to the
|
||||||
|
* viewport size.
|
||||||
|
*/
|
||||||
|
#define MAX_GENERATED_DRAW_COUNT (8192 * 8192)
|
||||||
|
|
||||||
|
static void
|
||||||
|
genX(cmd_buffer_emit_generate_draws_pipeline)(struct anv_cmd_buffer *cmd_buffer)
|
||||||
|
{
|
||||||
|
struct anv_batch *batch = &cmd_buffer->generation_batch;
|
||||||
|
struct anv_device *device = cmd_buffer->device;
|
||||||
|
const struct anv_shader_bin *draw_kernel = device->generated_draw_kernel;
|
||||||
|
const struct brw_wm_prog_data *prog_data =
|
||||||
|
brw_wm_prog_data_const(draw_kernel->prog_data);
|
||||||
|
|
||||||
|
uint32_t *dw = anv_batch_emitn(batch,
|
||||||
|
1 + 2 * GENX(VERTEX_ELEMENT_STATE_length),
|
||||||
|
GENX(3DSTATE_VERTEX_ELEMENTS));
|
||||||
|
/* You might think there is some shady stuff going here and you would be
|
||||||
|
* right. We're setting up 2 VERTEX_ELEMENT_STATE yet we're only providing
|
||||||
|
* 1 (positions) VERTEX_BUFFER_STATE later.
|
||||||
|
*
|
||||||
|
* Find more about how to set up a 3D pipeline with a fragment shader but
|
||||||
|
* without a vertex shader in blorp_emit_vertex_elements() in
|
||||||
|
* blorp_genX_exec.h.
|
||||||
|
*/
|
||||||
|
GENX(VERTEX_ELEMENT_STATE_pack)(
|
||||||
|
batch, dw + 1, &(struct GENX(VERTEX_ELEMENT_STATE)) {
|
||||||
|
.VertexBufferIndex = 1,
|
||||||
|
.Valid = true,
|
||||||
|
.SourceElementFormat = ISL_FORMAT_R32G32B32A32_FLOAT,
|
||||||
|
.SourceElementOffset = 0,
|
||||||
|
.Component0Control = VFCOMP_STORE_SRC,
|
||||||
|
.Component1Control = VFCOMP_STORE_0,
|
||||||
|
.Component2Control = VFCOMP_STORE_0,
|
||||||
|
.Component3Control = VFCOMP_STORE_0,
|
||||||
|
});
|
||||||
|
GENX(VERTEX_ELEMENT_STATE_pack)(
|
||||||
|
batch, dw + 3, &(struct GENX(VERTEX_ELEMENT_STATE)) {
|
||||||
|
.VertexBufferIndex = 0,
|
||||||
|
.Valid = true,
|
||||||
|
.SourceElementFormat = ISL_FORMAT_R32G32B32_FLOAT,
|
||||||
|
.SourceElementOffset = 0,
|
||||||
|
.Component0Control = VFCOMP_STORE_SRC,
|
||||||
|
.Component1Control = VFCOMP_STORE_SRC,
|
||||||
|
.Component2Control = VFCOMP_STORE_SRC,
|
||||||
|
.Component3Control = VFCOMP_STORE_1_FP,
|
||||||
|
});
|
||||||
|
|
||||||
|
anv_batch_emit(batch, GENX(3DSTATE_VF_STATISTICS), vf);
|
||||||
|
anv_batch_emit(batch, GENX(3DSTATE_VF_SGVS), sgvs) {
|
||||||
|
sgvs.InstanceIDEnable = true;
|
||||||
|
sgvs.InstanceIDComponentNumber = COMP_1;
|
||||||
|
sgvs.InstanceIDElementOffset = 0;
|
||||||
|
}
|
||||||
|
anv_batch_emit(batch, GENX(3DSTATE_VF_SGVS_2), sgvs);
|
||||||
|
anv_batch_emit(batch, GENX(3DSTATE_VF_INSTANCING), vfi) {
|
||||||
|
vfi.InstancingEnable = false;
|
||||||
|
vfi.VertexElementIndex = 0;
|
||||||
|
}
|
||||||
|
anv_batch_emit(batch, GENX(3DSTATE_VF_INSTANCING), vfi) {
|
||||||
|
vfi.InstancingEnable = false;
|
||||||
|
vfi.VertexElementIndex = 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
anv_batch_emit(batch, GENX(3DSTATE_VF_TOPOLOGY), topo) {
|
||||||
|
topo.PrimitiveTopologyType = _3DPRIM_RECTLIST;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Emit URB setup. We tell it that the VS is active because we want it to
|
||||||
|
* allocate space for the VS. Even though one isn't run, we need VUEs to
|
||||||
|
* store the data that VF is going to pass to SOL.
|
||||||
|
*/
|
||||||
|
const unsigned entry_size[4] = { DIV_ROUND_UP(32, 64), 1, 1, 1 };
|
||||||
|
|
||||||
|
genX(emit_l3_config)(batch, device, device->generated_draw_l3_config);
|
||||||
|
|
||||||
|
cmd_buffer->state.current_l3_config = device->generated_draw_l3_config;
|
||||||
|
|
||||||
|
genX(emit_urb_setup)(device, batch, device->generated_draw_l3_config,
|
||||||
|
VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT,
|
||||||
|
entry_size, NULL);
|
||||||
|
|
||||||
|
anv_batch_emit(batch, GENX(3DSTATE_PS_BLEND), ps_blend) {
|
||||||
|
ps_blend.HasWriteableRT = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
anv_batch_emit(batch, GENX(3DSTATE_WM_DEPTH_STENCIL), wm);
|
||||||
|
|
||||||
|
#if GFX_VER >= 12
|
||||||
|
anv_batch_emit(batch, GENX(3DSTATE_DEPTH_BOUNDS), db) {
|
||||||
|
db.DepthBoundsTestEnable = false;
|
||||||
|
db.DepthBoundsTestMinValue = 0.0;
|
||||||
|
db.DepthBoundsTestMaxValue = 1.0;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
anv_batch_emit(batch, GENX(3DSTATE_MULTISAMPLE), ms);
|
||||||
|
anv_batch_emit(batch, GENX(3DSTATE_SAMPLE_MASK), sm) {
|
||||||
|
sm.SampleMask = 0x1;
|
||||||
|
}
|
||||||
|
|
||||||
|
anv_batch_emit(batch, GENX(3DSTATE_VS), vs);
|
||||||
|
anv_batch_emit(batch, GENX(3DSTATE_HS), hs);
|
||||||
|
anv_batch_emit(batch, GENX(3DSTATE_TE), te);
|
||||||
|
anv_batch_emit(batch, GENX(3DSTATE_DS), DS);
|
||||||
|
|
||||||
|
anv_batch_emit(batch, GENX(3DSTATE_STREAMOUT), so);
|
||||||
|
|
||||||
|
anv_batch_emit(batch, GENX(3DSTATE_GS), gs);
|
||||||
|
|
||||||
|
anv_batch_emit(batch, GENX(3DSTATE_CLIP), clip) {
|
||||||
|
clip.PerspectiveDivideDisable = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
anv_batch_emit(batch, GENX(3DSTATE_SF), sf) {
|
||||||
|
#if GFX_VER >= 12
|
||||||
|
sf.DerefBlockSize = INTEL_URB_DEREF_BLOCK_SIZE_32; // TODO
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
anv_batch_emit(batch, GENX(3DSTATE_RASTER), raster) {
|
||||||
|
raster.CullMode = CULLMODE_NONE;
|
||||||
|
}
|
||||||
|
|
||||||
|
anv_batch_emit(batch, GENX(3DSTATE_SBE), sbe) {
|
||||||
|
sbe.VertexURBEntryReadOffset = 1;
|
||||||
|
sbe.NumberofSFOutputAttributes = prog_data->num_varying_inputs;
|
||||||
|
sbe.VertexURBEntryReadLength = MAX2((prog_data->num_varying_inputs + 1) / 2, 1);
|
||||||
|
sbe.ConstantInterpolationEnable = prog_data->flat_inputs;
|
||||||
|
sbe.ForceVertexURBEntryReadLength = true;
|
||||||
|
sbe.ForceVertexURBEntryReadOffset = true;
|
||||||
|
for (unsigned i = 0; i < 32; i++)
|
||||||
|
sbe.AttributeActiveComponentFormat[i] = ACF_XYZW;
|
||||||
|
}
|
||||||
|
|
||||||
|
anv_batch_emit(batch, GENX(3DSTATE_WM), wm) {
|
||||||
|
//wm.ForceThreadDispatchEnable = ForceON;
|
||||||
|
}
|
||||||
|
|
||||||
|
anv_batch_emit(batch, GENX(3DSTATE_PS_EXTRA), psx) {
|
||||||
|
psx.PixelShaderValid = true;
|
||||||
|
psx.AttributeEnable = prog_data->num_varying_inputs > 0;
|
||||||
|
psx.PixelShaderIsPerSample = prog_data->persample_dispatch;
|
||||||
|
psx.PixelShaderComputedDepthMode = prog_data->computed_depth_mode;
|
||||||
|
psx.PixelShaderComputesStencil = prog_data->computed_stencil;
|
||||||
|
}
|
||||||
|
|
||||||
|
anv_batch_emit(batch, GENX(3DSTATE_VIEWPORT_STATE_POINTERS_CC), cc) {
|
||||||
|
struct anv_state cc_state =
|
||||||
|
anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, 4 * GENX(CC_VIEWPORT_length), 32);
|
||||||
|
struct GENX(CC_VIEWPORT) cc_viewport = {
|
||||||
|
.MinimumDepth = 0.0f,
|
||||||
|
.MaximumDepth = 1.0f,
|
||||||
|
};
|
||||||
|
GENX(CC_VIEWPORT_pack)(NULL, cc_state.map, &cc_viewport);
|
||||||
|
cc.CCViewportPointer = cc_state.offset;
|
||||||
|
}
|
||||||
|
|
||||||
|
#if GFX_VER >= 12
|
||||||
|
/* Disable Primitive Replication. */
|
||||||
|
anv_batch_emit(batch, GENX(3DSTATE_PRIMITIVE_REPLICATION), pr);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
anv_batch_emit(batch, GENX(3DSTATE_PUSH_CONSTANT_ALLOC_VS), alloc);
|
||||||
|
anv_batch_emit(batch, GENX(3DSTATE_PUSH_CONSTANT_ALLOC_HS), alloc);
|
||||||
|
anv_batch_emit(batch, GENX(3DSTATE_PUSH_CONSTANT_ALLOC_DS), alloc);
|
||||||
|
anv_batch_emit(batch, GENX(3DSTATE_PUSH_CONSTANT_ALLOC_GS), alloc);
|
||||||
|
anv_batch_emit(batch, GENX(3DSTATE_PUSH_CONSTANT_ALLOC_PS), alloc) {
|
||||||
|
alloc.ConstantBufferOffset = 0;
|
||||||
|
alloc.ConstantBufferSize = cmd_buffer->device->info->max_constant_urb_size_kb;
|
||||||
|
}
|
||||||
|
|
||||||
|
#if GFX_VERx10 == 125
|
||||||
|
/* DG2: Wa_22011440098
|
||||||
|
* MTL: Wa_18022330953
|
||||||
|
*
|
||||||
|
* In 3D mode, after programming push constant alloc command immediately
|
||||||
|
* program push constant command(ZERO length) without any commit between
|
||||||
|
* them.
|
||||||
|
*/
|
||||||
|
anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_CONSTANT_ALL), c) {
|
||||||
|
/* Update empty push constants for all stages (bitmask = 11111b) */
|
||||||
|
c.ShaderUpdateEnable = 0x1f;
|
||||||
|
c.MOCS = anv_mocs(cmd_buffer->device, NULL, 0);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
cmd_buffer->state.gfx.vb_dirty = BITFIELD_BIT(0) | BITFIELD_BIT(1);
|
||||||
|
cmd_buffer->state.gfx.dirty |= ~(ANV_CMD_DIRTY_INDEX_BUFFER |
|
||||||
|
ANV_CMD_DIRTY_XFB_ENABLE);
|
||||||
|
cmd_buffer->state.push_constants_dirty |= VK_SHADER_STAGE_ALL_GRAPHICS;
|
||||||
|
cmd_buffer->state.gfx.push_constant_stages = VK_SHADER_STAGE_FRAGMENT_BIT;
|
||||||
|
vk_dynamic_graphics_state_dirty_all(&cmd_buffer->vk.dynamic_graphics_state);
|
||||||
|
|
||||||
|
anv_add_pending_pipe_bits(cmd_buffer,
|
||||||
|
ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT |
|
||||||
|
ANV_PIPE_STALL_AT_SCOREBOARD_BIT,
|
||||||
|
"after generation batch BTI change");
|
||||||
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
genX(cmd_buffer_emit_generate_draws_vertex)(struct anv_cmd_buffer *cmd_buffer,
|
||||||
|
uint32_t draw_count)
|
||||||
|
{
|
||||||
|
struct anv_batch *batch = &cmd_buffer->generation_batch;
|
||||||
|
struct anv_state vs_data_state =
|
||||||
|
anv_cmd_buffer_alloc_dynamic_state(
|
||||||
|
cmd_buffer, 9 * sizeof(uint32_t), 32);
|
||||||
|
|
||||||
|
float x0 = 0.0f, x1 = MIN2(draw_count, 8192);
|
||||||
|
float y0 = 0.0f, y1 = DIV_ROUND_UP(draw_count, 8192);
|
||||||
|
float z = 0.0f;
|
||||||
|
|
||||||
|
float *vertices = vs_data_state.map;
|
||||||
|
vertices[0] = x1; vertices[1] = y1; vertices[2] = z; /* v0 */
|
||||||
|
vertices[3] = x0; vertices[4] = y1; vertices[5] = z; /* v1 */
|
||||||
|
vertices[6] = x0; vertices[7] = y0; vertices[8] = z; /* v2 */
|
||||||
|
|
||||||
|
uint32_t *dw = anv_batch_emitn(batch,
|
||||||
|
1 + GENX(VERTEX_BUFFER_STATE_length),
|
||||||
|
GENX(3DSTATE_VERTEX_BUFFERS));
|
||||||
|
GENX(VERTEX_BUFFER_STATE_pack)(batch, dw + 1,
|
||||||
|
&(struct GENX(VERTEX_BUFFER_STATE)) {
|
||||||
|
.VertexBufferIndex = 0,
|
||||||
|
.AddressModifyEnable = true,
|
||||||
|
.BufferStartingAddress = (struct anv_address) {
|
||||||
|
.bo = cmd_buffer->device->dynamic_state_pool.block_pool.bo,
|
||||||
|
.offset = vs_data_state.offset,
|
||||||
|
},
|
||||||
|
.BufferPitch = 3 * sizeof(float),
|
||||||
|
.BufferSize = 9 * sizeof(float),
|
||||||
|
.MOCS = anv_mocs(cmd_buffer->device, NULL, 0),
|
||||||
|
#if GFX_VER >= 12
|
||||||
|
.L3BypassDisable = true,
|
||||||
|
#endif
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
static struct anv_state
|
||||||
|
genX(cmd_buffer_alloc_generated_push_data)(struct anv_cmd_buffer *cmd_buffer)
|
||||||
|
{
|
||||||
|
return anv_cmd_buffer_alloc_dynamic_state(cmd_buffer,
|
||||||
|
sizeof(struct anv_generate_indirect_params),
|
||||||
|
ANV_UBO_ALIGNMENT);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static struct anv_state
|
||||||
|
genX(cmd_buffer_emit_generated_push_data)(struct anv_cmd_buffer *cmd_buffer,
|
||||||
|
struct anv_state push_data_state)
|
||||||
|
{
|
||||||
|
struct anv_batch *batch = &cmd_buffer->generation_batch;
|
||||||
|
struct anv_address push_data_addr = anv_state_pool_state_address(
|
||||||
|
&cmd_buffer->device->dynamic_state_pool, push_data_state);
|
||||||
|
|
||||||
|
#if GFX_VER >= 12
|
||||||
|
const uint32_t num_dwords = GENX(3DSTATE_CONSTANT_ALL_length) +
|
||||||
|
GENX(3DSTATE_CONSTANT_ALL_DATA_length);
|
||||||
|
uint32_t *dw =
|
||||||
|
anv_batch_emitn(batch, num_dwords,
|
||||||
|
GENX(3DSTATE_CONSTANT_ALL),
|
||||||
|
.ShaderUpdateEnable = BITFIELD_BIT(MESA_SHADER_FRAGMENT),
|
||||||
|
.PointerBufferMask = 0x1,
|
||||||
|
.MOCS = anv_mocs(cmd_buffer->device, NULL, 0));
|
||||||
|
|
||||||
|
GENX(3DSTATE_CONSTANT_ALL_DATA_pack)(
|
||||||
|
batch, dw + GENX(3DSTATE_CONSTANT_ALL_length),
|
||||||
|
&(struct GENX(3DSTATE_CONSTANT_ALL_DATA)) {
|
||||||
|
.PointerToConstantBuffer = push_data_addr,
|
||||||
|
.ConstantBufferReadLength = DIV_ROUND_UP(push_data_state.alloc_size, 32),
|
||||||
|
});
|
||||||
|
#else
|
||||||
|
anv_batch_emit(batch, GENX(3DSTATE_CONSTANT_PS), c) {
|
||||||
|
c.MOCS = anv_mocs(cmd_buffer->device, NULL, 0);
|
||||||
|
c.ConstantBody.ReadLength[0] = DIV_ROUND_UP(push_data_state.alloc_size, 32);
|
||||||
|
c.ConstantBody.Buffer[0] = push_data_addr;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
return push_data_state;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
genX(cmd_buffer_emit_generate_draws)(struct anv_cmd_buffer *cmd_buffer,
|
||||||
|
struct anv_address generated_cmds_addr,
|
||||||
|
uint32_t generated_cmds_size,
|
||||||
|
struct anv_address indirect_data_addr,
|
||||||
|
uint32_t indirect_data_stride,
|
||||||
|
uint32_t item_base,
|
||||||
|
uint32_t item_count,
|
||||||
|
bool indexed)
|
||||||
|
{
|
||||||
|
struct anv_device *device = cmd_buffer->device;
|
||||||
|
struct anv_batch *batch = &cmd_buffer->generation_batch;
|
||||||
|
const struct anv_shader_bin *draw_kernel = device->generated_draw_kernel;
|
||||||
|
const struct brw_wm_prog_data *prog_data =
|
||||||
|
brw_wm_prog_data_const(draw_kernel->prog_data);
|
||||||
|
|
||||||
|
anv_batch_emit(batch, GENX(3DSTATE_PS), ps) {
|
||||||
|
ps.BindingTableEntryCount = 2;
|
||||||
|
ps.PushConstantEnable = prog_data->base.nr_params > 0 ||
|
||||||
|
prog_data->base.ubo_ranges[0].length;
|
||||||
|
|
||||||
|
ps._8PixelDispatchEnable = prog_data->dispatch_8;
|
||||||
|
ps._16PixelDispatchEnable = prog_data->dispatch_16;
|
||||||
|
ps._32PixelDispatchEnable = prog_data->dispatch_32;
|
||||||
|
|
||||||
|
ps.DispatchGRFStartRegisterForConstantSetupData0 =
|
||||||
|
brw_wm_prog_data_dispatch_grf_start_reg(prog_data, ps, 0);
|
||||||
|
ps.DispatchGRFStartRegisterForConstantSetupData1 =
|
||||||
|
brw_wm_prog_data_dispatch_grf_start_reg(prog_data, ps, 1);
|
||||||
|
ps.DispatchGRFStartRegisterForConstantSetupData2 =
|
||||||
|
brw_wm_prog_data_dispatch_grf_start_reg(prog_data, ps, 2);
|
||||||
|
|
||||||
|
ps.KernelStartPointer0 = draw_kernel->kernel.offset +
|
||||||
|
brw_wm_prog_data_prog_offset(prog_data, ps, 0);
|
||||||
|
ps.KernelStartPointer1 = draw_kernel->kernel.offset +
|
||||||
|
brw_wm_prog_data_prog_offset(prog_data, ps, 1);
|
||||||
|
ps.KernelStartPointer2 = draw_kernel->kernel.offset +
|
||||||
|
brw_wm_prog_data_prog_offset(prog_data, ps, 2);
|
||||||
|
|
||||||
|
ps.MaximumNumberofThreadsPerPSD = device->info->max_threads_per_psd - 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
genX(cmd_buffer_emit_generate_draws_vertex)(cmd_buffer, item_count);
|
||||||
|
|
||||||
|
struct anv_state push_data_state =
|
||||||
|
genX(cmd_buffer_alloc_generated_push_data)(cmd_buffer);
|
||||||
|
|
||||||
|
struct anv_graphics_pipeline *pipeline = cmd_buffer->state.gfx.pipeline;
|
||||||
|
|
||||||
|
struct anv_generate_indirect_params *push_data = push_data_state.map;
|
||||||
|
*push_data = (struct anv_generate_indirect_params) {
|
||||||
|
.draw = {
|
||||||
|
.is_indexed = indexed,
|
||||||
|
.is_predicated = cmd_buffer->state.conditional_render_enabled,
|
||||||
|
.draw_base = item_base,
|
||||||
|
.draw_count = item_count,
|
||||||
|
.instance_multiplier = pipeline->instance_multiplier,
|
||||||
|
.indirect_data_stride = indirect_data_stride,
|
||||||
|
},
|
||||||
|
.indirect_data_addr = anv_address_physical(indirect_data_addr),
|
||||||
|
.generated_cmds_addr = anv_address_physical(generated_cmds_addr),
|
||||||
|
};
|
||||||
|
|
||||||
|
genX(cmd_buffer_emit_generated_push_data)(cmd_buffer, push_data_state);
|
||||||
|
|
||||||
|
anv_batch_emit(batch, GENX(3DPRIMITIVE), prim) {
|
||||||
|
prim.VertexAccessType = SEQUENTIAL;
|
||||||
|
prim.PrimitiveTopologyType = _3DPRIM_RECTLIST;
|
||||||
|
prim.VertexCountPerInstance = 3;
|
||||||
|
prim.InstanceCount = 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
genX(cmd_buffer_emit_indirect_generated_draws_init)(struct anv_cmd_buffer *cmd_buffer)
|
||||||
|
{
|
||||||
|
#if GFX_VER >= 12
|
||||||
|
anv_batch_emit(&cmd_buffer->batch, GENX(MI_ARB_CHECK), arb) {
|
||||||
|
arb.PreParserDisableMask = true;
|
||||||
|
arb.PreParserDisable = true;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
anv_batch_emit_ensure_space(&cmd_buffer->generation_batch, 4);
|
||||||
|
|
||||||
|
trace_intel_begin_generate_draws(&cmd_buffer->trace);
|
||||||
|
|
||||||
|
anv_batch_emit(&cmd_buffer->batch, GENX(MI_BATCH_BUFFER_START), bbs) {
|
||||||
|
bbs.AddressSpaceIndicator = ASI_PPGTT;
|
||||||
|
bbs.BatchBufferStartAddress =
|
||||||
|
anv_batch_current_address(&cmd_buffer->generation_batch);
|
||||||
|
}
|
||||||
|
|
||||||
|
cmd_buffer->generation_return_addr = anv_batch_current_address(&cmd_buffer->batch);
|
||||||
|
|
||||||
|
trace_intel_end_generate_draws(&cmd_buffer->trace);
|
||||||
|
|
||||||
|
genX(cmd_buffer_emit_generate_draws_pipeline)(cmd_buffer);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
genX(cmd_buffer_emit_indirect_generated_draws)(struct anv_cmd_buffer *cmd_buffer,
|
||||||
|
struct anv_address indirect_data_addr,
|
||||||
|
uint32_t indirect_data_stride,
|
||||||
|
uint32_t draw_count,
|
||||||
|
bool indexed)
|
||||||
|
{
|
||||||
|
genX(flush_pipeline_select_3d)(cmd_buffer);
|
||||||
|
|
||||||
|
/* Apply the pipeline flush here so the indirect data is available for the
|
||||||
|
* generation shader.
|
||||||
|
*/
|
||||||
|
genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer);
|
||||||
|
|
||||||
|
if (anv_address_is_null(cmd_buffer->generation_return_addr))
|
||||||
|
genX(cmd_buffer_emit_indirect_generated_draws_init)(cmd_buffer);
|
||||||
|
|
||||||
|
/* In order to have the vertex fetch gather the data we need to have a non
|
||||||
|
* 0 stride. It's possible to have a 0 stride given by the application when
|
||||||
|
* draw_count is 1, but we need a correct value for the
|
||||||
|
* VERTEX_BUFFER_STATE::BufferPitch, so ensure the caller set this
|
||||||
|
* correctly :
|
||||||
|
*
|
||||||
|
* Vulkan spec, vkCmdDrawIndirect:
|
||||||
|
*
|
||||||
|
* "If drawCount is less than or equal to one, stride is ignored."
|
||||||
|
*/
|
||||||
|
assert(indirect_data_stride > 0);
|
||||||
|
|
||||||
|
if (cmd_buffer->state.conditional_render_enabled)
|
||||||
|
genX(cmd_emit_conditional_render_predicate)(cmd_buffer);
|
||||||
|
|
||||||
|
/* Emit the 3D state in the main batch. */
|
||||||
|
genX(cmd_buffer_flush_gfx_state)(cmd_buffer);
|
||||||
|
|
||||||
|
const uint32_t draw_cmd_stride = 4 * GENX(3DPRIMITIVE_EXTENDED_length);
|
||||||
|
|
||||||
|
uint32_t item_base = 0;
|
||||||
|
while (item_base < draw_count) {
|
||||||
|
const uint32_t item_count = MIN2(draw_count - item_base,
|
||||||
|
MAX_GENERATED_DRAW_COUNT);
|
||||||
|
const uint32_t draw_cmd_size = item_count * draw_cmd_stride;
|
||||||
|
|
||||||
|
/* Ensure we have enough contiguous space for all the draws so that the
|
||||||
|
* compute shader can edit all the 3DPRIMITIVEs from a single base
|
||||||
|
* address.
|
||||||
|
*
|
||||||
|
* TODO: we might have to split that if the amount of space is to large (at
|
||||||
|
* 1Mb?).
|
||||||
|
*/
|
||||||
|
VkResult result = anv_batch_emit_ensure_space(&cmd_buffer->batch,
|
||||||
|
draw_cmd_size);
|
||||||
|
if (result != VK_SUCCESS)
|
||||||
|
return;
|
||||||
|
|
||||||
|
genX(cmd_buffer_emit_generate_draws)(
|
||||||
|
cmd_buffer,
|
||||||
|
anv_batch_current_address(&cmd_buffer->batch),
|
||||||
|
draw_cmd_size,
|
||||||
|
indirect_data_addr,
|
||||||
|
indirect_data_stride,
|
||||||
|
item_base,
|
||||||
|
item_count,
|
||||||
|
indexed);
|
||||||
|
|
||||||
|
anv_batch_advance(&cmd_buffer->batch, draw_cmd_size);
|
||||||
|
|
||||||
|
item_base += item_count;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
genX(cmd_buffer_emit_generate_draws_count)(struct anv_cmd_buffer *cmd_buffer,
|
||||||
|
struct anv_address generated_cmds_addr,
|
||||||
|
uint32_t generated_cmds_size,
|
||||||
|
struct anv_address indirect_data_addr,
|
||||||
|
uint32_t indirect_data_stride,
|
||||||
|
uint32_t item_base,
|
||||||
|
uint32_t item_count,
|
||||||
|
struct anv_address count_addr,
|
||||||
|
bool indexed)
|
||||||
|
{
|
||||||
|
struct anv_device *device = cmd_buffer->device;
|
||||||
|
struct anv_batch *batch = &cmd_buffer->generation_batch;
|
||||||
|
const struct anv_shader_bin *draw_kernel =
|
||||||
|
device->generated_draw_count_kernel;
|
||||||
|
const struct brw_wm_prog_data *prog_data =
|
||||||
|
brw_wm_prog_data_const(draw_kernel->prog_data);
|
||||||
|
|
||||||
|
anv_batch_emit(batch, GENX(3DSTATE_PS), ps) {
|
||||||
|
ps.BindingTableEntryCount = 2;
|
||||||
|
ps.PushConstantEnable = prog_data->base.nr_params > 0 ||
|
||||||
|
prog_data->base.ubo_ranges[0].length;
|
||||||
|
|
||||||
|
ps._8PixelDispatchEnable = prog_data->dispatch_8;
|
||||||
|
ps._16PixelDispatchEnable = prog_data->dispatch_16;
|
||||||
|
ps._32PixelDispatchEnable = prog_data->dispatch_32;
|
||||||
|
|
||||||
|
ps.DispatchGRFStartRegisterForConstantSetupData0 =
|
||||||
|
brw_wm_prog_data_dispatch_grf_start_reg(prog_data, ps, 0);
|
||||||
|
ps.DispatchGRFStartRegisterForConstantSetupData1 =
|
||||||
|
brw_wm_prog_data_dispatch_grf_start_reg(prog_data, ps, 1);
|
||||||
|
ps.DispatchGRFStartRegisterForConstantSetupData2 =
|
||||||
|
brw_wm_prog_data_dispatch_grf_start_reg(prog_data, ps, 2);
|
||||||
|
|
||||||
|
ps.KernelStartPointer0 = draw_kernel->kernel.offset +
|
||||||
|
brw_wm_prog_data_prog_offset(prog_data, ps, 0);
|
||||||
|
ps.KernelStartPointer1 = draw_kernel->kernel.offset +
|
||||||
|
brw_wm_prog_data_prog_offset(prog_data, ps, 1);
|
||||||
|
ps.KernelStartPointer2 = draw_kernel->kernel.offset +
|
||||||
|
brw_wm_prog_data_prog_offset(prog_data, ps, 2);
|
||||||
|
|
||||||
|
ps.MaximumNumberofThreadsPerPSD = device->info->max_threads_per_psd - 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
genX(cmd_buffer_emit_generate_draws_vertex)(cmd_buffer, item_count);
|
||||||
|
|
||||||
|
struct anv_state push_data_state =
|
||||||
|
genX(cmd_buffer_alloc_generated_push_data)(cmd_buffer);
|
||||||
|
|
||||||
|
struct anv_graphics_pipeline *pipeline = cmd_buffer->state.gfx.pipeline;
|
||||||
|
uint64_t end_cmd_addr =
|
||||||
|
anv_address_physical(
|
||||||
|
anv_address_add(generated_cmds_addr, generated_cmds_size));
|
||||||
|
|
||||||
|
struct anv_generate_indirect_params *push_data = push_data_state.map;
|
||||||
|
*push_data = (struct anv_generate_indirect_params) {
|
||||||
|
.draw_count = {
|
||||||
|
.is_indexed = indexed,
|
||||||
|
.is_predicated = cmd_buffer->state.conditional_render_enabled,
|
||||||
|
.draw_base = item_base,
|
||||||
|
.item_count = item_count,
|
||||||
|
.draw_count = 0, // Edit this through a the command streamer
|
||||||
|
.instance_multiplier = pipeline->instance_multiplier,
|
||||||
|
.indirect_data_stride = indirect_data_stride,
|
||||||
|
.end_addr_ldw = end_cmd_addr & 0xffffffff,
|
||||||
|
.end_addr_udw = end_cmd_addr >> 32,
|
||||||
|
},
|
||||||
|
.indirect_data_addr = anv_address_physical(indirect_data_addr),
|
||||||
|
.generated_cmds_addr = anv_address_physical(generated_cmds_addr),
|
||||||
|
};
|
||||||
|
|
||||||
|
/* Copy the draw count into the push constants so that the generation gets
|
||||||
|
* the value straight away and doesn't even need to access memory.
|
||||||
|
*/
|
||||||
|
struct mi_builder b;
|
||||||
|
mi_builder_init(&b, cmd_buffer->device->info, batch);
|
||||||
|
mi_memcpy(&b,
|
||||||
|
anv_address_add((struct anv_address) {
|
||||||
|
.bo = cmd_buffer->device->dynamic_state_pool.block_pool.bo,
|
||||||
|
.offset = push_data_state.offset,
|
||||||
|
},
|
||||||
|
offsetof(struct anv_generate_indirect_params, draw_count.draw_count)),
|
||||||
|
count_addr, 4);
|
||||||
|
|
||||||
|
/* Only emit the data after the memcpy above. */
|
||||||
|
genX(cmd_buffer_emit_generated_push_data)(cmd_buffer, push_data_state);
|
||||||
|
|
||||||
|
anv_batch_emit(batch, GENX(3DPRIMITIVE), prim) {
|
||||||
|
prim.VertexAccessType = SEQUENTIAL;
|
||||||
|
prim.PrimitiveTopologyType = _3DPRIM_RECTLIST;
|
||||||
|
prim.VertexCountPerInstance = 3;
|
||||||
|
prim.InstanceCount = 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
genX(cmd_buffer_emit_indirect_generated_draws_count)(struct anv_cmd_buffer *cmd_buffer,
|
||||||
|
struct anv_address indirect_data_addr,
|
||||||
|
uint32_t indirect_data_stride,
|
||||||
|
struct anv_address count_addr,
|
||||||
|
uint32_t max_draw_count,
|
||||||
|
bool indexed)
|
||||||
|
{
|
||||||
|
genX(flush_pipeline_select_3d)(cmd_buffer);
|
||||||
|
|
||||||
|
/* Apply the pipeline flush here so the indirect data is available for the
|
||||||
|
* generation shader.
|
||||||
|
*/
|
||||||
|
genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer);
|
||||||
|
|
||||||
|
if (anv_address_is_null(cmd_buffer->generation_return_addr))
|
||||||
|
genX(cmd_buffer_emit_indirect_generated_draws_init)(cmd_buffer);
|
||||||
|
|
||||||
|
/* In order to have the vertex fetch gather the data we need to have a non
|
||||||
|
* 0 stride. It's possible to have a 0 stride given by the application when
|
||||||
|
* draw_count is 1, but we need a correct value for the
|
||||||
|
* VERTEX_BUFFER_STATE::BufferPitch, so ensure the caller set this
|
||||||
|
* correctly :
|
||||||
|
*
|
||||||
|
* Vulkan spec, vkCmdDrawIndirect:
|
||||||
|
*
|
||||||
|
* "If drawCount is less than or equal to one, stride is ignored."
|
||||||
|
*/
|
||||||
|
assert(indirect_data_stride > 0);
|
||||||
|
|
||||||
|
if (cmd_buffer->state.conditional_render_enabled)
|
||||||
|
genX(cmd_emit_conditional_render_predicate)(cmd_buffer);
|
||||||
|
|
||||||
|
/* Emit the 3D state in the main batch. */
|
||||||
|
genX(cmd_buffer_flush_gfx_state)(cmd_buffer);
|
||||||
|
|
||||||
|
const uint32_t draw_cmd_stride = 4 * GENX(3DPRIMITIVE_EXTENDED_length);
|
||||||
|
|
||||||
|
uint32_t item_base = 0;
|
||||||
|
while (item_base < max_draw_count) {
|
||||||
|
const uint32_t item_count = MIN2(max_draw_count - item_base,
|
||||||
|
MAX_GENERATED_DRAW_COUNT);
|
||||||
|
const uint32_t draw_cmd_size = item_count * draw_cmd_stride;
|
||||||
|
|
||||||
|
/* Ensure we have enough contiguous space for all the draws so that the
|
||||||
|
* compute shader can edit all the 3DPRIMITIVEs from a single base
|
||||||
|
* address.
|
||||||
|
*
|
||||||
|
* TODO: we might have to split that if the amount of space is to large (at
|
||||||
|
* 1Mb?).
|
||||||
|
*/
|
||||||
|
VkResult result = anv_batch_emit_ensure_space(&cmd_buffer->batch,
|
||||||
|
draw_cmd_size);
|
||||||
|
if (result != VK_SUCCESS)
|
||||||
|
return;
|
||||||
|
|
||||||
|
genX(cmd_buffer_emit_generate_draws_count)(
|
||||||
|
cmd_buffer,
|
||||||
|
anv_batch_current_address(&cmd_buffer->batch),
|
||||||
|
draw_cmd_size,
|
||||||
|
anv_address_add(indirect_data_addr,
|
||||||
|
item_base * indirect_data_stride),
|
||||||
|
indirect_data_stride,
|
||||||
|
item_base,
|
||||||
|
item_count,
|
||||||
|
count_addr,
|
||||||
|
indexed);
|
||||||
|
|
||||||
|
anv_batch_advance(&cmd_buffer->batch, draw_cmd_size);
|
||||||
|
|
||||||
|
item_base += item_count;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
genX(cmd_buffer_flush_generated_draws)(struct anv_cmd_buffer *cmd_buffer)
|
||||||
|
{
|
||||||
|
/* No return address setup means we don't have to do anything */
|
||||||
|
if (anv_address_is_null(cmd_buffer->generation_return_addr))
|
||||||
|
return;
|
||||||
|
|
||||||
|
struct anv_batch *batch = &cmd_buffer->generation_batch;
|
||||||
|
|
||||||
|
/* Wait for all the generation vertex shader to generate the commands. */
|
||||||
|
genX(emit_apply_pipe_flushes)(batch,
|
||||||
|
cmd_buffer->device,
|
||||||
|
_3D,
|
||||||
|
ANV_PIPE_DATA_CACHE_FLUSH_BIT |
|
||||||
|
ANV_PIPE_CS_STALL_BIT);
|
||||||
|
|
||||||
|
#if GFX_VER >= 12
|
||||||
|
anv_batch_emit(batch, GENX(MI_ARB_CHECK), arb) {
|
||||||
|
arb.PreParserDisableMask = true;
|
||||||
|
arb.PreParserDisable = false;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if GFX_VER < 12
|
||||||
|
/* Prior to Gfx12 we cannot disable the CS prefetch, so we have to emit a
|
||||||
|
* bunch of NOOPs to ensure we do not have generated commands loaded into
|
||||||
|
* the CS cache prior to them having been generated.
|
||||||
|
*/
|
||||||
|
const struct intel_device_info *devinfo = cmd_buffer->device->info;
|
||||||
|
const enum intel_engine_class engine_class = cmd_buffer->queue_family->engine_class;
|
||||||
|
for (uint32_t i = 0; i < devinfo->engine_class_prefetch[engine_class] / 4; i++)
|
||||||
|
anv_batch_emit(batch, GENX(MI_NOOP), noop);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/* Return to the main batch. */
|
||||||
|
anv_batch_emit(batch, GENX(MI_BATCH_BUFFER_START), bbs) {
|
||||||
|
bbs.AddressSpaceIndicator = ASI_PPGTT;
|
||||||
|
bbs.BatchBufferStartAddress = cmd_buffer->generation_return_addr;
|
||||||
|
}
|
||||||
|
|
||||||
|
cmd_buffer->generation_return_addr = ANV_NULL_ADDRESS;
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif /* GENX_CMD_GENERATED_INDIRECT_DRAW_H */
|
||||||
|
|
@ -18,6 +18,8 @@
|
||||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||||
# SOFTWARE.
|
# SOFTWARE.
|
||||||
|
|
||||||
|
subdir('shaders')
|
||||||
|
|
||||||
inc_anv = include_directories('.')
|
inc_anv = include_directories('.')
|
||||||
|
|
||||||
anv_flags = [
|
anv_flags = [
|
||||||
|
|
@ -43,20 +45,6 @@ anv_entrypoints = custom_target(
|
||||||
depend_files : vk_entrypoints_gen_depend_files,
|
depend_files : vk_entrypoints_gen_depend_files,
|
||||||
)
|
)
|
||||||
|
|
||||||
float64_spv_h = custom_target(
|
|
||||||
'float64_spv.h',
|
|
||||||
input : [glsl2spirv, float64_glsl_file],
|
|
||||||
output : 'float64_spv.h',
|
|
||||||
command : [
|
|
||||||
prog_python, '@INPUT@', '@OUTPUT@',
|
|
||||||
prog_glslang,
|
|
||||||
'--create-entry', 'main',
|
|
||||||
'--vn', 'float64_spv_source',
|
|
||||||
'--glsl-version', '450',
|
|
||||||
'-Olib',
|
|
||||||
]
|
|
||||||
)
|
|
||||||
|
|
||||||
idep_anv_headers = declare_dependency(
|
idep_anv_headers = declare_dependency(
|
||||||
sources : [anv_entrypoints[0]],
|
sources : [anv_entrypoints[0]],
|
||||||
include_directories : inc_anv,
|
include_directories : inc_anv,
|
||||||
|
|
@ -126,7 +114,8 @@ foreach g : [['90', ['gfx8_cmd_buffer.c']],
|
||||||
_gfx_ver = g[0]
|
_gfx_ver = g[0]
|
||||||
libanv_per_hw_ver_libs += static_library(
|
libanv_per_hw_ver_libs += static_library(
|
||||||
'anv_per_hw_ver@0@'.format(_gfx_ver),
|
'anv_per_hw_ver@0@'.format(_gfx_ver),
|
||||||
[anv_per_hw_ver_files, g[1], anv_entrypoints[0]],
|
[anv_per_hw_ver_files, g[1], anv_entrypoints[0],
|
||||||
|
generated_draws_spv_h, generated_draws_count_spv_h],
|
||||||
include_directories : [
|
include_directories : [
|
||||||
inc_include, inc_src, inc_mapi, inc_mesa, inc_gallium, inc_compiler, inc_intel,
|
inc_include, inc_src, inc_mapi, inc_mesa, inc_gallium, inc_compiler, inc_intel,
|
||||||
],
|
],
|
||||||
|
|
@ -152,6 +141,7 @@ libanv_files = files(
|
||||||
'anv_descriptor_set.c',
|
'anv_descriptor_set.c',
|
||||||
'anv_device.c',
|
'anv_device.c',
|
||||||
'anv_formats.c',
|
'anv_formats.c',
|
||||||
|
'anv_generated_indirect_draws.c',
|
||||||
'anv_genX.h',
|
'anv_genX.h',
|
||||||
'anv_image.c',
|
'anv_image.c',
|
||||||
'anv_measure.c',
|
'anv_measure.c',
|
||||||
|
|
@ -216,7 +206,7 @@ libanv_common = static_library(
|
||||||
c_args : anv_flags,
|
c_args : anv_flags,
|
||||||
cpp_args : anv_cpp_flags,
|
cpp_args : anv_cpp_flags,
|
||||||
gnu_symbol_visibility : 'hidden',
|
gnu_symbol_visibility : 'hidden',
|
||||||
dependencies : anv_deps,
|
dependencies : anv_deps
|
||||||
)
|
)
|
||||||
|
|
||||||
libvulkan_intel = shared_library(
|
libvulkan_intel = shared_library(
|
||||||
|
|
|
||||||
101
src/intel/vulkan/shaders/generated_draws.glsl
Normal file
101
src/intel/vulkan/shaders/generated_draws.glsl
Normal file
|
|
@ -0,0 +1,101 @@
|
||||||
|
/*
|
||||||
|
* Copyright © 2022 Intel Corporation
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||||
|
* copy of this software and associated documentation files (the "Software"),
|
||||||
|
* to deal in the Software without restriction, including without limitation
|
||||||
|
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||||
|
* and/or sell copies of the Software, and to permit persons to whom the
|
||||||
|
* Software is furnished to do so, subject to the following conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice (including the next
|
||||||
|
* paragraph) shall be included in all copies or substantial portions of the
|
||||||
|
* Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||||
|
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||||
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||||
|
* IN THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#version 450
|
||||||
|
|
||||||
|
/* These 2 bindings will be accessed through A64 messages */
|
||||||
|
layout(set = 0, binding = 0, std430) buffer Storage0 {
|
||||||
|
uint indirect_data[];
|
||||||
|
};
|
||||||
|
|
||||||
|
layout(set = 0, binding = 1, std430) buffer Storage1 {
|
||||||
|
uint commands[];
|
||||||
|
};
|
||||||
|
|
||||||
|
/* This data will be provided through push constants. */
|
||||||
|
layout(set = 0, binding = 2) uniform block {
|
||||||
|
uint is_indexed;
|
||||||
|
uint is_predicated;
|
||||||
|
uint draw_base;
|
||||||
|
uint draw_count;
|
||||||
|
uint instance_multiplier;
|
||||||
|
uint indirect_data_stride;
|
||||||
|
};
|
||||||
|
|
||||||
|
void main()
|
||||||
|
{
|
||||||
|
uint item_idx = uint(gl_FragCoord.y) * 8192 + uint(gl_FragCoord.x);
|
||||||
|
uint indirect_data_offset = item_idx * indirect_data_stride / 4;
|
||||||
|
uint _3dprim_dw_size = 10;
|
||||||
|
uint cmd_idx = uint(item_idx) * _3dprim_dw_size;
|
||||||
|
uint draw_id = draw_base + item_idx;
|
||||||
|
|
||||||
|
if (draw_id < draw_count) {
|
||||||
|
if (is_indexed != 0) {
|
||||||
|
/* Loading a VkDrawIndexedIndirectCommand */
|
||||||
|
uint index_count = indirect_data[indirect_data_offset + 0];
|
||||||
|
uint instance_count = indirect_data[indirect_data_offset + 1] * instance_multiplier;
|
||||||
|
uint first_index = indirect_data[indirect_data_offset + 2];
|
||||||
|
uint vertex_offset = indirect_data[indirect_data_offset + 3];
|
||||||
|
uint first_instance = indirect_data[indirect_data_offset + 4];
|
||||||
|
|
||||||
|
commands[cmd_idx + 0] = (3 << 29 | /* Command Type */
|
||||||
|
3 << 27 | /* Command SubType */
|
||||||
|
3 << 24 | /* 3D Command Opcode */
|
||||||
|
1 << 11 | /* Extended Parameter Enable */
|
||||||
|
is_predicated << 8 |
|
||||||
|
8 << 0); /* DWord Length */
|
||||||
|
commands[cmd_idx + 1] = 1 << 8; /* Indexed */
|
||||||
|
commands[cmd_idx + 2] = index_count; /* Vertex Count Per Instance */
|
||||||
|
commands[cmd_idx + 3] = first_index; /* Start Vertex Location */
|
||||||
|
commands[cmd_idx + 4] = instance_count; /* Instance Count */
|
||||||
|
commands[cmd_idx + 5] = first_instance; /* Start Instance Location */
|
||||||
|
commands[cmd_idx + 6] = vertex_offset; /* Base Vertex Location */
|
||||||
|
commands[cmd_idx + 7] = vertex_offset; /* gl_BaseVertex */
|
||||||
|
commands[cmd_idx + 8] = first_instance; /* gl_BaseInstance */
|
||||||
|
commands[cmd_idx + 9] = draw_id; /* gl_DrawID */
|
||||||
|
} else {
|
||||||
|
/* Loading a VkDrawIndirectCommand structure */
|
||||||
|
uint vertex_count = indirect_data[indirect_data_offset + 0];
|
||||||
|
uint instance_count = indirect_data[indirect_data_offset + 1] * instance_multiplier;
|
||||||
|
uint first_vertex = indirect_data[indirect_data_offset + 2];
|
||||||
|
uint first_instance = indirect_data[indirect_data_offset + 3];
|
||||||
|
|
||||||
|
commands[cmd_idx + 0] = (3 << 29 | /* Command Type */
|
||||||
|
3 << 27 | /* Command SubType */
|
||||||
|
3 << 24 | /* 3D Command Opcode */
|
||||||
|
1 << 11 | /* Extended Parameter Enable */
|
||||||
|
is_predicated << 8 |
|
||||||
|
8 << 0); /* DWord Length */
|
||||||
|
commands[cmd_idx + 1] = 0;
|
||||||
|
commands[cmd_idx + 2] = vertex_count; /* Vertex Count Per Instance */
|
||||||
|
commands[cmd_idx + 3] = first_vertex; /* Start Vertex Location */
|
||||||
|
commands[cmd_idx + 4] = instance_count; /* Instance Count */
|
||||||
|
commands[cmd_idx + 5] = first_instance; /* Start Instance Location */
|
||||||
|
commands[cmd_idx + 6] = 0; /* Base Vertex Location */
|
||||||
|
commands[cmd_idx + 7] = first_vertex; /* gl_BaseVertex */
|
||||||
|
commands[cmd_idx + 8] = first_instance; /* gl_BaseInstance */
|
||||||
|
commands[cmd_idx + 9] = draw_id; /* gl_DrawID */
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
118
src/intel/vulkan/shaders/generated_draws_count.glsl
Normal file
118
src/intel/vulkan/shaders/generated_draws_count.glsl
Normal file
|
|
@ -0,0 +1,118 @@
|
||||||
|
/*
|
||||||
|
* Copyright © 2022 Intel Corporation
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||||
|
* copy of this software and associated documentation files (the "Software"),
|
||||||
|
* to deal in the Software without restriction, including without limitation
|
||||||
|
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||||
|
* and/or sell copies of the Software, and to permit persons to whom the
|
||||||
|
* Software is furnished to do so, subject to the following conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice (including the next
|
||||||
|
* paragraph) shall be included in all copies or substantial portions of the
|
||||||
|
* Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||||
|
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||||
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||||
|
* IN THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#version 450
|
||||||
|
|
||||||
|
/* These 2 bindings will be accessed through A64 messages */
|
||||||
|
layout(set = 0, binding = 0, std430) buffer Storage0 {
|
||||||
|
uint indirect_data[];
|
||||||
|
};
|
||||||
|
|
||||||
|
layout(set = 0, binding = 1, std430) buffer Storage1 {
|
||||||
|
uint commands[];
|
||||||
|
};
|
||||||
|
|
||||||
|
/* This data will be provided through push constants. */
|
||||||
|
layout(set = 0, binding = 2) uniform block {
|
||||||
|
uint is_indexed;
|
||||||
|
uint is_predicated;
|
||||||
|
uint draw_base;
|
||||||
|
uint item_count;
|
||||||
|
uint draw_count;
|
||||||
|
uint instance_multiplier;
|
||||||
|
uint indirect_data_stride;
|
||||||
|
uint end_addr_ldw;
|
||||||
|
uint end_addr_udw;
|
||||||
|
};
|
||||||
|
|
||||||
|
void main()
|
||||||
|
{
|
||||||
|
uint item_idx = uint(gl_FragCoord.y) * 8192 + uint(gl_FragCoord.x);
|
||||||
|
uint indirect_data_offset = item_idx * indirect_data_stride / 4;
|
||||||
|
uint _3dprim_dw_size = 10;
|
||||||
|
uint cmd_idx = item_idx * _3dprim_dw_size;
|
||||||
|
|
||||||
|
/* Loading a VkDrawIndexedIndirectCommand */
|
||||||
|
uint index_count = indirect_data[indirect_data_offset + 0];
|
||||||
|
uint instance_count = indirect_data[indirect_data_offset + 1];
|
||||||
|
uint first_index = indirect_data[indirect_data_offset + 2];
|
||||||
|
uint vertex_offset = indirect_data[indirect_data_offset + 3];
|
||||||
|
uint first_instance = indirect_data[indirect_data_offset + 4];
|
||||||
|
uint draw_id = draw_base + item_idx;
|
||||||
|
|
||||||
|
if (draw_id < draw_count) {
|
||||||
|
if (is_indexed != 0) {
|
||||||
|
/* Loading a VkDrawIndexedIndirectCommand */
|
||||||
|
uint index_count = indirect_data[indirect_data_offset + 0];
|
||||||
|
uint instance_count = indirect_data[indirect_data_offset + 1] * instance_multiplier;
|
||||||
|
uint first_index = indirect_data[indirect_data_offset + 2];
|
||||||
|
uint vertex_offset = indirect_data[indirect_data_offset + 3];
|
||||||
|
uint first_instance = indirect_data[indirect_data_offset + 4];
|
||||||
|
|
||||||
|
commands[cmd_idx + 0] = (3 << 29 | /* Command Type */
|
||||||
|
3 << 27 | /* Command SubType */
|
||||||
|
3 << 24 | /* 3D Command Opcode */
|
||||||
|
1 << 11 | /* Extended Parameter Enable */
|
||||||
|
is_predicated << 8 |
|
||||||
|
8 << 0); /* DWord Length */
|
||||||
|
commands[cmd_idx + 1] = 1 << 8; /* Indexed */
|
||||||
|
commands[cmd_idx + 2] = index_count; /* Vertex Count Per Instance */
|
||||||
|
commands[cmd_idx + 3] = first_index; /* Start Vertex Location */
|
||||||
|
commands[cmd_idx + 4] = instance_count; /* Instance Count */
|
||||||
|
commands[cmd_idx + 5] = first_instance; /* Start Instance Location */
|
||||||
|
commands[cmd_idx + 6] = vertex_offset; /* Base Vertex Location */
|
||||||
|
commands[cmd_idx + 7] = vertex_offset; /* gl_BaseVertex */
|
||||||
|
commands[cmd_idx + 8] = first_instance; /* gl_BaseInstance */
|
||||||
|
commands[cmd_idx + 9] = draw_id; /* gl_DrawID */
|
||||||
|
} else {
|
||||||
|
/* Loading a VkDrawIndirectCommand structure */
|
||||||
|
uint vertex_count = indirect_data[indirect_data_offset + 0];
|
||||||
|
uint instance_count = indirect_data[indirect_data_offset + 1] * instance_multiplier;
|
||||||
|
uint first_vertex = indirect_data[indirect_data_offset + 2];
|
||||||
|
uint first_instance = indirect_data[indirect_data_offset + 3];
|
||||||
|
|
||||||
|
commands[cmd_idx + 0] = (3 << 29 | /* Command Type */
|
||||||
|
3 << 27 | /* Command SubType */
|
||||||
|
3 << 24 | /* 3D Command Opcode */
|
||||||
|
1 << 11 | /* Extended Parameter Enable */
|
||||||
|
is_predicated << 8 |
|
||||||
|
8 << 0); /* DWord Length */
|
||||||
|
commands[cmd_idx + 1] = 0;
|
||||||
|
commands[cmd_idx + 2] = vertex_count; /* Vertex Count Per Instance */
|
||||||
|
commands[cmd_idx + 3] = first_vertex; /* Start Vertex Location */
|
||||||
|
commands[cmd_idx + 4] = instance_count; /* Instance Count */
|
||||||
|
commands[cmd_idx + 5] = first_instance; /* Start Instance Location */
|
||||||
|
commands[cmd_idx + 6] = 0; /* Base Vertex Location */
|
||||||
|
commands[cmd_idx + 7] = first_vertex; /* gl_BaseVertex */
|
||||||
|
commands[cmd_idx + 8] = first_instance; /* gl_BaseInstance */
|
||||||
|
commands[cmd_idx + 9] = draw_id; /* gl_DrawID */
|
||||||
|
}
|
||||||
|
} else if (draw_id == draw_count) {
|
||||||
|
commands[cmd_idx + 0] = (0 << 29 | /* Command Type */
|
||||||
|
49 << 23 | /* MI Command Opcode */
|
||||||
|
1 << 8 | /* Address Space Indicator (PPGTT) */
|
||||||
|
1 << 0); /* DWord Length */
|
||||||
|
commands[cmd_idx + 1] = end_addr_ldw;
|
||||||
|
commands[cmd_idx + 2] = end_addr_udw;
|
||||||
|
}
|
||||||
|
}
|
||||||
59
src/intel/vulkan/shaders/meson.build
Normal file
59
src/intel/vulkan/shaders/meson.build
Normal file
|
|
@ -0,0 +1,59 @@
|
||||||
|
# Copyright © 2022 Intel Corporation
|
||||||
|
|
||||||
|
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
|
# of this software and associated documentation files (the "Software"), to deal
|
||||||
|
# in the Software without restriction, including without limitation the rights
|
||||||
|
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
# copies of the Software, and to permit persons to whom the Software is
|
||||||
|
# furnished to do so, subject to the following conditions:
|
||||||
|
|
||||||
|
# The above copyright notice and this permission notice shall be included in
|
||||||
|
# all copies or substantial portions of the Software.
|
||||||
|
|
||||||
|
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
|
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
|
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||||
|
# SOFTWARE.
|
||||||
|
|
||||||
|
float64_spv_h = custom_target(
|
||||||
|
'float64_spv.h',
|
||||||
|
input : [glsl2spirv, float64_glsl_file],
|
||||||
|
output : 'float64_spv.h',
|
||||||
|
command : [
|
||||||
|
prog_python, '@INPUT@', '@OUTPUT@',
|
||||||
|
prog_glslang,
|
||||||
|
'--create-entry', 'main',
|
||||||
|
'--vn', 'float64_spv_source',
|
||||||
|
'--glsl-version', '450',
|
||||||
|
'-Olib',
|
||||||
|
]
|
||||||
|
)
|
||||||
|
|
||||||
|
generated_draws_spv_h = custom_target(
|
||||||
|
'generated_draws_spv.h',
|
||||||
|
input : [glsl2spirv, 'generated_draws.glsl'],
|
||||||
|
output : 'generated_draws_spv.h',
|
||||||
|
command : [
|
||||||
|
prog_python, '@INPUT@', '@OUTPUT@',
|
||||||
|
prog_glslang,
|
||||||
|
'--vn', 'generated_draws_spv_source',
|
||||||
|
'--glsl-version', '450',
|
||||||
|
'--stage', 'frag',
|
||||||
|
]
|
||||||
|
)
|
||||||
|
|
||||||
|
generated_draws_count_spv_h = custom_target(
|
||||||
|
'generated_draws_count_spv.h',
|
||||||
|
input : [glsl2spirv, 'generated_draws_count.glsl'],
|
||||||
|
output : 'generated_draws_count_spv.h',
|
||||||
|
command : [
|
||||||
|
prog_python, '@INPUT@', '@OUTPUT@',
|
||||||
|
prog_glslang,
|
||||||
|
'--vn', 'generated_draws_count_spv_source',
|
||||||
|
'--glsl-version', '450',
|
||||||
|
'--stage', 'frag',
|
||||||
|
]
|
||||||
|
)
|
||||||
|
|
@ -615,4 +615,8 @@
|
||||||
DRI_CONF_OPT_B(fp64_workaround_enabled, def, \
|
DRI_CONF_OPT_B(fp64_workaround_enabled, def, \
|
||||||
"Use softpf64 when the shader uses float64, but the device doesn't support that type")
|
"Use softpf64 when the shader uses float64, but the device doesn't support that type")
|
||||||
|
|
||||||
|
#define DRI_CONF_ANV_GENERATED_INDIRECT_THRESHOLD(def) \
|
||||||
|
DRI_CONF_OPT_I(generated_indirect_threshold, def, 0, INT32_MAX, \
|
||||||
|
"Indirect threshold count above which we start generating commands")
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue