anv: implement generated (indexed) indirect draws

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Rohan Garg <rohan.garg@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/15642>
This commit is contained in:
Lionel Landwerlin 2022-02-25 16:56:04 +02:00 committed by Marge Bot
parent 3596a8ea7a
commit c950fe97a0
15 changed files with 1605 additions and 23 deletions

View file

@ -7,6 +7,9 @@ Debugging
Here are a few environment variable debug environment variables
specific to ANV:
:envvar:`ANV_ENABLE_GENERATED_INDIRECT_DRAWS`
If defined to ``0`` or ``false``, this will disable the generated
indirect draw optimization in Anv. This will only affect Gfx11+.
:envvar:`ANV_ENABLE_PIPELINE_CACHE`
If defined to ``0`` or ``false``, this will disable pipeline
caching, forcing ANV to reparse and recompile any VkShaderModule
@ -272,3 +275,34 @@ checking for ``ANV_CMD_DIRTY_PIPELINE``. It should only do so if it
requires to know some value that is coming from the
``anv_graphics_pipeline`` object that is not available from
``anv_dynamic_state``.
Generated indirect draws optimization
-------------------------------------
Indirect draws have traditionally been implemented on Intel HW by
loading the indirect parameters from memory into HW registers using
the command streamer's ``MI_LOAD_REGISTER_MEM`` instruction before
dispatching a draw call to the 3D pipeline.
On recent products, it was found that the command streamer is showing
as performance bottleneck, because it cannot dispatch draw calls fast
enough to keep the 3D pipeline busy.
The solution to this problem is to change the way we deal with
indirect draws. Instead of loading HW registers with values using the
command streamer, we generate entire set of ``3DPRIMITIVE``
instructions using a shader. The generated instructions contain the
entire draw call parameters. This way the command streamer executes
only ``3DPRIMITIVE`` instructions and doesn´t do any data loading from
memory or touch HW registers, feeding the 3D pipeline as fast as it
can.
In Anv this implemented by using a side batch buffer. When Anv
encounters the first indirect draws, it generates a jump into the side
batch, the side batch contains a draw call using a generation shader
for each indirect draw. We keep adding on more generation draws into
the batch until we have to stop due to command buffer end, secondary
command buffer calls or a barrier containing the access flag
``VK_ACCESS_INDIRECT_COMMAND_READ_BIT``. The side batch buffer jump
back right after the instruction where it was called.

View file

@ -84,6 +84,8 @@ def define_tracepoints(args):
Arg(type='enum isl_format', name='src_fmt', var='src_fmt', c_format='%s', to_prim_type='isl_format_get_short_name({})'),
])
begin_end_tp('generate_draws')
begin_end_tp('draw',
tp_args=[Arg(type='uint32_t', var='count', c_format='%u')])
begin_end_tp('draw_multi',

View file

@ -193,6 +193,14 @@ anv_batch_emit_ensure_space(struct anv_batch *batch, uint32_t size)
return VK_SUCCESS;
}
void
anv_batch_advance(struct anv_batch *batch, uint32_t size)
{
assert(batch->next + size <= batch->end);
batch->next += size;
}
struct anv_address
anv_batch_address(struct anv_batch *batch, void *batch_location)
{

View file

@ -72,6 +72,7 @@ static const driOptionDescription anv_dri_options[] = {
DRI_CONF_ANV_ASSUME_FULL_SUBGROUPS(false)
DRI_CONF_ANV_SAMPLE_MASK_OUT_OPENGL_BEHAVIOUR(false)
DRI_CONF_ANV_FP64_WORKAROUND_ENABLED(false)
DRI_CONF_ANV_GENERATED_INDIRECT_THRESHOLD(4)
DRI_CONF_SECTION_END
DRI_CONF_SECTION_DEBUG
@ -923,6 +924,12 @@ anv_physical_device_try_create(struct vk_instance *vk_instance,
if (debug_get_bool_option("ANV_QUEUE_THREAD_DISABLE", false))
device->has_exec_timeline = false;
device->generated_indirect_draws =
device->info.ver >= 11 &&
debug_get_bool_option("ANV_ENABLE_GENERATED_INDIRECT_DRAWS",
true);
unsigned st_idx = 0;
device->sync_syncobj_type = vk_drm_syncobj_get_type(fd);
@ -1104,6 +1111,8 @@ anv_init_dri_options(struct anv_instance *instance)
driQueryOptionf(&instance->dri_options, "lower_depth_range_rate");
instance->fp64_workaround_enabled =
driQueryOptionb(&instance->dri_options, "fp64_workaround_enabled");
instance->generated_indirect_threshold =
driQueryOptioni(&instance->dri_options, "generated_indirect_threshold");
}
VkResult anv_CreateInstance(
@ -3660,6 +3669,8 @@ VkResult anv_CreateDevice(
anv_device_init_border_colors(device);
anv_device_init_generated_indirect_draws(device);
anv_device_perf_init(device);
anv_device_utrace_init(device);
@ -3747,6 +3758,8 @@ void anv_DestroyDevice(
anv_device_finish_rt_shaders(device);
anv_device_finish_generated_indirect_draws(device);
vk_pipeline_cache_destroy(device->internal_cache, NULL);
vk_pipeline_cache_destroy(device->default_pipeline_cache, NULL);

View file

@ -0,0 +1,341 @@
/*
* Copyright © 2022 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
#include "anv_private.h"
#include "compiler/brw_compiler.h"
#include "compiler/brw_nir.h"
#include "compiler/spirv/nir_spirv.h"
#include "dev/intel_debug.h"
#include "util/macros.h"
#include "anv_generated_indirect_draws.h"
#include "shaders/generated_draws_spv.h"
#include "shaders/generated_draws_count_spv.h"
/* This pass takes vulkan descriptor bindings 0 & 1 and turns them into global
* 64bit addresses. Binding 2 is left UBO that would normally be accessed
* through the binding table but it fully promoted to push constants.
*
* As a result we're not using the binding table at all which is nice because
* of the side command buffer we use for the generating shader does not
* interact with the binding table allocation.
*/
static bool
lower_vulkan_descriptors_instr(nir_builder *b, nir_instr *instr, void *cb_data)
{
if (instr->type != nir_instr_type_intrinsic)
return false;
nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
if (intrin->intrinsic != nir_intrinsic_load_vulkan_descriptor)
return false;
nir_instr *res_index_instr = intrin->src[0].ssa->parent_instr;
assert(res_index_instr->type == nir_instr_type_intrinsic);
nir_intrinsic_instr *res_index_intrin =
nir_instr_as_intrinsic(res_index_instr);
assert(res_index_intrin->intrinsic == nir_intrinsic_vulkan_resource_index);
b->cursor = nir_after_instr(instr);
nir_ssa_def *desc_value = NULL;
switch (nir_intrinsic_binding(res_index_intrin)) {
case 0: {
desc_value =
nir_load_ubo(b, 1, 64,
nir_imm_int(b, 2),
nir_imm_int(b,
offsetof(struct anv_generate_indirect_params,
indirect_data_addr)),
.align_mul = 8,
.align_offset = 0,
.range_base = 0,
.range = ~0);
desc_value =
nir_vec4(b,
nir_unpack_64_2x32_split_x(b, desc_value),
nir_unpack_64_2x32_split_y(b, desc_value),
nir_imm_int(b, 0),
nir_imm_int(b, 0));
break;
}
case 1: {
desc_value =
nir_load_ubo(b, 1, 64,
nir_imm_int(b, 2),
nir_imm_int(b,
offsetof(struct anv_generate_indirect_params,
generated_cmds_addr)),
.align_mul = 8,
.align_offset = 0,
.range_base = 0,
.range = ~0);
desc_value =
nir_vec4(b,
nir_unpack_64_2x32_split_x(b, desc_value),
nir_unpack_64_2x32_split_y(b, desc_value),
nir_imm_int(b, 0),
nir_imm_int(b, 0));
break;
}
case 2:
desc_value =
nir_vec2(b,
nir_imm_int(b, 2),
nir_imm_int(b, 0));
break;
}
nir_ssa_def_rewrite_uses(&intrin->dest.ssa, desc_value);
return true;
}
static bool
lower_vulkan_descriptors(nir_shader *shader)
{
return nir_shader_instructions_pass(shader,
lower_vulkan_descriptors_instr,
nir_metadata_block_index |
nir_metadata_dominance,
NULL);
}
static struct anv_shader_bin *
compile_upload_spirv(struct anv_device *device,
const void *key,
uint32_t key_size,
const uint32_t *spirv_source,
uint32_t spirv_source_size,
uint32_t sends_count_expectation)
{
struct spirv_to_nir_options spirv_options = {
.caps = {
},
.ubo_addr_format = nir_address_format_32bit_index_offset,
.ssbo_addr_format = nir_address_format_64bit_global_32bit_offset,
.environment = NIR_SPIRV_VULKAN,
.create_library = false,
};
const nir_shader_compiler_options *nir_options =
device->physical->compiler->nir_options[MESA_SHADER_FRAGMENT];
nir_shader* nir =
spirv_to_nir(spirv_source, spirv_source_size,
NULL, 0, MESA_SHADER_FRAGMENT, "main",
&spirv_options, nir_options);
assert(nir != NULL);
nir->info.internal = true;
nir_validate_shader(nir, "after spirv_to_nir");
nir_validate_ssa_dominance(nir, "after spirv_to_nir");
NIR_PASS_V(nir, nir_lower_variable_initializers, nir_var_function_temp);
NIR_PASS_V(nir, nir_lower_returns);
NIR_PASS_V(nir, nir_inline_functions);
NIR_PASS_V(nir, nir_opt_deref);
NIR_PASS_V(nir, nir_lower_vars_to_ssa);
NIR_PASS_V(nir, nir_copy_prop);
NIR_PASS_V(nir, nir_opt_dce);
NIR_PASS_V(nir, nir_opt_cse);
NIR_PASS_V(nir, nir_opt_gcm, true);
NIR_PASS_V(nir, nir_opt_peephole_select, 1, false, false);
NIR_PASS_V(nir, nir_opt_dce);
NIR_PASS_V(nir, nir_lower_variable_initializers, ~0);
NIR_PASS_V(nir, nir_split_var_copies);
NIR_PASS_V(nir, nir_split_per_member_structs);
struct brw_compiler *compiler = device->physical->compiler;
struct brw_nir_compiler_opts opts = {};
brw_preprocess_nir(compiler, nir, &opts);
NIR_PASS_V(nir, nir_propagate_invariant, false);
NIR_PASS_V(nir, nir_lower_input_attachments,
&(nir_input_attachment_options) {
.use_fragcoord_sysval = true,
.use_layer_id_sysval = true,
});
nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));
/* Do vectorizing here. For some reason when trying to do it in the back
* this just isn't working.
*/
nir_load_store_vectorize_options options = {
.modes = nir_var_mem_ubo | nir_var_mem_ssbo,
.callback = brw_nir_should_vectorize_mem,
.robust_modes = (nir_variable_mode)0,
};
NIR_PASS_V(nir, nir_opt_load_store_vectorize, &options);
NIR_PASS_V(nir, lower_vulkan_descriptors);
NIR_PASS_V(nir, nir_opt_dce);
NIR_PASS_V(nir, nir_lower_explicit_io, nir_var_mem_ubo,
nir_address_format_32bit_index_offset);
NIR_PASS_V(nir, nir_lower_explicit_io, nir_var_mem_ssbo,
nir_address_format_64bit_global_32bit_offset);
NIR_PASS_V(nir, nir_copy_prop);
NIR_PASS_V(nir, nir_opt_constant_folding);
NIR_PASS_V(nir, nir_opt_dce);
struct brw_wm_prog_key wm_key;
memset(&wm_key, 0, sizeof(wm_key));
struct brw_wm_prog_data wm_prog_data = {
.base.nr_params = nir->num_uniforms / 4,
};
brw_nir_analyze_ubo_ranges(compiler, nir, NULL, wm_prog_data.base.ubo_ranges);
struct brw_compile_stats stats[3];
struct brw_compile_fs_params params = {
.nir = nir,
.key = &wm_key,
.prog_data = &wm_prog_data,
.stats = stats,
.log_data = device,
.debug_flag = DEBUG_WM,
};
const unsigned *program = brw_compile_fs(compiler, nir, &params);
if (wm_prog_data.dispatch_8) {
assert(stats[0].spills == 0);
assert(stats[0].fills == 0);
assert(stats[0].sends == sends_count_expectation);
}
if (wm_prog_data.dispatch_16) {
assert(stats[1].spills == 0);
assert(stats[1].fills == 0);
assert(stats[1].sends == sends_count_expectation);
}
if (wm_prog_data.dispatch_32) {
assert(stats[2].spills == 0);
assert(stats[2].fills == 0);
assert(stats[2].sends == sends_count_expectation);
}
struct anv_pipeline_bind_map bind_map;
memset(&bind_map, 0, sizeof(bind_map));
struct anv_push_descriptor_info push_desc_info = {};
struct anv_shader_bin *kernel =
anv_device_upload_kernel(device,
device->internal_cache,
nir->info.stage,
key, key_size, program,
wm_prog_data.base.program_size,
&wm_prog_data.base, sizeof(wm_prog_data),
NULL, 0, NULL, &bind_map,
&push_desc_info);
ralloc_free(nir);
return kernel;
}
VkResult
anv_device_init_generated_indirect_draws(struct anv_device *device)
{
if (device->info->ver < 11)
return VK_SUCCESS;
const struct intel_l3_weights w =
intel_get_default_l3_weights(device->info,
true /* wants_dc_cache */,
false /* needs_slm */);
device->generated_draw_l3_config = intel_get_l3_config(device->info, w);
struct {
char name[40];
} indirect_draws_key = {
.name = "anv-generated-indirect-draws",
}, indirect_draws_count_key = {
.name = "anv-generated-indirect-draws-count",
};
device->generated_draw_kernel =
anv_device_search_for_kernel(device,
device->internal_cache,
&indirect_draws_key,
sizeof(indirect_draws_key),
NULL);
if (device->generated_draw_kernel == NULL) {
device->generated_draw_kernel =
compile_upload_spirv(device,
&indirect_draws_key,
sizeof(indirect_draws_key),
generated_draws_spv_source,
ARRAY_SIZE(generated_draws_spv_source),
10 /* 2 * (2 loads + 3 stores) */);
}
if (device->generated_draw_kernel == NULL)
return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
/* The cache already has a reference and it's not going anywhere so there
* is no need to hold a second reference.
*/
anv_shader_bin_unref(device, device->generated_draw_kernel);
device->generated_draw_count_kernel =
anv_device_search_for_kernel(device,
device->internal_cache,
&indirect_draws_count_key,
sizeof(indirect_draws_count_key),
NULL);
if (device->generated_draw_count_kernel == NULL) {
device->generated_draw_count_kernel =
compile_upload_spirv(device,
&indirect_draws_count_key,
sizeof(indirect_draws_count_key),
generated_draws_count_spv_source,
ARRAY_SIZE(generated_draws_count_spv_source),
11 /* 2 * (3 loads + 3 stores) */);
}
if (device->generated_draw_count_kernel == NULL)
return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
/* The cache already has a reference and it's not going anywhere so there
* is no need to hold a second reference.
*/
anv_shader_bin_unref(device, device->generated_draw_count_kernel);
return VK_SUCCESS;
}
void
anv_device_finish_generated_indirect_draws(struct anv_device *device)
{
}

View file

@ -0,0 +1,71 @@
/*
* Copyright © 2022 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
#ifndef ANV_GENERATED_INDIRECT_DRAWS_H
#define ANV_GENERATED_INDIRECT_DRAWS_H
#include <stdint.h>
/* This needs to match generated_draws.glsl :
*
* layout(set = 0, binding = 2) uniform block
*/
struct anv_generated_indirect_draw_params {
uint32_t is_indexed;
uint32_t is_predicated;
uint32_t draw_base;
uint32_t draw_count;
uint32_t instance_multiplier;
uint32_t indirect_data_stride;
};
/* This needs to match generated_draws_count.glsl :
*
* layout(set = 0, binding = 2) uniform block
*/
struct anv_generated_indirect_draw_count_params {
uint32_t is_indexed;
uint32_t is_predicated;
uint32_t draw_base;
uint32_t item_count;
uint32_t draw_count;
uint32_t instance_multiplier;
uint32_t indirect_data_stride;
uint32_t end_addr_ldw;
uint32_t end_addr_udw;
};
struct anv_generate_indirect_params {
union {
struct anv_generated_indirect_draw_params draw;
struct anv_generated_indirect_draw_count_params draw_count;
};
/* Global address of binding 0 */
uint64_t indirect_data_addr;
/* Global address of binding 1 */
uint64_t generated_cmds_addr;
};
#endif /* ANV_GENERATED_INDIRECT_DRAWS_H */

View file

@ -31,7 +31,7 @@
#include "nir/nir_xfb_info.h"
#include "vulkan/util/vk_util.h"
#include "compiler/spirv/nir_spirv.h"
#include "float64_spv.h"
#include "shaders/float64_spv.h"
static bool
anv_shader_bin_serialize(struct vk_pipeline_cache_object *object,

View file

@ -1009,6 +1009,15 @@ struct anv_physical_device {
bool always_flush_cache;
/**
* True if the generated indirect draw optimization is turned on.
*
* This optimization is currently only available on Gfx11+ to avoid
* dealing with the annoying Gfx8/9 tracking of vertex buffer for the VF
* cache workaround.
*/
bool generated_indirect_draws;
struct {
uint32_t family_count;
struct anv_queue_family families[ANV_MAX_QUEUE_FAMILIES];
@ -1075,6 +1084,7 @@ struct anv_instance {
bool sample_mask_out_opengl_behaviour;
bool fp64_workaround_enabled;
float lower_depth_range_rate;
unsigned generated_indirect_threshold;
};
VkResult anv_init_wsi(struct anv_physical_device *physical_device);
@ -1241,6 +1251,15 @@ struct anv_device {
enum anv_rt_bvh_build_method bvh_build_method;
/** Draw generation shader
*
* Generates direct draw calls out of indirect parameters. Used to
* workaround slowness with indirect draw calls.
*/
struct anv_shader_bin *generated_draw_kernel;
struct anv_shader_bin *generated_draw_count_kernel;
const struct intel_l3_config *generated_draw_l3_config;
pthread_mutex_t mutex;
pthread_cond_t queue_submit;
@ -1462,6 +1481,7 @@ struct anv_batch {
void *anv_batch_emit_dwords(struct anv_batch *batch, int num_dwords);
VkResult anv_batch_emit_ensure_space(struct anv_batch *batch, uint32_t size);
void anv_batch_advance(struct anv_batch *batch, uint32_t size);
void anv_batch_emit_batch(struct anv_batch *batch, struct anv_batch *other);
struct anv_address anv_batch_address(struct anv_batch *batch, void *batch_location);
@ -2887,6 +2907,13 @@ void anv_cmd_buffer_dump(struct anv_cmd_buffer *cmd_buffer);
void anv_cmd_emit_conditional_render_predicate(struct anv_cmd_buffer *cmd_buffer);
static inline unsigned
anv_cmd_buffer_get_view_count(struct anv_cmd_buffer *cmd_buffer)
{
struct anv_cmd_graphics_state *gfx = &cmd_buffer->state.gfx;
return MAX2(1, util_bitcount(gfx->view_mask));
}
enum anv_bo_sync_state {
/** Indicates that this is a new (or newly reset fence) */
ANV_BO_SYNC_STATE_RESET,
@ -4139,6 +4166,18 @@ struct anv_memcpy_state {
struct anv_vb_cache_range vb_dirty;
};
VkResult
anv_device_init_generated_indirect_draws(struct anv_device *device);
void
anv_device_finish_generated_indirect_draws(struct anv_device *device);
static inline bool anv_use_generated_draws(const struct anv_device *device,
uint32_t count)
{
return device->physical->generated_indirect_draws &&
count >= device->physical->instance->generated_indirect_threshold;
}
struct anv_utrace_flush_copy {
/* Needs to be the first field */
struct intel_ds_flush_data ds;

View file

@ -3413,6 +3413,11 @@ genX(cmd_buffer_flush_gfx_state)(struct anv_cmd_buffer *cmd_buffer)
genX(cmd_buffer_flush_dynamic_state)(cmd_buffer);
}
#define GFX_HAS_GENERATED_CMDS GFX_VER >= 11
#if GFX_VER >= 11
#include "genX_cmd_draw_generated_indirect.h"
#endif
VkResult
genX(BeginCommandBuffer)(
VkCommandBuffer commandBuffer,
@ -3618,6 +3623,10 @@ genX(EndCommandBuffer)(
anv_measure_endcommandbuffer(cmd_buffer);
#if GFX_HAS_GENERATED_CMDS
genX(cmd_buffer_flush_generated_draws)(cmd_buffer);
#endif
/* We want every command buffer to start with the PMA fix in a known state,
* so we disable it at the end of the command buffer.
*/
@ -3657,6 +3666,10 @@ genX(CmdExecuteCommands)(
*/
genX(cmd_buffer_apply_pipe_flushes)(primary);
#if GFX_HAS_GENERATED_CMDS
genX(cmd_buffer_flush_generated_draws)(primary);
#endif
for (uint32_t i = 0; i < commandBufferCount; i++) {
ANV_FROM_HANDLE(anv_cmd_buffer, secondary, pCmdBuffers[i]);
@ -3819,6 +3832,11 @@ cmd_buffer_barrier(struct anv_cmd_buffer *cmd_buffer,
anv_pipe_flush_bits_for_access_flags(cmd_buffer->device, src_flags) |
anv_pipe_invalidate_bits_for_access_flags(cmd_buffer->device, dst_flags);
#if GFX_HAS_GENERATED_CMDS
if (dst_flags & VK_ACCESS_INDIRECT_COMMAND_READ_BIT)
genX(cmd_buffer_flush_generated_draws)(cmd_buffer);
#endif
anv_add_pending_pipe_bits(cmd_buffer, bits, reason);
}
@ -4442,9 +4460,24 @@ void genX(CmdDrawIndirect)(
drawCount);
trace_intel_begin_draw_indirect(&cmd_buffer->trace);
#if GFX_HAS_GENERATED_CMDS
if (anv_use_generated_draws(cmd_buffer->device, drawCount)) {
genX(cmd_buffer_emit_indirect_generated_draws)(
cmd_buffer,
anv_address_add(buffer->address, offset),
MAX2(stride, sizeof(VkDrawIndirectCommand)),
drawCount,
false /* indexed */);
} else {
emit_indirect_draws(cmd_buffer,
anv_address_add(buffer->address, offset),
stride, drawCount, false /* indexed */);
}
#else
emit_indirect_draws(cmd_buffer,
anv_address_add(buffer->address, offset),
stride, drawCount, false /* indexed */);
#endif
trace_intel_end_draw_indirect(&cmd_buffer->trace, drawCount);
}
@ -4468,9 +4501,24 @@ void genX(CmdDrawIndexedIndirect)(
drawCount);
trace_intel_begin_draw_indexed_indirect(&cmd_buffer->trace);
#if GFX_HAS_GENERATED_CMDS
if (anv_use_generated_draws(cmd_buffer->device, drawCount)) {
genX(cmd_buffer_emit_indirect_generated_draws)(
cmd_buffer,
anv_address_add(buffer->address, offset),
MAX2(stride, sizeof(VkDrawIndexedIndirectCommand)),
drawCount,
true /* indexed */);
} else {
emit_indirect_draws(cmd_buffer,
anv_address_add(buffer->address, offset),
stride, drawCount, true /* indexed */);
}
#else
emit_indirect_draws(cmd_buffer,
anv_address_add(buffer->address, offset),
stride, drawCount, true /* indexed */);
#endif
trace_intel_end_draw_indexed_indirect(&cmd_buffer->trace, drawCount);
}
@ -4643,12 +4691,37 @@ void genX(CmdDrawIndirectCount)(
0);
trace_intel_begin_draw_indirect_count(&cmd_buffer->trace);
emit_indirect_count_draws(cmd_buffer,
anv_address_add(buffer->address, offset),
MAX2(stride, sizeof(VkDrawIndirectCommand)),
anv_address_add(count_buffer->address, countBufferOffset),
struct anv_address indirect_data_address =
anv_address_add(buffer->address, offset);
struct anv_address count_address =
anv_address_add(count_buffer->address, countBufferOffset);
stride = MAX2(stride, sizeof(VkDrawIndirectCommand));
#if GFX_HAS_GENERATED_CMDS
if (anv_use_generated_draws(cmd_buffer->device, maxDrawCount)) {
genX(cmd_buffer_emit_indirect_generated_draws_count)(
cmd_buffer,
indirect_data_address,
stride,
count_address,
maxDrawCount,
false /* indexed */);
} else {
emit_indirect_count_draws(cmd_buffer,
indirect_data_address,
stride,
count_address,
maxDrawCount,
false /* indexed */);
}
#else
emit_indirect_count_draws(cmd_buffer,
indirect_data_address,
stride,
count_address,
maxDrawCount,
false /* indexed */);
#endif
trace_intel_end_draw_indirect_count(&cmd_buffer->trace, maxDrawCount);
}
@ -4675,12 +4748,37 @@ void genX(CmdDrawIndexedIndirectCount)(
0);
trace_intel_begin_draw_indexed_indirect_count(&cmd_buffer->trace);
emit_indirect_count_draws(cmd_buffer,
anv_address_add(buffer->address, offset),
MAX2(stride, sizeof(VkDrawIndirectCommand)),
anv_address_add(count_buffer->address, countBufferOffset),
struct anv_address indirect_data_address =
anv_address_add(buffer->address, offset);
struct anv_address count_address =
anv_address_add(count_buffer->address, countBufferOffset);
stride = MAX2(stride, sizeof(VkDrawIndexedIndirectCommand));
#if GFX_HAS_GENERATED_CMDS
if (anv_use_generated_draws(cmd_buffer->device, maxDrawCount)) {
genX(cmd_buffer_emit_indirect_generated_draws_count)(
cmd_buffer,
indirect_data_address,
stride,
count_address,
maxDrawCount,
true /* indexed */);
} else {
emit_indirect_count_draws(cmd_buffer,
indirect_data_address,
stride,
count_address,
maxDrawCount,
true /* indexed */);
}
#else
emit_indirect_count_draws(cmd_buffer,
indirect_data_address,
stride,
count_address,
maxDrawCount,
true /* indexed */);
#endif
trace_intel_end_draw_indexed_indirect_count(&cmd_buffer->trace, maxDrawCount);

View file

@ -0,0 +1,704 @@
/*
* Copyright © 2022 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
#ifndef GENX_CMD_GENERATED_INDIRECT_DRAW_H
#define GENX_CMD_GENERATED_INDIRECT_DRAW_H
#include <assert.h>
#include <stdbool.h>
#include "util/macros.h"
#include "anv_private.h"
#include "anv_generated_indirect_draws.h"
#if GFX_VER < 11
#error "Generated draws optimization not supported prior to Gfx11"
#endif
/* This is a maximum number of items a fragment shader can generate due to the
* viewport size.
*/
#define MAX_GENERATED_DRAW_COUNT (8192 * 8192)
static void
genX(cmd_buffer_emit_generate_draws_pipeline)(struct anv_cmd_buffer *cmd_buffer)
{
struct anv_batch *batch = &cmd_buffer->generation_batch;
struct anv_device *device = cmd_buffer->device;
const struct anv_shader_bin *draw_kernel = device->generated_draw_kernel;
const struct brw_wm_prog_data *prog_data =
brw_wm_prog_data_const(draw_kernel->prog_data);
uint32_t *dw = anv_batch_emitn(batch,
1 + 2 * GENX(VERTEX_ELEMENT_STATE_length),
GENX(3DSTATE_VERTEX_ELEMENTS));
/* You might think there is some shady stuff going here and you would be
* right. We're setting up 2 VERTEX_ELEMENT_STATE yet we're only providing
* 1 (positions) VERTEX_BUFFER_STATE later.
*
* Find more about how to set up a 3D pipeline with a fragment shader but
* without a vertex shader in blorp_emit_vertex_elements() in
* blorp_genX_exec.h.
*/
GENX(VERTEX_ELEMENT_STATE_pack)(
batch, dw + 1, &(struct GENX(VERTEX_ELEMENT_STATE)) {
.VertexBufferIndex = 1,
.Valid = true,
.SourceElementFormat = ISL_FORMAT_R32G32B32A32_FLOAT,
.SourceElementOffset = 0,
.Component0Control = VFCOMP_STORE_SRC,
.Component1Control = VFCOMP_STORE_0,
.Component2Control = VFCOMP_STORE_0,
.Component3Control = VFCOMP_STORE_0,
});
GENX(VERTEX_ELEMENT_STATE_pack)(
batch, dw + 3, &(struct GENX(VERTEX_ELEMENT_STATE)) {
.VertexBufferIndex = 0,
.Valid = true,
.SourceElementFormat = ISL_FORMAT_R32G32B32_FLOAT,
.SourceElementOffset = 0,
.Component0Control = VFCOMP_STORE_SRC,
.Component1Control = VFCOMP_STORE_SRC,
.Component2Control = VFCOMP_STORE_SRC,
.Component3Control = VFCOMP_STORE_1_FP,
});
anv_batch_emit(batch, GENX(3DSTATE_VF_STATISTICS), vf);
anv_batch_emit(batch, GENX(3DSTATE_VF_SGVS), sgvs) {
sgvs.InstanceIDEnable = true;
sgvs.InstanceIDComponentNumber = COMP_1;
sgvs.InstanceIDElementOffset = 0;
}
anv_batch_emit(batch, GENX(3DSTATE_VF_SGVS_2), sgvs);
anv_batch_emit(batch, GENX(3DSTATE_VF_INSTANCING), vfi) {
vfi.InstancingEnable = false;
vfi.VertexElementIndex = 0;
}
anv_batch_emit(batch, GENX(3DSTATE_VF_INSTANCING), vfi) {
vfi.InstancingEnable = false;
vfi.VertexElementIndex = 1;
}
anv_batch_emit(batch, GENX(3DSTATE_VF_TOPOLOGY), topo) {
topo.PrimitiveTopologyType = _3DPRIM_RECTLIST;
}
/* Emit URB setup. We tell it that the VS is active because we want it to
* allocate space for the VS. Even though one isn't run, we need VUEs to
* store the data that VF is going to pass to SOL.
*/
const unsigned entry_size[4] = { DIV_ROUND_UP(32, 64), 1, 1, 1 };
genX(emit_l3_config)(batch, device, device->generated_draw_l3_config);
cmd_buffer->state.current_l3_config = device->generated_draw_l3_config;
genX(emit_urb_setup)(device, batch, device->generated_draw_l3_config,
VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT,
entry_size, NULL);
anv_batch_emit(batch, GENX(3DSTATE_PS_BLEND), ps_blend) {
ps_blend.HasWriteableRT = true;
}
anv_batch_emit(batch, GENX(3DSTATE_WM_DEPTH_STENCIL), wm);
#if GFX_VER >= 12
anv_batch_emit(batch, GENX(3DSTATE_DEPTH_BOUNDS), db) {
db.DepthBoundsTestEnable = false;
db.DepthBoundsTestMinValue = 0.0;
db.DepthBoundsTestMaxValue = 1.0;
}
#endif
anv_batch_emit(batch, GENX(3DSTATE_MULTISAMPLE), ms);
anv_batch_emit(batch, GENX(3DSTATE_SAMPLE_MASK), sm) {
sm.SampleMask = 0x1;
}
anv_batch_emit(batch, GENX(3DSTATE_VS), vs);
anv_batch_emit(batch, GENX(3DSTATE_HS), hs);
anv_batch_emit(batch, GENX(3DSTATE_TE), te);
anv_batch_emit(batch, GENX(3DSTATE_DS), DS);
anv_batch_emit(batch, GENX(3DSTATE_STREAMOUT), so);
anv_batch_emit(batch, GENX(3DSTATE_GS), gs);
anv_batch_emit(batch, GENX(3DSTATE_CLIP), clip) {
clip.PerspectiveDivideDisable = true;
}
anv_batch_emit(batch, GENX(3DSTATE_SF), sf) {
#if GFX_VER >= 12
sf.DerefBlockSize = INTEL_URB_DEREF_BLOCK_SIZE_32; // TODO
#endif
}
anv_batch_emit(batch, GENX(3DSTATE_RASTER), raster) {
raster.CullMode = CULLMODE_NONE;
}
anv_batch_emit(batch, GENX(3DSTATE_SBE), sbe) {
sbe.VertexURBEntryReadOffset = 1;
sbe.NumberofSFOutputAttributes = prog_data->num_varying_inputs;
sbe.VertexURBEntryReadLength = MAX2((prog_data->num_varying_inputs + 1) / 2, 1);
sbe.ConstantInterpolationEnable = prog_data->flat_inputs;
sbe.ForceVertexURBEntryReadLength = true;
sbe.ForceVertexURBEntryReadOffset = true;
for (unsigned i = 0; i < 32; i++)
sbe.AttributeActiveComponentFormat[i] = ACF_XYZW;
}
anv_batch_emit(batch, GENX(3DSTATE_WM), wm) {
//wm.ForceThreadDispatchEnable = ForceON;
}
anv_batch_emit(batch, GENX(3DSTATE_PS_EXTRA), psx) {
psx.PixelShaderValid = true;
psx.AttributeEnable = prog_data->num_varying_inputs > 0;
psx.PixelShaderIsPerSample = prog_data->persample_dispatch;
psx.PixelShaderComputedDepthMode = prog_data->computed_depth_mode;
psx.PixelShaderComputesStencil = prog_data->computed_stencil;
}
anv_batch_emit(batch, GENX(3DSTATE_VIEWPORT_STATE_POINTERS_CC), cc) {
struct anv_state cc_state =
anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, 4 * GENX(CC_VIEWPORT_length), 32);
struct GENX(CC_VIEWPORT) cc_viewport = {
.MinimumDepth = 0.0f,
.MaximumDepth = 1.0f,
};
GENX(CC_VIEWPORT_pack)(NULL, cc_state.map, &cc_viewport);
cc.CCViewportPointer = cc_state.offset;
}
#if GFX_VER >= 12
/* Disable Primitive Replication. */
anv_batch_emit(batch, GENX(3DSTATE_PRIMITIVE_REPLICATION), pr);
#endif
anv_batch_emit(batch, GENX(3DSTATE_PUSH_CONSTANT_ALLOC_VS), alloc);
anv_batch_emit(batch, GENX(3DSTATE_PUSH_CONSTANT_ALLOC_HS), alloc);
anv_batch_emit(batch, GENX(3DSTATE_PUSH_CONSTANT_ALLOC_DS), alloc);
anv_batch_emit(batch, GENX(3DSTATE_PUSH_CONSTANT_ALLOC_GS), alloc);
anv_batch_emit(batch, GENX(3DSTATE_PUSH_CONSTANT_ALLOC_PS), alloc) {
alloc.ConstantBufferOffset = 0;
alloc.ConstantBufferSize = cmd_buffer->device->info->max_constant_urb_size_kb;
}
#if GFX_VERx10 == 125
/* DG2: Wa_22011440098
* MTL: Wa_18022330953
*
* In 3D mode, after programming push constant alloc command immediately
* program push constant command(ZERO length) without any commit between
* them.
*/
anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_CONSTANT_ALL), c) {
/* Update empty push constants for all stages (bitmask = 11111b) */
c.ShaderUpdateEnable = 0x1f;
c.MOCS = anv_mocs(cmd_buffer->device, NULL, 0);
}
#endif
cmd_buffer->state.gfx.vb_dirty = BITFIELD_BIT(0) | BITFIELD_BIT(1);
cmd_buffer->state.gfx.dirty |= ~(ANV_CMD_DIRTY_INDEX_BUFFER |
ANV_CMD_DIRTY_XFB_ENABLE);
cmd_buffer->state.push_constants_dirty |= VK_SHADER_STAGE_ALL_GRAPHICS;
cmd_buffer->state.gfx.push_constant_stages = VK_SHADER_STAGE_FRAGMENT_BIT;
vk_dynamic_graphics_state_dirty_all(&cmd_buffer->vk.dynamic_graphics_state);
anv_add_pending_pipe_bits(cmd_buffer,
ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT |
ANV_PIPE_STALL_AT_SCOREBOARD_BIT,
"after generation batch BTI change");
}
static void
genX(cmd_buffer_emit_generate_draws_vertex)(struct anv_cmd_buffer *cmd_buffer,
uint32_t draw_count)
{
struct anv_batch *batch = &cmd_buffer->generation_batch;
struct anv_state vs_data_state =
anv_cmd_buffer_alloc_dynamic_state(
cmd_buffer, 9 * sizeof(uint32_t), 32);
float x0 = 0.0f, x1 = MIN2(draw_count, 8192);
float y0 = 0.0f, y1 = DIV_ROUND_UP(draw_count, 8192);
float z = 0.0f;
float *vertices = vs_data_state.map;
vertices[0] = x1; vertices[1] = y1; vertices[2] = z; /* v0 */
vertices[3] = x0; vertices[4] = y1; vertices[5] = z; /* v1 */
vertices[6] = x0; vertices[7] = y0; vertices[8] = z; /* v2 */
uint32_t *dw = anv_batch_emitn(batch,
1 + GENX(VERTEX_BUFFER_STATE_length),
GENX(3DSTATE_VERTEX_BUFFERS));
GENX(VERTEX_BUFFER_STATE_pack)(batch, dw + 1,
&(struct GENX(VERTEX_BUFFER_STATE)) {
.VertexBufferIndex = 0,
.AddressModifyEnable = true,
.BufferStartingAddress = (struct anv_address) {
.bo = cmd_buffer->device->dynamic_state_pool.block_pool.bo,
.offset = vs_data_state.offset,
},
.BufferPitch = 3 * sizeof(float),
.BufferSize = 9 * sizeof(float),
.MOCS = anv_mocs(cmd_buffer->device, NULL, 0),
#if GFX_VER >= 12
.L3BypassDisable = true,
#endif
});
}
static struct anv_state
genX(cmd_buffer_alloc_generated_push_data)(struct anv_cmd_buffer *cmd_buffer)
{
return anv_cmd_buffer_alloc_dynamic_state(cmd_buffer,
sizeof(struct anv_generate_indirect_params),
ANV_UBO_ALIGNMENT);
}
static struct anv_state
genX(cmd_buffer_emit_generated_push_data)(struct anv_cmd_buffer *cmd_buffer,
struct anv_state push_data_state)
{
struct anv_batch *batch = &cmd_buffer->generation_batch;
struct anv_address push_data_addr = anv_state_pool_state_address(
&cmd_buffer->device->dynamic_state_pool, push_data_state);
#if GFX_VER >= 12
const uint32_t num_dwords = GENX(3DSTATE_CONSTANT_ALL_length) +
GENX(3DSTATE_CONSTANT_ALL_DATA_length);
uint32_t *dw =
anv_batch_emitn(batch, num_dwords,
GENX(3DSTATE_CONSTANT_ALL),
.ShaderUpdateEnable = BITFIELD_BIT(MESA_SHADER_FRAGMENT),
.PointerBufferMask = 0x1,
.MOCS = anv_mocs(cmd_buffer->device, NULL, 0));
GENX(3DSTATE_CONSTANT_ALL_DATA_pack)(
batch, dw + GENX(3DSTATE_CONSTANT_ALL_length),
&(struct GENX(3DSTATE_CONSTANT_ALL_DATA)) {
.PointerToConstantBuffer = push_data_addr,
.ConstantBufferReadLength = DIV_ROUND_UP(push_data_state.alloc_size, 32),
});
#else
anv_batch_emit(batch, GENX(3DSTATE_CONSTANT_PS), c) {
c.MOCS = anv_mocs(cmd_buffer->device, NULL, 0);
c.ConstantBody.ReadLength[0] = DIV_ROUND_UP(push_data_state.alloc_size, 32);
c.ConstantBody.Buffer[0] = push_data_addr;
}
#endif
return push_data_state;
}
static void
genX(cmd_buffer_emit_generate_draws)(struct anv_cmd_buffer *cmd_buffer,
struct anv_address generated_cmds_addr,
uint32_t generated_cmds_size,
struct anv_address indirect_data_addr,
uint32_t indirect_data_stride,
uint32_t item_base,
uint32_t item_count,
bool indexed)
{
struct anv_device *device = cmd_buffer->device;
struct anv_batch *batch = &cmd_buffer->generation_batch;
const struct anv_shader_bin *draw_kernel = device->generated_draw_kernel;
const struct brw_wm_prog_data *prog_data =
brw_wm_prog_data_const(draw_kernel->prog_data);
anv_batch_emit(batch, GENX(3DSTATE_PS), ps) {
ps.BindingTableEntryCount = 2;
ps.PushConstantEnable = prog_data->base.nr_params > 0 ||
prog_data->base.ubo_ranges[0].length;
ps._8PixelDispatchEnable = prog_data->dispatch_8;
ps._16PixelDispatchEnable = prog_data->dispatch_16;
ps._32PixelDispatchEnable = prog_data->dispatch_32;
ps.DispatchGRFStartRegisterForConstantSetupData0 =
brw_wm_prog_data_dispatch_grf_start_reg(prog_data, ps, 0);
ps.DispatchGRFStartRegisterForConstantSetupData1 =
brw_wm_prog_data_dispatch_grf_start_reg(prog_data, ps, 1);
ps.DispatchGRFStartRegisterForConstantSetupData2 =
brw_wm_prog_data_dispatch_grf_start_reg(prog_data, ps, 2);
ps.KernelStartPointer0 = draw_kernel->kernel.offset +
brw_wm_prog_data_prog_offset(prog_data, ps, 0);
ps.KernelStartPointer1 = draw_kernel->kernel.offset +
brw_wm_prog_data_prog_offset(prog_data, ps, 1);
ps.KernelStartPointer2 = draw_kernel->kernel.offset +
brw_wm_prog_data_prog_offset(prog_data, ps, 2);
ps.MaximumNumberofThreadsPerPSD = device->info->max_threads_per_psd - 1;
}
genX(cmd_buffer_emit_generate_draws_vertex)(cmd_buffer, item_count);
struct anv_state push_data_state =
genX(cmd_buffer_alloc_generated_push_data)(cmd_buffer);
struct anv_graphics_pipeline *pipeline = cmd_buffer->state.gfx.pipeline;
struct anv_generate_indirect_params *push_data = push_data_state.map;
*push_data = (struct anv_generate_indirect_params) {
.draw = {
.is_indexed = indexed,
.is_predicated = cmd_buffer->state.conditional_render_enabled,
.draw_base = item_base,
.draw_count = item_count,
.instance_multiplier = pipeline->instance_multiplier,
.indirect_data_stride = indirect_data_stride,
},
.indirect_data_addr = anv_address_physical(indirect_data_addr),
.generated_cmds_addr = anv_address_physical(generated_cmds_addr),
};
genX(cmd_buffer_emit_generated_push_data)(cmd_buffer, push_data_state);
anv_batch_emit(batch, GENX(3DPRIMITIVE), prim) {
prim.VertexAccessType = SEQUENTIAL;
prim.PrimitiveTopologyType = _3DPRIM_RECTLIST;
prim.VertexCountPerInstance = 3;
prim.InstanceCount = 1;
}
}
static void
genX(cmd_buffer_emit_indirect_generated_draws_init)(struct anv_cmd_buffer *cmd_buffer)
{
#if GFX_VER >= 12
anv_batch_emit(&cmd_buffer->batch, GENX(MI_ARB_CHECK), arb) {
arb.PreParserDisableMask = true;
arb.PreParserDisable = true;
}
#endif
anv_batch_emit_ensure_space(&cmd_buffer->generation_batch, 4);
trace_intel_begin_generate_draws(&cmd_buffer->trace);
anv_batch_emit(&cmd_buffer->batch, GENX(MI_BATCH_BUFFER_START), bbs) {
bbs.AddressSpaceIndicator = ASI_PPGTT;
bbs.BatchBufferStartAddress =
anv_batch_current_address(&cmd_buffer->generation_batch);
}
cmd_buffer->generation_return_addr = anv_batch_current_address(&cmd_buffer->batch);
trace_intel_end_generate_draws(&cmd_buffer->trace);
genX(cmd_buffer_emit_generate_draws_pipeline)(cmd_buffer);
}
static void
genX(cmd_buffer_emit_indirect_generated_draws)(struct anv_cmd_buffer *cmd_buffer,
struct anv_address indirect_data_addr,
uint32_t indirect_data_stride,
uint32_t draw_count,
bool indexed)
{
genX(flush_pipeline_select_3d)(cmd_buffer);
/* Apply the pipeline flush here so the indirect data is available for the
* generation shader.
*/
genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer);
if (anv_address_is_null(cmd_buffer->generation_return_addr))
genX(cmd_buffer_emit_indirect_generated_draws_init)(cmd_buffer);
/* In order to have the vertex fetch gather the data we need to have a non
* 0 stride. It's possible to have a 0 stride given by the application when
* draw_count is 1, but we need a correct value for the
* VERTEX_BUFFER_STATE::BufferPitch, so ensure the caller set this
* correctly :
*
* Vulkan spec, vkCmdDrawIndirect:
*
* "If drawCount is less than or equal to one, stride is ignored."
*/
assert(indirect_data_stride > 0);
if (cmd_buffer->state.conditional_render_enabled)
genX(cmd_emit_conditional_render_predicate)(cmd_buffer);
/* Emit the 3D state in the main batch. */
genX(cmd_buffer_flush_gfx_state)(cmd_buffer);
const uint32_t draw_cmd_stride = 4 * GENX(3DPRIMITIVE_EXTENDED_length);
uint32_t item_base = 0;
while (item_base < draw_count) {
const uint32_t item_count = MIN2(draw_count - item_base,
MAX_GENERATED_DRAW_COUNT);
const uint32_t draw_cmd_size = item_count * draw_cmd_stride;
/* Ensure we have enough contiguous space for all the draws so that the
* compute shader can edit all the 3DPRIMITIVEs from a single base
* address.
*
* TODO: we might have to split that if the amount of space is to large (at
* 1Mb?).
*/
VkResult result = anv_batch_emit_ensure_space(&cmd_buffer->batch,
draw_cmd_size);
if (result != VK_SUCCESS)
return;
genX(cmd_buffer_emit_generate_draws)(
cmd_buffer,
anv_batch_current_address(&cmd_buffer->batch),
draw_cmd_size,
indirect_data_addr,
indirect_data_stride,
item_base,
item_count,
indexed);
anv_batch_advance(&cmd_buffer->batch, draw_cmd_size);
item_base += item_count;
}
}
static void
genX(cmd_buffer_emit_generate_draws_count)(struct anv_cmd_buffer *cmd_buffer,
struct anv_address generated_cmds_addr,
uint32_t generated_cmds_size,
struct anv_address indirect_data_addr,
uint32_t indirect_data_stride,
uint32_t item_base,
uint32_t item_count,
struct anv_address count_addr,
bool indexed)
{
struct anv_device *device = cmd_buffer->device;
struct anv_batch *batch = &cmd_buffer->generation_batch;
const struct anv_shader_bin *draw_kernel =
device->generated_draw_count_kernel;
const struct brw_wm_prog_data *prog_data =
brw_wm_prog_data_const(draw_kernel->prog_data);
anv_batch_emit(batch, GENX(3DSTATE_PS), ps) {
ps.BindingTableEntryCount = 2;
ps.PushConstantEnable = prog_data->base.nr_params > 0 ||
prog_data->base.ubo_ranges[0].length;
ps._8PixelDispatchEnable = prog_data->dispatch_8;
ps._16PixelDispatchEnable = prog_data->dispatch_16;
ps._32PixelDispatchEnable = prog_data->dispatch_32;
ps.DispatchGRFStartRegisterForConstantSetupData0 =
brw_wm_prog_data_dispatch_grf_start_reg(prog_data, ps, 0);
ps.DispatchGRFStartRegisterForConstantSetupData1 =
brw_wm_prog_data_dispatch_grf_start_reg(prog_data, ps, 1);
ps.DispatchGRFStartRegisterForConstantSetupData2 =
brw_wm_prog_data_dispatch_grf_start_reg(prog_data, ps, 2);
ps.KernelStartPointer0 = draw_kernel->kernel.offset +
brw_wm_prog_data_prog_offset(prog_data, ps, 0);
ps.KernelStartPointer1 = draw_kernel->kernel.offset +
brw_wm_prog_data_prog_offset(prog_data, ps, 1);
ps.KernelStartPointer2 = draw_kernel->kernel.offset +
brw_wm_prog_data_prog_offset(prog_data, ps, 2);
ps.MaximumNumberofThreadsPerPSD = device->info->max_threads_per_psd - 1;
}
genX(cmd_buffer_emit_generate_draws_vertex)(cmd_buffer, item_count);
struct anv_state push_data_state =
genX(cmd_buffer_alloc_generated_push_data)(cmd_buffer);
struct anv_graphics_pipeline *pipeline = cmd_buffer->state.gfx.pipeline;
uint64_t end_cmd_addr =
anv_address_physical(
anv_address_add(generated_cmds_addr, generated_cmds_size));
struct anv_generate_indirect_params *push_data = push_data_state.map;
*push_data = (struct anv_generate_indirect_params) {
.draw_count = {
.is_indexed = indexed,
.is_predicated = cmd_buffer->state.conditional_render_enabled,
.draw_base = item_base,
.item_count = item_count,
.draw_count = 0, // Edit this through a the command streamer
.instance_multiplier = pipeline->instance_multiplier,
.indirect_data_stride = indirect_data_stride,
.end_addr_ldw = end_cmd_addr & 0xffffffff,
.end_addr_udw = end_cmd_addr >> 32,
},
.indirect_data_addr = anv_address_physical(indirect_data_addr),
.generated_cmds_addr = anv_address_physical(generated_cmds_addr),
};
/* Copy the draw count into the push constants so that the generation gets
* the value straight away and doesn't even need to access memory.
*/
struct mi_builder b;
mi_builder_init(&b, cmd_buffer->device->info, batch);
mi_memcpy(&b,
anv_address_add((struct anv_address) {
.bo = cmd_buffer->device->dynamic_state_pool.block_pool.bo,
.offset = push_data_state.offset,
},
offsetof(struct anv_generate_indirect_params, draw_count.draw_count)),
count_addr, 4);
/* Only emit the data after the memcpy above. */
genX(cmd_buffer_emit_generated_push_data)(cmd_buffer, push_data_state);
anv_batch_emit(batch, GENX(3DPRIMITIVE), prim) {
prim.VertexAccessType = SEQUENTIAL;
prim.PrimitiveTopologyType = _3DPRIM_RECTLIST;
prim.VertexCountPerInstance = 3;
prim.InstanceCount = 1;
}
}
static void
genX(cmd_buffer_emit_indirect_generated_draws_count)(struct anv_cmd_buffer *cmd_buffer,
struct anv_address indirect_data_addr,
uint32_t indirect_data_stride,
struct anv_address count_addr,
uint32_t max_draw_count,
bool indexed)
{
genX(flush_pipeline_select_3d)(cmd_buffer);
/* Apply the pipeline flush here so the indirect data is available for the
* generation shader.
*/
genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer);
if (anv_address_is_null(cmd_buffer->generation_return_addr))
genX(cmd_buffer_emit_indirect_generated_draws_init)(cmd_buffer);
/* In order to have the vertex fetch gather the data we need to have a non
* 0 stride. It's possible to have a 0 stride given by the application when
* draw_count is 1, but we need a correct value for the
* VERTEX_BUFFER_STATE::BufferPitch, so ensure the caller set this
* correctly :
*
* Vulkan spec, vkCmdDrawIndirect:
*
* "If drawCount is less than or equal to one, stride is ignored."
*/
assert(indirect_data_stride > 0);
if (cmd_buffer->state.conditional_render_enabled)
genX(cmd_emit_conditional_render_predicate)(cmd_buffer);
/* Emit the 3D state in the main batch. */
genX(cmd_buffer_flush_gfx_state)(cmd_buffer);
const uint32_t draw_cmd_stride = 4 * GENX(3DPRIMITIVE_EXTENDED_length);
uint32_t item_base = 0;
while (item_base < max_draw_count) {
const uint32_t item_count = MIN2(max_draw_count - item_base,
MAX_GENERATED_DRAW_COUNT);
const uint32_t draw_cmd_size = item_count * draw_cmd_stride;
/* Ensure we have enough contiguous space for all the draws so that the
* compute shader can edit all the 3DPRIMITIVEs from a single base
* address.
*
* TODO: we might have to split that if the amount of space is to large (at
* 1Mb?).
*/
VkResult result = anv_batch_emit_ensure_space(&cmd_buffer->batch,
draw_cmd_size);
if (result != VK_SUCCESS)
return;
genX(cmd_buffer_emit_generate_draws_count)(
cmd_buffer,
anv_batch_current_address(&cmd_buffer->batch),
draw_cmd_size,
anv_address_add(indirect_data_addr,
item_base * indirect_data_stride),
indirect_data_stride,
item_base,
item_count,
count_addr,
indexed);
anv_batch_advance(&cmd_buffer->batch, draw_cmd_size);
item_base += item_count;
}
}
static void
genX(cmd_buffer_flush_generated_draws)(struct anv_cmd_buffer *cmd_buffer)
{
/* No return address setup means we don't have to do anything */
if (anv_address_is_null(cmd_buffer->generation_return_addr))
return;
struct anv_batch *batch = &cmd_buffer->generation_batch;
/* Wait for all the generation vertex shader to generate the commands. */
genX(emit_apply_pipe_flushes)(batch,
cmd_buffer->device,
_3D,
ANV_PIPE_DATA_CACHE_FLUSH_BIT |
ANV_PIPE_CS_STALL_BIT);
#if GFX_VER >= 12
anv_batch_emit(batch, GENX(MI_ARB_CHECK), arb) {
arb.PreParserDisableMask = true;
arb.PreParserDisable = false;
}
#endif
#if GFX_VER < 12
/* Prior to Gfx12 we cannot disable the CS prefetch, so we have to emit a
* bunch of NOOPs to ensure we do not have generated commands loaded into
* the CS cache prior to them having been generated.
*/
const struct intel_device_info *devinfo = cmd_buffer->device->info;
const enum intel_engine_class engine_class = cmd_buffer->queue_family->engine_class;
for (uint32_t i = 0; i < devinfo->engine_class_prefetch[engine_class] / 4; i++)
anv_batch_emit(batch, GENX(MI_NOOP), noop);
#endif
/* Return to the main batch. */
anv_batch_emit(batch, GENX(MI_BATCH_BUFFER_START), bbs) {
bbs.AddressSpaceIndicator = ASI_PPGTT;
bbs.BatchBufferStartAddress = cmd_buffer->generation_return_addr;
}
cmd_buffer->generation_return_addr = ANV_NULL_ADDRESS;
}
#endif /* GENX_CMD_GENERATED_INDIRECT_DRAW_H */

View file

@ -18,6 +18,8 @@
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
subdir('shaders')
inc_anv = include_directories('.')
anv_flags = [
@ -43,20 +45,6 @@ anv_entrypoints = custom_target(
depend_files : vk_entrypoints_gen_depend_files,
)
float64_spv_h = custom_target(
'float64_spv.h',
input : [glsl2spirv, float64_glsl_file],
output : 'float64_spv.h',
command : [
prog_python, '@INPUT@', '@OUTPUT@',
prog_glslang,
'--create-entry', 'main',
'--vn', 'float64_spv_source',
'--glsl-version', '450',
'-Olib',
]
)
idep_anv_headers = declare_dependency(
sources : [anv_entrypoints[0]],
include_directories : inc_anv,
@ -126,7 +114,8 @@ foreach g : [['90', ['gfx8_cmd_buffer.c']],
_gfx_ver = g[0]
libanv_per_hw_ver_libs += static_library(
'anv_per_hw_ver@0@'.format(_gfx_ver),
[anv_per_hw_ver_files, g[1], anv_entrypoints[0]],
[anv_per_hw_ver_files, g[1], anv_entrypoints[0],
generated_draws_spv_h, generated_draws_count_spv_h],
include_directories : [
inc_include, inc_src, inc_mapi, inc_mesa, inc_gallium, inc_compiler, inc_intel,
],
@ -152,6 +141,7 @@ libanv_files = files(
'anv_descriptor_set.c',
'anv_device.c',
'anv_formats.c',
'anv_generated_indirect_draws.c',
'anv_genX.h',
'anv_image.c',
'anv_measure.c',
@ -216,7 +206,7 @@ libanv_common = static_library(
c_args : anv_flags,
cpp_args : anv_cpp_flags,
gnu_symbol_visibility : 'hidden',
dependencies : anv_deps,
dependencies : anv_deps
)
libvulkan_intel = shared_library(

View file

@ -0,0 +1,101 @@
/*
* Copyright © 2022 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
#version 450
/* These 2 bindings will be accessed through A64 messages */
layout(set = 0, binding = 0, std430) buffer Storage0 {
uint indirect_data[];
};
layout(set = 0, binding = 1, std430) buffer Storage1 {
uint commands[];
};
/* This data will be provided through push constants. */
layout(set = 0, binding = 2) uniform block {
uint is_indexed;
uint is_predicated;
uint draw_base;
uint draw_count;
uint instance_multiplier;
uint indirect_data_stride;
};
void main()
{
uint item_idx = uint(gl_FragCoord.y) * 8192 + uint(gl_FragCoord.x);
uint indirect_data_offset = item_idx * indirect_data_stride / 4;
uint _3dprim_dw_size = 10;
uint cmd_idx = uint(item_idx) * _3dprim_dw_size;
uint draw_id = draw_base + item_idx;
if (draw_id < draw_count) {
if (is_indexed != 0) {
/* Loading a VkDrawIndexedIndirectCommand */
uint index_count = indirect_data[indirect_data_offset + 0];
uint instance_count = indirect_data[indirect_data_offset + 1] * instance_multiplier;
uint first_index = indirect_data[indirect_data_offset + 2];
uint vertex_offset = indirect_data[indirect_data_offset + 3];
uint first_instance = indirect_data[indirect_data_offset + 4];
commands[cmd_idx + 0] = (3 << 29 | /* Command Type */
3 << 27 | /* Command SubType */
3 << 24 | /* 3D Command Opcode */
1 << 11 | /* Extended Parameter Enable */
is_predicated << 8 |
8 << 0); /* DWord Length */
commands[cmd_idx + 1] = 1 << 8; /* Indexed */
commands[cmd_idx + 2] = index_count; /* Vertex Count Per Instance */
commands[cmd_idx + 3] = first_index; /* Start Vertex Location */
commands[cmd_idx + 4] = instance_count; /* Instance Count */
commands[cmd_idx + 5] = first_instance; /* Start Instance Location */
commands[cmd_idx + 6] = vertex_offset; /* Base Vertex Location */
commands[cmd_idx + 7] = vertex_offset; /* gl_BaseVertex */
commands[cmd_idx + 8] = first_instance; /* gl_BaseInstance */
commands[cmd_idx + 9] = draw_id; /* gl_DrawID */
} else {
/* Loading a VkDrawIndirectCommand structure */
uint vertex_count = indirect_data[indirect_data_offset + 0];
uint instance_count = indirect_data[indirect_data_offset + 1] * instance_multiplier;
uint first_vertex = indirect_data[indirect_data_offset + 2];
uint first_instance = indirect_data[indirect_data_offset + 3];
commands[cmd_idx + 0] = (3 << 29 | /* Command Type */
3 << 27 | /* Command SubType */
3 << 24 | /* 3D Command Opcode */
1 << 11 | /* Extended Parameter Enable */
is_predicated << 8 |
8 << 0); /* DWord Length */
commands[cmd_idx + 1] = 0;
commands[cmd_idx + 2] = vertex_count; /* Vertex Count Per Instance */
commands[cmd_idx + 3] = first_vertex; /* Start Vertex Location */
commands[cmd_idx + 4] = instance_count; /* Instance Count */
commands[cmd_idx + 5] = first_instance; /* Start Instance Location */
commands[cmd_idx + 6] = 0; /* Base Vertex Location */
commands[cmd_idx + 7] = first_vertex; /* gl_BaseVertex */
commands[cmd_idx + 8] = first_instance; /* gl_BaseInstance */
commands[cmd_idx + 9] = draw_id; /* gl_DrawID */
}
}
}

View file

@ -0,0 +1,118 @@
/*
* Copyright © 2022 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
#version 450
/* These 2 bindings will be accessed through A64 messages */
layout(set = 0, binding = 0, std430) buffer Storage0 {
uint indirect_data[];
};
layout(set = 0, binding = 1, std430) buffer Storage1 {
uint commands[];
};
/* This data will be provided through push constants. */
layout(set = 0, binding = 2) uniform block {
uint is_indexed;
uint is_predicated;
uint draw_base;
uint item_count;
uint draw_count;
uint instance_multiplier;
uint indirect_data_stride;
uint end_addr_ldw;
uint end_addr_udw;
};
void main()
{
uint item_idx = uint(gl_FragCoord.y) * 8192 + uint(gl_FragCoord.x);
uint indirect_data_offset = item_idx * indirect_data_stride / 4;
uint _3dprim_dw_size = 10;
uint cmd_idx = item_idx * _3dprim_dw_size;
/* Loading a VkDrawIndexedIndirectCommand */
uint index_count = indirect_data[indirect_data_offset + 0];
uint instance_count = indirect_data[indirect_data_offset + 1];
uint first_index = indirect_data[indirect_data_offset + 2];
uint vertex_offset = indirect_data[indirect_data_offset + 3];
uint first_instance = indirect_data[indirect_data_offset + 4];
uint draw_id = draw_base + item_idx;
if (draw_id < draw_count) {
if (is_indexed != 0) {
/* Loading a VkDrawIndexedIndirectCommand */
uint index_count = indirect_data[indirect_data_offset + 0];
uint instance_count = indirect_data[indirect_data_offset + 1] * instance_multiplier;
uint first_index = indirect_data[indirect_data_offset + 2];
uint vertex_offset = indirect_data[indirect_data_offset + 3];
uint first_instance = indirect_data[indirect_data_offset + 4];
commands[cmd_idx + 0] = (3 << 29 | /* Command Type */
3 << 27 | /* Command SubType */
3 << 24 | /* 3D Command Opcode */
1 << 11 | /* Extended Parameter Enable */
is_predicated << 8 |
8 << 0); /* DWord Length */
commands[cmd_idx + 1] = 1 << 8; /* Indexed */
commands[cmd_idx + 2] = index_count; /* Vertex Count Per Instance */
commands[cmd_idx + 3] = first_index; /* Start Vertex Location */
commands[cmd_idx + 4] = instance_count; /* Instance Count */
commands[cmd_idx + 5] = first_instance; /* Start Instance Location */
commands[cmd_idx + 6] = vertex_offset; /* Base Vertex Location */
commands[cmd_idx + 7] = vertex_offset; /* gl_BaseVertex */
commands[cmd_idx + 8] = first_instance; /* gl_BaseInstance */
commands[cmd_idx + 9] = draw_id; /* gl_DrawID */
} else {
/* Loading a VkDrawIndirectCommand structure */
uint vertex_count = indirect_data[indirect_data_offset + 0];
uint instance_count = indirect_data[indirect_data_offset + 1] * instance_multiplier;
uint first_vertex = indirect_data[indirect_data_offset + 2];
uint first_instance = indirect_data[indirect_data_offset + 3];
commands[cmd_idx + 0] = (3 << 29 | /* Command Type */
3 << 27 | /* Command SubType */
3 << 24 | /* 3D Command Opcode */
1 << 11 | /* Extended Parameter Enable */
is_predicated << 8 |
8 << 0); /* DWord Length */
commands[cmd_idx + 1] = 0;
commands[cmd_idx + 2] = vertex_count; /* Vertex Count Per Instance */
commands[cmd_idx + 3] = first_vertex; /* Start Vertex Location */
commands[cmd_idx + 4] = instance_count; /* Instance Count */
commands[cmd_idx + 5] = first_instance; /* Start Instance Location */
commands[cmd_idx + 6] = 0; /* Base Vertex Location */
commands[cmd_idx + 7] = first_vertex; /* gl_BaseVertex */
commands[cmd_idx + 8] = first_instance; /* gl_BaseInstance */
commands[cmd_idx + 9] = draw_id; /* gl_DrawID */
}
} else if (draw_id == draw_count) {
commands[cmd_idx + 0] = (0 << 29 | /* Command Type */
49 << 23 | /* MI Command Opcode */
1 << 8 | /* Address Space Indicator (PPGTT) */
1 << 0); /* DWord Length */
commands[cmd_idx + 1] = end_addr_ldw;
commands[cmd_idx + 2] = end_addr_udw;
}
}

View file

@ -0,0 +1,59 @@
# Copyright © 2022 Intel Corporation
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
float64_spv_h = custom_target(
'float64_spv.h',
input : [glsl2spirv, float64_glsl_file],
output : 'float64_spv.h',
command : [
prog_python, '@INPUT@', '@OUTPUT@',
prog_glslang,
'--create-entry', 'main',
'--vn', 'float64_spv_source',
'--glsl-version', '450',
'-Olib',
]
)
generated_draws_spv_h = custom_target(
'generated_draws_spv.h',
input : [glsl2spirv, 'generated_draws.glsl'],
output : 'generated_draws_spv.h',
command : [
prog_python, '@INPUT@', '@OUTPUT@',
prog_glslang,
'--vn', 'generated_draws_spv_source',
'--glsl-version', '450',
'--stage', 'frag',
]
)
generated_draws_count_spv_h = custom_target(
'generated_draws_count_spv.h',
input : [glsl2spirv, 'generated_draws_count.glsl'],
output : 'generated_draws_count_spv.h',
command : [
prog_python, '@INPUT@', '@OUTPUT@',
prog_glslang,
'--vn', 'generated_draws_count_spv_source',
'--glsl-version', '450',
'--stage', 'frag',
]
)

View file

@ -615,4 +615,8 @@
DRI_CONF_OPT_B(fp64_workaround_enabled, def, \
"Use softpf64 when the shader uses float64, but the device doesn't support that type")
#define DRI_CONF_ANV_GENERATED_INDIRECT_THRESHOLD(def) \
DRI_CONF_OPT_I(generated_indirect_threshold, def, 0, INT32_MAX, \
"Indirect threshold count above which we start generating commands")
#endif