anv: implement Wa_16011107343/22018402687 for generated draws

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Ivan Briano <ivan.briano@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/32059>
This commit is contained in:
Lionel Landwerlin 2024-11-08 14:49:37 +02:00 committed by Marge Bot
parent 53eed61a90
commit 08530462bd
9 changed files with 114 additions and 3 deletions

View file

@ -104,6 +104,8 @@ endforeach
genX_cl_included_symbols = [
# instructions
'3DSTATE_DS',
'3DSTATE_HS',
'3DSTATE_INDEX_BUFFER',
'3DSTATE_VERTEX_BUFFERS',
'3DPRIMITIVE',

View file

@ -3,6 +3,7 @@
*/
#include "libintel_shaders.h"
#include "dev/intel_wa.h"
static void end_generated_draws(global void *dst_ptr,
uint32_t item_idx,
@ -35,6 +36,7 @@ static void end_generated_draws(global void *dst_ptr,
void
genX(libanv_write_draw)(global void *dst_base,
global void *wa_insts_ptr,
global void *indirect_base,
global void *draw_id_base,
uint32_t indirect_stride,
@ -62,8 +64,28 @@ genX(libanv_write_draw)(global void *dst_base,
bool uses_tbimr = (flags & ANV_GENERATED_FLAG_TBIMR) != 0;
bool uses_base = (flags & ANV_GENERATED_FLAG_BASE) != 0;
bool uses_drawid = (flags & ANV_GENERATED_FLAG_DRAWID) != 0;
uint32_t inst_offset_B = 0;
genX(write_draw)(dst_ptr, indirect_ptr, draw_id_ptr,
#if INTEL_WA_16011107343_GFX_VER
if (flags & ANV_GENERATED_FLAG_WA_16011107343) {
genX(copy_data)(dst_ptr + inst_offset_B,
wa_insts_ptr + inst_offset_B,
GENX(3DSTATE_HS_length) * 4);
inst_offset_B += GENX(3DSTATE_HS_length) * 4;
}
#endif
#if INTEL_WA_22018402687_GFX_VER
if (flags & ANV_GENERATED_FLAG_WA_22018402687) {
genX(copy_data)(dst_ptr + inst_offset_B,
wa_insts_ptr + inst_offset_B,
GENX(3DSTATE_DS_length) * 4);
inst_offset_B += GENX(3DSTATE_DS_length) * 4;
}
#endif
genX(write_draw)(dst_ptr + inst_offset_B,
indirect_ptr, draw_id_ptr,
draw_id, instance_multiplier,
is_indexed, is_predicated,
uses_tbimr, uses_base, uses_drawid,

View file

@ -62,6 +62,10 @@ enum anv_generated_draw_flags {
ANV_GENERATED_FLAG_RING_MODE = BITFIELD_BIT(5),
/* Whether TBIMR tile-based rendering shall be enabled. */
ANV_GENERATED_FLAG_TBIMR = BITFIELD_BIT(6),
/* Wa_16011107343 */
ANV_GENERATED_FLAG_WA_16011107343 = BITFIELD_BIT(7),
/* Wa_22018402687 */
ANV_GENERATED_FLAG_WA_22018402687 = BITFIELD_BIT(8),
};
/**
@ -123,6 +127,10 @@ void genX(write_draw)(global uint32_t *dst_ptr,
bool uses_draw_id,
uint32_t mocs);
void genX(copy_data)(global void *dst_ptr,
global void *src_ptr,
uint32_t size);
#endif /* __OPENCL_VERSION__ */
#endif /* _LIBANV_SHADERS_H_ */

View file

@ -26,6 +26,7 @@ intel_shader_files = files(
'generate_draws_iris.cl',
'memcpy.cl',
'query_copy.cl',
'util.cl',
)
prepended_input_args = []

25
src/intel/shaders/util.cl Normal file
View file

@ -0,0 +1,25 @@
/* Copyright © 2024 Intel Corporation
* SPDX-License-Identifier: MIT
*/
#include "libintel_shaders.h"
/* Copy size from src_ptr to dst_ptr for using a single lane with size
* multiple of 4.
*/
void genX(copy_data)(global void *dst_ptr,
global void *src_ptr,
uint32_t size)
{
for (uint32_t offset = 0; offset < size; offset += 16) {
if (offset + 16 <= size) {
*(global uint4 *)(dst_ptr + offset) = *(global uint4 *)(src_ptr + offset);
} else if (offset + 12 <= size) {
*(global uint3 *)(dst_ptr + offset) = *(global uint3 *)(src_ptr + offset);
} else if (offset + 8 <= size) {
*(global uint2 *)(dst_ptr + offset) = *(global uint2 *)(src_ptr + offset);
} else if (offset + 4 <= size) {
*(global uint *)(dst_ptr + offset) = *(global uint *)(src_ptr + offset);
}
}
}

View file

@ -293,7 +293,11 @@ anv_device_get_internal_shader(struct anv_device *device,
* 2 * (2 loads + 3 stores) +
* 3 stores
*/
14),
14) +
/* 3 loads + 3 stores */
(intel_needs_workaround(device->info, 16011107343) ? 6 : 0) +
/* 3 loads + 3 stores */
(intel_needs_workaround(device->info, 22018402687) ? 6 : 0),
},
[ANV_INTERNAL_KERNEL_COPY_QUERY_RESULTS_COMPUTE] = {
.key = {

View file

@ -33,6 +33,9 @@ struct PACKED anv_gen_indirect_params {
/* Indirect data buffer address (only used on Gfx9) */
uint64_t indirect_data_addr;
/* Pointers to workaround instructions */
uint64_t wa_insts_addr;
/* Stride between each elements of the indirect data buffer */
uint32_t indirect_data_stride;

View file

@ -78,8 +78,53 @@ genX(cmd_buffer_emit_generate_draws)(struct anv_cmd_buffer *cmd_buffer,
draw_count_addr = count_addr;
}
const bool wa_16011107343 =
intel_needs_workaround(device->info, 16011107343) &&
anv_pipeline_has_stage(pipeline, MESA_SHADER_TESS_CTRL);
const bool wa_22018402687 =
intel_needs_workaround(device->info, 22018402687) &&
anv_pipeline_has_stage(pipeline, MESA_SHADER_TESS_EVAL);
const uint32_t wa_insts_size =
((wa_16011107343 ? GENX(3DSTATE_HS_length) : 0) +
(wa_22018402687 ? GENX(3DSTATE_HS_length) : 0)) * 4;
UNUSED const bool protected = cmd_buffer->vk.pool->flags &
VK_COMMAND_POOL_CREATE_PROTECTED_BIT;
struct anv_state wa_insts_state =
wa_insts_size ?
anv_cmd_buffer_alloc_temporary_state(cmd_buffer, wa_insts_size, 4) :
ANV_STATE_NULL;
UNUSED uint32_t wa_insts_offset = 0;
#if INTEL_WA_16011107343_GFX_VER
if (wa_16011107343) {
memcpy(wa_insts_state.map + wa_insts_offset,
&pipeline->batch_data[
protected ?
pipeline->final.hs_protected.offset :
pipeline->final.hs.offset],
GENX(3DSTATE_HS_length) * 4);
wa_insts_offset += GENX(3DSTATE_HS_length) * 4;
}
#endif
#if INTEL_WA_22018402687_GFX_VER
if (wa_22018402687) {
memcpy(wa_insts_state.map + wa_insts_offset,
&pipeline->batch_data[
protected ?
pipeline->final.ds_protected.offset :
pipeline->final.ds.offset],
GENX(3DSTATE_DS_length) * 4);
wa_insts_offset += GENX(3DSTATE_DS_length) * 4;
}
#endif
struct anv_gen_indirect_params *push_data = push_data_state.map;
*push_data = (struct anv_gen_indirect_params) {
.wa_insts_addr = anv_address_physical(
anv_cmd_buffer_temporary_state_address(cmd_buffer, wa_insts_state)),
.draw_id_addr = anv_address_physical(draw_id_addr),
.indirect_data_addr = anv_address_physical(indirect_data_addr),
.indirect_data_stride = indirect_data_stride,
@ -96,7 +141,7 @@ genX(cmd_buffer_emit_generate_draws)(struct anv_cmd_buffer *cmd_buffer,
(ring_count != 0 ? ANV_GENERATED_FLAG_RING_MODE : 0),
.mocs = anv_mocs(device, indirect_data_addr.bo,
ISL_SURF_USAGE_VERTEX_BUFFER_BIT),
.cmd_primitive_size = generated_cmd_stride,
.cmd_primitive_size = wa_insts_size + generated_cmd_stride,
.draw_base = item_base,
.max_draw_count = max_count,
.ring_count = ring_count,

View file

@ -66,6 +66,7 @@ genX(call_internal_shader)(nir_builder *b, enum anv_internal_kernel_name shader_
genX(libanv_write_draw)(
b,
load_param(b, 64, struct anv_gen_indirect_params, generated_cmds_addr),
load_param(b, 64, struct anv_gen_indirect_params, wa_insts_addr),
load_param(b, 64, struct anv_gen_indirect_params, indirect_data_addr),
load_param(b, 64, struct anv_gen_indirect_params, draw_id_addr),
load_param(b, 32, struct anv_gen_indirect_params, indirect_data_stride),