diff --git a/src/intel/genxml/meson.build b/src/intel/genxml/meson.build index 7173ac5f031..cb06e30ebc7 100644 --- a/src/intel/genxml/meson.build +++ b/src/intel/genxml/meson.build @@ -104,6 +104,8 @@ endforeach genX_cl_included_symbols = [ # instructions + '3DSTATE_DS', + '3DSTATE_HS', '3DSTATE_INDEX_BUFFER', '3DSTATE_VERTEX_BUFFERS', '3DPRIMITIVE', diff --git a/src/intel/shaders/generate_draws.cl b/src/intel/shaders/generate_draws.cl index eff7aed6bf3..22b6f0485a5 100644 --- a/src/intel/shaders/generate_draws.cl +++ b/src/intel/shaders/generate_draws.cl @@ -3,6 +3,7 @@ */ #include "libintel_shaders.h" +#include "dev/intel_wa.h" static void end_generated_draws(global void *dst_ptr, uint32_t item_idx, @@ -35,6 +36,7 @@ static void end_generated_draws(global void *dst_ptr, void genX(libanv_write_draw)(global void *dst_base, + global void *wa_insts_ptr, global void *indirect_base, global void *draw_id_base, uint32_t indirect_stride, @@ -62,8 +64,28 @@ genX(libanv_write_draw)(global void *dst_base, bool uses_tbimr = (flags & ANV_GENERATED_FLAG_TBIMR) != 0; bool uses_base = (flags & ANV_GENERATED_FLAG_BASE) != 0; bool uses_drawid = (flags & ANV_GENERATED_FLAG_DRAWID) != 0; + uint32_t inst_offset_B = 0; - genX(write_draw)(dst_ptr, indirect_ptr, draw_id_ptr, +#if INTEL_WA_16011107343_GFX_VER + if (flags & ANV_GENERATED_FLAG_WA_16011107343) { + genX(copy_data)(dst_ptr + inst_offset_B, + wa_insts_ptr + inst_offset_B, + GENX(3DSTATE_HS_length) * 4); + inst_offset_B += GENX(3DSTATE_HS_length) * 4; + } +#endif + +#if INTEL_WA_22018402687_GFX_VER + if (flags & ANV_GENERATED_FLAG_WA_22018402687) { + genX(copy_data)(dst_ptr + inst_offset_B, + wa_insts_ptr + inst_offset_B, + GENX(3DSTATE_DS_length) * 4); + inst_offset_B += GENX(3DSTATE_DS_length) * 4; + } +#endif + + genX(write_draw)(dst_ptr + inst_offset_B, + indirect_ptr, draw_id_ptr, draw_id, instance_multiplier, is_indexed, is_predicated, uses_tbimr, uses_base, uses_drawid, diff --git a/src/intel/shaders/libintel_shaders.h b/src/intel/shaders/libintel_shaders.h index 1f5fca24fd9..44a7a9acc76 100644 --- a/src/intel/shaders/libintel_shaders.h +++ b/src/intel/shaders/libintel_shaders.h @@ -62,6 +62,10 @@ enum anv_generated_draw_flags { ANV_GENERATED_FLAG_RING_MODE = BITFIELD_BIT(5), /* Whether TBIMR tile-based rendering shall be enabled. */ ANV_GENERATED_FLAG_TBIMR = BITFIELD_BIT(6), + /* Wa_16011107343 */ + ANV_GENERATED_FLAG_WA_16011107343 = BITFIELD_BIT(7), + /* Wa_22018402687 */ + ANV_GENERATED_FLAG_WA_22018402687 = BITFIELD_BIT(8), }; /** @@ -123,6 +127,10 @@ void genX(write_draw)(global uint32_t *dst_ptr, bool uses_draw_id, uint32_t mocs); +void genX(copy_data)(global void *dst_ptr, + global void *src_ptr, + uint32_t size); + #endif /* __OPENCL_VERSION__ */ #endif /* _LIBANV_SHADERS_H_ */ diff --git a/src/intel/shaders/meson.build b/src/intel/shaders/meson.build index ceb95003a8a..607959e8026 100644 --- a/src/intel/shaders/meson.build +++ b/src/intel/shaders/meson.build @@ -26,6 +26,7 @@ intel_shader_files = files( 'generate_draws_iris.cl', 'memcpy.cl', 'query_copy.cl', + 'util.cl', ) prepended_input_args = [] diff --git a/src/intel/shaders/util.cl b/src/intel/shaders/util.cl new file mode 100644 index 00000000000..580b4b36a20 --- /dev/null +++ b/src/intel/shaders/util.cl @@ -0,0 +1,25 @@ +/* Copyright © 2024 Intel Corporation + * SPDX-License-Identifier: MIT + */ + +#include "libintel_shaders.h" + +/* Copy size from src_ptr to dst_ptr for using a single lane with size + * multiple of 4. + */ +void genX(copy_data)(global void *dst_ptr, + global void *src_ptr, + uint32_t size) +{ + for (uint32_t offset = 0; offset < size; offset += 16) { + if (offset + 16 <= size) { + *(global uint4 *)(dst_ptr + offset) = *(global uint4 *)(src_ptr + offset); + } else if (offset + 12 <= size) { + *(global uint3 *)(dst_ptr + offset) = *(global uint3 *)(src_ptr + offset); + } else if (offset + 8 <= size) { + *(global uint2 *)(dst_ptr + offset) = *(global uint2 *)(src_ptr + offset); + } else if (offset + 4 <= size) { + *(global uint *)(dst_ptr + offset) = *(global uint *)(src_ptr + offset); + } + } +} diff --git a/src/intel/vulkan/anv_internal_kernels.c b/src/intel/vulkan/anv_internal_kernels.c index 85c4d00610e..4107b62dddb 100644 --- a/src/intel/vulkan/anv_internal_kernels.c +++ b/src/intel/vulkan/anv_internal_kernels.c @@ -293,7 +293,11 @@ anv_device_get_internal_shader(struct anv_device *device, * 2 * (2 loads + 3 stores) + * 3 stores */ - 14), + 14) + + /* 3 loads + 3 stores */ + (intel_needs_workaround(device->info, 16011107343) ? 6 : 0) + + /* 3 loads + 3 stores */ + (intel_needs_workaround(device->info, 22018402687) ? 6 : 0), }, [ANV_INTERNAL_KERNEL_COPY_QUERY_RESULTS_COMPUTE] = { .key = { diff --git a/src/intel/vulkan/anv_internal_kernels.h b/src/intel/vulkan/anv_internal_kernels.h index 898b96db8f0..0e1797d861b 100644 --- a/src/intel/vulkan/anv_internal_kernels.h +++ b/src/intel/vulkan/anv_internal_kernels.h @@ -33,6 +33,9 @@ struct PACKED anv_gen_indirect_params { /* Indirect data buffer address (only used on Gfx9) */ uint64_t indirect_data_addr; + /* Pointers to workaround instructions */ + uint64_t wa_insts_addr; + /* Stride between each elements of the indirect data buffer */ uint32_t indirect_data_stride; diff --git a/src/intel/vulkan/genX_cmd_draw_generated_indirect.h b/src/intel/vulkan/genX_cmd_draw_generated_indirect.h index 11e1b5a5268..78cd1b6fba3 100644 --- a/src/intel/vulkan/genX_cmd_draw_generated_indirect.h +++ b/src/intel/vulkan/genX_cmd_draw_generated_indirect.h @@ -78,8 +78,53 @@ genX(cmd_buffer_emit_generate_draws)(struct anv_cmd_buffer *cmd_buffer, draw_count_addr = count_addr; } + const bool wa_16011107343 = + intel_needs_workaround(device->info, 16011107343) && + anv_pipeline_has_stage(pipeline, MESA_SHADER_TESS_CTRL); + const bool wa_22018402687 = + intel_needs_workaround(device->info, 22018402687) && + anv_pipeline_has_stage(pipeline, MESA_SHADER_TESS_EVAL); + + const uint32_t wa_insts_size = + ((wa_16011107343 ? GENX(3DSTATE_HS_length) : 0) + + (wa_22018402687 ? GENX(3DSTATE_HS_length) : 0)) * 4; + UNUSED const bool protected = cmd_buffer->vk.pool->flags & + VK_COMMAND_POOL_CREATE_PROTECTED_BIT; + + struct anv_state wa_insts_state = + wa_insts_size ? + anv_cmd_buffer_alloc_temporary_state(cmd_buffer, wa_insts_size, 4) : + ANV_STATE_NULL; + UNUSED uint32_t wa_insts_offset = 0; + +#if INTEL_WA_16011107343_GFX_VER + if (wa_16011107343) { + memcpy(wa_insts_state.map + wa_insts_offset, + &pipeline->batch_data[ + protected ? + pipeline->final.hs_protected.offset : + pipeline->final.hs.offset], + GENX(3DSTATE_HS_length) * 4); + wa_insts_offset += GENX(3DSTATE_HS_length) * 4; + } +#endif + +#if INTEL_WA_22018402687_GFX_VER + if (wa_22018402687) { + memcpy(wa_insts_state.map + wa_insts_offset, + &pipeline->batch_data[ + protected ? + pipeline->final.ds_protected.offset : + pipeline->final.ds.offset], + GENX(3DSTATE_DS_length) * 4); + wa_insts_offset += GENX(3DSTATE_DS_length) * 4; + } +#endif + struct anv_gen_indirect_params *push_data = push_data_state.map; *push_data = (struct anv_gen_indirect_params) { + .wa_insts_addr = anv_address_physical( + anv_cmd_buffer_temporary_state_address(cmd_buffer, wa_insts_state)), .draw_id_addr = anv_address_physical(draw_id_addr), .indirect_data_addr = anv_address_physical(indirect_data_addr), .indirect_data_stride = indirect_data_stride, @@ -96,7 +141,7 @@ genX(cmd_buffer_emit_generate_draws)(struct anv_cmd_buffer *cmd_buffer, (ring_count != 0 ? ANV_GENERATED_FLAG_RING_MODE : 0), .mocs = anv_mocs(device, indirect_data_addr.bo, ISL_SURF_USAGE_VERTEX_BUFFER_BIT), - .cmd_primitive_size = generated_cmd_stride, + .cmd_primitive_size = wa_insts_size + generated_cmd_stride, .draw_base = item_base, .max_draw_count = max_count, .ring_count = ring_count, diff --git a/src/intel/vulkan/genX_internal_kernels.c b/src/intel/vulkan/genX_internal_kernels.c index b44acfebf9d..c1738e70050 100644 --- a/src/intel/vulkan/genX_internal_kernels.c +++ b/src/intel/vulkan/genX_internal_kernels.c @@ -66,6 +66,7 @@ genX(call_internal_shader)(nir_builder *b, enum anv_internal_kernel_name shader_ genX(libanv_write_draw)( b, load_param(b, 64, struct anv_gen_indirect_params, generated_cmds_addr), + load_param(b, 64, struct anv_gen_indirect_params, wa_insts_addr), load_param(b, 64, struct anv_gen_indirect_params, indirect_data_addr), load_param(b, 64, struct anv_gen_indirect_params, draw_id_addr), load_param(b, 32, struct anv_gen_indirect_params, indirect_data_stride),