anv: add shader instruction emission

Should replace much of genX_pipeline.c

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Caio Oliveira <caio.oliveira@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/34872>
This commit is contained in:
Lionel Landwerlin 2024-08-11 00:05:15 +03:00 committed by Marge Bot
parent 8f4c2bd566
commit 69b6b4cb28
7 changed files with 1763 additions and 6 deletions

View file

@ -107,6 +107,11 @@ void genX(batch_emit_pipeline_vertex_input)(struct anv_batch *batch,
struct anv_graphics_pipeline *pipeline, struct anv_graphics_pipeline *pipeline,
const struct vk_vertex_input_state *vi); const struct vk_vertex_input_state *vi);
void genX(batch_emit_vertex_input)(struct anv_batch *batch,
struct anv_device *device,
struct anv_shader *shader,
const struct vk_vertex_input_state *vi);
enum anv_pipe_bits enum anv_pipe_bits
genX(emit_apply_pipe_flushes)(struct anv_batch *batch, genX(emit_apply_pipe_flushes)(struct anv_batch *batch,
struct anv_device *device, struct anv_device *device,
@ -319,6 +324,36 @@ genX(ray_tracing_pipeline_emit)(struct anv_ray_tracing_pipeline *pipeline);
}) })
#endif #endif
#if GFX_VERx10 >= 300
#define anv_shader_get_bsr(shader, local_arg_offset) ({ \
assert((local_arg_offset) % 8 == 0); \
const struct brw_bs_prog_data *prog_data = \
brw_bs_prog_data_const(shader->prog_data); \
assert(prog_data->simd_size == 16); \
\
(struct GENX(BINDLESS_SHADER_RECORD)) { \
.OffsetToLocalArguments = (local_arg_offset) / 8, \
.BindlessShaderDispatchMode = RT_SIMD16, \
.KernelStartPointer = shader->kernel.offset, \
.RegistersPerThread = ptl_register_blocks(prog_data->base.grf_used), \
}; \
})
#else
#define anv_shader_get_bsr(shader, local_arg_offset) ({ \
assert((local_arg_offset) % 8 == 0); \
const struct brw_bs_prog_data *prog_data = \
brw_bs_prog_data_const(shader->prog_data); \
assert(prog_data->simd_size == 8 || prog_data->simd_size == 16); \
\
(struct GENX(BINDLESS_SHADER_RECORD)) { \
.OffsetToLocalArguments = (local_arg_offset) / 8, \
.BindlessShaderDispatchMode = \
prog_data->simd_size == 16 ? RT_SIMD16 : RT_SIMD8, \
.KernelStartPointer = shader->kernel.offset, \
}; \
})
#endif
#if GFX_VERx10 >= 300 #if GFX_VERx10 >= 300
#define anv_shader_bin_get_bsr(bin, local_arg_offset) ({ \ #define anv_shader_bin_get_bsr(bin, local_arg_offset) ({ \
assert((local_arg_offset) % 8 == 0); \ assert((local_arg_offset) % 8 == 0); \
@ -501,3 +536,16 @@ genX(cmd_dispatch_unaligned)(
uint32_t invocations_x, uint32_t invocations_x,
uint32_t invocations_y, uint32_t invocations_y,
uint32_t invocations_z); uint32_t invocations_z);
void genX(shader_emit)(struct anv_batch *batch,
struct anv_device *device,
struct anv_shader *shader);
void genX(write_rt_shader_group)(struct anv_device *device,
VkRayTracingShaderGroupTypeKHR type,
const struct vk_shader **shaders,
uint32_t shader_count,
void *output);
uint32_t genX(shader_cmd_size)(struct anv_device *device,
mesa_shader_stage stage);

View file

@ -1185,6 +1185,39 @@ struct anv_push_descriptor_info {
uint8_t push_set_buffer; uint8_t push_set_buffer;
}; };
struct anv_gfx_state_ptr {
/* Both in dwords */
uint16_t offset;
uint16_t len;
};
#define anv_batch_emit_shader_state(batch, shader, state) \
do { \
if ((shader)->state.len == 0) \
break; \
uint32_t *dw; \
dw = anv_batch_emit_dwords((batch), (shader)->state.len); \
if (!dw) \
break; \
memcpy(dw, &(shader)->cmd_data[(shader)->state.offset], \
4 * (shader)->state.len); \
} while (0)
#define anv_batch_emit_shader_state_protected(batch, shader, \
state, protected) \
do { \
struct anv_gfx_state_ptr *_cmd_state = protected ? \
&(shader)->state##_protected : &(shader)->state; \
if (_cmd_state->len == 0) \
break; \
uint32_t *dw; \
dw = anv_batch_emit_dwords((batch), _cmd_state->len); \
if (!dw) \
break; \
memcpy(dw, &(shader)->cmd_data[_cmd_state->offset], \
4 * _cmd_state->len); \
} while (0)
struct anv_shader { struct anv_shader {
struct vk_shader vk; struct vk_shader vk;
@ -1212,6 +1245,79 @@ struct anv_shader {
* Array of pointers of length bind_map.embedded_sampler_count * Array of pointers of length bind_map.embedded_sampler_count
*/ */
struct anv_embedded_sampler **embedded_samplers; struct anv_embedded_sampler **embedded_samplers;
struct anv_reloc_list relocs;
union {
struct {
/* Number of elements for application values */
uint32_t input_elements;
/* Number of elements for system generated values */
uint32_t sgvs_count;
uint32_t sgvs_elements[2 * 2 /* 2 internal */];
struct anv_gfx_state_ptr vf_sgvs;
struct anv_gfx_state_ptr vf_sgvs_2;
struct anv_gfx_state_ptr vf_sgvs_instancing;
struct anv_gfx_state_ptr vf_component_packing;
struct anv_gfx_state_ptr vs;
struct anv_gfx_state_ptr vs_protected;
} vs;
struct {
struct anv_gfx_state_ptr hs;
struct anv_gfx_state_ptr hs_protected;
} hs;
struct {
struct anv_gfx_state_ptr te;
struct anv_gfx_state_ptr ds;
struct anv_gfx_state_ptr ds_protected;
} ds;
struct {
struct anv_gfx_state_ptr gs;
struct anv_gfx_state_ptr gs_protected;
} gs;
struct {
struct anv_gfx_state_ptr control;
struct anv_gfx_state_ptr control_protected;
struct anv_gfx_state_ptr shader;
struct anv_gfx_state_ptr redistrib;
} ts;
struct {
struct anv_gfx_state_ptr control;
struct anv_gfx_state_ptr control_protected;
struct anv_gfx_state_ptr shader;
struct anv_gfx_state_ptr distrib;
struct anv_gfx_state_ptr clip;
} ms;
struct {
struct anv_gfx_state_ptr ps;
struct anv_gfx_state_ptr ps_protected;
struct anv_gfx_state_ptr ps_extra;
struct anv_gfx_state_ptr wm;
} ps;
union {
struct {
struct anv_gfx_state_ptr vfe;
uint32_t idd[8];
} gfx9;
struct {
uint32_t compute_walker_body[39];
} gfx125;
} cs;
};
/* This one is shared amongst VS/DS/GS stages */
struct anv_gfx_state_ptr so_decl_list;
struct anv_gfx_state_ptr so;
uint32_t *cmd_data;
}; };
extern struct vk_device_shader_ops anv_device_shader_ops; extern struct vk_device_shader_ops anv_device_shader_ops;
@ -5197,12 +5303,6 @@ struct anv_graphics_lib_pipeline {
bool retain_shaders; bool retain_shaders;
}; };
struct anv_gfx_state_ptr {
/* Both in dwords */
uint16_t offset;
uint16_t len;
};
/* The final graphics pipeline object has all the graphics state ready to be /* The final graphics pipeline object has all the graphics state ready to be
* programmed into HW packets (dynamic_state field) or fully baked in its * programmed into HW packets (dynamic_state field) or fully baked in its
* batch. * batch.
@ -5486,6 +5586,13 @@ get_gfx_##prefix##_prog_data( \
} else { \ } else { \
return NULL; \ return NULL; \
} \ } \
} \
\
static inline const struct brw_##prefix##_prog_data * \
get_shader_##prefix##_prog_data(const struct anv_shader *shader) \
{ \
return (const struct brw_##prefix##_prog_data *) \
shader->prog_data; \
} }
ANV_DECL_GET_GRAPHICS_PROG_DATA_FUNC(vs, MESA_SHADER_VERTEX) ANV_DECL_GET_GRAPHICS_PROG_DATA_FUNC(vs, MESA_SHADER_VERTEX)
@ -5525,6 +5632,12 @@ get_cs_prog_data(const struct anv_cmd_compute_state *comp_state)
return (const struct brw_cs_prog_data *) comp_state->shader->prog_data; return (const struct brw_cs_prog_data *) comp_state->shader->prog_data;
} }
static inline const struct brw_cs_prog_data *
get_shader_cs_prog_data(const struct anv_shader *shader)
{
return (const struct brw_cs_prog_data *) shader->prog_data;
}
static inline const struct brw_vue_prog_data * static inline const struct brw_vue_prog_data *
anv_pipeline_get_last_vue_prog_data(const struct anv_graphics_pipeline *pipeline) anv_pipeline_get_last_vue_prog_data(const struct anv_graphics_pipeline *pipeline)
{ {

View file

@ -23,6 +23,7 @@ anv_shader_destroy(struct vk_device *vk_device,
anv_embedded_sampler_unref(device, shader->embedded_samplers[i]); anv_embedded_sampler_unref(device, shader->embedded_samplers[i]);
anv_state_pool_free(&device->instruction_state_pool, shader->kernel); anv_state_pool_free(&device->instruction_state_pool, shader->kernel);
anv_reloc_list_finish(&shader->relocs);
vk_shader_free(vk_device, pAllocator, vk_shader); vk_shader_free(vk_device, pAllocator, vk_shader);
} }
@ -670,11 +671,15 @@ anv_shader_create(struct anv_device *device,
shader_data, mem_ctx); shader_data, mem_ctx);
} }
const uint32_t cmd_data_dwords = anv_genX(device->info, shader_cmd_size)(
device, stage);
/* We never need this at runtime */ /* We never need this at runtime */
shader_data->prog_data.base.param = NULL; shader_data->prog_data.base.param = NULL;
VK_MULTIALLOC(ma); VK_MULTIALLOC(ma);
VK_MULTIALLOC_DECL(&ma, struct anv_shader, shader, 1); VK_MULTIALLOC_DECL(&ma, struct anv_shader, shader, 1);
VK_MULTIALLOC_DECL(&ma, uint32_t, cmd_data, cmd_data_dwords);
VK_MULTIALLOC_DECL_SIZE(&ma, void, obj_key_data, brw_prog_key_size(stage)); VK_MULTIALLOC_DECL_SIZE(&ma, void, obj_key_data, brw_prog_key_size(stage));
VK_MULTIALLOC_DECL_SIZE(&ma, struct brw_stage_prog_data, prog_data, VK_MULTIALLOC_DECL_SIZE(&ma, struct brw_stage_prog_data, prog_data,
brw_prog_data_size(stage)); brw_prog_data_size(stage));
@ -782,6 +787,19 @@ anv_shader_create(struct anv_device *device,
shader->vk.scratch_size = shader->prog_data->total_scratch; shader->vk.scratch_size = shader->prog_data->total_scratch;
shader->vk.ray_queries = shader->prog_data->ray_queries; shader->vk.ray_queries = shader->prog_data->ray_queries;
result =
anv_reloc_list_init(&shader->relocs, &device->vk.alloc,
device->physical->uses_relocs);
if (result != VK_SUCCESS)
goto error_embedded_samplers;
struct anv_batch batch = {};
anv_batch_set_storage(&batch, ANV_NULL_ADDRESS,
cmd_data, 4 * cmd_data_dwords);
batch.relocs = &shader->relocs;
shader->cmd_data = cmd_data;
anv_genX(device->info, shader_emit)(&batch, device, shader);
*shader_out = &shader->vk; *shader_out = &shader->vk;
return VK_SUCCESS; return VK_SUCCESS;
@ -791,6 +809,7 @@ anv_shader_create(struct anv_device *device,
anv_embedded_sampler_unref(device, shader->embedded_samplers[s]); anv_embedded_sampler_unref(device, shader->embedded_samplers[s]);
anv_state_pool_free(&device->instruction_state_pool, shader->kernel); anv_state_pool_free(&device->instruction_state_pool, shader->kernel);
error_shader: error_shader:
anv_state_pool_free(&device->instruction_state_pool, shader->kernel);
vk_shader_free(&device->vk, pAllocator, &shader->vk); vk_shader_free(&device->vk, pAllocator, &shader->vk);
return result; return result;
} }

View file

@ -9,6 +9,75 @@
#include "nir/nir_xfb_info.h" #include "nir/nir_xfb_info.h"
static inline struct anv_batch *
anv_shader_add(struct anv_batch *batch,
struct anv_gfx_state_ptr *ptr,
uint32_t n_dwords)
{
assert(ptr->len == 0 ||
(batch->next - batch->start) / 4 == (ptr->offset + ptr->len));
if (ptr->len == 0)
ptr->offset = (batch->next - batch->start) / 4;
ptr->len += n_dwords;
return batch;
}
#define anv_shader_emit(batch, shader, state, cmd, name) \
for (struct cmd name = { __anv_cmd_header(cmd) }, \
*_dst = anv_batch_emit_dwords( \
anv_shader_add(batch, &(shader)->state, \
__anv_cmd_length(cmd)), \
__anv_cmd_length(cmd)); \
__builtin_expect(_dst != NULL, 1); \
({ __anv_cmd_pack(cmd)(batch, _dst, &name); \
VG(VALGRIND_CHECK_MEM_IS_DEFINED( \
_dst, __anv_cmd_length(cmd) * 4)); \
_dst = NULL; \
}))
#define anv_shader_emitn(batch, shader, state, n, cmd, ...) ({ \
void *__dst = anv_batch_emit_dwords( \
anv_shader_add(batch, &(shader)->state, n), n); \
if (__dst) { \
struct cmd __template = { \
__anv_cmd_header(cmd), \
.DWordLength = n - __anv_cmd_length_bias(cmd), \
__VA_ARGS__ \
}; \
__anv_cmd_pack(cmd)(batch, __dst, &__template); \
} \
__dst; \
})
#define anv_shader_emit_tmp(batch, storage, cmd, name) \
for (struct cmd name = { __anv_cmd_header(cmd) }, \
*_dst = (void *) storage; \
__builtin_expect(_dst != NULL, 1); \
({ __anv_cmd_pack(cmd)(batch, _dst, &name); \
VG(VALGRIND_CHECK_MEM_IS_DEFINED( \
_dst, __anv_cmd_length(cmd) * 4)); \
_dst = NULL; \
}))
#define anv_shader_emit_merge(batch, shader, state, dwords, cmd, name) \
for (struct cmd name = { 0 }, \
*_dst = anv_batch_emit_dwords( \
anv_shader_add(batch, &(shader)->state, \
__anv_cmd_length(cmd)), \
__anv_cmd_length(cmd)); \
__builtin_expect(_dst != NULL, 1); \
({ uint32_t _partial[__anv_cmd_length(cmd)]; \
assert((shader)->state.len == __anv_cmd_length(cmd)); \
__anv_cmd_pack(cmd)(batch, _partial, &name); \
for (uint32_t i = 0; i < __anv_cmd_length(cmd); i++) { \
((uint32_t *)_dst)[i] = _partial[i] | dwords[i]; \
} \
VG(VALGRIND_CHECK_MEM_IS_DEFINED( \
_dst, __anv_cmd_length(cmd) * 4)); \
_dst = NULL; \
}))
struct anv_shader_data { struct anv_shader_data {
struct vk_shader_compile_info *info; struct vk_shader_compile_info *info;

View file

@ -1852,6 +1852,30 @@ fail:
return result; return result;
} }
static void
anv_write_rt_shader_group(struct vk_device *vk_device,
VkRayTracingShaderGroupTypeKHR type,
const struct vk_shader **shaders,
uint32_t shader_count,
void *output)
{
struct anv_device *device =
container_of(vk_device, struct anv_device, vk);
anv_genX(device->info, write_rt_shader_group)(device, type,
shaders, shader_count,
output);
}
static void
anv_write_rt_shader_group_replay_handle(struct vk_device *device,
const struct vk_shader **shaders,
uint32_t shader_count,
void *output)
{
UNREACHABLE("Unimplemented");
}
struct vk_device_shader_ops anv_device_shader_ops = { struct vk_device_shader_ops anv_device_shader_ops = {
.get_nir_options = anv_shader_get_nir_options, .get_nir_options = anv_shader_get_nir_options,
.get_spirv_options = anv_shader_get_spirv_options, .get_spirv_options = anv_shader_get_spirv_options,
@ -1860,5 +1884,7 @@ struct vk_device_shader_ops anv_device_shader_ops = {
.hash_state = anv_shader_hash_state, .hash_state = anv_shader_hash_state,
.compile = anv_shader_compile, .compile = anv_shader_compile,
.deserialize = anv_shader_deserialize, .deserialize = anv_shader_deserialize,
.write_rt_shader_group = anv_write_rt_shader_group,
.write_rt_shader_group_replay_handle = anv_write_rt_shader_group_replay_handle,
.cmd_set_dynamic_graphics_state = vk_cmd_set_dynamic_graphics_state, .cmd_set_dynamic_graphics_state = vk_cmd_set_dynamic_graphics_state,
}; };

File diff suppressed because it is too large Load diff

View file

@ -99,6 +99,7 @@ anv_per_hw_ver_files = files(
'genX_pipeline.c', 'genX_pipeline.c',
'genX_query.c', 'genX_query.c',
'genX_simple_shader.c', 'genX_simple_shader.c',
'genX_shader.c',
) )
if with_intel_vk_rt if with_intel_vk_rt
anv_per_hw_ver_files += files( anv_per_hw_ver_files += files(