anv: enable generation shader calls

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Acked-by: Alyssa Rosenzweig <alyssa.rosenzweig@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/31384>
This commit is contained in:
Lionel Landwerlin 2024-05-28 12:07:24 +03:00 committed by Marge Bot
parent 5c3deebd6f
commit 08c5e2854a
4 changed files with 321 additions and 1 deletions

View file

@ -162,7 +162,7 @@ compile_shader(struct anv_device *device,
const unsigned *program;
if (stage == MESA_SHADER_FRAGMENT) {
struct genisa_stats stats[3];
struct genisa_stats stats[3] = {};
struct brw_compile_fs_params params = {
.base = {
.nir = nir,
@ -189,6 +189,11 @@ compile_shader(struct anv_device *device,
check_sends(&stats[stat_idx++], sends_count_expectation *
(device->info->ver < 20 ? 2 : 1));
}
} else {
for (uint32_t i = 0; i < ARRAY_SIZE(stats); i++) {
assert(stats[i].spills == 0);
assert(stats[i].fills == 0);
}
}
} else {
brw_cs_fill_push_const_info(device->info, &prog_data.cs, -1);
@ -304,6 +309,55 @@ anv_device_get_internal_shader(struct anv_device *device,
10 /* 5 loads (1 pull constants) + 4 stores + 1 EOT */ :
9 /* 4 loads + 4 stores + 1 EOT */,
},
[ANV_INTERNAL_KERNEL_DGC_GFX_COMPUTE] = {
.key = {
.name = "anv-dgc-gfx-compute",
},
.stage = MESA_SHADER_COMPUTE,
.send_count = 0 /* too complex */,
},
[ANV_INTERNAL_KERNEL_DGC_GFX_FRAGMENT] = {
.key = {
.name = "anv-dgc-gfx-fragment",
},
.stage = MESA_SHADER_FRAGMENT,
.send_count = 0 /* too complex */,
},
[ANV_INTERNAL_KERNEL_DGC_CS_COMPUTE] = {
.key = {
.name = "anv-dgc-cs-compute",
},
.stage = MESA_SHADER_COMPUTE,
.send_count = 0 /* too complex */,
},
[ANV_INTERNAL_KERNEL_DGC_CS_FRAGMENT] = {
.key = {
.name = "anv-dgc-cs-fragment",
},
.stage = MESA_SHADER_FRAGMENT,
.send_count = 0 /* too complex */,
},
[ANV_INTERNAL_KERNEL_DGC_CS_POSTPROCESS_COMPUTE] = {
.key = {
.name = "anv-dgc-postprocess-compute",
},
.stage = MESA_SHADER_COMPUTE,
.send_count = device->info->verx10 >= 125 ? 11 : 8,
},
[ANV_INTERNAL_KERNEL_DGC_RT_COMPUTE] = {
.key = {
.name = "anv-dgc-rt-compute",
},
.stage = MESA_SHADER_COMPUTE,
.send_count = 0 /* too complex */,
},
[ANV_INTERNAL_KERNEL_DGC_RT_FRAGMENT] = {
.key = {
.name = "anv-dgc-rt-fragment",
},
.stage = MESA_SHADER_COMPUTE,
.send_count = 0 /* too complex */,
},
};
struct anv_shader_internal *bin =

View file

@ -138,4 +138,166 @@ struct PACKED anv_memcpy_params {
uint64_t dst_addr;
};
struct PACKED anv_dgc_gfx_params {
/* Preprocess command address */
uint64_t cmd_addr;
/* Preprocess data address */
uint64_t data_addr;
/* Sequence buffer address */
uint64_t seq_addr;
/* Sequence count address */
uint64_t seq_count_addr;
/* State address */
uint64_t state_addr;
/* Push constant address */
uint64_t const_addr;
/* Driver constant address */
uint64_t driver_const_addr;
/* Return address in the command buffer */
uint64_t return_addr;
/* Stride between each generated commands for a sequence */
uint32_t cmd_stride;
/* Stride between each data chunks for a sequence */
uint32_t data_stride;
/* Stride between sequences */
uint32_t seq_stride;
/* Max sequence count */
uint32_t max_seq_count;
/* Size of the prolog prior to generated commands */
uint32_t cmd_prolog_size;
/* Size of the prolog prior to generated data */
uint32_t data_prolog_size;
/* Push constant size */
uint32_t const_size;
/* Flags for the generation (ANV_GENERATED_FLAG_*) */
uint32_t flags;
};
struct PACKED anv_dgc_cs_params {
/* Preprocess command address */
uint64_t cmd_addr;
/* Preprocess data address */
uint64_t data_addr;
/* Sequence buffer address */
uint64_t seq_addr;
/* Sequence count address */
uint64_t seq_count_addr;
/* Layout address */
uint64_t layout_addr;
/* Push constant address */
uint64_t const_addr;
/* Driver constant address */
uint64_t driver_const_addr;
/* Indirect set address */
uint64_t indirect_set_addr;
/* INTERFACE_DESCRITPOR_DATA prepared data from driver */
uint64_t interface_descriptor_data_addr;
/* Return address in the command buffer */
uint64_t return_addr;
/* Stride between each generated commands for a sequence */
uint32_t cmd_stride;
/* Stride between each data chunks for a sequence */
uint32_t data_stride;
/* Stride between sequences */
uint32_t seq_stride;
/* Max sequence count */
uint32_t max_seq_count;
/* Size of the prolog prior to generated commands */
uint32_t cmd_prolog_size;
/* Size of the prolog prior to generated data */
uint32_t data_prolog_size;
/* Push constant size */
uint32_t const_size;
/* Flags for the generation (ANV_GENERATED_FLAG_*) */
uint32_t flags;
};
struct PACKED anv_dgc_rt_params {
/* Preprocess command address */
uint64_t cmd_addr;
/* Preprocess data address */
uint64_t data_addr;
/* Sequence buffer address */
uint64_t seq_addr;
/* Sequence count address */
uint64_t seq_count_addr;
/* Layout address */
uint64_t layout_addr;
/* Template COMPUTE_WALKER instruction */
uint64_t compute_walker_addr;
/* Template RT_DISPATCH_GLOBALS structure */
uint64_t rtdg_global_addr;
/* Push constant address */
uint64_t const_addr;
/* Driver constant address */
uint64_t driver_const_addr;
/* Return address in the command buffer */
uint64_t return_addr;
/* Stride between each generated commands for a sequence */
uint32_t cmd_stride;
/* Stride between each data chunks for a sequence */
uint32_t data_stride;
/* Stride between sequences */
uint32_t seq_stride;
/* Max sequence count */
uint32_t max_seq_count;
/* Size of the prolog prior to generated commands */
uint32_t cmd_prolog_size;
/* Size of the prolog prior to generated data */
uint32_t data_prolog_size;
/* Push constant size */
uint32_t const_size;
/* Flags for the generation (ANV_GENERATED_FLAG_*) */
uint32_t flags;
};
#endif /* ANV_GENERATED_INDIRECT_DRAWS_H */

View file

@ -2473,6 +2473,13 @@ enum anv_internal_kernel_name {
ANV_INTERNAL_KERNEL_COPY_QUERY_RESULTS_COMPUTE,
ANV_INTERNAL_KERNEL_COPY_QUERY_RESULTS_FRAGMENT,
ANV_INTERNAL_KERNEL_MEMCPY_COMPUTE,
ANV_INTERNAL_KERNEL_DGC_GFX_COMPUTE,
ANV_INTERNAL_KERNEL_DGC_GFX_FRAGMENT,
ANV_INTERNAL_KERNEL_DGC_CS_COMPUTE,
ANV_INTERNAL_KERNEL_DGC_CS_FRAGMENT,
ANV_INTERNAL_KERNEL_DGC_CS_POSTPROCESS_COMPUTE,
ANV_INTERNAL_KERNEL_DGC_RT_COMPUTE,
ANV_INTERNAL_KERNEL_DGC_RT_FRAGMENT,
ANV_INTERNAL_KERNEL_COUNT,
};

View file

@ -110,6 +110,103 @@ genX(call_internal_shader)(nir_builder *b, enum anv_internal_kernel_name shader_
nir_imul_imm(b, load_compute_index(b), 4));
return sizeof(struct anv_memcpy_params);
#if GFX_VER >= 11
case ANV_INTERNAL_KERNEL_DGC_GFX_COMPUTE:
case ANV_INTERNAL_KERNEL_DGC_GFX_FRAGMENT:
genX(libanv_preprocess_gfx_generate)(
b,
load_param(b, 64, struct anv_dgc_gfx_params, cmd_addr),
load_param(b, 32, struct anv_dgc_gfx_params, cmd_stride),
load_param(b, 64, struct anv_dgc_gfx_params, data_addr),
load_param(b, 32, struct anv_dgc_gfx_params, data_stride),
load_param(b, 64, struct anv_dgc_gfx_params, seq_addr),
load_param(b, 32, struct anv_dgc_gfx_params, seq_stride),
load_param(b, 64, struct anv_dgc_gfx_params, seq_count_addr),
load_param(b, 32, struct anv_dgc_gfx_params, max_seq_count),
load_param(b, 32, struct anv_dgc_gfx_params, cmd_prolog_size),
load_param(b, 32, struct anv_dgc_gfx_params, data_prolog_size),
load_param(b, 64, struct anv_dgc_gfx_params, state_addr),
load_param(b, 64, struct anv_dgc_gfx_params, const_addr),
load_param(b, 32, struct anv_dgc_gfx_params, const_size),
load_param(b, 64, struct anv_dgc_gfx_params, driver_const_addr),
load_param(b, 64, struct anv_dgc_gfx_params, return_addr),
load_param(b, 32, struct anv_dgc_gfx_params, flags),
shader_name == ANV_INTERNAL_KERNEL_DGC_GFX_COMPUTE ?
load_compute_index(b) : load_fragment_index(b));
return sizeof(struct anv_dgc_gfx_params);
case ANV_INTERNAL_KERNEL_DGC_CS_COMPUTE:
case ANV_INTERNAL_KERNEL_DGC_CS_FRAGMENT:
genX(libanv_preprocess_cs_generate)(
b,
load_param(b, 64, struct anv_dgc_cs_params, cmd_addr),
load_param(b, 32, struct anv_dgc_cs_params, cmd_stride),
load_param(b, 64, struct anv_dgc_cs_params, data_addr),
load_param(b, 32, struct anv_dgc_cs_params, data_stride),
load_param(b, 64, struct anv_dgc_cs_params, seq_addr),
load_param(b, 32, struct anv_dgc_cs_params, seq_stride),
load_param(b, 64, struct anv_dgc_cs_params, seq_count_addr),
load_param(b, 32, struct anv_dgc_cs_params, max_seq_count),
load_param(b, 32, struct anv_dgc_cs_params, cmd_prolog_size),
load_param(b, 32, struct anv_dgc_cs_params, data_prolog_size),
load_param(b, 64, struct anv_dgc_cs_params, layout_addr),
load_param(b, 64, struct anv_dgc_cs_params, indirect_set_addr),
load_param(b, 64, struct anv_dgc_cs_params, interface_descriptor_data_addr),
load_param(b, 64, struct anv_dgc_cs_params, const_addr),
load_param(b, 32, struct anv_dgc_cs_params, const_size),
load_param(b, 64, struct anv_dgc_cs_params, driver_const_addr),
load_param(b, 64, struct anv_dgc_cs_params, return_addr),
load_param(b, 32, struct anv_dgc_cs_params, flags),
shader_name == ANV_INTERNAL_KERNEL_DGC_CS_COMPUTE ?
load_compute_index(b) : load_fragment_index(b));
return sizeof(struct anv_dgc_cs_params);
case ANV_INTERNAL_KERNEL_DGC_CS_POSTPROCESS_COMPUTE:
genX(libanv_postprocess_cs_generate)(
b,
load_param(b, 64, struct anv_dgc_cs_params, cmd_addr),
load_param(b, 32, struct anv_dgc_cs_params, cmd_stride),
load_param(b, 64, struct anv_dgc_cs_params, data_addr),
load_param(b, 32, struct anv_dgc_cs_params, data_stride),
load_param(b, 64, struct anv_dgc_cs_params, seq_count_addr),
load_param(b, 32, struct anv_dgc_cs_params, max_seq_count),
load_param(b, 32, struct anv_dgc_cs_params, cmd_prolog_size),
load_param(b, 32, struct anv_dgc_cs_params, data_prolog_size),
load_param(b, 32, struct anv_dgc_cs_params, data_stride),
load_param(b, 64, struct anv_dgc_cs_params, indirect_set_addr),
load_param(b, 64, struct anv_dgc_cs_params, return_addr),
load_compute_index(b));
return sizeof(struct anv_dgc_cs_params);
#endif /* GFX_VER >= 11 */
#if GFX_VERx10 >= 125
case ANV_INTERNAL_KERNEL_DGC_RT_COMPUTE:
case ANV_INTERNAL_KERNEL_DGC_RT_FRAGMENT:
genX(libanv_preprocess_rt_generate)(
b,
load_param(b, 64, struct anv_dgc_rt_params, cmd_addr),
load_param(b, 32, struct anv_dgc_rt_params, cmd_stride),
load_param(b, 64, struct anv_dgc_rt_params, data_addr),
load_param(b, 32, struct anv_dgc_rt_params, data_stride),
load_param(b, 64, struct anv_dgc_rt_params, seq_addr),
load_param(b, 32, struct anv_dgc_rt_params, seq_stride),
load_param(b, 64, struct anv_dgc_rt_params, seq_count_addr),
load_param(b, 32, struct anv_dgc_rt_params, max_seq_count),
load_param(b, 32, struct anv_dgc_rt_params, cmd_prolog_size),
load_param(b, 32, struct anv_dgc_rt_params, data_prolog_size),
load_param(b, 64, struct anv_dgc_rt_params, layout_addr),
load_param(b, 64, struct anv_dgc_rt_params, compute_walker_addr),
load_param(b, 64, struct anv_dgc_rt_params, rtdg_global_addr),
load_param(b, 64, struct anv_dgc_rt_params, const_addr),
load_param(b, 32, struct anv_dgc_rt_params, const_size),
load_param(b, 64, struct anv_dgc_rt_params, driver_const_addr),
load_param(b, 64, struct anv_dgc_rt_params, return_addr),
load_param(b, 32, struct anv_dgc_rt_params, flags),
shader_name == ANV_INTERNAL_KERNEL_DGC_RT_COMPUTE ?
load_compute_index(b) : load_fragment_index(b));
return sizeof(struct anv_dgc_rt_params);
#endif /* GFX_VERx10 >= 125 */
default:
UNREACHABLE("Invalid shader name");
break;