hasvk: remove mesh code

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Acked-by: Jason Ekstrand <jason.ekstrand@collabora.com>
Acked-by: Jason Ekstrand <jason@jlekstrand.net>
Acked-by: Jason Ekstrand <jason.ekstrand@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/18208>
This commit is contained in:
Lionel Landwerlin 2022-08-22 10:09:18 +03:00 committed by Marge Bot
parent 6cbaaf27ab
commit d8e2d227ef
5 changed files with 125 additions and 870 deletions

View file

@ -182,9 +182,6 @@ get_device_extensions(const struct anv_physical_device *device,
const bool has_syncobj_wait =
(device->sync_syncobj_type.features & VK_SYNC_FEATURE_CPU_WAIT) != 0;
const bool nv_mesh_shading_enabled =
env_var_as_boolean("ANV_EXPERIMENTAL_NV_MESH_SHADER", false);
*ext = (struct vk_device_extension_table) {
.KHR_8bit_storage = device->info.ver >= 8,
.KHR_16bit_storage = device->info.ver >= 8,
@ -333,8 +330,6 @@ get_device_extensions(const struct anv_physical_device *device,
.INTEL_shader_integer_functions2 = device->info.ver >= 8,
.EXT_multi_draw = true,
.NV_compute_shader_derivatives = true,
.NV_mesh_shader = device->info.has_mesh_shading &&
nv_mesh_shading_enabled,
.VALVE_mutable_descriptor_type = true,
};
}
@ -1513,14 +1508,6 @@ void anv_GetPhysicalDeviceFeatures2(
break;
}
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MESH_SHADER_FEATURES_NV: {
VkPhysicalDeviceMeshShaderFeaturesNV *features =
(VkPhysicalDeviceMeshShaderFeaturesNV *)ext;
features->taskShader = pdevice->vk.supported_extensions.NV_mesh_shader;
features->meshShader = pdevice->vk.supported_extensions.NV_mesh_shader;
break;
}
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MUTABLE_DESCRIPTOR_TYPE_FEATURES_VALVE: {
VkPhysicalDeviceMutableDescriptorTypeFeaturesVALVE *features =
(VkPhysicalDeviceMutableDescriptorTypeFeaturesVALVE *)ext;
@ -1932,10 +1919,6 @@ anv_get_physical_device_properties_1_1(struct anv_physical_device *pdevice,
if (pdevice->compiler->scalar_stage[stage])
scalar_stages |= mesa_to_vk_shader_stage(stage);
}
if (pdevice->vk.supported_extensions.NV_mesh_shader) {
scalar_stages |= VK_SHADER_STAGE_TASK_BIT_NV |
VK_SHADER_STAGE_MESH_BIT_NV;
}
p->subgroupSupportedStages = scalar_stages;
p->subgroupSupportedOperations = VK_SUBGROUP_FEATURE_BASIC_BIT |
VK_SUBGROUP_FEATURE_VOTE_BIT |
@ -2104,9 +2087,7 @@ anv_get_physical_device_properties_1_3(struct anv_physical_device *pdevice,
p->minSubgroupSize = 8;
p->maxSubgroupSize = 32;
p->maxComputeWorkgroupSubgroups = pdevice->info.max_cs_workgroup_threads;
p->requiredSubgroupSizeStages = VK_SHADER_STAGE_COMPUTE_BIT |
VK_SHADER_STAGE_TASK_BIT_NV |
VK_SHADER_STAGE_MESH_BIT_NV;
p->requiredSubgroupSizeStages = VK_SHADER_STAGE_COMPUTE_BIT;
p->maxInlineUniformBlockSize = MAX_INLINE_UNIFORM_BLOCK_SIZE;
p->maxPerStageDescriptorInlineUniformBlocks =
@ -2340,66 +2321,6 @@ void anv_GetPhysicalDeviceProperties2(
break;
}
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MESH_SHADER_PROPERTIES_NV: {
VkPhysicalDeviceMeshShaderPropertiesNV *props =
(VkPhysicalDeviceMeshShaderPropertiesNV *)ext;
/* Bounded by the maximum representable size in
* 3DSTATE_MESH_SHADER_BODY::SharedLocalMemorySize. Same for Task.
*/
const uint32_t max_slm_size = 64 * 1024;
/* Bounded by the maximum representable size in
* 3DSTATE_MESH_SHADER_BODY::LocalXMaximum. Same for Task.
*/
const uint32_t max_workgroup_size = 1 << 10;
/* Bounded by the maximum representable count in
* 3DSTATE_MESH_SHADER_BODY::MaximumPrimitiveCount.
*/
const uint32_t max_primitives = 1024;
/* TODO(mesh): Multiview. */
const uint32_t max_view_count = 1;
props->maxDrawMeshTasksCount = UINT32_MAX;
/* TODO(mesh): Implement workgroup Y and Z sizes larger than one by
* mapping them to/from the single value that HW provides us
* (currently used for X).
*/
props->maxTaskWorkGroupInvocations = max_workgroup_size;
props->maxTaskWorkGroupSize[0] = max_workgroup_size;
props->maxTaskWorkGroupSize[1] = 1;
props->maxTaskWorkGroupSize[2] = 1;
props->maxTaskTotalMemorySize = max_slm_size;
props->maxTaskOutputCount = UINT16_MAX;
props->maxMeshWorkGroupInvocations = max_workgroup_size;
props->maxMeshWorkGroupSize[0] = max_workgroup_size;
props->maxMeshWorkGroupSize[1] = 1;
props->maxMeshWorkGroupSize[2] = 1;
props->maxMeshTotalMemorySize = max_slm_size / max_view_count;
props->maxMeshOutputPrimitives = max_primitives / max_view_count;
props->maxMeshMultiviewViewCount = max_view_count;
/* Depends on what indices can be represented with IndexFormat. For
* now we always use U32, so bound to the maximum unique vertices we
* need for the maximum primitives.
*
* TODO(mesh): Revisit this if we drop "U32" IndexFormat when adding
* support for others.
*/
props->maxMeshOutputVertices = 3 * props->maxMeshOutputPrimitives;
props->meshOutputPerVertexGranularity = 32;
props->meshOutputPerPrimitiveGranularity = 32;
break;
}
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PCI_BUS_INFO_PROPERTIES_EXT: {
VkPhysicalDevicePCIBusInfoPropertiesEXT *properties =
(VkPhysicalDevicePCIBusInfoPropertiesEXT *)ext;

View file

@ -88,7 +88,6 @@ anv_shader_stage_to_nir(struct anv_device *device,
.int64 = pdevice->info.ver >= 8,
.int64_atomics = pdevice->info.ver >= 9 && pdevice->use_softpin,
.integer_functions2 = pdevice->info.ver >= 8,
.mesh_shading_nv = pdevice->vk.supported_extensions.NV_mesh_shader,
.min_lod = true,
.multiview = true,
.physical_storage_buffer_address = pdevice->has_a64_buffer_access,
@ -398,26 +397,6 @@ pipeline_has_coarse_pixel(const struct anv_graphics_pipeline *pipeline,
return true;
}
static void
populate_task_prog_key(const struct anv_device *device,
bool robust_buffer_access,
struct brw_task_prog_key *key)
{
memset(key, 0, sizeof(*key));
populate_base_prog_key(device, robust_buffer_access, &key->base);
}
static void
populate_mesh_prog_key(const struct anv_device *device,
bool robust_buffer_access,
struct brw_mesh_prog_key *key)
{
memset(key, 0, sizeof(*key));
populate_base_prog_key(device, robust_buffer_access, &key->base);
}
static void
populate_wm_prog_key(const struct anv_graphics_pipeline *pipeline,
bool robust_buffer_acccess,
@ -742,8 +721,7 @@ anv_pipeline_lower_nir(struct anv_pipeline *pipeline,
}
}
if (gl_shader_stage_is_compute(nir->info.stage) ||
gl_shader_stage_is_mesh(nir->info.stage))
if (gl_shader_stage_is_compute(nir->info.stage))
NIR_PASS(_, nir, brw_nir_lower_cs_intrinsics);
stage->nir = nir;
@ -954,70 +932,6 @@ anv_pipeline_compile_gs(const struct brw_compiler *compiler,
gs_stage->code = brw_compile_gs(compiler, mem_ctx, &params);
}
static void
anv_pipeline_link_task(const struct brw_compiler *compiler,
struct anv_pipeline_stage *task_stage,
struct anv_pipeline_stage *next_stage)
{
assert(next_stage);
assert(next_stage->stage == MESA_SHADER_MESH);
brw_nir_link_shaders(compiler, task_stage->nir, next_stage->nir);
}
static void
anv_pipeline_compile_task(const struct brw_compiler *compiler,
void *mem_ctx,
struct anv_device *device,
struct anv_pipeline_stage *task_stage)
{
task_stage->num_stats = 1;
struct brw_compile_task_params params = {
.nir = task_stage->nir,
.key = &task_stage->key.task,
.prog_data = &task_stage->prog_data.task,
.stats = task_stage->stats,
.log_data = device,
};
task_stage->code = brw_compile_task(compiler, mem_ctx, &params);
}
static void
anv_pipeline_link_mesh(const struct brw_compiler *compiler,
struct anv_pipeline_stage *mesh_stage,
struct anv_pipeline_stage *next_stage)
{
if (next_stage) {
brw_nir_link_shaders(compiler, mesh_stage->nir, next_stage->nir);
}
}
static void
anv_pipeline_compile_mesh(const struct brw_compiler *compiler,
void *mem_ctx,
struct anv_device *device,
struct anv_pipeline_stage *mesh_stage,
struct anv_pipeline_stage *prev_stage)
{
mesh_stage->num_stats = 1;
struct brw_compile_mesh_params params = {
.nir = mesh_stage->nir,
.key = &mesh_stage->key.mesh,
.prog_data = &mesh_stage->prog_data.mesh,
.stats = mesh_stage->stats,
.log_data = device,
};
if (prev_stage) {
assert(prev_stage->stage == MESA_SHADER_TASK);
params.tue_map = &prev_stage->prog_data.task.map;
}
mesh_stage->code = brw_compile_mesh(compiler, mem_ctx, &params);
}
static void
anv_pipeline_link_fs(const struct brw_compiler *compiler,
struct anv_pipeline_stage *stage,
@ -1102,13 +1016,8 @@ anv_pipeline_compile_fs(const struct brw_compiler *compiler,
.log_data = device,
};
if (prev_stage->stage == MESA_SHADER_MESH) {
params.mue_map = &prev_stage->prog_data.mesh.map;
/* TODO(mesh): Slots valid, do we even use/rely on it? */
} else {
fs_stage->key.wm.input_slots_valid =
prev_stage->prog_data.vue.vue_map.slots_valid;
}
fs_stage->key.wm.input_slots_valid =
prev_stage->prog_data.vue.vue_map.slots_valid;
fs_stage->code = brw_compile_fs(compiler, mem_ctx, &params);
@ -1291,16 +1200,6 @@ anv_graphics_pipeline_init_keys(struct anv_graphics_pipeline *pipeline,
&stages[s].key.wm);
break;
}
case MESA_SHADER_TASK:
populate_task_prog_key(device,
pipeline->base.device->robust_buffer_access,
&stages[s].key.task);
break;
case MESA_SHADER_MESH:
populate_mesh_prog_key(device,
pipeline->base.device->robust_buffer_access,
&stages[s].key.mesh);
break;
default:
unreachable("Invalid graphics shader stage");
}
@ -1309,8 +1208,7 @@ anv_graphics_pipeline_init_keys(struct anv_graphics_pipeline *pipeline,
stages[s].feedback.flags |= VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT;
}
assert(pipeline->active_stages & VK_SHADER_STAGE_VERTEX_BIT ||
pipeline->active_stages & VK_SHADER_STAGE_MESH_BIT_NV);
assert(pipeline->active_stages & VK_SHADER_STAGE_VERTEX_BIT);
}
static bool
@ -1394,9 +1292,6 @@ static const gl_shader_stage graphics_shader_order[] = {
MESA_SHADER_TESS_EVAL,
MESA_SHADER_GEOMETRY,
MESA_SHADER_TASK,
MESA_SHADER_MESH,
MESA_SHADER_FRAGMENT,
};
@ -1509,12 +1404,6 @@ anv_graphics_pipeline_compile(struct anv_graphics_pipeline *pipeline,
case MESA_SHADER_GEOMETRY:
anv_pipeline_link_gs(compiler, &stages[s], next_stage);
break;
case MESA_SHADER_TASK:
anv_pipeline_link_task(compiler, &stages[s], next_stage);
break;
case MESA_SHADER_MESH:
anv_pipeline_link_mesh(compiler, &stages[s], next_stage);
break;
case MESA_SHADER_FRAGMENT:
anv_pipeline_link_fs(compiler, &stages[s], state->rp);
break;
@ -1584,8 +1473,7 @@ anv_graphics_pipeline_compile(struct anv_graphics_pipeline *pipeline,
if (devinfo->has_coarse_pixel_primitive_and_cb &&
stages[MESA_SHADER_FRAGMENT].info &&
stages[MESA_SHADER_FRAGMENT].key.wm.coarse_pixel &&
!stages[MESA_SHADER_FRAGMENT].nir->info.fs.uses_sample_shading &&
stages[MESA_SHADER_MESH].info == NULL) {
!stages[MESA_SHADER_FRAGMENT].nir->info.fs.uses_sample_shading) {
struct anv_pipeline_stage *last_psr = NULL;
for (unsigned i = 0; i < ARRAY_SIZE(graphics_shader_order); i++) {
@ -1631,14 +1519,6 @@ anv_graphics_pipeline_compile(struct anv_graphics_pipeline *pipeline,
anv_pipeline_compile_gs(compiler, stage_ctx, pipeline->base.device,
&stages[s], prev_stage);
break;
case MESA_SHADER_TASK:
anv_pipeline_compile_task(compiler, stage_ctx, pipeline->base.device,
&stages[s]);
break;
case MESA_SHADER_MESH:
anv_pipeline_compile_mesh(compiler, stage_ctx, pipeline->base.device,
&stages[s], prev_stage);
break;
case MESA_SHADER_FRAGMENT:
anv_pipeline_compile_fs(compiler, stage_ctx, pipeline->base.device,
&stages[s], prev_stage);
@ -2008,9 +1888,6 @@ anv_graphics_pipeline_init(struct anv_graphics_pipeline *pipeline,
if (pipeline->active_stages & VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT)
pipeline->active_stages |= VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT;
if (anv_pipeline_is_mesh(pipeline))
assert(device->physical->vk.supported_extensions.NV_mesh_shader);
pipeline->dynamic_state.ms.sample_locations = &pipeline->sample_locations;
vk_dynamic_graphics_state_fill(&pipeline->dynamic_state, state);
@ -2026,38 +1903,33 @@ anv_graphics_pipeline_init(struct anv_graphics_pipeline *pipeline,
anv_pipeline_setup_l3_config(&pipeline->base, false);
if (anv_pipeline_is_primitive(pipeline)) {
const uint64_t inputs_read = get_vs_prog_data(pipeline)->inputs_read;
const uint64_t inputs_read = get_vs_prog_data(pipeline)->inputs_read;
u_foreach_bit(a, state->vi->attributes_valid) {
if (inputs_read & BITFIELD64_BIT(VERT_ATTRIB_GENERIC0 + a))
pipeline->vb_used |= BITFIELD64_BIT(state->vi->attributes[a].binding);
}
u_foreach_bit(b, state->vi->bindings_valid) {
pipeline->vb[b].stride = state->vi->bindings[b].stride;
pipeline->vb[b].instanced = state->vi->bindings[b].input_rate ==
VK_VERTEX_INPUT_RATE_INSTANCE;
pipeline->vb[b].instance_divisor = state->vi->bindings[b].divisor;
}
/* Our implementation of VK_KHR_multiview uses instancing to draw the
* different views when primitive replication cannot be used. If the
* client asks for instancing, we need to multiply by the client's
* instance count at draw time and instance divisor in the vertex
* bindings by the number of views ensure that we repeat the client's
* per-instance data once for each view.
*/
const bool uses_primitive_replication =
anv_pipeline_get_last_vue_prog_data(pipeline)->vue_map.num_pos_slots > 1;
pipeline->instance_multiplier = 1;
if (pipeline->view_mask && !uses_primitive_replication)
pipeline->instance_multiplier = util_bitcount(pipeline->view_mask);
} else {
assert(anv_pipeline_is_mesh(pipeline));
/* TODO(mesh): Mesh vs. Multiview with Instancing. */
u_foreach_bit(a, state->vi->attributes_valid) {
if (inputs_read & BITFIELD64_BIT(VERT_ATTRIB_GENERIC0 + a))
pipeline->vb_used |= BITFIELD64_BIT(state->vi->attributes[a].binding);
}
u_foreach_bit(b, state->vi->bindings_valid) {
pipeline->vb[b].stride = state->vi->bindings[b].stride;
pipeline->vb[b].instanced = state->vi->bindings[b].input_rate ==
VK_VERTEX_INPUT_RATE_INSTANCE;
pipeline->vb[b].instance_divisor = state->vi->bindings[b].divisor;
}
/* Our implementation of VK_KHR_multiview uses instancing to draw the
* different views when primitive replication cannot be used. If the client
* asks for instancing, we need to multiply by the client's instance count
* at draw time and instance divisor in the vertex bindings by the number
* of views ensure that we repeat the client's per-instance data once for
* each view.
*/
const bool uses_primitive_replication =
anv_pipeline_get_last_vue_prog_data(pipeline)->vue_map.num_pos_slots > 1;
pipeline->instance_multiplier = 1;
if (pipeline->view_mask && !uses_primitive_replication)
pipeline->instance_multiplier = util_bitcount(pipeline->view_mask);
pipeline->negative_one_to_one =
state->vp != NULL && state->vp->negative_one_to_one;

View file

@ -3131,12 +3131,6 @@ anv_pipeline_is_primitive(const struct anv_graphics_pipeline *pipeline)
return anv_pipeline_has_stage(pipeline, MESA_SHADER_VERTEX);
}
static inline bool
anv_pipeline_is_mesh(const struct anv_graphics_pipeline *pipeline)
{
return anv_pipeline_has_stage(pipeline, MESA_SHADER_MESH);
}
static inline bool
anv_cmd_buffer_all_color_write_masked(const struct anv_cmd_buffer *cmd_buffer)
{
@ -3175,8 +3169,6 @@ ANV_DECL_GET_GRAPHICS_PROG_DATA_FUNC(tcs, MESA_SHADER_TESS_CTRL)
ANV_DECL_GET_GRAPHICS_PROG_DATA_FUNC(tes, MESA_SHADER_TESS_EVAL)
ANV_DECL_GET_GRAPHICS_PROG_DATA_FUNC(gs, MESA_SHADER_GEOMETRY)
ANV_DECL_GET_GRAPHICS_PROG_DATA_FUNC(wm, MESA_SHADER_FRAGMENT)
ANV_DECL_GET_GRAPHICS_PROG_DATA_FUNC(mesh, MESA_SHADER_MESH)
ANV_DECL_GET_GRAPHICS_PROG_DATA_FUNC(task, MESA_SHADER_TASK)
static inline const struct brw_cs_prog_data *
get_cs_prog_data(const struct anv_compute_pipeline *pipeline)

View file

@ -3404,64 +3404,6 @@ cmd_buffer_flush_push_constants(struct anv_cmd_buffer *cmd_buffer,
cmd_buffer->state.push_constants_dirty &= ~flushed;
}
#if GFX_VERx10 >= 125
static void
cmd_buffer_flush_mesh_inline_data(struct anv_cmd_buffer *cmd_buffer,
VkShaderStageFlags dirty_stages)
{
struct anv_cmd_graphics_state *gfx_state = &cmd_buffer->state.gfx;
const struct anv_graphics_pipeline *pipeline = gfx_state->pipeline;
if (dirty_stages & VK_SHADER_STAGE_TASK_BIT_NV &&
anv_pipeline_has_stage(pipeline, MESA_SHADER_TASK)) {
const struct anv_shader_bin *shader = pipeline->shaders[MESA_SHADER_TASK];
const struct anv_pipeline_bind_map *bind_map = &shader->bind_map;
anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_TASK_SHADER_DATA), data) {
const struct anv_push_range *range = &bind_map->push_ranges[0];
if (range->length > 0) {
struct anv_address buffer =
get_push_range_address(cmd_buffer, shader, range);
uint64_t addr = anv_address_physical(buffer);
data.InlineData[0] = addr & 0xffffffff;
data.InlineData[1] = addr >> 32;
memcpy(&data.InlineData[BRW_TASK_MESH_PUSH_CONSTANTS_START_DW],
cmd_buffer->state.gfx.base.push_constants.client_data,
BRW_TASK_MESH_PUSH_CONSTANTS_SIZE_DW * 4);
}
}
}
if (dirty_stages & VK_SHADER_STAGE_MESH_BIT_NV &&
anv_pipeline_has_stage(pipeline, MESA_SHADER_MESH)) {
const struct anv_shader_bin *shader = pipeline->shaders[MESA_SHADER_MESH];
const struct anv_pipeline_bind_map *bind_map = &shader->bind_map;
anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_MESH_SHADER_DATA), data) {
const struct anv_push_range *range = &bind_map->push_ranges[0];
if (range->length > 0) {
struct anv_address buffer =
get_push_range_address(cmd_buffer, shader, range);
uint64_t addr = anv_address_physical(buffer);
data.InlineData[0] = addr & 0xffffffff;
data.InlineData[1] = addr >> 32;
memcpy(&data.InlineData[BRW_TASK_MESH_PUSH_CONSTANTS_START_DW],
cmd_buffer->state.gfx.base.push_constants.client_data,
BRW_TASK_MESH_PUSH_CONSTANTS_SIZE_DW * 4);
}
}
}
cmd_buffer->state.push_constants_dirty &= ~dirty_stages;
}
#endif
static void
cmd_buffer_emit_clip(struct anv_cmd_buffer *cmd_buffer)
{
@ -3495,7 +3437,6 @@ cmd_buffer_emit_clip(struct anv_cmd_buffer *cmd_buffer)
};
uint32_t dwords[GENX(3DSTATE_CLIP_length)];
/* TODO(mesh): Multiview. */
struct anv_graphics_pipeline *pipeline = cmd_buffer->state.gfx.pipeline;
if (anv_pipeline_is_primitive(pipeline)) {
const struct brw_vue_prog_data *last =
@ -3504,12 +3445,6 @@ cmd_buffer_emit_clip(struct anv_cmd_buffer *cmd_buffer)
clip.MaximumVPIndex = dyn->vp.viewport_count > 0 ?
dyn->vp.viewport_count - 1 : 0;
}
} else if (anv_pipeline_is_mesh(pipeline)) {
const struct brw_mesh_prog_data *mesh_prog_data = get_mesh_prog_data(pipeline);
if (mesh_prog_data->map.start_dw[VARYING_SLOT_VIEWPORT] >= 0) {
clip.MaximumVPIndex = dyn->vp.viewport_count > 0 ?
dyn->vp.viewport_count - 1 : 0;
}
}
GENX(3DSTATE_CLIP_pack)(NULL, dwords, &clip);
@ -3999,11 +3934,6 @@ genX(cmd_buffer_flush_state)(struct anv_cmd_buffer *cmd_buffer)
dirty |= cmd_buffer->state.push_constants_dirty;
cmd_buffer_flush_push_constants(cmd_buffer,
dirty & VK_SHADER_STAGE_ALL_GRAPHICS);
#if GFX_VERx10 >= 125
cmd_buffer_flush_mesh_inline_data(
cmd_buffer, dirty & (VK_SHADER_STAGE_TASK_BIT_NV |
VK_SHADER_STAGE_MESH_BIT_NV));
#endif
}
if (dirty & VK_SHADER_STAGE_ALL_GRAPHICS) {
@ -5008,160 +4938,6 @@ void genX(CmdEndTransformFeedbackEXT)(
cmd_buffer->state.gfx.dirty |= ANV_CMD_DIRTY_XFB_ENABLE;
}
#if GFX_VERx10 >= 125
void
genX(CmdDrawMeshTasksNV)(
VkCommandBuffer commandBuffer,
uint32_t taskCount,
uint32_t firstTask)
{
ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
if (anv_batch_has_error(&cmd_buffer->batch))
return;
/* TODO(mesh): Check if this is not emitting more packets than we need. */
genX(cmd_buffer_flush_state)(cmd_buffer);
if (cmd_buffer->state.conditional_render_enabled)
genX(cmd_emit_conditional_render_predicate)(cmd_buffer);
/* BSpec 54016 says: "The values passed for Starting ThreadGroup ID X
* and ThreadGroup Count X shall not cause TGIDs to exceed (2^32)-1."
*/
assert((int64_t)firstTask + taskCount - 1 <= UINT32_MAX);
anv_batch_emit(&cmd_buffer->batch, GENX(3DMESH_1D), m) {
m.PredicateEnable = cmd_buffer->state.conditional_render_enabled;
m.ThreadGroupCountX = taskCount;
m.StartingThreadGroupIDX = firstTask;
}
}
#define GFX125_3DMESH_TG_COUNT 0x26F0
#define GFX125_3DMESH_STARTING_TGID 0x26F4
#define GFX10_3DPRIM_XP(n) (0x2690 + (n) * 4) /* n = { 0, 1, 2 } */
static void
mesh_load_indirect_parameters(struct anv_cmd_buffer *cmd_buffer,
struct mi_builder *b,
struct anv_address addr,
bool emit_xp0,
uint32_t xp0)
{
const size_t taskCountOff = offsetof(VkDrawMeshTasksIndirectCommandNV, taskCount);
const size_t firstTaskOff = offsetof(VkDrawMeshTasksIndirectCommandNV, firstTask);
mi_store(b, mi_reg32(GFX125_3DMESH_TG_COUNT),
mi_mem32(anv_address_add(addr, taskCountOff)));
mi_store(b, mi_reg32(GFX125_3DMESH_STARTING_TGID),
mi_mem32(anv_address_add(addr, firstTaskOff)));
if (emit_xp0)
mi_store(b, mi_reg32(GFX10_3DPRIM_XP(0)), mi_imm(xp0));
}
static void
emit_indirect_3dmesh_1d(struct anv_batch *batch,
bool predicate_enable,
bool uses_drawid)
{
uint32_t len = GENX(3DMESH_1D_length) + uses_drawid;
uint32_t *dw = anv_batch_emitn(batch, len, GENX(3DMESH_1D),
.PredicateEnable = predicate_enable,
.IndirectParameterEnable = true,
.ExtendedParameter0Present = uses_drawid);
if (uses_drawid)
dw[len - 1] = 0;
}
void
genX(CmdDrawMeshTasksIndirectNV)(
VkCommandBuffer commandBuffer,
VkBuffer _buffer,
VkDeviceSize offset,
uint32_t drawCount,
uint32_t stride)
{
ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
ANV_FROM_HANDLE(anv_buffer, buffer, _buffer);
struct anv_graphics_pipeline *pipeline = cmd_buffer->state.gfx.pipeline;
const struct brw_task_prog_data *task_prog_data = get_task_prog_data(pipeline);
const struct brw_mesh_prog_data *mesh_prog_data = get_mesh_prog_data(pipeline);
struct anv_cmd_state *cmd_state = &cmd_buffer->state;
if (anv_batch_has_error(&cmd_buffer->batch))
return;
genX(cmd_buffer_flush_state)(cmd_buffer);
if (cmd_state->conditional_render_enabled)
genX(cmd_emit_conditional_render_predicate)(cmd_buffer);
bool uses_drawid = (task_prog_data && task_prog_data->uses_drawid) ||
mesh_prog_data->uses_drawid;
struct mi_builder b;
mi_builder_init(&b, cmd_buffer->device->info, &cmd_buffer->batch);
for (uint32_t i = 0; i < drawCount; i++) {
struct anv_address draw = anv_address_add(buffer->address, offset);
mesh_load_indirect_parameters(cmd_buffer, &b, draw, uses_drawid, i);
emit_indirect_3dmesh_1d(&cmd_buffer->batch,
cmd_state->conditional_render_enabled, uses_drawid);
offset += stride;
}
}
void
genX(CmdDrawMeshTasksIndirectCountNV)(
VkCommandBuffer commandBuffer,
VkBuffer _buffer,
VkDeviceSize offset,
VkBuffer _countBuffer,
VkDeviceSize countBufferOffset,
uint32_t maxDrawCount,
uint32_t stride)
{
ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
ANV_FROM_HANDLE(anv_buffer, buffer, _buffer);
ANV_FROM_HANDLE(anv_buffer, count_buffer, _countBuffer);
struct anv_graphics_pipeline *pipeline = cmd_buffer->state.gfx.pipeline;
const struct brw_task_prog_data *task_prog_data = get_task_prog_data(pipeline);
const struct brw_mesh_prog_data *mesh_prog_data = get_mesh_prog_data(pipeline);
if (anv_batch_has_error(&cmd_buffer->batch))
return;
genX(cmd_buffer_flush_state)(cmd_buffer);
bool uses_drawid = (task_prog_data && task_prog_data->uses_drawid) ||
mesh_prog_data->uses_drawid;
struct mi_builder b;
mi_builder_init(&b, cmd_buffer->device->info, &cmd_buffer->batch);
struct mi_value max =
prepare_for_draw_count_predicate(cmd_buffer, &b,
count_buffer, countBufferOffset);
for (uint32_t i = 0; i < maxDrawCount; i++) {
struct anv_address draw = anv_address_add(buffer->address, offset);
emit_draw_count_predicate_cond(cmd_buffer, &b, i, max);
mesh_load_indirect_parameters(cmd_buffer, &b, draw, uses_drawid, i);
emit_indirect_3dmesh_1d(&cmd_buffer->batch, true, uses_drawid);
offset += stride;
}
}
#endif /* GFX_VERx10 >= 125 */
void
genX(cmd_buffer_flush_compute_state)(struct anv_cmd_buffer *cmd_buffer)
{

View file

@ -309,71 +309,12 @@ genX(emit_urb_setup)(struct anv_device *device, struct anv_batch *batch,
urb.VSNumberofURBEntries = entries[i];
}
}
#if GFX_VERx10 >= 125
if (device->physical->vk.supported_extensions.NV_mesh_shader) {
anv_batch_emit(batch, GENX(3DSTATE_URB_ALLOC_MESH), zero);
anv_batch_emit(batch, GENX(3DSTATE_URB_ALLOC_TASK), zero);
}
#endif
}
#if GFX_VERx10 >= 125
static void
emit_urb_setup_mesh(struct anv_graphics_pipeline *pipeline,
enum intel_urb_deref_block_size *deref_block_size)
{
const struct intel_device_info *devinfo = pipeline->base.device->info;
const struct brw_task_prog_data *task_prog_data =
anv_pipeline_has_stage(pipeline, MESA_SHADER_TASK) ?
get_task_prog_data(pipeline) : NULL;
const struct brw_mesh_prog_data *mesh_prog_data = get_mesh_prog_data(pipeline);
const struct intel_mesh_urb_allocation alloc =
intel_get_mesh_urb_config(devinfo, pipeline->base.l3_config,
task_prog_data ? task_prog_data->map.size_dw : 0,
mesh_prog_data->map.size_dw);
/* Zero out the primitive pipeline URB allocations. */
for (int i = 0; i <= MESA_SHADER_GEOMETRY; i++) {
anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_URB_VS), urb) {
urb._3DCommandSubOpcode += i;
}
}
anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_URB_ALLOC_TASK), urb) {
if (task_prog_data) {
urb.TASKURBEntryAllocationSize = alloc.task_entry_size_64b - 1;
urb.TASKNumberofURBEntriesSlice0 = alloc.task_entries;
urb.TASKNumberofURBEntriesSliceN = alloc.task_entries;
urb.TASKURBStartingAddressSlice0 = alloc.task_starting_address_8kb;
urb.TASKURBStartingAddressSliceN = alloc.task_starting_address_8kb;
}
}
anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_URB_ALLOC_MESH), urb) {
urb.MESHURBEntryAllocationSize = alloc.mesh_entry_size_64b - 1;
urb.MESHNumberofURBEntriesSlice0 = alloc.mesh_entries;
urb.MESHNumberofURBEntriesSliceN = alloc.mesh_entries;
urb.MESHURBStartingAddressSlice0 = alloc.mesh_starting_address_8kb;
urb.MESHURBStartingAddressSliceN = alloc.mesh_starting_address_8kb;
}
*deref_block_size = alloc.deref_block_size;
}
#endif
static void
emit_urb_setup(struct anv_graphics_pipeline *pipeline,
enum intel_urb_deref_block_size *deref_block_size)
{
#if GFX_VERx10 >= 125
if (anv_pipeline_is_mesh(pipeline)) {
emit_urb_setup_mesh(pipeline, deref_block_size);
return;
}
#endif
unsigned entry_size[4];
for (int i = MESA_SHADER_VERTEX; i <= MESA_SHADER_GEOMETRY; i++) {
const struct brw_vue_prog_data *prog_data =
@ -398,10 +339,6 @@ emit_3dstate_sbe(struct anv_graphics_pipeline *pipeline)
anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_SBE), sbe);
#if GFX_VER >= 8
anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_SBE_SWIZ), sbe);
#endif
#if GFX_VERx10 >= 125
if (anv_pipeline_is_mesh(pipeline))
anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_SBE_MESH), sbe_mesh);
#endif
return;
}
@ -431,123 +368,75 @@ emit_3dstate_sbe(struct anv_graphics_pipeline *pipeline)
# define swiz sbe
#endif
if (anv_pipeline_is_primitive(pipeline)) {
const struct brw_vue_map *fs_input_map =
&anv_pipeline_get_last_vue_prog_data(pipeline)->vue_map;
const struct brw_vue_map *fs_input_map =
&anv_pipeline_get_last_vue_prog_data(pipeline)->vue_map;
int first_slot = brw_compute_first_urb_slot_required(wm_prog_data->inputs,
fs_input_map);
assert(first_slot % 2 == 0);
unsigned urb_entry_read_offset = first_slot / 2;
int max_source_attr = 0;
for (uint8_t idx = 0; idx < wm_prog_data->urb_setup_attribs_count; idx++) {
uint8_t attr = wm_prog_data->urb_setup_attribs[idx];
int input_index = wm_prog_data->urb_setup[attr];
int first_slot = brw_compute_first_urb_slot_required(wm_prog_data->inputs,
fs_input_map);
assert(first_slot % 2 == 0);
unsigned urb_entry_read_offset = first_slot / 2;
int max_source_attr = 0;
for (uint8_t idx = 0; idx < wm_prog_data->urb_setup_attribs_count; idx++) {
uint8_t attr = wm_prog_data->urb_setup_attribs[idx];
int input_index = wm_prog_data->urb_setup[attr];
assert(0 <= input_index);
assert(0 <= input_index);
/* gl_Viewport, gl_Layer and FragmentShadingRateKHR are stored in the
* VUE header
*/
if (attr == VARYING_SLOT_VIEWPORT ||
attr == VARYING_SLOT_LAYER ||
attr == VARYING_SLOT_PRIMITIVE_SHADING_RATE) {
continue;
}
if (attr == VARYING_SLOT_PNTC) {
sbe.PointSpriteTextureCoordinateEnable = 1 << input_index;
continue;
}
const int slot = fs_input_map->varying_to_slot[attr];
if (slot == -1) {
/* This attribute does not exist in the VUE--that means that the
* vertex shader did not write to it. It could be that it's a
* regular varying read by the fragment shader but not written by
* the vertex shader or it's gl_PrimitiveID. In the first case the
* value is undefined, in the second it needs to be
* gl_PrimitiveID.
*/
swiz.Attribute[input_index].ConstantSource = PRIM_ID;
swiz.Attribute[input_index].ComponentOverrideX = true;
swiz.Attribute[input_index].ComponentOverrideY = true;
swiz.Attribute[input_index].ComponentOverrideZ = true;
swiz.Attribute[input_index].ComponentOverrideW = true;
continue;
}
/* We have to subtract two slots to account for the URB entry output
* read offset in the VS and GS stages.
*/
const int source_attr = slot - 2 * urb_entry_read_offset;
assert(source_attr >= 0 && source_attr < 32);
max_source_attr = MAX2(max_source_attr, source_attr);
/* The hardware can only do overrides on 16 overrides at a time, and the
* other up to 16 have to be lined up so that the input index = the
* output index. We'll need to do some tweaking to make sure that's the
* case.
*/
if (input_index < 16)
swiz.Attribute[input_index].SourceAttribute = source_attr;
else
assert(source_attr == input_index);
/* gl_Viewport, gl_Layer and FragmentShadingRateKHR are stored in the
* VUE header
*/
if (attr == VARYING_SLOT_VIEWPORT ||
attr == VARYING_SLOT_LAYER ||
attr == VARYING_SLOT_PRIMITIVE_SHADING_RATE) {
continue;
}
sbe.VertexURBEntryReadOffset = urb_entry_read_offset;
sbe.VertexURBEntryReadLength = DIV_ROUND_UP(max_source_attr + 1, 2);
#if GFX_VER >= 8
sbe.ForceVertexURBEntryReadOffset = true;
sbe.ForceVertexURBEntryReadLength = true;
#endif
} else {
assert(anv_pipeline_is_mesh(pipeline));
#if GFX_VERx10 >= 125
const struct brw_mesh_prog_data *mesh_prog_data = get_mesh_prog_data(pipeline);
anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_SBE_MESH), sbe_mesh) {
const struct brw_mue_map *mue = &mesh_prog_data->map;
assert(mue->per_vertex_header_size_dw % 8 == 0);
sbe_mesh.PerVertexURBEntryOutputReadOffset = mue->per_vertex_header_size_dw / 8;
sbe_mesh.PerVertexURBEntryOutputReadLength = DIV_ROUND_UP(mue->per_vertex_data_size_dw, 8);
/* Clip distance array is passed in the per-vertex header so that
* it can be consumed by the HW. If user wants to read it in the FS,
* adjust the offset and length to cover it. Conveniently it is at
* the end of the per-vertex header, right before per-vertex
* attributes.
*
* Note that FS attribute reading must be aware that the clip
* distances have fixed position.
*/
if (mue->per_vertex_header_size_dw > 8 &&
(wm_prog_data->urb_setup[VARYING_SLOT_CLIP_DIST0] >= 0 ||
wm_prog_data->urb_setup[VARYING_SLOT_CLIP_DIST1] >= 0)) {
sbe_mesh.PerVertexURBEntryOutputReadOffset -= 1;
sbe_mesh.PerVertexURBEntryOutputReadLength += 1;
}
assert(mue->per_primitive_header_size_dw % 8 == 0);
sbe_mesh.PerPrimitiveURBEntryOutputReadOffset = mue->per_primitive_header_size_dw / 8;
sbe_mesh.PerPrimitiveURBEntryOutputReadLength = DIV_ROUND_UP(mue->per_primitive_data_size_dw, 8);
/* Just like with clip distances, if Primitive Shading Rate,
* Viewport Index or Layer is read back in the FS, adjust
* the offset and length to cover the Primitive Header, where
* PSR, Viewport Index & Layer are stored.
*/
if (wm_prog_data->urb_setup[VARYING_SLOT_VIEWPORT] >= 0 ||
wm_prog_data->urb_setup[VARYING_SLOT_PRIMITIVE_SHADING_RATE] >= 0 ||
wm_prog_data->urb_setup[VARYING_SLOT_LAYER] >= 0) {
assert(sbe_mesh.PerPrimitiveURBEntryOutputReadOffset > 0);
sbe_mesh.PerPrimitiveURBEntryOutputReadOffset -= 1;
sbe_mesh.PerPrimitiveURBEntryOutputReadLength += 1;
}
if (attr == VARYING_SLOT_PNTC) {
sbe.PointSpriteTextureCoordinateEnable = 1 << input_index;
continue;
}
#endif
const int slot = fs_input_map->varying_to_slot[attr];
if (slot == -1) {
/* This attribute does not exist in the VUE--that means that the
* vertex shader did not write to it. It could be that it's a regular
* varying read by the fragment shader but not written by the vertex
* shader or it's gl_PrimitiveID. In the first case the value is
* undefined, in the second it needs to be gl_PrimitiveID.
*/
swiz.Attribute[input_index].ConstantSource = PRIM_ID;
swiz.Attribute[input_index].ComponentOverrideX = true;
swiz.Attribute[input_index].ComponentOverrideY = true;
swiz.Attribute[input_index].ComponentOverrideZ = true;
swiz.Attribute[input_index].ComponentOverrideW = true;
continue;
}
/* We have to subtract two slots to account for the URB entry output
* read offset in the VS and GS stages.
*/
const int source_attr = slot - 2 * urb_entry_read_offset;
assert(source_attr >= 0 && source_attr < 32);
max_source_attr = MAX2(max_source_attr, source_attr);
/* The hardware can only do overrides on 16 overrides at a time, and the
* other up to 16 have to be lined up so that the input index = the
* output index. We'll need to do some tweaking to make sure that's the
* case.
*/
if (input_index < 16)
swiz.Attribute[input_index].SourceAttribute = source_attr;
else
assert(source_attr == input_index);
}
sbe.VertexURBEntryReadOffset = urb_entry_read_offset;
sbe.VertexURBEntryReadLength = DIV_ROUND_UP(max_source_attr + 1, 2);
#if GFX_VER >= 8
sbe.ForceVertexURBEntryReadOffset = true;
sbe.ForceVertexURBEntryReadLength = true;
#endif
uint32_t *dw = anv_batch_emit_dwords(&pipeline->base.batch,
GENX(3DSTATE_SBE_length));
if (!dw)
@ -571,18 +460,7 @@ VkPolygonMode
genX(raster_polygon_mode)(struct anv_graphics_pipeline *pipeline,
VkPrimitiveTopology primitive_topology)
{
if (anv_pipeline_is_mesh(pipeline)) {
switch (get_mesh_prog_data(pipeline)->primitive_type) {
case SHADER_PRIM_POINTS:
return VK_POLYGON_MODE_POINT;
case SHADER_PRIM_LINES:
return VK_POLYGON_MODE_LINE;
case SHADER_PRIM_TRIANGLES:
return pipeline->polygon_mode;
default:
unreachable("invalid primitive type for mesh");
}
} else if (anv_pipeline_has_stage(pipeline, MESA_SHADER_GEOMETRY)) {
if (anv_pipeline_has_stage(pipeline, MESA_SHADER_GEOMETRY)) {
switch (get_gs_prog_data(pipeline)->output_topology) {
case _3DPRIM_POINTLIST:
return VK_POLYGON_MODE_POINT;
@ -779,15 +657,9 @@ emit_rs_state(struct anv_graphics_pipeline *pipeline,
#endif
bool point_from_shader;
if (anv_pipeline_is_primitive(pipeline)) {
const struct brw_vue_prog_data *last_vue_prog_data =
anv_pipeline_get_last_vue_prog_data(pipeline);
point_from_shader = last_vue_prog_data->vue_map.slots_valid & VARYING_BIT_PSIZ;
} else {
assert(anv_pipeline_is_mesh(pipeline));
const struct brw_mesh_prog_data *mesh_prog_data = get_mesh_prog_data(pipeline);
point_from_shader = mesh_prog_data->map.start_dw[VARYING_SLOT_PSIZ] >= 0;
}
const struct brw_vue_prog_data *last_vue_prog_data =
anv_pipeline_get_last_vue_prog_data(pipeline);
point_from_shader = last_vue_prog_data->vue_map.slots_valid & VARYING_BIT_PSIZ;
if (point_from_shader) {
sf.PointWidthSource = Vertex;
@ -1198,44 +1070,35 @@ emit_3dstate_clip(struct anv_graphics_pipeline *pipeline,
clip.MinimumPointWidth = 0.125;
clip.MaximumPointWidth = 255.875;
/* TODO(mesh): Multiview. */
if (anv_pipeline_is_primitive(pipeline)) {
const struct brw_vue_prog_data *last =
anv_pipeline_get_last_vue_prog_data(pipeline);
const struct brw_vue_prog_data *last =
anv_pipeline_get_last_vue_prog_data(pipeline);
/* From the Vulkan 1.0.45 spec:
*
* "If the last active vertex processing stage shader entry point's
* interface does not include a variable decorated with
* ViewportIndex, then the first viewport is used."
*/
if (vp && (last->vue_map.slots_valid & VARYING_BIT_VIEWPORT)) {
clip.MaximumVPIndex = vp->viewport_count > 0 ?
vp->viewport_count - 1 : 0;
} else {
clip.MaximumVPIndex = 0;
}
/* From the Vulkan 1.0.45 spec:
*
* "If the last active vertex processing stage shader entry point's
* interface does not include a variable decorated with ViewportIndex,
* then the first viewport is used."
*/
if (vp && (last->vue_map.slots_valid & VARYING_BIT_VIEWPORT)) {
clip.MaximumVPIndex = vp->viewport_count > 0 ?
vp->viewport_count - 1 : 0;
} else {
clip.MaximumVPIndex = 0;
}
/* From the Vulkan 1.0.45 spec:
*
* "If the last active vertex processing stage shader entry point's
* interface does not include a variable decorated with Layer, then
* the first layer is used."
*/
clip.ForceZeroRTAIndexEnable =
!(last->vue_map.slots_valid & VARYING_BIT_LAYER);
/* From the Vulkan 1.0.45 spec:
*
* "If the last active vertex processing stage shader entry point's
* interface does not include a variable decorated with Layer, then the
* first layer is used."
*/
clip.ForceZeroRTAIndexEnable =
!(last->vue_map.slots_valid & VARYING_BIT_LAYER);
#if GFX_VER == 7
clip.UserClipDistanceClipTestEnableBitmask = last->clip_distance_mask;
clip.UserClipDistanceCullTestEnableBitmask = last->cull_distance_mask;
clip.UserClipDistanceClipTestEnableBitmask = last->clip_distance_mask;
clip.UserClipDistanceCullTestEnableBitmask = last->cull_distance_mask;
#endif
} else if (anv_pipeline_is_mesh(pipeline)) {
const struct brw_mesh_prog_data *mesh_prog_data = get_mesh_prog_data(pipeline);
if (vp && vp->viewport_count > 0 &&
mesh_prog_data->map.start_dw[VARYING_SLOT_VIEWPORT] >= 0) {
clip.MaximumVPIndex = vp->viewport_count - 1;
}
}
#if GFX_VER == 7
clip.FrontWinding = genX(vk_to_intel_front_face)[rs->front_face];
@ -1247,17 +1110,6 @@ emit_3dstate_clip(struct anv_graphics_pipeline *pipeline,
#endif
GENX(3DSTATE_CLIP_pack)(NULL, pipeline->gfx7.clip, &clip);
#if GFX_VERx10 >= 125
if (anv_pipeline_is_mesh(pipeline)) {
const struct brw_mesh_prog_data *mesh_prog_data = get_mesh_prog_data(pipeline);
anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_CLIP_MESH), clip_mesh) {
clip_mesh.PrimitiveHeaderEnable = mesh_prog_data->map.per_primitive_header_size_dw > 0;
clip_mesh.UserClipDistanceClipTestEnableBitmask = mesh_prog_data->clip_distance_mask;
clip_mesh.UserClipDistanceCullTestEnableBitmask = mesh_prog_data->cull_distance_mask;
}
}
#endif
}
static void
@ -2168,141 +2020,6 @@ emit_3dstate_primitive_replication(struct anv_graphics_pipeline *pipeline,
}
#endif
#if GFX_VERx10 >= 125
static void
emit_task_state(struct anv_graphics_pipeline *pipeline)
{
assert(anv_pipeline_is_mesh(pipeline));
if (!anv_pipeline_has_stage(pipeline, MESA_SHADER_TASK)) {
anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_TASK_CONTROL), zero);
return;
}
const struct anv_shader_bin *task_bin = pipeline->shaders[MESA_SHADER_TASK];
anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_TASK_CONTROL), tc) {
tc.TaskShaderEnable = true;
tc.ScratchSpaceBuffer =
get_scratch_surf(&pipeline->base, MESA_SHADER_TASK, task_bin);
}
const struct intel_device_info *devinfo = pipeline->base.device->info;
const struct brw_task_prog_data *task_prog_data = get_task_prog_data(pipeline);
const struct brw_cs_dispatch_info task_dispatch =
brw_cs_get_dispatch_info(devinfo, &task_prog_data->base, NULL);
anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_TASK_SHADER), task) {
task.KernelStartPointer = task_bin->kernel.offset;
task.SIMDSize = task_dispatch.simd_size / 16;
task.MessageSIMD = task.SIMDSize;
task.NumberofThreadsinGPGPUThreadGroup = task_dispatch.threads;
task.ExecutionMask = task_dispatch.right_mask;
task.LocalXMaximum = task_dispatch.group_size - 1;
task.EmitLocalIDX = true;
task.NumberofBarriers = task_prog_data->base.uses_barrier;
task.SharedLocalMemorySize =
encode_slm_size(GFX_VER, task_prog_data->base.base.total_shared);
/*
* 3DSTATE_TASK_SHADER_DATA.InlineData[0:1] will be used for an address
* of a buffer with push constants and descriptor set table and
* InlineData[2:7] will be used for first few push constants.
*/
task.EmitInlineParameter = true;
task.XP0Required = task_prog_data->uses_drawid;
}
/* Recommended values from "Task and Mesh Distribution Programming". */
anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_TASK_REDISTRIB), redistrib) {
redistrib.LocalBOTAccumulatorThreshold = MULTIPLIER_1;
redistrib.SmallTaskThreshold = 1; /* 2^N */
redistrib.TargetMeshBatchSize = devinfo->num_slices > 2 ? 3 : 5; /* 2^N */
redistrib.TaskRedistributionLevel = TASKREDISTRIB_BOM;
/* TODO: We have an unknown issue with Task Payload when task redistribution
* is enabled. Disable it for now.
* See https://gitlab.freedesktop.org/mesa/mesa/-/issues/7141
*/
redistrib.TaskRedistributionMode = TASKREDISTRIB_OFF;
}
}
static void
emit_mesh_state(struct anv_graphics_pipeline *pipeline)
{
assert(anv_pipeline_is_mesh(pipeline));
const struct anv_shader_bin *mesh_bin = pipeline->shaders[MESA_SHADER_MESH];
anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_MESH_CONTROL), mc) {
mc.MeshShaderEnable = true;
mc.ScratchSpaceBuffer =
get_scratch_surf(&pipeline->base, MESA_SHADER_MESH, mesh_bin);
/* TODO(mesh): MaximumNumberofThreadGroups. */
}
const struct intel_device_info *devinfo = pipeline->base.device->info;
const struct brw_mesh_prog_data *mesh_prog_data = get_mesh_prog_data(pipeline);
const struct brw_cs_dispatch_info mesh_dispatch =
brw_cs_get_dispatch_info(devinfo, &mesh_prog_data->base, NULL);
const unsigned output_topology =
mesh_prog_data->primitive_type == SHADER_PRIM_POINTS ? OUTPUT_POINT :
mesh_prog_data->primitive_type == SHADER_PRIM_LINES ? OUTPUT_LINE :
OUTPUT_TRI;
uint32_t index_format;
switch (mesh_prog_data->index_format) {
case BRW_INDEX_FORMAT_U32:
index_format = INDEX_U32;
break;
default:
unreachable("invalid index format");
}
anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_MESH_SHADER), mesh) {
mesh.KernelStartPointer = mesh_bin->kernel.offset;
mesh.SIMDSize = mesh_dispatch.simd_size / 16;
mesh.MessageSIMD = mesh.SIMDSize;
mesh.NumberofThreadsinGPGPUThreadGroup = mesh_dispatch.threads;
mesh.ExecutionMask = mesh_dispatch.right_mask;
mesh.LocalXMaximum = mesh_dispatch.group_size - 1;
mesh.EmitLocalIDX = true;
mesh.MaximumPrimitiveCount = mesh_prog_data->map.max_primitives - 1;
mesh.OutputTopology = output_topology;
mesh.PerVertexDataPitch = mesh_prog_data->map.per_vertex_pitch_dw / 8;
mesh.PerPrimitiveDataPresent = mesh_prog_data->map.per_primitive_pitch_dw > 0;
mesh.PerPrimitiveDataPitch = mesh_prog_data->map.per_primitive_pitch_dw / 8;
mesh.IndexFormat = index_format;
mesh.NumberofBarriers = mesh_prog_data->base.uses_barrier;
mesh.SharedLocalMemorySize =
encode_slm_size(GFX_VER, mesh_prog_data->base.base.total_shared);
/*
* 3DSTATE_MESH_SHADER_DATA.InlineData[0:1] will be used for an address
* of a buffer with push constants and descriptor set table and
* InlineData[2:7] will be used for first few push constants.
*/
mesh.EmitInlineParameter = true;
mesh.XP0Required = mesh_prog_data->uses_drawid;
}
/* Recommended values from "Task and Mesh Distribution Programming". */
anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_MESH_DISTRIB), distrib) {
distrib.DistributionMode = MESH_RR_FREE;
distrib.TaskDistributionBatchSize = devinfo->num_slices > 2 ? 8 : 9; /* 2^N thread groups */
distrib.MeshDistributionBatchSize = devinfo->num_slices > 2 ? 5 : 3; /* 2^N thread groups */
}
}
#endif
void
genX(graphics_pipeline_emit)(struct anv_graphics_pipeline *pipeline,
const struct vk_graphics_pipeline_state *state)
@ -2342,38 +2059,15 @@ genX(graphics_pipeline_emit)(struct anv_graphics_pipeline *pipeline,
gfx7_emit_vs_workaround_flush(brw);
#endif
if (anv_pipeline_is_primitive(pipeline)) {
emit_vertex_input(pipeline, state->vi);
emit_vertex_input(pipeline, state->vi);
emit_3dstate_vs(pipeline);
emit_3dstate_hs_te_ds(pipeline, state->ts);
emit_3dstate_gs(pipeline);
emit_3dstate_vs(pipeline);
emit_3dstate_hs_te_ds(pipeline, state->ts);
emit_3dstate_gs(pipeline);
emit_3dstate_vf_statistics(pipeline);
emit_3dstate_vf_statistics(pipeline);
emit_3dstate_streamout(pipeline, state->rs);
#if GFX_VERx10 >= 125
const struct anv_device *device = pipeline->base.device;
/* Disable Mesh. */
if (device->physical->vk.supported_extensions.NV_mesh_shader) {
anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_MESH_CONTROL), zero);
anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_TASK_CONTROL), zero);
}
#endif
} else {
assert(anv_pipeline_is_mesh(pipeline));
/* BSpec 46303 forbids both 3DSTATE_MESH_CONTROL.MeshShaderEnable
* and 3DSTATE_STREAMOUT.SOFunctionEnable to be 1.
*/
anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_STREAMOUT), so) {}
#if GFX_VERx10 >= 125
emit_task_state(pipeline);
emit_mesh_state(pipeline);
#endif
}
emit_3dstate_streamout(pipeline, state->rs);
emit_3dstate_sbe(pipeline);
emit_3dstate_wm(pipeline, state->ia, state->rs,