mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-25 00:00:11 +01:00
hasvk: remove mesh code
Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Acked-by: Jason Ekstrand <jason.ekstrand@collabora.com> Acked-by: Jason Ekstrand <jason@jlekstrand.net> Acked-by: Jason Ekstrand <jason.ekstrand@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/18208>
This commit is contained in:
parent
6cbaaf27ab
commit
d8e2d227ef
5 changed files with 125 additions and 870 deletions
|
|
@ -182,9 +182,6 @@ get_device_extensions(const struct anv_physical_device *device,
|
|||
const bool has_syncobj_wait =
|
||||
(device->sync_syncobj_type.features & VK_SYNC_FEATURE_CPU_WAIT) != 0;
|
||||
|
||||
const bool nv_mesh_shading_enabled =
|
||||
env_var_as_boolean("ANV_EXPERIMENTAL_NV_MESH_SHADER", false);
|
||||
|
||||
*ext = (struct vk_device_extension_table) {
|
||||
.KHR_8bit_storage = device->info.ver >= 8,
|
||||
.KHR_16bit_storage = device->info.ver >= 8,
|
||||
|
|
@ -333,8 +330,6 @@ get_device_extensions(const struct anv_physical_device *device,
|
|||
.INTEL_shader_integer_functions2 = device->info.ver >= 8,
|
||||
.EXT_multi_draw = true,
|
||||
.NV_compute_shader_derivatives = true,
|
||||
.NV_mesh_shader = device->info.has_mesh_shading &&
|
||||
nv_mesh_shading_enabled,
|
||||
.VALVE_mutable_descriptor_type = true,
|
||||
};
|
||||
}
|
||||
|
|
@ -1513,14 +1508,6 @@ void anv_GetPhysicalDeviceFeatures2(
|
|||
break;
|
||||
}
|
||||
|
||||
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MESH_SHADER_FEATURES_NV: {
|
||||
VkPhysicalDeviceMeshShaderFeaturesNV *features =
|
||||
(VkPhysicalDeviceMeshShaderFeaturesNV *)ext;
|
||||
features->taskShader = pdevice->vk.supported_extensions.NV_mesh_shader;
|
||||
features->meshShader = pdevice->vk.supported_extensions.NV_mesh_shader;
|
||||
break;
|
||||
}
|
||||
|
||||
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MUTABLE_DESCRIPTOR_TYPE_FEATURES_VALVE: {
|
||||
VkPhysicalDeviceMutableDescriptorTypeFeaturesVALVE *features =
|
||||
(VkPhysicalDeviceMutableDescriptorTypeFeaturesVALVE *)ext;
|
||||
|
|
@ -1932,10 +1919,6 @@ anv_get_physical_device_properties_1_1(struct anv_physical_device *pdevice,
|
|||
if (pdevice->compiler->scalar_stage[stage])
|
||||
scalar_stages |= mesa_to_vk_shader_stage(stage);
|
||||
}
|
||||
if (pdevice->vk.supported_extensions.NV_mesh_shader) {
|
||||
scalar_stages |= VK_SHADER_STAGE_TASK_BIT_NV |
|
||||
VK_SHADER_STAGE_MESH_BIT_NV;
|
||||
}
|
||||
p->subgroupSupportedStages = scalar_stages;
|
||||
p->subgroupSupportedOperations = VK_SUBGROUP_FEATURE_BASIC_BIT |
|
||||
VK_SUBGROUP_FEATURE_VOTE_BIT |
|
||||
|
|
@ -2104,9 +2087,7 @@ anv_get_physical_device_properties_1_3(struct anv_physical_device *pdevice,
|
|||
p->minSubgroupSize = 8;
|
||||
p->maxSubgroupSize = 32;
|
||||
p->maxComputeWorkgroupSubgroups = pdevice->info.max_cs_workgroup_threads;
|
||||
p->requiredSubgroupSizeStages = VK_SHADER_STAGE_COMPUTE_BIT |
|
||||
VK_SHADER_STAGE_TASK_BIT_NV |
|
||||
VK_SHADER_STAGE_MESH_BIT_NV;
|
||||
p->requiredSubgroupSizeStages = VK_SHADER_STAGE_COMPUTE_BIT;
|
||||
|
||||
p->maxInlineUniformBlockSize = MAX_INLINE_UNIFORM_BLOCK_SIZE;
|
||||
p->maxPerStageDescriptorInlineUniformBlocks =
|
||||
|
|
@ -2340,66 +2321,6 @@ void anv_GetPhysicalDeviceProperties2(
|
|||
break;
|
||||
}
|
||||
|
||||
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MESH_SHADER_PROPERTIES_NV: {
|
||||
VkPhysicalDeviceMeshShaderPropertiesNV *props =
|
||||
(VkPhysicalDeviceMeshShaderPropertiesNV *)ext;
|
||||
|
||||
/* Bounded by the maximum representable size in
|
||||
* 3DSTATE_MESH_SHADER_BODY::SharedLocalMemorySize. Same for Task.
|
||||
*/
|
||||
const uint32_t max_slm_size = 64 * 1024;
|
||||
|
||||
/* Bounded by the maximum representable size in
|
||||
* 3DSTATE_MESH_SHADER_BODY::LocalXMaximum. Same for Task.
|
||||
*/
|
||||
const uint32_t max_workgroup_size = 1 << 10;
|
||||
|
||||
/* Bounded by the maximum representable count in
|
||||
* 3DSTATE_MESH_SHADER_BODY::MaximumPrimitiveCount.
|
||||
*/
|
||||
const uint32_t max_primitives = 1024;
|
||||
|
||||
/* TODO(mesh): Multiview. */
|
||||
const uint32_t max_view_count = 1;
|
||||
|
||||
props->maxDrawMeshTasksCount = UINT32_MAX;
|
||||
|
||||
/* TODO(mesh): Implement workgroup Y and Z sizes larger than one by
|
||||
* mapping them to/from the single value that HW provides us
|
||||
* (currently used for X).
|
||||
*/
|
||||
|
||||
props->maxTaskWorkGroupInvocations = max_workgroup_size;
|
||||
props->maxTaskWorkGroupSize[0] = max_workgroup_size;
|
||||
props->maxTaskWorkGroupSize[1] = 1;
|
||||
props->maxTaskWorkGroupSize[2] = 1;
|
||||
props->maxTaskTotalMemorySize = max_slm_size;
|
||||
props->maxTaskOutputCount = UINT16_MAX;
|
||||
|
||||
props->maxMeshWorkGroupInvocations = max_workgroup_size;
|
||||
props->maxMeshWorkGroupSize[0] = max_workgroup_size;
|
||||
props->maxMeshWorkGroupSize[1] = 1;
|
||||
props->maxMeshWorkGroupSize[2] = 1;
|
||||
props->maxMeshTotalMemorySize = max_slm_size / max_view_count;
|
||||
props->maxMeshOutputPrimitives = max_primitives / max_view_count;
|
||||
props->maxMeshMultiviewViewCount = max_view_count;
|
||||
|
||||
/* Depends on what indices can be represented with IndexFormat. For
|
||||
* now we always use U32, so bound to the maximum unique vertices we
|
||||
* need for the maximum primitives.
|
||||
*
|
||||
* TODO(mesh): Revisit this if we drop "U32" IndexFormat when adding
|
||||
* support for others.
|
||||
*/
|
||||
props->maxMeshOutputVertices = 3 * props->maxMeshOutputPrimitives;
|
||||
|
||||
|
||||
props->meshOutputPerVertexGranularity = 32;
|
||||
props->meshOutputPerPrimitiveGranularity = 32;
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PCI_BUS_INFO_PROPERTIES_EXT: {
|
||||
VkPhysicalDevicePCIBusInfoPropertiesEXT *properties =
|
||||
(VkPhysicalDevicePCIBusInfoPropertiesEXT *)ext;
|
||||
|
|
|
|||
|
|
@ -88,7 +88,6 @@ anv_shader_stage_to_nir(struct anv_device *device,
|
|||
.int64 = pdevice->info.ver >= 8,
|
||||
.int64_atomics = pdevice->info.ver >= 9 && pdevice->use_softpin,
|
||||
.integer_functions2 = pdevice->info.ver >= 8,
|
||||
.mesh_shading_nv = pdevice->vk.supported_extensions.NV_mesh_shader,
|
||||
.min_lod = true,
|
||||
.multiview = true,
|
||||
.physical_storage_buffer_address = pdevice->has_a64_buffer_access,
|
||||
|
|
@ -398,26 +397,6 @@ pipeline_has_coarse_pixel(const struct anv_graphics_pipeline *pipeline,
|
|||
return true;
|
||||
}
|
||||
|
||||
static void
|
||||
populate_task_prog_key(const struct anv_device *device,
|
||||
bool robust_buffer_access,
|
||||
struct brw_task_prog_key *key)
|
||||
{
|
||||
memset(key, 0, sizeof(*key));
|
||||
|
||||
populate_base_prog_key(device, robust_buffer_access, &key->base);
|
||||
}
|
||||
|
||||
static void
|
||||
populate_mesh_prog_key(const struct anv_device *device,
|
||||
bool robust_buffer_access,
|
||||
struct brw_mesh_prog_key *key)
|
||||
{
|
||||
memset(key, 0, sizeof(*key));
|
||||
|
||||
populate_base_prog_key(device, robust_buffer_access, &key->base);
|
||||
}
|
||||
|
||||
static void
|
||||
populate_wm_prog_key(const struct anv_graphics_pipeline *pipeline,
|
||||
bool robust_buffer_acccess,
|
||||
|
|
@ -742,8 +721,7 @@ anv_pipeline_lower_nir(struct anv_pipeline *pipeline,
|
|||
}
|
||||
}
|
||||
|
||||
if (gl_shader_stage_is_compute(nir->info.stage) ||
|
||||
gl_shader_stage_is_mesh(nir->info.stage))
|
||||
if (gl_shader_stage_is_compute(nir->info.stage))
|
||||
NIR_PASS(_, nir, brw_nir_lower_cs_intrinsics);
|
||||
|
||||
stage->nir = nir;
|
||||
|
|
@ -954,70 +932,6 @@ anv_pipeline_compile_gs(const struct brw_compiler *compiler,
|
|||
gs_stage->code = brw_compile_gs(compiler, mem_ctx, ¶ms);
|
||||
}
|
||||
|
||||
static void
|
||||
anv_pipeline_link_task(const struct brw_compiler *compiler,
|
||||
struct anv_pipeline_stage *task_stage,
|
||||
struct anv_pipeline_stage *next_stage)
|
||||
{
|
||||
assert(next_stage);
|
||||
assert(next_stage->stage == MESA_SHADER_MESH);
|
||||
brw_nir_link_shaders(compiler, task_stage->nir, next_stage->nir);
|
||||
}
|
||||
|
||||
static void
|
||||
anv_pipeline_compile_task(const struct brw_compiler *compiler,
|
||||
void *mem_ctx,
|
||||
struct anv_device *device,
|
||||
struct anv_pipeline_stage *task_stage)
|
||||
{
|
||||
task_stage->num_stats = 1;
|
||||
|
||||
struct brw_compile_task_params params = {
|
||||
.nir = task_stage->nir,
|
||||
.key = &task_stage->key.task,
|
||||
.prog_data = &task_stage->prog_data.task,
|
||||
.stats = task_stage->stats,
|
||||
.log_data = device,
|
||||
};
|
||||
|
||||
task_stage->code = brw_compile_task(compiler, mem_ctx, ¶ms);
|
||||
}
|
||||
|
||||
static void
|
||||
anv_pipeline_link_mesh(const struct brw_compiler *compiler,
|
||||
struct anv_pipeline_stage *mesh_stage,
|
||||
struct anv_pipeline_stage *next_stage)
|
||||
{
|
||||
if (next_stage) {
|
||||
brw_nir_link_shaders(compiler, mesh_stage->nir, next_stage->nir);
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
anv_pipeline_compile_mesh(const struct brw_compiler *compiler,
|
||||
void *mem_ctx,
|
||||
struct anv_device *device,
|
||||
struct anv_pipeline_stage *mesh_stage,
|
||||
struct anv_pipeline_stage *prev_stage)
|
||||
{
|
||||
mesh_stage->num_stats = 1;
|
||||
|
||||
struct brw_compile_mesh_params params = {
|
||||
.nir = mesh_stage->nir,
|
||||
.key = &mesh_stage->key.mesh,
|
||||
.prog_data = &mesh_stage->prog_data.mesh,
|
||||
.stats = mesh_stage->stats,
|
||||
.log_data = device,
|
||||
};
|
||||
|
||||
if (prev_stage) {
|
||||
assert(prev_stage->stage == MESA_SHADER_TASK);
|
||||
params.tue_map = &prev_stage->prog_data.task.map;
|
||||
}
|
||||
|
||||
mesh_stage->code = brw_compile_mesh(compiler, mem_ctx, ¶ms);
|
||||
}
|
||||
|
||||
static void
|
||||
anv_pipeline_link_fs(const struct brw_compiler *compiler,
|
||||
struct anv_pipeline_stage *stage,
|
||||
|
|
@ -1102,13 +1016,8 @@ anv_pipeline_compile_fs(const struct brw_compiler *compiler,
|
|||
.log_data = device,
|
||||
};
|
||||
|
||||
if (prev_stage->stage == MESA_SHADER_MESH) {
|
||||
params.mue_map = &prev_stage->prog_data.mesh.map;
|
||||
/* TODO(mesh): Slots valid, do we even use/rely on it? */
|
||||
} else {
|
||||
fs_stage->key.wm.input_slots_valid =
|
||||
prev_stage->prog_data.vue.vue_map.slots_valid;
|
||||
}
|
||||
fs_stage->key.wm.input_slots_valid =
|
||||
prev_stage->prog_data.vue.vue_map.slots_valid;
|
||||
|
||||
fs_stage->code = brw_compile_fs(compiler, mem_ctx, ¶ms);
|
||||
|
||||
|
|
@ -1291,16 +1200,6 @@ anv_graphics_pipeline_init_keys(struct anv_graphics_pipeline *pipeline,
|
|||
&stages[s].key.wm);
|
||||
break;
|
||||
}
|
||||
case MESA_SHADER_TASK:
|
||||
populate_task_prog_key(device,
|
||||
pipeline->base.device->robust_buffer_access,
|
||||
&stages[s].key.task);
|
||||
break;
|
||||
case MESA_SHADER_MESH:
|
||||
populate_mesh_prog_key(device,
|
||||
pipeline->base.device->robust_buffer_access,
|
||||
&stages[s].key.mesh);
|
||||
break;
|
||||
default:
|
||||
unreachable("Invalid graphics shader stage");
|
||||
}
|
||||
|
|
@ -1309,8 +1208,7 @@ anv_graphics_pipeline_init_keys(struct anv_graphics_pipeline *pipeline,
|
|||
stages[s].feedback.flags |= VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT;
|
||||
}
|
||||
|
||||
assert(pipeline->active_stages & VK_SHADER_STAGE_VERTEX_BIT ||
|
||||
pipeline->active_stages & VK_SHADER_STAGE_MESH_BIT_NV);
|
||||
assert(pipeline->active_stages & VK_SHADER_STAGE_VERTEX_BIT);
|
||||
}
|
||||
|
||||
static bool
|
||||
|
|
@ -1394,9 +1292,6 @@ static const gl_shader_stage graphics_shader_order[] = {
|
|||
MESA_SHADER_TESS_EVAL,
|
||||
MESA_SHADER_GEOMETRY,
|
||||
|
||||
MESA_SHADER_TASK,
|
||||
MESA_SHADER_MESH,
|
||||
|
||||
MESA_SHADER_FRAGMENT,
|
||||
};
|
||||
|
||||
|
|
@ -1509,12 +1404,6 @@ anv_graphics_pipeline_compile(struct anv_graphics_pipeline *pipeline,
|
|||
case MESA_SHADER_GEOMETRY:
|
||||
anv_pipeline_link_gs(compiler, &stages[s], next_stage);
|
||||
break;
|
||||
case MESA_SHADER_TASK:
|
||||
anv_pipeline_link_task(compiler, &stages[s], next_stage);
|
||||
break;
|
||||
case MESA_SHADER_MESH:
|
||||
anv_pipeline_link_mesh(compiler, &stages[s], next_stage);
|
||||
break;
|
||||
case MESA_SHADER_FRAGMENT:
|
||||
anv_pipeline_link_fs(compiler, &stages[s], state->rp);
|
||||
break;
|
||||
|
|
@ -1584,8 +1473,7 @@ anv_graphics_pipeline_compile(struct anv_graphics_pipeline *pipeline,
|
|||
if (devinfo->has_coarse_pixel_primitive_and_cb &&
|
||||
stages[MESA_SHADER_FRAGMENT].info &&
|
||||
stages[MESA_SHADER_FRAGMENT].key.wm.coarse_pixel &&
|
||||
!stages[MESA_SHADER_FRAGMENT].nir->info.fs.uses_sample_shading &&
|
||||
stages[MESA_SHADER_MESH].info == NULL) {
|
||||
!stages[MESA_SHADER_FRAGMENT].nir->info.fs.uses_sample_shading) {
|
||||
struct anv_pipeline_stage *last_psr = NULL;
|
||||
|
||||
for (unsigned i = 0; i < ARRAY_SIZE(graphics_shader_order); i++) {
|
||||
|
|
@ -1631,14 +1519,6 @@ anv_graphics_pipeline_compile(struct anv_graphics_pipeline *pipeline,
|
|||
anv_pipeline_compile_gs(compiler, stage_ctx, pipeline->base.device,
|
||||
&stages[s], prev_stage);
|
||||
break;
|
||||
case MESA_SHADER_TASK:
|
||||
anv_pipeline_compile_task(compiler, stage_ctx, pipeline->base.device,
|
||||
&stages[s]);
|
||||
break;
|
||||
case MESA_SHADER_MESH:
|
||||
anv_pipeline_compile_mesh(compiler, stage_ctx, pipeline->base.device,
|
||||
&stages[s], prev_stage);
|
||||
break;
|
||||
case MESA_SHADER_FRAGMENT:
|
||||
anv_pipeline_compile_fs(compiler, stage_ctx, pipeline->base.device,
|
||||
&stages[s], prev_stage);
|
||||
|
|
@ -2008,9 +1888,6 @@ anv_graphics_pipeline_init(struct anv_graphics_pipeline *pipeline,
|
|||
if (pipeline->active_stages & VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT)
|
||||
pipeline->active_stages |= VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT;
|
||||
|
||||
if (anv_pipeline_is_mesh(pipeline))
|
||||
assert(device->physical->vk.supported_extensions.NV_mesh_shader);
|
||||
|
||||
pipeline->dynamic_state.ms.sample_locations = &pipeline->sample_locations;
|
||||
vk_dynamic_graphics_state_fill(&pipeline->dynamic_state, state);
|
||||
|
||||
|
|
@ -2026,38 +1903,33 @@ anv_graphics_pipeline_init(struct anv_graphics_pipeline *pipeline,
|
|||
|
||||
anv_pipeline_setup_l3_config(&pipeline->base, false);
|
||||
|
||||
if (anv_pipeline_is_primitive(pipeline)) {
|
||||
const uint64_t inputs_read = get_vs_prog_data(pipeline)->inputs_read;
|
||||
const uint64_t inputs_read = get_vs_prog_data(pipeline)->inputs_read;
|
||||
|
||||
u_foreach_bit(a, state->vi->attributes_valid) {
|
||||
if (inputs_read & BITFIELD64_BIT(VERT_ATTRIB_GENERIC0 + a))
|
||||
pipeline->vb_used |= BITFIELD64_BIT(state->vi->attributes[a].binding);
|
||||
}
|
||||
|
||||
u_foreach_bit(b, state->vi->bindings_valid) {
|
||||
pipeline->vb[b].stride = state->vi->bindings[b].stride;
|
||||
pipeline->vb[b].instanced = state->vi->bindings[b].input_rate ==
|
||||
VK_VERTEX_INPUT_RATE_INSTANCE;
|
||||
pipeline->vb[b].instance_divisor = state->vi->bindings[b].divisor;
|
||||
}
|
||||
|
||||
/* Our implementation of VK_KHR_multiview uses instancing to draw the
|
||||
* different views when primitive replication cannot be used. If the
|
||||
* client asks for instancing, we need to multiply by the client's
|
||||
* instance count at draw time and instance divisor in the vertex
|
||||
* bindings by the number of views ensure that we repeat the client's
|
||||
* per-instance data once for each view.
|
||||
*/
|
||||
const bool uses_primitive_replication =
|
||||
anv_pipeline_get_last_vue_prog_data(pipeline)->vue_map.num_pos_slots > 1;
|
||||
pipeline->instance_multiplier = 1;
|
||||
if (pipeline->view_mask && !uses_primitive_replication)
|
||||
pipeline->instance_multiplier = util_bitcount(pipeline->view_mask);
|
||||
} else {
|
||||
assert(anv_pipeline_is_mesh(pipeline));
|
||||
/* TODO(mesh): Mesh vs. Multiview with Instancing. */
|
||||
u_foreach_bit(a, state->vi->attributes_valid) {
|
||||
if (inputs_read & BITFIELD64_BIT(VERT_ATTRIB_GENERIC0 + a))
|
||||
pipeline->vb_used |= BITFIELD64_BIT(state->vi->attributes[a].binding);
|
||||
}
|
||||
|
||||
u_foreach_bit(b, state->vi->bindings_valid) {
|
||||
pipeline->vb[b].stride = state->vi->bindings[b].stride;
|
||||
pipeline->vb[b].instanced = state->vi->bindings[b].input_rate ==
|
||||
VK_VERTEX_INPUT_RATE_INSTANCE;
|
||||
pipeline->vb[b].instance_divisor = state->vi->bindings[b].divisor;
|
||||
}
|
||||
|
||||
/* Our implementation of VK_KHR_multiview uses instancing to draw the
|
||||
* different views when primitive replication cannot be used. If the client
|
||||
* asks for instancing, we need to multiply by the client's instance count
|
||||
* at draw time and instance divisor in the vertex bindings by the number
|
||||
* of views ensure that we repeat the client's per-instance data once for
|
||||
* each view.
|
||||
*/
|
||||
const bool uses_primitive_replication =
|
||||
anv_pipeline_get_last_vue_prog_data(pipeline)->vue_map.num_pos_slots > 1;
|
||||
pipeline->instance_multiplier = 1;
|
||||
if (pipeline->view_mask && !uses_primitive_replication)
|
||||
pipeline->instance_multiplier = util_bitcount(pipeline->view_mask);
|
||||
|
||||
pipeline->negative_one_to_one =
|
||||
state->vp != NULL && state->vp->negative_one_to_one;
|
||||
|
||||
|
|
|
|||
|
|
@ -3131,12 +3131,6 @@ anv_pipeline_is_primitive(const struct anv_graphics_pipeline *pipeline)
|
|||
return anv_pipeline_has_stage(pipeline, MESA_SHADER_VERTEX);
|
||||
}
|
||||
|
||||
static inline bool
|
||||
anv_pipeline_is_mesh(const struct anv_graphics_pipeline *pipeline)
|
||||
{
|
||||
return anv_pipeline_has_stage(pipeline, MESA_SHADER_MESH);
|
||||
}
|
||||
|
||||
static inline bool
|
||||
anv_cmd_buffer_all_color_write_masked(const struct anv_cmd_buffer *cmd_buffer)
|
||||
{
|
||||
|
|
@ -3175,8 +3169,6 @@ ANV_DECL_GET_GRAPHICS_PROG_DATA_FUNC(tcs, MESA_SHADER_TESS_CTRL)
|
|||
ANV_DECL_GET_GRAPHICS_PROG_DATA_FUNC(tes, MESA_SHADER_TESS_EVAL)
|
||||
ANV_DECL_GET_GRAPHICS_PROG_DATA_FUNC(gs, MESA_SHADER_GEOMETRY)
|
||||
ANV_DECL_GET_GRAPHICS_PROG_DATA_FUNC(wm, MESA_SHADER_FRAGMENT)
|
||||
ANV_DECL_GET_GRAPHICS_PROG_DATA_FUNC(mesh, MESA_SHADER_MESH)
|
||||
ANV_DECL_GET_GRAPHICS_PROG_DATA_FUNC(task, MESA_SHADER_TASK)
|
||||
|
||||
static inline const struct brw_cs_prog_data *
|
||||
get_cs_prog_data(const struct anv_compute_pipeline *pipeline)
|
||||
|
|
|
|||
|
|
@ -3404,64 +3404,6 @@ cmd_buffer_flush_push_constants(struct anv_cmd_buffer *cmd_buffer,
|
|||
cmd_buffer->state.push_constants_dirty &= ~flushed;
|
||||
}
|
||||
|
||||
#if GFX_VERx10 >= 125
|
||||
static void
|
||||
cmd_buffer_flush_mesh_inline_data(struct anv_cmd_buffer *cmd_buffer,
|
||||
VkShaderStageFlags dirty_stages)
|
||||
{
|
||||
struct anv_cmd_graphics_state *gfx_state = &cmd_buffer->state.gfx;
|
||||
const struct anv_graphics_pipeline *pipeline = gfx_state->pipeline;
|
||||
|
||||
if (dirty_stages & VK_SHADER_STAGE_TASK_BIT_NV &&
|
||||
anv_pipeline_has_stage(pipeline, MESA_SHADER_TASK)) {
|
||||
|
||||
const struct anv_shader_bin *shader = pipeline->shaders[MESA_SHADER_TASK];
|
||||
const struct anv_pipeline_bind_map *bind_map = &shader->bind_map;
|
||||
|
||||
anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_TASK_SHADER_DATA), data) {
|
||||
const struct anv_push_range *range = &bind_map->push_ranges[0];
|
||||
if (range->length > 0) {
|
||||
struct anv_address buffer =
|
||||
get_push_range_address(cmd_buffer, shader, range);
|
||||
|
||||
uint64_t addr = anv_address_physical(buffer);
|
||||
data.InlineData[0] = addr & 0xffffffff;
|
||||
data.InlineData[1] = addr >> 32;
|
||||
|
||||
memcpy(&data.InlineData[BRW_TASK_MESH_PUSH_CONSTANTS_START_DW],
|
||||
cmd_buffer->state.gfx.base.push_constants.client_data,
|
||||
BRW_TASK_MESH_PUSH_CONSTANTS_SIZE_DW * 4);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (dirty_stages & VK_SHADER_STAGE_MESH_BIT_NV &&
|
||||
anv_pipeline_has_stage(pipeline, MESA_SHADER_MESH)) {
|
||||
|
||||
const struct anv_shader_bin *shader = pipeline->shaders[MESA_SHADER_MESH];
|
||||
const struct anv_pipeline_bind_map *bind_map = &shader->bind_map;
|
||||
|
||||
anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_MESH_SHADER_DATA), data) {
|
||||
const struct anv_push_range *range = &bind_map->push_ranges[0];
|
||||
if (range->length > 0) {
|
||||
struct anv_address buffer =
|
||||
get_push_range_address(cmd_buffer, shader, range);
|
||||
|
||||
uint64_t addr = anv_address_physical(buffer);
|
||||
data.InlineData[0] = addr & 0xffffffff;
|
||||
data.InlineData[1] = addr >> 32;
|
||||
|
||||
memcpy(&data.InlineData[BRW_TASK_MESH_PUSH_CONSTANTS_START_DW],
|
||||
cmd_buffer->state.gfx.base.push_constants.client_data,
|
||||
BRW_TASK_MESH_PUSH_CONSTANTS_SIZE_DW * 4);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
cmd_buffer->state.push_constants_dirty &= ~dirty_stages;
|
||||
}
|
||||
#endif
|
||||
|
||||
static void
|
||||
cmd_buffer_emit_clip(struct anv_cmd_buffer *cmd_buffer)
|
||||
{
|
||||
|
|
@ -3495,7 +3437,6 @@ cmd_buffer_emit_clip(struct anv_cmd_buffer *cmd_buffer)
|
|||
};
|
||||
uint32_t dwords[GENX(3DSTATE_CLIP_length)];
|
||||
|
||||
/* TODO(mesh): Multiview. */
|
||||
struct anv_graphics_pipeline *pipeline = cmd_buffer->state.gfx.pipeline;
|
||||
if (anv_pipeline_is_primitive(pipeline)) {
|
||||
const struct brw_vue_prog_data *last =
|
||||
|
|
@ -3504,12 +3445,6 @@ cmd_buffer_emit_clip(struct anv_cmd_buffer *cmd_buffer)
|
|||
clip.MaximumVPIndex = dyn->vp.viewport_count > 0 ?
|
||||
dyn->vp.viewport_count - 1 : 0;
|
||||
}
|
||||
} else if (anv_pipeline_is_mesh(pipeline)) {
|
||||
const struct brw_mesh_prog_data *mesh_prog_data = get_mesh_prog_data(pipeline);
|
||||
if (mesh_prog_data->map.start_dw[VARYING_SLOT_VIEWPORT] >= 0) {
|
||||
clip.MaximumVPIndex = dyn->vp.viewport_count > 0 ?
|
||||
dyn->vp.viewport_count - 1 : 0;
|
||||
}
|
||||
}
|
||||
|
||||
GENX(3DSTATE_CLIP_pack)(NULL, dwords, &clip);
|
||||
|
|
@ -3999,11 +3934,6 @@ genX(cmd_buffer_flush_state)(struct anv_cmd_buffer *cmd_buffer)
|
|||
dirty |= cmd_buffer->state.push_constants_dirty;
|
||||
cmd_buffer_flush_push_constants(cmd_buffer,
|
||||
dirty & VK_SHADER_STAGE_ALL_GRAPHICS);
|
||||
#if GFX_VERx10 >= 125
|
||||
cmd_buffer_flush_mesh_inline_data(
|
||||
cmd_buffer, dirty & (VK_SHADER_STAGE_TASK_BIT_NV |
|
||||
VK_SHADER_STAGE_MESH_BIT_NV));
|
||||
#endif
|
||||
}
|
||||
|
||||
if (dirty & VK_SHADER_STAGE_ALL_GRAPHICS) {
|
||||
|
|
@ -5008,160 +4938,6 @@ void genX(CmdEndTransformFeedbackEXT)(
|
|||
cmd_buffer->state.gfx.dirty |= ANV_CMD_DIRTY_XFB_ENABLE;
|
||||
}
|
||||
|
||||
#if GFX_VERx10 >= 125
|
||||
void
|
||||
genX(CmdDrawMeshTasksNV)(
|
||||
VkCommandBuffer commandBuffer,
|
||||
uint32_t taskCount,
|
||||
uint32_t firstTask)
|
||||
{
|
||||
ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
|
||||
|
||||
if (anv_batch_has_error(&cmd_buffer->batch))
|
||||
return;
|
||||
|
||||
/* TODO(mesh): Check if this is not emitting more packets than we need. */
|
||||
genX(cmd_buffer_flush_state)(cmd_buffer);
|
||||
|
||||
if (cmd_buffer->state.conditional_render_enabled)
|
||||
genX(cmd_emit_conditional_render_predicate)(cmd_buffer);
|
||||
|
||||
/* BSpec 54016 says: "The values passed for Starting ThreadGroup ID X
|
||||
* and ThreadGroup Count X shall not cause TGIDs to exceed (2^32)-1."
|
||||
*/
|
||||
assert((int64_t)firstTask + taskCount - 1 <= UINT32_MAX);
|
||||
|
||||
anv_batch_emit(&cmd_buffer->batch, GENX(3DMESH_1D), m) {
|
||||
m.PredicateEnable = cmd_buffer->state.conditional_render_enabled;
|
||||
m.ThreadGroupCountX = taskCount;
|
||||
m.StartingThreadGroupIDX = firstTask;
|
||||
}
|
||||
}
|
||||
|
||||
#define GFX125_3DMESH_TG_COUNT 0x26F0
|
||||
#define GFX125_3DMESH_STARTING_TGID 0x26F4
|
||||
#define GFX10_3DPRIM_XP(n) (0x2690 + (n) * 4) /* n = { 0, 1, 2 } */
|
||||
|
||||
static void
|
||||
mesh_load_indirect_parameters(struct anv_cmd_buffer *cmd_buffer,
|
||||
struct mi_builder *b,
|
||||
struct anv_address addr,
|
||||
bool emit_xp0,
|
||||
uint32_t xp0)
|
||||
{
|
||||
const size_t taskCountOff = offsetof(VkDrawMeshTasksIndirectCommandNV, taskCount);
|
||||
const size_t firstTaskOff = offsetof(VkDrawMeshTasksIndirectCommandNV, firstTask);
|
||||
|
||||
mi_store(b, mi_reg32(GFX125_3DMESH_TG_COUNT),
|
||||
mi_mem32(anv_address_add(addr, taskCountOff)));
|
||||
|
||||
mi_store(b, mi_reg32(GFX125_3DMESH_STARTING_TGID),
|
||||
mi_mem32(anv_address_add(addr, firstTaskOff)));
|
||||
|
||||
if (emit_xp0)
|
||||
mi_store(b, mi_reg32(GFX10_3DPRIM_XP(0)), mi_imm(xp0));
|
||||
}
|
||||
|
||||
static void
|
||||
emit_indirect_3dmesh_1d(struct anv_batch *batch,
|
||||
bool predicate_enable,
|
||||
bool uses_drawid)
|
||||
{
|
||||
uint32_t len = GENX(3DMESH_1D_length) + uses_drawid;
|
||||
uint32_t *dw = anv_batch_emitn(batch, len, GENX(3DMESH_1D),
|
||||
.PredicateEnable = predicate_enable,
|
||||
.IndirectParameterEnable = true,
|
||||
.ExtendedParameter0Present = uses_drawid);
|
||||
if (uses_drawid)
|
||||
dw[len - 1] = 0;
|
||||
}
|
||||
|
||||
void
|
||||
genX(CmdDrawMeshTasksIndirectNV)(
|
||||
VkCommandBuffer commandBuffer,
|
||||
VkBuffer _buffer,
|
||||
VkDeviceSize offset,
|
||||
uint32_t drawCount,
|
||||
uint32_t stride)
|
||||
{
|
||||
ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
|
||||
ANV_FROM_HANDLE(anv_buffer, buffer, _buffer);
|
||||
struct anv_graphics_pipeline *pipeline = cmd_buffer->state.gfx.pipeline;
|
||||
const struct brw_task_prog_data *task_prog_data = get_task_prog_data(pipeline);
|
||||
const struct brw_mesh_prog_data *mesh_prog_data = get_mesh_prog_data(pipeline);
|
||||
struct anv_cmd_state *cmd_state = &cmd_buffer->state;
|
||||
|
||||
if (anv_batch_has_error(&cmd_buffer->batch))
|
||||
return;
|
||||
|
||||
genX(cmd_buffer_flush_state)(cmd_buffer);
|
||||
|
||||
if (cmd_state->conditional_render_enabled)
|
||||
genX(cmd_emit_conditional_render_predicate)(cmd_buffer);
|
||||
|
||||
bool uses_drawid = (task_prog_data && task_prog_data->uses_drawid) ||
|
||||
mesh_prog_data->uses_drawid;
|
||||
struct mi_builder b;
|
||||
mi_builder_init(&b, cmd_buffer->device->info, &cmd_buffer->batch);
|
||||
|
||||
for (uint32_t i = 0; i < drawCount; i++) {
|
||||
struct anv_address draw = anv_address_add(buffer->address, offset);
|
||||
|
||||
mesh_load_indirect_parameters(cmd_buffer, &b, draw, uses_drawid, i);
|
||||
|
||||
emit_indirect_3dmesh_1d(&cmd_buffer->batch,
|
||||
cmd_state->conditional_render_enabled, uses_drawid);
|
||||
|
||||
offset += stride;
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
genX(CmdDrawMeshTasksIndirectCountNV)(
|
||||
VkCommandBuffer commandBuffer,
|
||||
VkBuffer _buffer,
|
||||
VkDeviceSize offset,
|
||||
VkBuffer _countBuffer,
|
||||
VkDeviceSize countBufferOffset,
|
||||
uint32_t maxDrawCount,
|
||||
uint32_t stride)
|
||||
{
|
||||
ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
|
||||
ANV_FROM_HANDLE(anv_buffer, buffer, _buffer);
|
||||
ANV_FROM_HANDLE(anv_buffer, count_buffer, _countBuffer);
|
||||
struct anv_graphics_pipeline *pipeline = cmd_buffer->state.gfx.pipeline;
|
||||
const struct brw_task_prog_data *task_prog_data = get_task_prog_data(pipeline);
|
||||
const struct brw_mesh_prog_data *mesh_prog_data = get_mesh_prog_data(pipeline);
|
||||
|
||||
if (anv_batch_has_error(&cmd_buffer->batch))
|
||||
return;
|
||||
|
||||
genX(cmd_buffer_flush_state)(cmd_buffer);
|
||||
|
||||
bool uses_drawid = (task_prog_data && task_prog_data->uses_drawid) ||
|
||||
mesh_prog_data->uses_drawid;
|
||||
|
||||
struct mi_builder b;
|
||||
mi_builder_init(&b, cmd_buffer->device->info, &cmd_buffer->batch);
|
||||
|
||||
struct mi_value max =
|
||||
prepare_for_draw_count_predicate(cmd_buffer, &b,
|
||||
count_buffer, countBufferOffset);
|
||||
|
||||
for (uint32_t i = 0; i < maxDrawCount; i++) {
|
||||
struct anv_address draw = anv_address_add(buffer->address, offset);
|
||||
|
||||
emit_draw_count_predicate_cond(cmd_buffer, &b, i, max);
|
||||
|
||||
mesh_load_indirect_parameters(cmd_buffer, &b, draw, uses_drawid, i);
|
||||
|
||||
emit_indirect_3dmesh_1d(&cmd_buffer->batch, true, uses_drawid);
|
||||
|
||||
offset += stride;
|
||||
}
|
||||
}
|
||||
#endif /* GFX_VERx10 >= 125 */
|
||||
|
||||
void
|
||||
genX(cmd_buffer_flush_compute_state)(struct anv_cmd_buffer *cmd_buffer)
|
||||
{
|
||||
|
|
|
|||
|
|
@ -309,71 +309,12 @@ genX(emit_urb_setup)(struct anv_device *device, struct anv_batch *batch,
|
|||
urb.VSNumberofURBEntries = entries[i];
|
||||
}
|
||||
}
|
||||
#if GFX_VERx10 >= 125
|
||||
if (device->physical->vk.supported_extensions.NV_mesh_shader) {
|
||||
anv_batch_emit(batch, GENX(3DSTATE_URB_ALLOC_MESH), zero);
|
||||
anv_batch_emit(batch, GENX(3DSTATE_URB_ALLOC_TASK), zero);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
#if GFX_VERx10 >= 125
|
||||
static void
|
||||
emit_urb_setup_mesh(struct anv_graphics_pipeline *pipeline,
|
||||
enum intel_urb_deref_block_size *deref_block_size)
|
||||
{
|
||||
const struct intel_device_info *devinfo = pipeline->base.device->info;
|
||||
|
||||
const struct brw_task_prog_data *task_prog_data =
|
||||
anv_pipeline_has_stage(pipeline, MESA_SHADER_TASK) ?
|
||||
get_task_prog_data(pipeline) : NULL;
|
||||
const struct brw_mesh_prog_data *mesh_prog_data = get_mesh_prog_data(pipeline);
|
||||
|
||||
const struct intel_mesh_urb_allocation alloc =
|
||||
intel_get_mesh_urb_config(devinfo, pipeline->base.l3_config,
|
||||
task_prog_data ? task_prog_data->map.size_dw : 0,
|
||||
mesh_prog_data->map.size_dw);
|
||||
|
||||
/* Zero out the primitive pipeline URB allocations. */
|
||||
for (int i = 0; i <= MESA_SHADER_GEOMETRY; i++) {
|
||||
anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_URB_VS), urb) {
|
||||
urb._3DCommandSubOpcode += i;
|
||||
}
|
||||
}
|
||||
|
||||
anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_URB_ALLOC_TASK), urb) {
|
||||
if (task_prog_data) {
|
||||
urb.TASKURBEntryAllocationSize = alloc.task_entry_size_64b - 1;
|
||||
urb.TASKNumberofURBEntriesSlice0 = alloc.task_entries;
|
||||
urb.TASKNumberofURBEntriesSliceN = alloc.task_entries;
|
||||
urb.TASKURBStartingAddressSlice0 = alloc.task_starting_address_8kb;
|
||||
urb.TASKURBStartingAddressSliceN = alloc.task_starting_address_8kb;
|
||||
}
|
||||
}
|
||||
|
||||
anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_URB_ALLOC_MESH), urb) {
|
||||
urb.MESHURBEntryAllocationSize = alloc.mesh_entry_size_64b - 1;
|
||||
urb.MESHNumberofURBEntriesSlice0 = alloc.mesh_entries;
|
||||
urb.MESHNumberofURBEntriesSliceN = alloc.mesh_entries;
|
||||
urb.MESHURBStartingAddressSlice0 = alloc.mesh_starting_address_8kb;
|
||||
urb.MESHURBStartingAddressSliceN = alloc.mesh_starting_address_8kb;
|
||||
}
|
||||
|
||||
*deref_block_size = alloc.deref_block_size;
|
||||
}
|
||||
#endif
|
||||
|
||||
static void
|
||||
emit_urb_setup(struct anv_graphics_pipeline *pipeline,
|
||||
enum intel_urb_deref_block_size *deref_block_size)
|
||||
{
|
||||
#if GFX_VERx10 >= 125
|
||||
if (anv_pipeline_is_mesh(pipeline)) {
|
||||
emit_urb_setup_mesh(pipeline, deref_block_size);
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
|
||||
unsigned entry_size[4];
|
||||
for (int i = MESA_SHADER_VERTEX; i <= MESA_SHADER_GEOMETRY; i++) {
|
||||
const struct brw_vue_prog_data *prog_data =
|
||||
|
|
@ -398,10 +339,6 @@ emit_3dstate_sbe(struct anv_graphics_pipeline *pipeline)
|
|||
anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_SBE), sbe);
|
||||
#if GFX_VER >= 8
|
||||
anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_SBE_SWIZ), sbe);
|
||||
#endif
|
||||
#if GFX_VERx10 >= 125
|
||||
if (anv_pipeline_is_mesh(pipeline))
|
||||
anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_SBE_MESH), sbe_mesh);
|
||||
#endif
|
||||
return;
|
||||
}
|
||||
|
|
@ -431,123 +368,75 @@ emit_3dstate_sbe(struct anv_graphics_pipeline *pipeline)
|
|||
# define swiz sbe
|
||||
#endif
|
||||
|
||||
if (anv_pipeline_is_primitive(pipeline)) {
|
||||
const struct brw_vue_map *fs_input_map =
|
||||
&anv_pipeline_get_last_vue_prog_data(pipeline)->vue_map;
|
||||
const struct brw_vue_map *fs_input_map =
|
||||
&anv_pipeline_get_last_vue_prog_data(pipeline)->vue_map;
|
||||
|
||||
int first_slot = brw_compute_first_urb_slot_required(wm_prog_data->inputs,
|
||||
fs_input_map);
|
||||
assert(first_slot % 2 == 0);
|
||||
unsigned urb_entry_read_offset = first_slot / 2;
|
||||
int max_source_attr = 0;
|
||||
for (uint8_t idx = 0; idx < wm_prog_data->urb_setup_attribs_count; idx++) {
|
||||
uint8_t attr = wm_prog_data->urb_setup_attribs[idx];
|
||||
int input_index = wm_prog_data->urb_setup[attr];
|
||||
int first_slot = brw_compute_first_urb_slot_required(wm_prog_data->inputs,
|
||||
fs_input_map);
|
||||
assert(first_slot % 2 == 0);
|
||||
unsigned urb_entry_read_offset = first_slot / 2;
|
||||
int max_source_attr = 0;
|
||||
for (uint8_t idx = 0; idx < wm_prog_data->urb_setup_attribs_count; idx++) {
|
||||
uint8_t attr = wm_prog_data->urb_setup_attribs[idx];
|
||||
int input_index = wm_prog_data->urb_setup[attr];
|
||||
|
||||
assert(0 <= input_index);
|
||||
assert(0 <= input_index);
|
||||
|
||||
/* gl_Viewport, gl_Layer and FragmentShadingRateKHR are stored in the
|
||||
* VUE header
|
||||
*/
|
||||
if (attr == VARYING_SLOT_VIEWPORT ||
|
||||
attr == VARYING_SLOT_LAYER ||
|
||||
attr == VARYING_SLOT_PRIMITIVE_SHADING_RATE) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (attr == VARYING_SLOT_PNTC) {
|
||||
sbe.PointSpriteTextureCoordinateEnable = 1 << input_index;
|
||||
continue;
|
||||
}
|
||||
|
||||
const int slot = fs_input_map->varying_to_slot[attr];
|
||||
|
||||
if (slot == -1) {
|
||||
/* This attribute does not exist in the VUE--that means that the
|
||||
* vertex shader did not write to it. It could be that it's a
|
||||
* regular varying read by the fragment shader but not written by
|
||||
* the vertex shader or it's gl_PrimitiveID. In the first case the
|
||||
* value is undefined, in the second it needs to be
|
||||
* gl_PrimitiveID.
|
||||
*/
|
||||
swiz.Attribute[input_index].ConstantSource = PRIM_ID;
|
||||
swiz.Attribute[input_index].ComponentOverrideX = true;
|
||||
swiz.Attribute[input_index].ComponentOverrideY = true;
|
||||
swiz.Attribute[input_index].ComponentOverrideZ = true;
|
||||
swiz.Attribute[input_index].ComponentOverrideW = true;
|
||||
continue;
|
||||
}
|
||||
|
||||
/* We have to subtract two slots to account for the URB entry output
|
||||
* read offset in the VS and GS stages.
|
||||
*/
|
||||
const int source_attr = slot - 2 * urb_entry_read_offset;
|
||||
assert(source_attr >= 0 && source_attr < 32);
|
||||
max_source_attr = MAX2(max_source_attr, source_attr);
|
||||
/* The hardware can only do overrides on 16 overrides at a time, and the
|
||||
* other up to 16 have to be lined up so that the input index = the
|
||||
* output index. We'll need to do some tweaking to make sure that's the
|
||||
* case.
|
||||
*/
|
||||
if (input_index < 16)
|
||||
swiz.Attribute[input_index].SourceAttribute = source_attr;
|
||||
else
|
||||
assert(source_attr == input_index);
|
||||
/* gl_Viewport, gl_Layer and FragmentShadingRateKHR are stored in the
|
||||
* VUE header
|
||||
*/
|
||||
if (attr == VARYING_SLOT_VIEWPORT ||
|
||||
attr == VARYING_SLOT_LAYER ||
|
||||
attr == VARYING_SLOT_PRIMITIVE_SHADING_RATE) {
|
||||
continue;
|
||||
}
|
||||
|
||||
sbe.VertexURBEntryReadOffset = urb_entry_read_offset;
|
||||
sbe.VertexURBEntryReadLength = DIV_ROUND_UP(max_source_attr + 1, 2);
|
||||
#if GFX_VER >= 8
|
||||
sbe.ForceVertexURBEntryReadOffset = true;
|
||||
sbe.ForceVertexURBEntryReadLength = true;
|
||||
#endif
|
||||
} else {
|
||||
assert(anv_pipeline_is_mesh(pipeline));
|
||||
#if GFX_VERx10 >= 125
|
||||
const struct brw_mesh_prog_data *mesh_prog_data = get_mesh_prog_data(pipeline);
|
||||
anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_SBE_MESH), sbe_mesh) {
|
||||
const struct brw_mue_map *mue = &mesh_prog_data->map;
|
||||
|
||||
assert(mue->per_vertex_header_size_dw % 8 == 0);
|
||||
sbe_mesh.PerVertexURBEntryOutputReadOffset = mue->per_vertex_header_size_dw / 8;
|
||||
sbe_mesh.PerVertexURBEntryOutputReadLength = DIV_ROUND_UP(mue->per_vertex_data_size_dw, 8);
|
||||
|
||||
/* Clip distance array is passed in the per-vertex header so that
|
||||
* it can be consumed by the HW. If user wants to read it in the FS,
|
||||
* adjust the offset and length to cover it. Conveniently it is at
|
||||
* the end of the per-vertex header, right before per-vertex
|
||||
* attributes.
|
||||
*
|
||||
* Note that FS attribute reading must be aware that the clip
|
||||
* distances have fixed position.
|
||||
*/
|
||||
if (mue->per_vertex_header_size_dw > 8 &&
|
||||
(wm_prog_data->urb_setup[VARYING_SLOT_CLIP_DIST0] >= 0 ||
|
||||
wm_prog_data->urb_setup[VARYING_SLOT_CLIP_DIST1] >= 0)) {
|
||||
sbe_mesh.PerVertexURBEntryOutputReadOffset -= 1;
|
||||
sbe_mesh.PerVertexURBEntryOutputReadLength += 1;
|
||||
}
|
||||
|
||||
assert(mue->per_primitive_header_size_dw % 8 == 0);
|
||||
sbe_mesh.PerPrimitiveURBEntryOutputReadOffset = mue->per_primitive_header_size_dw / 8;
|
||||
sbe_mesh.PerPrimitiveURBEntryOutputReadLength = DIV_ROUND_UP(mue->per_primitive_data_size_dw, 8);
|
||||
|
||||
/* Just like with clip distances, if Primitive Shading Rate,
|
||||
* Viewport Index or Layer is read back in the FS, adjust
|
||||
* the offset and length to cover the Primitive Header, where
|
||||
* PSR, Viewport Index & Layer are stored.
|
||||
*/
|
||||
if (wm_prog_data->urb_setup[VARYING_SLOT_VIEWPORT] >= 0 ||
|
||||
wm_prog_data->urb_setup[VARYING_SLOT_PRIMITIVE_SHADING_RATE] >= 0 ||
|
||||
wm_prog_data->urb_setup[VARYING_SLOT_LAYER] >= 0) {
|
||||
assert(sbe_mesh.PerPrimitiveURBEntryOutputReadOffset > 0);
|
||||
sbe_mesh.PerPrimitiveURBEntryOutputReadOffset -= 1;
|
||||
sbe_mesh.PerPrimitiveURBEntryOutputReadLength += 1;
|
||||
}
|
||||
if (attr == VARYING_SLOT_PNTC) {
|
||||
sbe.PointSpriteTextureCoordinateEnable = 1 << input_index;
|
||||
continue;
|
||||
}
|
||||
#endif
|
||||
|
||||
const int slot = fs_input_map->varying_to_slot[attr];
|
||||
|
||||
if (slot == -1) {
|
||||
/* This attribute does not exist in the VUE--that means that the
|
||||
* vertex shader did not write to it. It could be that it's a regular
|
||||
* varying read by the fragment shader but not written by the vertex
|
||||
* shader or it's gl_PrimitiveID. In the first case the value is
|
||||
* undefined, in the second it needs to be gl_PrimitiveID.
|
||||
*/
|
||||
swiz.Attribute[input_index].ConstantSource = PRIM_ID;
|
||||
swiz.Attribute[input_index].ComponentOverrideX = true;
|
||||
swiz.Attribute[input_index].ComponentOverrideY = true;
|
||||
swiz.Attribute[input_index].ComponentOverrideZ = true;
|
||||
swiz.Attribute[input_index].ComponentOverrideW = true;
|
||||
continue;
|
||||
}
|
||||
|
||||
/* We have to subtract two slots to account for the URB entry output
|
||||
* read offset in the VS and GS stages.
|
||||
*/
|
||||
const int source_attr = slot - 2 * urb_entry_read_offset;
|
||||
assert(source_attr >= 0 && source_attr < 32);
|
||||
max_source_attr = MAX2(max_source_attr, source_attr);
|
||||
/* The hardware can only do overrides on 16 overrides at a time, and the
|
||||
* other up to 16 have to be lined up so that the input index = the
|
||||
* output index. We'll need to do some tweaking to make sure that's the
|
||||
* case.
|
||||
*/
|
||||
if (input_index < 16)
|
||||
swiz.Attribute[input_index].SourceAttribute = source_attr;
|
||||
else
|
||||
assert(source_attr == input_index);
|
||||
}
|
||||
|
||||
sbe.VertexURBEntryReadOffset = urb_entry_read_offset;
|
||||
sbe.VertexURBEntryReadLength = DIV_ROUND_UP(max_source_attr + 1, 2);
|
||||
#if GFX_VER >= 8
|
||||
sbe.ForceVertexURBEntryReadOffset = true;
|
||||
sbe.ForceVertexURBEntryReadLength = true;
|
||||
#endif
|
||||
|
||||
uint32_t *dw = anv_batch_emit_dwords(&pipeline->base.batch,
|
||||
GENX(3DSTATE_SBE_length));
|
||||
if (!dw)
|
||||
|
|
@ -571,18 +460,7 @@ VkPolygonMode
|
|||
genX(raster_polygon_mode)(struct anv_graphics_pipeline *pipeline,
|
||||
VkPrimitiveTopology primitive_topology)
|
||||
{
|
||||
if (anv_pipeline_is_mesh(pipeline)) {
|
||||
switch (get_mesh_prog_data(pipeline)->primitive_type) {
|
||||
case SHADER_PRIM_POINTS:
|
||||
return VK_POLYGON_MODE_POINT;
|
||||
case SHADER_PRIM_LINES:
|
||||
return VK_POLYGON_MODE_LINE;
|
||||
case SHADER_PRIM_TRIANGLES:
|
||||
return pipeline->polygon_mode;
|
||||
default:
|
||||
unreachable("invalid primitive type for mesh");
|
||||
}
|
||||
} else if (anv_pipeline_has_stage(pipeline, MESA_SHADER_GEOMETRY)) {
|
||||
if (anv_pipeline_has_stage(pipeline, MESA_SHADER_GEOMETRY)) {
|
||||
switch (get_gs_prog_data(pipeline)->output_topology) {
|
||||
case _3DPRIM_POINTLIST:
|
||||
return VK_POLYGON_MODE_POINT;
|
||||
|
|
@ -779,15 +657,9 @@ emit_rs_state(struct anv_graphics_pipeline *pipeline,
|
|||
#endif
|
||||
|
||||
bool point_from_shader;
|
||||
if (anv_pipeline_is_primitive(pipeline)) {
|
||||
const struct brw_vue_prog_data *last_vue_prog_data =
|
||||
anv_pipeline_get_last_vue_prog_data(pipeline);
|
||||
point_from_shader = last_vue_prog_data->vue_map.slots_valid & VARYING_BIT_PSIZ;
|
||||
} else {
|
||||
assert(anv_pipeline_is_mesh(pipeline));
|
||||
const struct brw_mesh_prog_data *mesh_prog_data = get_mesh_prog_data(pipeline);
|
||||
point_from_shader = mesh_prog_data->map.start_dw[VARYING_SLOT_PSIZ] >= 0;
|
||||
}
|
||||
const struct brw_vue_prog_data *last_vue_prog_data =
|
||||
anv_pipeline_get_last_vue_prog_data(pipeline);
|
||||
point_from_shader = last_vue_prog_data->vue_map.slots_valid & VARYING_BIT_PSIZ;
|
||||
|
||||
if (point_from_shader) {
|
||||
sf.PointWidthSource = Vertex;
|
||||
|
|
@ -1198,44 +1070,35 @@ emit_3dstate_clip(struct anv_graphics_pipeline *pipeline,
|
|||
clip.MinimumPointWidth = 0.125;
|
||||
clip.MaximumPointWidth = 255.875;
|
||||
|
||||
/* TODO(mesh): Multiview. */
|
||||
if (anv_pipeline_is_primitive(pipeline)) {
|
||||
const struct brw_vue_prog_data *last =
|
||||
anv_pipeline_get_last_vue_prog_data(pipeline);
|
||||
const struct brw_vue_prog_data *last =
|
||||
anv_pipeline_get_last_vue_prog_data(pipeline);
|
||||
|
||||
/* From the Vulkan 1.0.45 spec:
|
||||
*
|
||||
* "If the last active vertex processing stage shader entry point's
|
||||
* interface does not include a variable decorated with
|
||||
* ViewportIndex, then the first viewport is used."
|
||||
*/
|
||||
if (vp && (last->vue_map.slots_valid & VARYING_BIT_VIEWPORT)) {
|
||||
clip.MaximumVPIndex = vp->viewport_count > 0 ?
|
||||
vp->viewport_count - 1 : 0;
|
||||
} else {
|
||||
clip.MaximumVPIndex = 0;
|
||||
}
|
||||
/* From the Vulkan 1.0.45 spec:
|
||||
*
|
||||
* "If the last active vertex processing stage shader entry point's
|
||||
* interface does not include a variable decorated with ViewportIndex,
|
||||
* then the first viewport is used."
|
||||
*/
|
||||
if (vp && (last->vue_map.slots_valid & VARYING_BIT_VIEWPORT)) {
|
||||
clip.MaximumVPIndex = vp->viewport_count > 0 ?
|
||||
vp->viewport_count - 1 : 0;
|
||||
} else {
|
||||
clip.MaximumVPIndex = 0;
|
||||
}
|
||||
|
||||
/* From the Vulkan 1.0.45 spec:
|
||||
*
|
||||
* "If the last active vertex processing stage shader entry point's
|
||||
* interface does not include a variable decorated with Layer, then
|
||||
* the first layer is used."
|
||||
*/
|
||||
clip.ForceZeroRTAIndexEnable =
|
||||
!(last->vue_map.slots_valid & VARYING_BIT_LAYER);
|
||||
/* From the Vulkan 1.0.45 spec:
|
||||
*
|
||||
* "If the last active vertex processing stage shader entry point's
|
||||
* interface does not include a variable decorated with Layer, then the
|
||||
* first layer is used."
|
||||
*/
|
||||
clip.ForceZeroRTAIndexEnable =
|
||||
!(last->vue_map.slots_valid & VARYING_BIT_LAYER);
|
||||
|
||||
#if GFX_VER == 7
|
||||
clip.UserClipDistanceClipTestEnableBitmask = last->clip_distance_mask;
|
||||
clip.UserClipDistanceCullTestEnableBitmask = last->cull_distance_mask;
|
||||
clip.UserClipDistanceClipTestEnableBitmask = last->clip_distance_mask;
|
||||
clip.UserClipDistanceCullTestEnableBitmask = last->cull_distance_mask;
|
||||
#endif
|
||||
} else if (anv_pipeline_is_mesh(pipeline)) {
|
||||
const struct brw_mesh_prog_data *mesh_prog_data = get_mesh_prog_data(pipeline);
|
||||
if (vp && vp->viewport_count > 0 &&
|
||||
mesh_prog_data->map.start_dw[VARYING_SLOT_VIEWPORT] >= 0) {
|
||||
clip.MaximumVPIndex = vp->viewport_count - 1;
|
||||
}
|
||||
}
|
||||
|
||||
#if GFX_VER == 7
|
||||
clip.FrontWinding = genX(vk_to_intel_front_face)[rs->front_face];
|
||||
|
|
@ -1247,17 +1110,6 @@ emit_3dstate_clip(struct anv_graphics_pipeline *pipeline,
|
|||
#endif
|
||||
|
||||
GENX(3DSTATE_CLIP_pack)(NULL, pipeline->gfx7.clip, &clip);
|
||||
|
||||
#if GFX_VERx10 >= 125
|
||||
if (anv_pipeline_is_mesh(pipeline)) {
|
||||
const struct brw_mesh_prog_data *mesh_prog_data = get_mesh_prog_data(pipeline);
|
||||
anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_CLIP_MESH), clip_mesh) {
|
||||
clip_mesh.PrimitiveHeaderEnable = mesh_prog_data->map.per_primitive_header_size_dw > 0;
|
||||
clip_mesh.UserClipDistanceClipTestEnableBitmask = mesh_prog_data->clip_distance_mask;
|
||||
clip_mesh.UserClipDistanceCullTestEnableBitmask = mesh_prog_data->cull_distance_mask;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
static void
|
||||
|
|
@ -2168,141 +2020,6 @@ emit_3dstate_primitive_replication(struct anv_graphics_pipeline *pipeline,
|
|||
}
|
||||
#endif
|
||||
|
||||
#if GFX_VERx10 >= 125
|
||||
static void
|
||||
emit_task_state(struct anv_graphics_pipeline *pipeline)
|
||||
{
|
||||
assert(anv_pipeline_is_mesh(pipeline));
|
||||
|
||||
if (!anv_pipeline_has_stage(pipeline, MESA_SHADER_TASK)) {
|
||||
anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_TASK_CONTROL), zero);
|
||||
return;
|
||||
}
|
||||
|
||||
const struct anv_shader_bin *task_bin = pipeline->shaders[MESA_SHADER_TASK];
|
||||
|
||||
anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_TASK_CONTROL), tc) {
|
||||
tc.TaskShaderEnable = true;
|
||||
tc.ScratchSpaceBuffer =
|
||||
get_scratch_surf(&pipeline->base, MESA_SHADER_TASK, task_bin);
|
||||
}
|
||||
|
||||
const struct intel_device_info *devinfo = pipeline->base.device->info;
|
||||
const struct brw_task_prog_data *task_prog_data = get_task_prog_data(pipeline);
|
||||
const struct brw_cs_dispatch_info task_dispatch =
|
||||
brw_cs_get_dispatch_info(devinfo, &task_prog_data->base, NULL);
|
||||
|
||||
anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_TASK_SHADER), task) {
|
||||
task.KernelStartPointer = task_bin->kernel.offset;
|
||||
task.SIMDSize = task_dispatch.simd_size / 16;
|
||||
task.MessageSIMD = task.SIMDSize;
|
||||
task.NumberofThreadsinGPGPUThreadGroup = task_dispatch.threads;
|
||||
task.ExecutionMask = task_dispatch.right_mask;
|
||||
task.LocalXMaximum = task_dispatch.group_size - 1;
|
||||
task.EmitLocalIDX = true;
|
||||
|
||||
task.NumberofBarriers = task_prog_data->base.uses_barrier;
|
||||
task.SharedLocalMemorySize =
|
||||
encode_slm_size(GFX_VER, task_prog_data->base.base.total_shared);
|
||||
|
||||
/*
|
||||
* 3DSTATE_TASK_SHADER_DATA.InlineData[0:1] will be used for an address
|
||||
* of a buffer with push constants and descriptor set table and
|
||||
* InlineData[2:7] will be used for first few push constants.
|
||||
*/
|
||||
task.EmitInlineParameter = true;
|
||||
|
||||
task.XP0Required = task_prog_data->uses_drawid;
|
||||
}
|
||||
|
||||
/* Recommended values from "Task and Mesh Distribution Programming". */
|
||||
anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_TASK_REDISTRIB), redistrib) {
|
||||
redistrib.LocalBOTAccumulatorThreshold = MULTIPLIER_1;
|
||||
redistrib.SmallTaskThreshold = 1; /* 2^N */
|
||||
redistrib.TargetMeshBatchSize = devinfo->num_slices > 2 ? 3 : 5; /* 2^N */
|
||||
redistrib.TaskRedistributionLevel = TASKREDISTRIB_BOM;
|
||||
|
||||
/* TODO: We have an unknown issue with Task Payload when task redistribution
|
||||
* is enabled. Disable it for now.
|
||||
* See https://gitlab.freedesktop.org/mesa/mesa/-/issues/7141
|
||||
*/
|
||||
redistrib.TaskRedistributionMode = TASKREDISTRIB_OFF;
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
emit_mesh_state(struct anv_graphics_pipeline *pipeline)
|
||||
{
|
||||
assert(anv_pipeline_is_mesh(pipeline));
|
||||
|
||||
const struct anv_shader_bin *mesh_bin = pipeline->shaders[MESA_SHADER_MESH];
|
||||
|
||||
anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_MESH_CONTROL), mc) {
|
||||
mc.MeshShaderEnable = true;
|
||||
mc.ScratchSpaceBuffer =
|
||||
get_scratch_surf(&pipeline->base, MESA_SHADER_MESH, mesh_bin);
|
||||
|
||||
/* TODO(mesh): MaximumNumberofThreadGroups. */
|
||||
}
|
||||
|
||||
const struct intel_device_info *devinfo = pipeline->base.device->info;
|
||||
const struct brw_mesh_prog_data *mesh_prog_data = get_mesh_prog_data(pipeline);
|
||||
const struct brw_cs_dispatch_info mesh_dispatch =
|
||||
brw_cs_get_dispatch_info(devinfo, &mesh_prog_data->base, NULL);
|
||||
|
||||
const unsigned output_topology =
|
||||
mesh_prog_data->primitive_type == SHADER_PRIM_POINTS ? OUTPUT_POINT :
|
||||
mesh_prog_data->primitive_type == SHADER_PRIM_LINES ? OUTPUT_LINE :
|
||||
OUTPUT_TRI;
|
||||
|
||||
uint32_t index_format;
|
||||
switch (mesh_prog_data->index_format) {
|
||||
case BRW_INDEX_FORMAT_U32:
|
||||
index_format = INDEX_U32;
|
||||
break;
|
||||
default:
|
||||
unreachable("invalid index format");
|
||||
}
|
||||
|
||||
anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_MESH_SHADER), mesh) {
|
||||
mesh.KernelStartPointer = mesh_bin->kernel.offset;
|
||||
mesh.SIMDSize = mesh_dispatch.simd_size / 16;
|
||||
mesh.MessageSIMD = mesh.SIMDSize;
|
||||
mesh.NumberofThreadsinGPGPUThreadGroup = mesh_dispatch.threads;
|
||||
mesh.ExecutionMask = mesh_dispatch.right_mask;
|
||||
mesh.LocalXMaximum = mesh_dispatch.group_size - 1;
|
||||
mesh.EmitLocalIDX = true;
|
||||
|
||||
mesh.MaximumPrimitiveCount = mesh_prog_data->map.max_primitives - 1;
|
||||
mesh.OutputTopology = output_topology;
|
||||
mesh.PerVertexDataPitch = mesh_prog_data->map.per_vertex_pitch_dw / 8;
|
||||
mesh.PerPrimitiveDataPresent = mesh_prog_data->map.per_primitive_pitch_dw > 0;
|
||||
mesh.PerPrimitiveDataPitch = mesh_prog_data->map.per_primitive_pitch_dw / 8;
|
||||
mesh.IndexFormat = index_format;
|
||||
|
||||
mesh.NumberofBarriers = mesh_prog_data->base.uses_barrier;
|
||||
mesh.SharedLocalMemorySize =
|
||||
encode_slm_size(GFX_VER, mesh_prog_data->base.base.total_shared);
|
||||
|
||||
/*
|
||||
* 3DSTATE_MESH_SHADER_DATA.InlineData[0:1] will be used for an address
|
||||
* of a buffer with push constants and descriptor set table and
|
||||
* InlineData[2:7] will be used for first few push constants.
|
||||
*/
|
||||
mesh.EmitInlineParameter = true;
|
||||
|
||||
mesh.XP0Required = mesh_prog_data->uses_drawid;
|
||||
}
|
||||
|
||||
/* Recommended values from "Task and Mesh Distribution Programming". */
|
||||
anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_MESH_DISTRIB), distrib) {
|
||||
distrib.DistributionMode = MESH_RR_FREE;
|
||||
distrib.TaskDistributionBatchSize = devinfo->num_slices > 2 ? 8 : 9; /* 2^N thread groups */
|
||||
distrib.MeshDistributionBatchSize = devinfo->num_slices > 2 ? 5 : 3; /* 2^N thread groups */
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
void
|
||||
genX(graphics_pipeline_emit)(struct anv_graphics_pipeline *pipeline,
|
||||
const struct vk_graphics_pipeline_state *state)
|
||||
|
|
@ -2342,38 +2059,15 @@ genX(graphics_pipeline_emit)(struct anv_graphics_pipeline *pipeline,
|
|||
gfx7_emit_vs_workaround_flush(brw);
|
||||
#endif
|
||||
|
||||
if (anv_pipeline_is_primitive(pipeline)) {
|
||||
emit_vertex_input(pipeline, state->vi);
|
||||
emit_vertex_input(pipeline, state->vi);
|
||||
|
||||
emit_3dstate_vs(pipeline);
|
||||
emit_3dstate_hs_te_ds(pipeline, state->ts);
|
||||
emit_3dstate_gs(pipeline);
|
||||
emit_3dstate_vs(pipeline);
|
||||
emit_3dstate_hs_te_ds(pipeline, state->ts);
|
||||
emit_3dstate_gs(pipeline);
|
||||
|
||||
emit_3dstate_vf_statistics(pipeline);
|
||||
emit_3dstate_vf_statistics(pipeline);
|
||||
|
||||
emit_3dstate_streamout(pipeline, state->rs);
|
||||
|
||||
#if GFX_VERx10 >= 125
|
||||
const struct anv_device *device = pipeline->base.device;
|
||||
/* Disable Mesh. */
|
||||
if (device->physical->vk.supported_extensions.NV_mesh_shader) {
|
||||
anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_MESH_CONTROL), zero);
|
||||
anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_TASK_CONTROL), zero);
|
||||
}
|
||||
#endif
|
||||
} else {
|
||||
assert(anv_pipeline_is_mesh(pipeline));
|
||||
|
||||
/* BSpec 46303 forbids both 3DSTATE_MESH_CONTROL.MeshShaderEnable
|
||||
* and 3DSTATE_STREAMOUT.SOFunctionEnable to be 1.
|
||||
*/
|
||||
anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_STREAMOUT), so) {}
|
||||
|
||||
#if GFX_VERx10 >= 125
|
||||
emit_task_state(pipeline);
|
||||
emit_mesh_state(pipeline);
|
||||
#endif
|
||||
}
|
||||
emit_3dstate_streamout(pipeline, state->rs);
|
||||
|
||||
emit_3dstate_sbe(pipeline);
|
||||
emit_3dstate_wm(pipeline, state->ia, state->rs,
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue