diff --git a/src/intel/vulkan_hasvk/anv_device.c b/src/intel/vulkan_hasvk/anv_device.c index e167a5c671d..c52aece7e45 100644 --- a/src/intel/vulkan_hasvk/anv_device.c +++ b/src/intel/vulkan_hasvk/anv_device.c @@ -182,9 +182,6 @@ get_device_extensions(const struct anv_physical_device *device, const bool has_syncobj_wait = (device->sync_syncobj_type.features & VK_SYNC_FEATURE_CPU_WAIT) != 0; - const bool nv_mesh_shading_enabled = - env_var_as_boolean("ANV_EXPERIMENTAL_NV_MESH_SHADER", false); - *ext = (struct vk_device_extension_table) { .KHR_8bit_storage = device->info.ver >= 8, .KHR_16bit_storage = device->info.ver >= 8, @@ -333,8 +330,6 @@ get_device_extensions(const struct anv_physical_device *device, .INTEL_shader_integer_functions2 = device->info.ver >= 8, .EXT_multi_draw = true, .NV_compute_shader_derivatives = true, - .NV_mesh_shader = device->info.has_mesh_shading && - nv_mesh_shading_enabled, .VALVE_mutable_descriptor_type = true, }; } @@ -1513,14 +1508,6 @@ void anv_GetPhysicalDeviceFeatures2( break; } - case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MESH_SHADER_FEATURES_NV: { - VkPhysicalDeviceMeshShaderFeaturesNV *features = - (VkPhysicalDeviceMeshShaderFeaturesNV *)ext; - features->taskShader = pdevice->vk.supported_extensions.NV_mesh_shader; - features->meshShader = pdevice->vk.supported_extensions.NV_mesh_shader; - break; - } - case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MUTABLE_DESCRIPTOR_TYPE_FEATURES_VALVE: { VkPhysicalDeviceMutableDescriptorTypeFeaturesVALVE *features = (VkPhysicalDeviceMutableDescriptorTypeFeaturesVALVE *)ext; @@ -1932,10 +1919,6 @@ anv_get_physical_device_properties_1_1(struct anv_physical_device *pdevice, if (pdevice->compiler->scalar_stage[stage]) scalar_stages |= mesa_to_vk_shader_stage(stage); } - if (pdevice->vk.supported_extensions.NV_mesh_shader) { - scalar_stages |= VK_SHADER_STAGE_TASK_BIT_NV | - VK_SHADER_STAGE_MESH_BIT_NV; - } p->subgroupSupportedStages = scalar_stages; p->subgroupSupportedOperations = VK_SUBGROUP_FEATURE_BASIC_BIT | VK_SUBGROUP_FEATURE_VOTE_BIT | @@ -2104,9 +2087,7 @@ anv_get_physical_device_properties_1_3(struct anv_physical_device *pdevice, p->minSubgroupSize = 8; p->maxSubgroupSize = 32; p->maxComputeWorkgroupSubgroups = pdevice->info.max_cs_workgroup_threads; - p->requiredSubgroupSizeStages = VK_SHADER_STAGE_COMPUTE_BIT | - VK_SHADER_STAGE_TASK_BIT_NV | - VK_SHADER_STAGE_MESH_BIT_NV; + p->requiredSubgroupSizeStages = VK_SHADER_STAGE_COMPUTE_BIT; p->maxInlineUniformBlockSize = MAX_INLINE_UNIFORM_BLOCK_SIZE; p->maxPerStageDescriptorInlineUniformBlocks = @@ -2340,66 +2321,6 @@ void anv_GetPhysicalDeviceProperties2( break; } - case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MESH_SHADER_PROPERTIES_NV: { - VkPhysicalDeviceMeshShaderPropertiesNV *props = - (VkPhysicalDeviceMeshShaderPropertiesNV *)ext; - - /* Bounded by the maximum representable size in - * 3DSTATE_MESH_SHADER_BODY::SharedLocalMemorySize. Same for Task. - */ - const uint32_t max_slm_size = 64 * 1024; - - /* Bounded by the maximum representable size in - * 3DSTATE_MESH_SHADER_BODY::LocalXMaximum. Same for Task. - */ - const uint32_t max_workgroup_size = 1 << 10; - - /* Bounded by the maximum representable count in - * 3DSTATE_MESH_SHADER_BODY::MaximumPrimitiveCount. - */ - const uint32_t max_primitives = 1024; - - /* TODO(mesh): Multiview. */ - const uint32_t max_view_count = 1; - - props->maxDrawMeshTasksCount = UINT32_MAX; - - /* TODO(mesh): Implement workgroup Y and Z sizes larger than one by - * mapping them to/from the single value that HW provides us - * (currently used for X). - */ - - props->maxTaskWorkGroupInvocations = max_workgroup_size; - props->maxTaskWorkGroupSize[0] = max_workgroup_size; - props->maxTaskWorkGroupSize[1] = 1; - props->maxTaskWorkGroupSize[2] = 1; - props->maxTaskTotalMemorySize = max_slm_size; - props->maxTaskOutputCount = UINT16_MAX; - - props->maxMeshWorkGroupInvocations = max_workgroup_size; - props->maxMeshWorkGroupSize[0] = max_workgroup_size; - props->maxMeshWorkGroupSize[1] = 1; - props->maxMeshWorkGroupSize[2] = 1; - props->maxMeshTotalMemorySize = max_slm_size / max_view_count; - props->maxMeshOutputPrimitives = max_primitives / max_view_count; - props->maxMeshMultiviewViewCount = max_view_count; - - /* Depends on what indices can be represented with IndexFormat. For - * now we always use U32, so bound to the maximum unique vertices we - * need for the maximum primitives. - * - * TODO(mesh): Revisit this if we drop "U32" IndexFormat when adding - * support for others. - */ - props->maxMeshOutputVertices = 3 * props->maxMeshOutputPrimitives; - - - props->meshOutputPerVertexGranularity = 32; - props->meshOutputPerPrimitiveGranularity = 32; - - break; - } - case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PCI_BUS_INFO_PROPERTIES_EXT: { VkPhysicalDevicePCIBusInfoPropertiesEXT *properties = (VkPhysicalDevicePCIBusInfoPropertiesEXT *)ext; diff --git a/src/intel/vulkan_hasvk/anv_pipeline.c b/src/intel/vulkan_hasvk/anv_pipeline.c index 66dd6f37a96..f8de43afe04 100644 --- a/src/intel/vulkan_hasvk/anv_pipeline.c +++ b/src/intel/vulkan_hasvk/anv_pipeline.c @@ -88,7 +88,6 @@ anv_shader_stage_to_nir(struct anv_device *device, .int64 = pdevice->info.ver >= 8, .int64_atomics = pdevice->info.ver >= 9 && pdevice->use_softpin, .integer_functions2 = pdevice->info.ver >= 8, - .mesh_shading_nv = pdevice->vk.supported_extensions.NV_mesh_shader, .min_lod = true, .multiview = true, .physical_storage_buffer_address = pdevice->has_a64_buffer_access, @@ -398,26 +397,6 @@ pipeline_has_coarse_pixel(const struct anv_graphics_pipeline *pipeline, return true; } -static void -populate_task_prog_key(const struct anv_device *device, - bool robust_buffer_access, - struct brw_task_prog_key *key) -{ - memset(key, 0, sizeof(*key)); - - populate_base_prog_key(device, robust_buffer_access, &key->base); -} - -static void -populate_mesh_prog_key(const struct anv_device *device, - bool robust_buffer_access, - struct brw_mesh_prog_key *key) -{ - memset(key, 0, sizeof(*key)); - - populate_base_prog_key(device, robust_buffer_access, &key->base); -} - static void populate_wm_prog_key(const struct anv_graphics_pipeline *pipeline, bool robust_buffer_acccess, @@ -742,8 +721,7 @@ anv_pipeline_lower_nir(struct anv_pipeline *pipeline, } } - if (gl_shader_stage_is_compute(nir->info.stage) || - gl_shader_stage_is_mesh(nir->info.stage)) + if (gl_shader_stage_is_compute(nir->info.stage)) NIR_PASS(_, nir, brw_nir_lower_cs_intrinsics); stage->nir = nir; @@ -954,70 +932,6 @@ anv_pipeline_compile_gs(const struct brw_compiler *compiler, gs_stage->code = brw_compile_gs(compiler, mem_ctx, ¶ms); } -static void -anv_pipeline_link_task(const struct brw_compiler *compiler, - struct anv_pipeline_stage *task_stage, - struct anv_pipeline_stage *next_stage) -{ - assert(next_stage); - assert(next_stage->stage == MESA_SHADER_MESH); - brw_nir_link_shaders(compiler, task_stage->nir, next_stage->nir); -} - -static void -anv_pipeline_compile_task(const struct brw_compiler *compiler, - void *mem_ctx, - struct anv_device *device, - struct anv_pipeline_stage *task_stage) -{ - task_stage->num_stats = 1; - - struct brw_compile_task_params params = { - .nir = task_stage->nir, - .key = &task_stage->key.task, - .prog_data = &task_stage->prog_data.task, - .stats = task_stage->stats, - .log_data = device, - }; - - task_stage->code = brw_compile_task(compiler, mem_ctx, ¶ms); -} - -static void -anv_pipeline_link_mesh(const struct brw_compiler *compiler, - struct anv_pipeline_stage *mesh_stage, - struct anv_pipeline_stage *next_stage) -{ - if (next_stage) { - brw_nir_link_shaders(compiler, mesh_stage->nir, next_stage->nir); - } -} - -static void -anv_pipeline_compile_mesh(const struct brw_compiler *compiler, - void *mem_ctx, - struct anv_device *device, - struct anv_pipeline_stage *mesh_stage, - struct anv_pipeline_stage *prev_stage) -{ - mesh_stage->num_stats = 1; - - struct brw_compile_mesh_params params = { - .nir = mesh_stage->nir, - .key = &mesh_stage->key.mesh, - .prog_data = &mesh_stage->prog_data.mesh, - .stats = mesh_stage->stats, - .log_data = device, - }; - - if (prev_stage) { - assert(prev_stage->stage == MESA_SHADER_TASK); - params.tue_map = &prev_stage->prog_data.task.map; - } - - mesh_stage->code = brw_compile_mesh(compiler, mem_ctx, ¶ms); -} - static void anv_pipeline_link_fs(const struct brw_compiler *compiler, struct anv_pipeline_stage *stage, @@ -1102,13 +1016,8 @@ anv_pipeline_compile_fs(const struct brw_compiler *compiler, .log_data = device, }; - if (prev_stage->stage == MESA_SHADER_MESH) { - params.mue_map = &prev_stage->prog_data.mesh.map; - /* TODO(mesh): Slots valid, do we even use/rely on it? */ - } else { - fs_stage->key.wm.input_slots_valid = - prev_stage->prog_data.vue.vue_map.slots_valid; - } + fs_stage->key.wm.input_slots_valid = + prev_stage->prog_data.vue.vue_map.slots_valid; fs_stage->code = brw_compile_fs(compiler, mem_ctx, ¶ms); @@ -1291,16 +1200,6 @@ anv_graphics_pipeline_init_keys(struct anv_graphics_pipeline *pipeline, &stages[s].key.wm); break; } - case MESA_SHADER_TASK: - populate_task_prog_key(device, - pipeline->base.device->robust_buffer_access, - &stages[s].key.task); - break; - case MESA_SHADER_MESH: - populate_mesh_prog_key(device, - pipeline->base.device->robust_buffer_access, - &stages[s].key.mesh); - break; default: unreachable("Invalid graphics shader stage"); } @@ -1309,8 +1208,7 @@ anv_graphics_pipeline_init_keys(struct anv_graphics_pipeline *pipeline, stages[s].feedback.flags |= VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT; } - assert(pipeline->active_stages & VK_SHADER_STAGE_VERTEX_BIT || - pipeline->active_stages & VK_SHADER_STAGE_MESH_BIT_NV); + assert(pipeline->active_stages & VK_SHADER_STAGE_VERTEX_BIT); } static bool @@ -1394,9 +1292,6 @@ static const gl_shader_stage graphics_shader_order[] = { MESA_SHADER_TESS_EVAL, MESA_SHADER_GEOMETRY, - MESA_SHADER_TASK, - MESA_SHADER_MESH, - MESA_SHADER_FRAGMENT, }; @@ -1509,12 +1404,6 @@ anv_graphics_pipeline_compile(struct anv_graphics_pipeline *pipeline, case MESA_SHADER_GEOMETRY: anv_pipeline_link_gs(compiler, &stages[s], next_stage); break; - case MESA_SHADER_TASK: - anv_pipeline_link_task(compiler, &stages[s], next_stage); - break; - case MESA_SHADER_MESH: - anv_pipeline_link_mesh(compiler, &stages[s], next_stage); - break; case MESA_SHADER_FRAGMENT: anv_pipeline_link_fs(compiler, &stages[s], state->rp); break; @@ -1584,8 +1473,7 @@ anv_graphics_pipeline_compile(struct anv_graphics_pipeline *pipeline, if (devinfo->has_coarse_pixel_primitive_and_cb && stages[MESA_SHADER_FRAGMENT].info && stages[MESA_SHADER_FRAGMENT].key.wm.coarse_pixel && - !stages[MESA_SHADER_FRAGMENT].nir->info.fs.uses_sample_shading && - stages[MESA_SHADER_MESH].info == NULL) { + !stages[MESA_SHADER_FRAGMENT].nir->info.fs.uses_sample_shading) { struct anv_pipeline_stage *last_psr = NULL; for (unsigned i = 0; i < ARRAY_SIZE(graphics_shader_order); i++) { @@ -1631,14 +1519,6 @@ anv_graphics_pipeline_compile(struct anv_graphics_pipeline *pipeline, anv_pipeline_compile_gs(compiler, stage_ctx, pipeline->base.device, &stages[s], prev_stage); break; - case MESA_SHADER_TASK: - anv_pipeline_compile_task(compiler, stage_ctx, pipeline->base.device, - &stages[s]); - break; - case MESA_SHADER_MESH: - anv_pipeline_compile_mesh(compiler, stage_ctx, pipeline->base.device, - &stages[s], prev_stage); - break; case MESA_SHADER_FRAGMENT: anv_pipeline_compile_fs(compiler, stage_ctx, pipeline->base.device, &stages[s], prev_stage); @@ -2008,9 +1888,6 @@ anv_graphics_pipeline_init(struct anv_graphics_pipeline *pipeline, if (pipeline->active_stages & VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT) pipeline->active_stages |= VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT; - if (anv_pipeline_is_mesh(pipeline)) - assert(device->physical->vk.supported_extensions.NV_mesh_shader); - pipeline->dynamic_state.ms.sample_locations = &pipeline->sample_locations; vk_dynamic_graphics_state_fill(&pipeline->dynamic_state, state); @@ -2026,38 +1903,33 @@ anv_graphics_pipeline_init(struct anv_graphics_pipeline *pipeline, anv_pipeline_setup_l3_config(&pipeline->base, false); - if (anv_pipeline_is_primitive(pipeline)) { - const uint64_t inputs_read = get_vs_prog_data(pipeline)->inputs_read; + const uint64_t inputs_read = get_vs_prog_data(pipeline)->inputs_read; - u_foreach_bit(a, state->vi->attributes_valid) { - if (inputs_read & BITFIELD64_BIT(VERT_ATTRIB_GENERIC0 + a)) - pipeline->vb_used |= BITFIELD64_BIT(state->vi->attributes[a].binding); - } - - u_foreach_bit(b, state->vi->bindings_valid) { - pipeline->vb[b].stride = state->vi->bindings[b].stride; - pipeline->vb[b].instanced = state->vi->bindings[b].input_rate == - VK_VERTEX_INPUT_RATE_INSTANCE; - pipeline->vb[b].instance_divisor = state->vi->bindings[b].divisor; - } - - /* Our implementation of VK_KHR_multiview uses instancing to draw the - * different views when primitive replication cannot be used. If the - * client asks for instancing, we need to multiply by the client's - * instance count at draw time and instance divisor in the vertex - * bindings by the number of views ensure that we repeat the client's - * per-instance data once for each view. - */ - const bool uses_primitive_replication = - anv_pipeline_get_last_vue_prog_data(pipeline)->vue_map.num_pos_slots > 1; - pipeline->instance_multiplier = 1; - if (pipeline->view_mask && !uses_primitive_replication) - pipeline->instance_multiplier = util_bitcount(pipeline->view_mask); - } else { - assert(anv_pipeline_is_mesh(pipeline)); - /* TODO(mesh): Mesh vs. Multiview with Instancing. */ + u_foreach_bit(a, state->vi->attributes_valid) { + if (inputs_read & BITFIELD64_BIT(VERT_ATTRIB_GENERIC0 + a)) + pipeline->vb_used |= BITFIELD64_BIT(state->vi->attributes[a].binding); } + u_foreach_bit(b, state->vi->bindings_valid) { + pipeline->vb[b].stride = state->vi->bindings[b].stride; + pipeline->vb[b].instanced = state->vi->bindings[b].input_rate == + VK_VERTEX_INPUT_RATE_INSTANCE; + pipeline->vb[b].instance_divisor = state->vi->bindings[b].divisor; + } + + /* Our implementation of VK_KHR_multiview uses instancing to draw the + * different views when primitive replication cannot be used. If the client + * asks for instancing, we need to multiply by the client's instance count + * at draw time and instance divisor in the vertex bindings by the number + * of views ensure that we repeat the client's per-instance data once for + * each view. + */ + const bool uses_primitive_replication = + anv_pipeline_get_last_vue_prog_data(pipeline)->vue_map.num_pos_slots > 1; + pipeline->instance_multiplier = 1; + if (pipeline->view_mask && !uses_primitive_replication) + pipeline->instance_multiplier = util_bitcount(pipeline->view_mask); + pipeline->negative_one_to_one = state->vp != NULL && state->vp->negative_one_to_one; diff --git a/src/intel/vulkan_hasvk/anv_private.h b/src/intel/vulkan_hasvk/anv_private.h index a66e57e0542..2d5aebbf91f 100644 --- a/src/intel/vulkan_hasvk/anv_private.h +++ b/src/intel/vulkan_hasvk/anv_private.h @@ -3131,12 +3131,6 @@ anv_pipeline_is_primitive(const struct anv_graphics_pipeline *pipeline) return anv_pipeline_has_stage(pipeline, MESA_SHADER_VERTEX); } -static inline bool -anv_pipeline_is_mesh(const struct anv_graphics_pipeline *pipeline) -{ - return anv_pipeline_has_stage(pipeline, MESA_SHADER_MESH); -} - static inline bool anv_cmd_buffer_all_color_write_masked(const struct anv_cmd_buffer *cmd_buffer) { @@ -3175,8 +3169,6 @@ ANV_DECL_GET_GRAPHICS_PROG_DATA_FUNC(tcs, MESA_SHADER_TESS_CTRL) ANV_DECL_GET_GRAPHICS_PROG_DATA_FUNC(tes, MESA_SHADER_TESS_EVAL) ANV_DECL_GET_GRAPHICS_PROG_DATA_FUNC(gs, MESA_SHADER_GEOMETRY) ANV_DECL_GET_GRAPHICS_PROG_DATA_FUNC(wm, MESA_SHADER_FRAGMENT) -ANV_DECL_GET_GRAPHICS_PROG_DATA_FUNC(mesh, MESA_SHADER_MESH) -ANV_DECL_GET_GRAPHICS_PROG_DATA_FUNC(task, MESA_SHADER_TASK) static inline const struct brw_cs_prog_data * get_cs_prog_data(const struct anv_compute_pipeline *pipeline) diff --git a/src/intel/vulkan_hasvk/genX_cmd_buffer.c b/src/intel/vulkan_hasvk/genX_cmd_buffer.c index 147d256cff5..e71f7075690 100644 --- a/src/intel/vulkan_hasvk/genX_cmd_buffer.c +++ b/src/intel/vulkan_hasvk/genX_cmd_buffer.c @@ -3404,64 +3404,6 @@ cmd_buffer_flush_push_constants(struct anv_cmd_buffer *cmd_buffer, cmd_buffer->state.push_constants_dirty &= ~flushed; } -#if GFX_VERx10 >= 125 -static void -cmd_buffer_flush_mesh_inline_data(struct anv_cmd_buffer *cmd_buffer, - VkShaderStageFlags dirty_stages) -{ - struct anv_cmd_graphics_state *gfx_state = &cmd_buffer->state.gfx; - const struct anv_graphics_pipeline *pipeline = gfx_state->pipeline; - - if (dirty_stages & VK_SHADER_STAGE_TASK_BIT_NV && - anv_pipeline_has_stage(pipeline, MESA_SHADER_TASK)) { - - const struct anv_shader_bin *shader = pipeline->shaders[MESA_SHADER_TASK]; - const struct anv_pipeline_bind_map *bind_map = &shader->bind_map; - - anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_TASK_SHADER_DATA), data) { - const struct anv_push_range *range = &bind_map->push_ranges[0]; - if (range->length > 0) { - struct anv_address buffer = - get_push_range_address(cmd_buffer, shader, range); - - uint64_t addr = anv_address_physical(buffer); - data.InlineData[0] = addr & 0xffffffff; - data.InlineData[1] = addr >> 32; - - memcpy(&data.InlineData[BRW_TASK_MESH_PUSH_CONSTANTS_START_DW], - cmd_buffer->state.gfx.base.push_constants.client_data, - BRW_TASK_MESH_PUSH_CONSTANTS_SIZE_DW * 4); - } - } - } - - if (dirty_stages & VK_SHADER_STAGE_MESH_BIT_NV && - anv_pipeline_has_stage(pipeline, MESA_SHADER_MESH)) { - - const struct anv_shader_bin *shader = pipeline->shaders[MESA_SHADER_MESH]; - const struct anv_pipeline_bind_map *bind_map = &shader->bind_map; - - anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_MESH_SHADER_DATA), data) { - const struct anv_push_range *range = &bind_map->push_ranges[0]; - if (range->length > 0) { - struct anv_address buffer = - get_push_range_address(cmd_buffer, shader, range); - - uint64_t addr = anv_address_physical(buffer); - data.InlineData[0] = addr & 0xffffffff; - data.InlineData[1] = addr >> 32; - - memcpy(&data.InlineData[BRW_TASK_MESH_PUSH_CONSTANTS_START_DW], - cmd_buffer->state.gfx.base.push_constants.client_data, - BRW_TASK_MESH_PUSH_CONSTANTS_SIZE_DW * 4); - } - } - } - - cmd_buffer->state.push_constants_dirty &= ~dirty_stages; -} -#endif - static void cmd_buffer_emit_clip(struct anv_cmd_buffer *cmd_buffer) { @@ -3495,7 +3437,6 @@ cmd_buffer_emit_clip(struct anv_cmd_buffer *cmd_buffer) }; uint32_t dwords[GENX(3DSTATE_CLIP_length)]; - /* TODO(mesh): Multiview. */ struct anv_graphics_pipeline *pipeline = cmd_buffer->state.gfx.pipeline; if (anv_pipeline_is_primitive(pipeline)) { const struct brw_vue_prog_data *last = @@ -3504,12 +3445,6 @@ cmd_buffer_emit_clip(struct anv_cmd_buffer *cmd_buffer) clip.MaximumVPIndex = dyn->vp.viewport_count > 0 ? dyn->vp.viewport_count - 1 : 0; } - } else if (anv_pipeline_is_mesh(pipeline)) { - const struct brw_mesh_prog_data *mesh_prog_data = get_mesh_prog_data(pipeline); - if (mesh_prog_data->map.start_dw[VARYING_SLOT_VIEWPORT] >= 0) { - clip.MaximumVPIndex = dyn->vp.viewport_count > 0 ? - dyn->vp.viewport_count - 1 : 0; - } } GENX(3DSTATE_CLIP_pack)(NULL, dwords, &clip); @@ -3999,11 +3934,6 @@ genX(cmd_buffer_flush_state)(struct anv_cmd_buffer *cmd_buffer) dirty |= cmd_buffer->state.push_constants_dirty; cmd_buffer_flush_push_constants(cmd_buffer, dirty & VK_SHADER_STAGE_ALL_GRAPHICS); -#if GFX_VERx10 >= 125 - cmd_buffer_flush_mesh_inline_data( - cmd_buffer, dirty & (VK_SHADER_STAGE_TASK_BIT_NV | - VK_SHADER_STAGE_MESH_BIT_NV)); -#endif } if (dirty & VK_SHADER_STAGE_ALL_GRAPHICS) { @@ -5008,160 +4938,6 @@ void genX(CmdEndTransformFeedbackEXT)( cmd_buffer->state.gfx.dirty |= ANV_CMD_DIRTY_XFB_ENABLE; } -#if GFX_VERx10 >= 125 -void -genX(CmdDrawMeshTasksNV)( - VkCommandBuffer commandBuffer, - uint32_t taskCount, - uint32_t firstTask) -{ - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); - - if (anv_batch_has_error(&cmd_buffer->batch)) - return; - - /* TODO(mesh): Check if this is not emitting more packets than we need. */ - genX(cmd_buffer_flush_state)(cmd_buffer); - - if (cmd_buffer->state.conditional_render_enabled) - genX(cmd_emit_conditional_render_predicate)(cmd_buffer); - - /* BSpec 54016 says: "The values passed for Starting ThreadGroup ID X - * and ThreadGroup Count X shall not cause TGIDs to exceed (2^32)-1." - */ - assert((int64_t)firstTask + taskCount - 1 <= UINT32_MAX); - - anv_batch_emit(&cmd_buffer->batch, GENX(3DMESH_1D), m) { - m.PredicateEnable = cmd_buffer->state.conditional_render_enabled; - m.ThreadGroupCountX = taskCount; - m.StartingThreadGroupIDX = firstTask; - } -} - -#define GFX125_3DMESH_TG_COUNT 0x26F0 -#define GFX125_3DMESH_STARTING_TGID 0x26F4 -#define GFX10_3DPRIM_XP(n) (0x2690 + (n) * 4) /* n = { 0, 1, 2 } */ - -static void -mesh_load_indirect_parameters(struct anv_cmd_buffer *cmd_buffer, - struct mi_builder *b, - struct anv_address addr, - bool emit_xp0, - uint32_t xp0) -{ - const size_t taskCountOff = offsetof(VkDrawMeshTasksIndirectCommandNV, taskCount); - const size_t firstTaskOff = offsetof(VkDrawMeshTasksIndirectCommandNV, firstTask); - - mi_store(b, mi_reg32(GFX125_3DMESH_TG_COUNT), - mi_mem32(anv_address_add(addr, taskCountOff))); - - mi_store(b, mi_reg32(GFX125_3DMESH_STARTING_TGID), - mi_mem32(anv_address_add(addr, firstTaskOff))); - - if (emit_xp0) - mi_store(b, mi_reg32(GFX10_3DPRIM_XP(0)), mi_imm(xp0)); -} - -static void -emit_indirect_3dmesh_1d(struct anv_batch *batch, - bool predicate_enable, - bool uses_drawid) -{ - uint32_t len = GENX(3DMESH_1D_length) + uses_drawid; - uint32_t *dw = anv_batch_emitn(batch, len, GENX(3DMESH_1D), - .PredicateEnable = predicate_enable, - .IndirectParameterEnable = true, - .ExtendedParameter0Present = uses_drawid); - if (uses_drawid) - dw[len - 1] = 0; -} - -void -genX(CmdDrawMeshTasksIndirectNV)( - VkCommandBuffer commandBuffer, - VkBuffer _buffer, - VkDeviceSize offset, - uint32_t drawCount, - uint32_t stride) -{ - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); - ANV_FROM_HANDLE(anv_buffer, buffer, _buffer); - struct anv_graphics_pipeline *pipeline = cmd_buffer->state.gfx.pipeline; - const struct brw_task_prog_data *task_prog_data = get_task_prog_data(pipeline); - const struct brw_mesh_prog_data *mesh_prog_data = get_mesh_prog_data(pipeline); - struct anv_cmd_state *cmd_state = &cmd_buffer->state; - - if (anv_batch_has_error(&cmd_buffer->batch)) - return; - - genX(cmd_buffer_flush_state)(cmd_buffer); - - if (cmd_state->conditional_render_enabled) - genX(cmd_emit_conditional_render_predicate)(cmd_buffer); - - bool uses_drawid = (task_prog_data && task_prog_data->uses_drawid) || - mesh_prog_data->uses_drawid; - struct mi_builder b; - mi_builder_init(&b, cmd_buffer->device->info, &cmd_buffer->batch); - - for (uint32_t i = 0; i < drawCount; i++) { - struct anv_address draw = anv_address_add(buffer->address, offset); - - mesh_load_indirect_parameters(cmd_buffer, &b, draw, uses_drawid, i); - - emit_indirect_3dmesh_1d(&cmd_buffer->batch, - cmd_state->conditional_render_enabled, uses_drawid); - - offset += stride; - } -} - -void -genX(CmdDrawMeshTasksIndirectCountNV)( - VkCommandBuffer commandBuffer, - VkBuffer _buffer, - VkDeviceSize offset, - VkBuffer _countBuffer, - VkDeviceSize countBufferOffset, - uint32_t maxDrawCount, - uint32_t stride) -{ - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); - ANV_FROM_HANDLE(anv_buffer, buffer, _buffer); - ANV_FROM_HANDLE(anv_buffer, count_buffer, _countBuffer); - struct anv_graphics_pipeline *pipeline = cmd_buffer->state.gfx.pipeline; - const struct brw_task_prog_data *task_prog_data = get_task_prog_data(pipeline); - const struct brw_mesh_prog_data *mesh_prog_data = get_mesh_prog_data(pipeline); - - if (anv_batch_has_error(&cmd_buffer->batch)) - return; - - genX(cmd_buffer_flush_state)(cmd_buffer); - - bool uses_drawid = (task_prog_data && task_prog_data->uses_drawid) || - mesh_prog_data->uses_drawid; - - struct mi_builder b; - mi_builder_init(&b, cmd_buffer->device->info, &cmd_buffer->batch); - - struct mi_value max = - prepare_for_draw_count_predicate(cmd_buffer, &b, - count_buffer, countBufferOffset); - - for (uint32_t i = 0; i < maxDrawCount; i++) { - struct anv_address draw = anv_address_add(buffer->address, offset); - - emit_draw_count_predicate_cond(cmd_buffer, &b, i, max); - - mesh_load_indirect_parameters(cmd_buffer, &b, draw, uses_drawid, i); - - emit_indirect_3dmesh_1d(&cmd_buffer->batch, true, uses_drawid); - - offset += stride; - } -} -#endif /* GFX_VERx10 >= 125 */ - void genX(cmd_buffer_flush_compute_state)(struct anv_cmd_buffer *cmd_buffer) { diff --git a/src/intel/vulkan_hasvk/genX_pipeline.c b/src/intel/vulkan_hasvk/genX_pipeline.c index 65b8e25f568..379051ca94d 100644 --- a/src/intel/vulkan_hasvk/genX_pipeline.c +++ b/src/intel/vulkan_hasvk/genX_pipeline.c @@ -309,71 +309,12 @@ genX(emit_urb_setup)(struct anv_device *device, struct anv_batch *batch, urb.VSNumberofURBEntries = entries[i]; } } -#if GFX_VERx10 >= 125 - if (device->physical->vk.supported_extensions.NV_mesh_shader) { - anv_batch_emit(batch, GENX(3DSTATE_URB_ALLOC_MESH), zero); - anv_batch_emit(batch, GENX(3DSTATE_URB_ALLOC_TASK), zero); - } -#endif } -#if GFX_VERx10 >= 125 -static void -emit_urb_setup_mesh(struct anv_graphics_pipeline *pipeline, - enum intel_urb_deref_block_size *deref_block_size) -{ - const struct intel_device_info *devinfo = pipeline->base.device->info; - - const struct brw_task_prog_data *task_prog_data = - anv_pipeline_has_stage(pipeline, MESA_SHADER_TASK) ? - get_task_prog_data(pipeline) : NULL; - const struct brw_mesh_prog_data *mesh_prog_data = get_mesh_prog_data(pipeline); - - const struct intel_mesh_urb_allocation alloc = - intel_get_mesh_urb_config(devinfo, pipeline->base.l3_config, - task_prog_data ? task_prog_data->map.size_dw : 0, - mesh_prog_data->map.size_dw); - - /* Zero out the primitive pipeline URB allocations. */ - for (int i = 0; i <= MESA_SHADER_GEOMETRY; i++) { - anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_URB_VS), urb) { - urb._3DCommandSubOpcode += i; - } - } - - anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_URB_ALLOC_TASK), urb) { - if (task_prog_data) { - urb.TASKURBEntryAllocationSize = alloc.task_entry_size_64b - 1; - urb.TASKNumberofURBEntriesSlice0 = alloc.task_entries; - urb.TASKNumberofURBEntriesSliceN = alloc.task_entries; - urb.TASKURBStartingAddressSlice0 = alloc.task_starting_address_8kb; - urb.TASKURBStartingAddressSliceN = alloc.task_starting_address_8kb; - } - } - - anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_URB_ALLOC_MESH), urb) { - urb.MESHURBEntryAllocationSize = alloc.mesh_entry_size_64b - 1; - urb.MESHNumberofURBEntriesSlice0 = alloc.mesh_entries; - urb.MESHNumberofURBEntriesSliceN = alloc.mesh_entries; - urb.MESHURBStartingAddressSlice0 = alloc.mesh_starting_address_8kb; - urb.MESHURBStartingAddressSliceN = alloc.mesh_starting_address_8kb; - } - - *deref_block_size = alloc.deref_block_size; -} -#endif - static void emit_urb_setup(struct anv_graphics_pipeline *pipeline, enum intel_urb_deref_block_size *deref_block_size) { -#if GFX_VERx10 >= 125 - if (anv_pipeline_is_mesh(pipeline)) { - emit_urb_setup_mesh(pipeline, deref_block_size); - return; - } -#endif - unsigned entry_size[4]; for (int i = MESA_SHADER_VERTEX; i <= MESA_SHADER_GEOMETRY; i++) { const struct brw_vue_prog_data *prog_data = @@ -398,10 +339,6 @@ emit_3dstate_sbe(struct anv_graphics_pipeline *pipeline) anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_SBE), sbe); #if GFX_VER >= 8 anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_SBE_SWIZ), sbe); -#endif -#if GFX_VERx10 >= 125 - if (anv_pipeline_is_mesh(pipeline)) - anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_SBE_MESH), sbe_mesh); #endif return; } @@ -431,123 +368,75 @@ emit_3dstate_sbe(struct anv_graphics_pipeline *pipeline) # define swiz sbe #endif - if (anv_pipeline_is_primitive(pipeline)) { - const struct brw_vue_map *fs_input_map = - &anv_pipeline_get_last_vue_prog_data(pipeline)->vue_map; + const struct brw_vue_map *fs_input_map = + &anv_pipeline_get_last_vue_prog_data(pipeline)->vue_map; - int first_slot = brw_compute_first_urb_slot_required(wm_prog_data->inputs, - fs_input_map); - assert(first_slot % 2 == 0); - unsigned urb_entry_read_offset = first_slot / 2; - int max_source_attr = 0; - for (uint8_t idx = 0; idx < wm_prog_data->urb_setup_attribs_count; idx++) { - uint8_t attr = wm_prog_data->urb_setup_attribs[idx]; - int input_index = wm_prog_data->urb_setup[attr]; + int first_slot = brw_compute_first_urb_slot_required(wm_prog_data->inputs, + fs_input_map); + assert(first_slot % 2 == 0); + unsigned urb_entry_read_offset = first_slot / 2; + int max_source_attr = 0; + for (uint8_t idx = 0; idx < wm_prog_data->urb_setup_attribs_count; idx++) { + uint8_t attr = wm_prog_data->urb_setup_attribs[idx]; + int input_index = wm_prog_data->urb_setup[attr]; - assert(0 <= input_index); + assert(0 <= input_index); - /* gl_Viewport, gl_Layer and FragmentShadingRateKHR are stored in the - * VUE header - */ - if (attr == VARYING_SLOT_VIEWPORT || - attr == VARYING_SLOT_LAYER || - attr == VARYING_SLOT_PRIMITIVE_SHADING_RATE) { - continue; - } - - if (attr == VARYING_SLOT_PNTC) { - sbe.PointSpriteTextureCoordinateEnable = 1 << input_index; - continue; - } - - const int slot = fs_input_map->varying_to_slot[attr]; - - if (slot == -1) { - /* This attribute does not exist in the VUE--that means that the - * vertex shader did not write to it. It could be that it's a - * regular varying read by the fragment shader but not written by - * the vertex shader or it's gl_PrimitiveID. In the first case the - * value is undefined, in the second it needs to be - * gl_PrimitiveID. - */ - swiz.Attribute[input_index].ConstantSource = PRIM_ID; - swiz.Attribute[input_index].ComponentOverrideX = true; - swiz.Attribute[input_index].ComponentOverrideY = true; - swiz.Attribute[input_index].ComponentOverrideZ = true; - swiz.Attribute[input_index].ComponentOverrideW = true; - continue; - } - - /* We have to subtract two slots to account for the URB entry output - * read offset in the VS and GS stages. - */ - const int source_attr = slot - 2 * urb_entry_read_offset; - assert(source_attr >= 0 && source_attr < 32); - max_source_attr = MAX2(max_source_attr, source_attr); - /* The hardware can only do overrides on 16 overrides at a time, and the - * other up to 16 have to be lined up so that the input index = the - * output index. We'll need to do some tweaking to make sure that's the - * case. - */ - if (input_index < 16) - swiz.Attribute[input_index].SourceAttribute = source_attr; - else - assert(source_attr == input_index); + /* gl_Viewport, gl_Layer and FragmentShadingRateKHR are stored in the + * VUE header + */ + if (attr == VARYING_SLOT_VIEWPORT || + attr == VARYING_SLOT_LAYER || + attr == VARYING_SLOT_PRIMITIVE_SHADING_RATE) { + continue; } - sbe.VertexURBEntryReadOffset = urb_entry_read_offset; - sbe.VertexURBEntryReadLength = DIV_ROUND_UP(max_source_attr + 1, 2); -#if GFX_VER >= 8 - sbe.ForceVertexURBEntryReadOffset = true; - sbe.ForceVertexURBEntryReadLength = true; -#endif - } else { - assert(anv_pipeline_is_mesh(pipeline)); -#if GFX_VERx10 >= 125 - const struct brw_mesh_prog_data *mesh_prog_data = get_mesh_prog_data(pipeline); - anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_SBE_MESH), sbe_mesh) { - const struct brw_mue_map *mue = &mesh_prog_data->map; - - assert(mue->per_vertex_header_size_dw % 8 == 0); - sbe_mesh.PerVertexURBEntryOutputReadOffset = mue->per_vertex_header_size_dw / 8; - sbe_mesh.PerVertexURBEntryOutputReadLength = DIV_ROUND_UP(mue->per_vertex_data_size_dw, 8); - - /* Clip distance array is passed in the per-vertex header so that - * it can be consumed by the HW. If user wants to read it in the FS, - * adjust the offset and length to cover it. Conveniently it is at - * the end of the per-vertex header, right before per-vertex - * attributes. - * - * Note that FS attribute reading must be aware that the clip - * distances have fixed position. - */ - if (mue->per_vertex_header_size_dw > 8 && - (wm_prog_data->urb_setup[VARYING_SLOT_CLIP_DIST0] >= 0 || - wm_prog_data->urb_setup[VARYING_SLOT_CLIP_DIST1] >= 0)) { - sbe_mesh.PerVertexURBEntryOutputReadOffset -= 1; - sbe_mesh.PerVertexURBEntryOutputReadLength += 1; - } - - assert(mue->per_primitive_header_size_dw % 8 == 0); - sbe_mesh.PerPrimitiveURBEntryOutputReadOffset = mue->per_primitive_header_size_dw / 8; - sbe_mesh.PerPrimitiveURBEntryOutputReadLength = DIV_ROUND_UP(mue->per_primitive_data_size_dw, 8); - - /* Just like with clip distances, if Primitive Shading Rate, - * Viewport Index or Layer is read back in the FS, adjust - * the offset and length to cover the Primitive Header, where - * PSR, Viewport Index & Layer are stored. - */ - if (wm_prog_data->urb_setup[VARYING_SLOT_VIEWPORT] >= 0 || - wm_prog_data->urb_setup[VARYING_SLOT_PRIMITIVE_SHADING_RATE] >= 0 || - wm_prog_data->urb_setup[VARYING_SLOT_LAYER] >= 0) { - assert(sbe_mesh.PerPrimitiveURBEntryOutputReadOffset > 0); - sbe_mesh.PerPrimitiveURBEntryOutputReadOffset -= 1; - sbe_mesh.PerPrimitiveURBEntryOutputReadLength += 1; - } + if (attr == VARYING_SLOT_PNTC) { + sbe.PointSpriteTextureCoordinateEnable = 1 << input_index; + continue; } -#endif + + const int slot = fs_input_map->varying_to_slot[attr]; + + if (slot == -1) { + /* This attribute does not exist in the VUE--that means that the + * vertex shader did not write to it. It could be that it's a regular + * varying read by the fragment shader but not written by the vertex + * shader or it's gl_PrimitiveID. In the first case the value is + * undefined, in the second it needs to be gl_PrimitiveID. + */ + swiz.Attribute[input_index].ConstantSource = PRIM_ID; + swiz.Attribute[input_index].ComponentOverrideX = true; + swiz.Attribute[input_index].ComponentOverrideY = true; + swiz.Attribute[input_index].ComponentOverrideZ = true; + swiz.Attribute[input_index].ComponentOverrideW = true; + continue; + } + + /* We have to subtract two slots to account for the URB entry output + * read offset in the VS and GS stages. + */ + const int source_attr = slot - 2 * urb_entry_read_offset; + assert(source_attr >= 0 && source_attr < 32); + max_source_attr = MAX2(max_source_attr, source_attr); + /* The hardware can only do overrides on 16 overrides at a time, and the + * other up to 16 have to be lined up so that the input index = the + * output index. We'll need to do some tweaking to make sure that's the + * case. + */ + if (input_index < 16) + swiz.Attribute[input_index].SourceAttribute = source_attr; + else + assert(source_attr == input_index); } + sbe.VertexURBEntryReadOffset = urb_entry_read_offset; + sbe.VertexURBEntryReadLength = DIV_ROUND_UP(max_source_attr + 1, 2); +#if GFX_VER >= 8 + sbe.ForceVertexURBEntryReadOffset = true; + sbe.ForceVertexURBEntryReadLength = true; +#endif + uint32_t *dw = anv_batch_emit_dwords(&pipeline->base.batch, GENX(3DSTATE_SBE_length)); if (!dw) @@ -571,18 +460,7 @@ VkPolygonMode genX(raster_polygon_mode)(struct anv_graphics_pipeline *pipeline, VkPrimitiveTopology primitive_topology) { - if (anv_pipeline_is_mesh(pipeline)) { - switch (get_mesh_prog_data(pipeline)->primitive_type) { - case SHADER_PRIM_POINTS: - return VK_POLYGON_MODE_POINT; - case SHADER_PRIM_LINES: - return VK_POLYGON_MODE_LINE; - case SHADER_PRIM_TRIANGLES: - return pipeline->polygon_mode; - default: - unreachable("invalid primitive type for mesh"); - } - } else if (anv_pipeline_has_stage(pipeline, MESA_SHADER_GEOMETRY)) { + if (anv_pipeline_has_stage(pipeline, MESA_SHADER_GEOMETRY)) { switch (get_gs_prog_data(pipeline)->output_topology) { case _3DPRIM_POINTLIST: return VK_POLYGON_MODE_POINT; @@ -779,15 +657,9 @@ emit_rs_state(struct anv_graphics_pipeline *pipeline, #endif bool point_from_shader; - if (anv_pipeline_is_primitive(pipeline)) { - const struct brw_vue_prog_data *last_vue_prog_data = - anv_pipeline_get_last_vue_prog_data(pipeline); - point_from_shader = last_vue_prog_data->vue_map.slots_valid & VARYING_BIT_PSIZ; - } else { - assert(anv_pipeline_is_mesh(pipeline)); - const struct brw_mesh_prog_data *mesh_prog_data = get_mesh_prog_data(pipeline); - point_from_shader = mesh_prog_data->map.start_dw[VARYING_SLOT_PSIZ] >= 0; - } + const struct brw_vue_prog_data *last_vue_prog_data = + anv_pipeline_get_last_vue_prog_data(pipeline); + point_from_shader = last_vue_prog_data->vue_map.slots_valid & VARYING_BIT_PSIZ; if (point_from_shader) { sf.PointWidthSource = Vertex; @@ -1198,44 +1070,35 @@ emit_3dstate_clip(struct anv_graphics_pipeline *pipeline, clip.MinimumPointWidth = 0.125; clip.MaximumPointWidth = 255.875; - /* TODO(mesh): Multiview. */ - if (anv_pipeline_is_primitive(pipeline)) { - const struct brw_vue_prog_data *last = - anv_pipeline_get_last_vue_prog_data(pipeline); + const struct brw_vue_prog_data *last = + anv_pipeline_get_last_vue_prog_data(pipeline); - /* From the Vulkan 1.0.45 spec: - * - * "If the last active vertex processing stage shader entry point's - * interface does not include a variable decorated with - * ViewportIndex, then the first viewport is used." - */ - if (vp && (last->vue_map.slots_valid & VARYING_BIT_VIEWPORT)) { - clip.MaximumVPIndex = vp->viewport_count > 0 ? - vp->viewport_count - 1 : 0; - } else { - clip.MaximumVPIndex = 0; - } + /* From the Vulkan 1.0.45 spec: + * + * "If the last active vertex processing stage shader entry point's + * interface does not include a variable decorated with ViewportIndex, + * then the first viewport is used." + */ + if (vp && (last->vue_map.slots_valid & VARYING_BIT_VIEWPORT)) { + clip.MaximumVPIndex = vp->viewport_count > 0 ? + vp->viewport_count - 1 : 0; + } else { + clip.MaximumVPIndex = 0; + } - /* From the Vulkan 1.0.45 spec: - * - * "If the last active vertex processing stage shader entry point's - * interface does not include a variable decorated with Layer, then - * the first layer is used." - */ - clip.ForceZeroRTAIndexEnable = - !(last->vue_map.slots_valid & VARYING_BIT_LAYER); + /* From the Vulkan 1.0.45 spec: + * + * "If the last active vertex processing stage shader entry point's + * interface does not include a variable decorated with Layer, then the + * first layer is used." + */ + clip.ForceZeroRTAIndexEnable = + !(last->vue_map.slots_valid & VARYING_BIT_LAYER); #if GFX_VER == 7 - clip.UserClipDistanceClipTestEnableBitmask = last->clip_distance_mask; - clip.UserClipDistanceCullTestEnableBitmask = last->cull_distance_mask; + clip.UserClipDistanceClipTestEnableBitmask = last->clip_distance_mask; + clip.UserClipDistanceCullTestEnableBitmask = last->cull_distance_mask; #endif - } else if (anv_pipeline_is_mesh(pipeline)) { - const struct brw_mesh_prog_data *mesh_prog_data = get_mesh_prog_data(pipeline); - if (vp && vp->viewport_count > 0 && - mesh_prog_data->map.start_dw[VARYING_SLOT_VIEWPORT] >= 0) { - clip.MaximumVPIndex = vp->viewport_count - 1; - } - } #if GFX_VER == 7 clip.FrontWinding = genX(vk_to_intel_front_face)[rs->front_face]; @@ -1247,17 +1110,6 @@ emit_3dstate_clip(struct anv_graphics_pipeline *pipeline, #endif GENX(3DSTATE_CLIP_pack)(NULL, pipeline->gfx7.clip, &clip); - -#if GFX_VERx10 >= 125 - if (anv_pipeline_is_mesh(pipeline)) { - const struct brw_mesh_prog_data *mesh_prog_data = get_mesh_prog_data(pipeline); - anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_CLIP_MESH), clip_mesh) { - clip_mesh.PrimitiveHeaderEnable = mesh_prog_data->map.per_primitive_header_size_dw > 0; - clip_mesh.UserClipDistanceClipTestEnableBitmask = mesh_prog_data->clip_distance_mask; - clip_mesh.UserClipDistanceCullTestEnableBitmask = mesh_prog_data->cull_distance_mask; - } - } -#endif } static void @@ -2168,141 +2020,6 @@ emit_3dstate_primitive_replication(struct anv_graphics_pipeline *pipeline, } #endif -#if GFX_VERx10 >= 125 -static void -emit_task_state(struct anv_graphics_pipeline *pipeline) -{ - assert(anv_pipeline_is_mesh(pipeline)); - - if (!anv_pipeline_has_stage(pipeline, MESA_SHADER_TASK)) { - anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_TASK_CONTROL), zero); - return; - } - - const struct anv_shader_bin *task_bin = pipeline->shaders[MESA_SHADER_TASK]; - - anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_TASK_CONTROL), tc) { - tc.TaskShaderEnable = true; - tc.ScratchSpaceBuffer = - get_scratch_surf(&pipeline->base, MESA_SHADER_TASK, task_bin); - } - - const struct intel_device_info *devinfo = pipeline->base.device->info; - const struct brw_task_prog_data *task_prog_data = get_task_prog_data(pipeline); - const struct brw_cs_dispatch_info task_dispatch = - brw_cs_get_dispatch_info(devinfo, &task_prog_data->base, NULL); - - anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_TASK_SHADER), task) { - task.KernelStartPointer = task_bin->kernel.offset; - task.SIMDSize = task_dispatch.simd_size / 16; - task.MessageSIMD = task.SIMDSize; - task.NumberofThreadsinGPGPUThreadGroup = task_dispatch.threads; - task.ExecutionMask = task_dispatch.right_mask; - task.LocalXMaximum = task_dispatch.group_size - 1; - task.EmitLocalIDX = true; - - task.NumberofBarriers = task_prog_data->base.uses_barrier; - task.SharedLocalMemorySize = - encode_slm_size(GFX_VER, task_prog_data->base.base.total_shared); - - /* - * 3DSTATE_TASK_SHADER_DATA.InlineData[0:1] will be used for an address - * of a buffer with push constants and descriptor set table and - * InlineData[2:7] will be used for first few push constants. - */ - task.EmitInlineParameter = true; - - task.XP0Required = task_prog_data->uses_drawid; - } - - /* Recommended values from "Task and Mesh Distribution Programming". */ - anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_TASK_REDISTRIB), redistrib) { - redistrib.LocalBOTAccumulatorThreshold = MULTIPLIER_1; - redistrib.SmallTaskThreshold = 1; /* 2^N */ - redistrib.TargetMeshBatchSize = devinfo->num_slices > 2 ? 3 : 5; /* 2^N */ - redistrib.TaskRedistributionLevel = TASKREDISTRIB_BOM; - - /* TODO: We have an unknown issue with Task Payload when task redistribution - * is enabled. Disable it for now. - * See https://gitlab.freedesktop.org/mesa/mesa/-/issues/7141 - */ - redistrib.TaskRedistributionMode = TASKREDISTRIB_OFF; - } -} - -static void -emit_mesh_state(struct anv_graphics_pipeline *pipeline) -{ - assert(anv_pipeline_is_mesh(pipeline)); - - const struct anv_shader_bin *mesh_bin = pipeline->shaders[MESA_SHADER_MESH]; - - anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_MESH_CONTROL), mc) { - mc.MeshShaderEnable = true; - mc.ScratchSpaceBuffer = - get_scratch_surf(&pipeline->base, MESA_SHADER_MESH, mesh_bin); - - /* TODO(mesh): MaximumNumberofThreadGroups. */ - } - - const struct intel_device_info *devinfo = pipeline->base.device->info; - const struct brw_mesh_prog_data *mesh_prog_data = get_mesh_prog_data(pipeline); - const struct brw_cs_dispatch_info mesh_dispatch = - brw_cs_get_dispatch_info(devinfo, &mesh_prog_data->base, NULL); - - const unsigned output_topology = - mesh_prog_data->primitive_type == SHADER_PRIM_POINTS ? OUTPUT_POINT : - mesh_prog_data->primitive_type == SHADER_PRIM_LINES ? OUTPUT_LINE : - OUTPUT_TRI; - - uint32_t index_format; - switch (mesh_prog_data->index_format) { - case BRW_INDEX_FORMAT_U32: - index_format = INDEX_U32; - break; - default: - unreachable("invalid index format"); - } - - anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_MESH_SHADER), mesh) { - mesh.KernelStartPointer = mesh_bin->kernel.offset; - mesh.SIMDSize = mesh_dispatch.simd_size / 16; - mesh.MessageSIMD = mesh.SIMDSize; - mesh.NumberofThreadsinGPGPUThreadGroup = mesh_dispatch.threads; - mesh.ExecutionMask = mesh_dispatch.right_mask; - mesh.LocalXMaximum = mesh_dispatch.group_size - 1; - mesh.EmitLocalIDX = true; - - mesh.MaximumPrimitiveCount = mesh_prog_data->map.max_primitives - 1; - mesh.OutputTopology = output_topology; - mesh.PerVertexDataPitch = mesh_prog_data->map.per_vertex_pitch_dw / 8; - mesh.PerPrimitiveDataPresent = mesh_prog_data->map.per_primitive_pitch_dw > 0; - mesh.PerPrimitiveDataPitch = mesh_prog_data->map.per_primitive_pitch_dw / 8; - mesh.IndexFormat = index_format; - - mesh.NumberofBarriers = mesh_prog_data->base.uses_barrier; - mesh.SharedLocalMemorySize = - encode_slm_size(GFX_VER, mesh_prog_data->base.base.total_shared); - - /* - * 3DSTATE_MESH_SHADER_DATA.InlineData[0:1] will be used for an address - * of a buffer with push constants and descriptor set table and - * InlineData[2:7] will be used for first few push constants. - */ - mesh.EmitInlineParameter = true; - - mesh.XP0Required = mesh_prog_data->uses_drawid; - } - - /* Recommended values from "Task and Mesh Distribution Programming". */ - anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_MESH_DISTRIB), distrib) { - distrib.DistributionMode = MESH_RR_FREE; - distrib.TaskDistributionBatchSize = devinfo->num_slices > 2 ? 8 : 9; /* 2^N thread groups */ - distrib.MeshDistributionBatchSize = devinfo->num_slices > 2 ? 5 : 3; /* 2^N thread groups */ - } -} -#endif - void genX(graphics_pipeline_emit)(struct anv_graphics_pipeline *pipeline, const struct vk_graphics_pipeline_state *state) @@ -2342,38 +2059,15 @@ genX(graphics_pipeline_emit)(struct anv_graphics_pipeline *pipeline, gfx7_emit_vs_workaround_flush(brw); #endif - if (anv_pipeline_is_primitive(pipeline)) { - emit_vertex_input(pipeline, state->vi); + emit_vertex_input(pipeline, state->vi); - emit_3dstate_vs(pipeline); - emit_3dstate_hs_te_ds(pipeline, state->ts); - emit_3dstate_gs(pipeline); + emit_3dstate_vs(pipeline); + emit_3dstate_hs_te_ds(pipeline, state->ts); + emit_3dstate_gs(pipeline); - emit_3dstate_vf_statistics(pipeline); + emit_3dstate_vf_statistics(pipeline); - emit_3dstate_streamout(pipeline, state->rs); - -#if GFX_VERx10 >= 125 - const struct anv_device *device = pipeline->base.device; - /* Disable Mesh. */ - if (device->physical->vk.supported_extensions.NV_mesh_shader) { - anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_MESH_CONTROL), zero); - anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_TASK_CONTROL), zero); - } -#endif - } else { - assert(anv_pipeline_is_mesh(pipeline)); - - /* BSpec 46303 forbids both 3DSTATE_MESH_CONTROL.MeshShaderEnable - * and 3DSTATE_STREAMOUT.SOFunctionEnable to be 1. - */ - anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_STREAMOUT), so) {} - -#if GFX_VERx10 >= 125 - emit_task_state(pipeline); - emit_mesh_state(pipeline); -#endif - } + emit_3dstate_streamout(pipeline, state->rs); emit_3dstate_sbe(pipeline); emit_3dstate_wm(pipeline, state->ia, state->rs,