diff --git a/.pick_status.json b/.pick_status.json index c833019b7bf..e655ab76aa4 100644 --- a/.pick_status.json +++ b/.pick_status.json @@ -32089,7 +32089,7 @@ "description": "radv: simplify the NGG vs legacy pipelinestat query path", "nominated": false, "nomination_type": null, - "resolution": 4, + "resolution": 1, "main_sha": null, "because_sha": null }, diff --git a/src/amd/vulkan/radv_query.c b/src/amd/vulkan/radv_query.c index d0de76332e6..0ddac8bd72c 100644 --- a/src/amd/vulkan/radv_query.c +++ b/src/amd/vulkan/radv_query.c @@ -43,6 +43,13 @@ /* TODO: Add support for mesh/task queries on GFX11 */ static const unsigned pipeline_statistics_indices[] = {7, 6, 3, 4, 5, 2, 1, 0, 8, 9, 10}; +static unsigned +radv_get_pipelinestat_query_offset(VkQueryPipelineStatisticFlagBits query) +{ + uint32_t idx = ffs(query) - 1; + return pipeline_statistics_indices[idx] * 8; +} + static unsigned radv_get_pipelinestat_query_size(struct radv_device *device) { @@ -285,25 +292,14 @@ build_pipeline_statistics_query_shader(struct radv_device *device) nir_ssa_def *flags = nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 0), .range = 4); nir_ssa_def *stats_mask = nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 8), .range = 12); nir_ssa_def *avail_offset = nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 12), .range = 16); - nir_ssa_def *uses_gds = nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 16), .range = 20); nir_ssa_def *dst_buf = radv_meta_load_descriptor(&b, 0, 0); nir_ssa_def *src_buf = radv_meta_load_descriptor(&b, 0, 1); nir_ssa_def *global_id = get_global_ids(&b, 1); - nir_variable *input_stride = nir_local_variable_create(b.impl, glsl_int_type(), "input_stride"); - nir_push_if(&b, nir_ine(&b, uses_gds, nir_imm_int(&b, 0))); - { - nir_store_var(&b, input_stride, nir_imm_int(&b, pipelinestat_block_size * 2 + 8 * 2), 0x1); - } - nir_push_else(&b, NULL); - { - nir_store_var(&b, input_stride, nir_imm_int(&b, pipelinestat_block_size * 2), 0x1); - } - nir_pop_if(&b, NULL); - - nir_ssa_def *input_base = nir_imul(&b, nir_load_var(&b, input_stride), global_id); + nir_ssa_def *input_stride = nir_imm_int(&b, pipelinestat_block_size * 2); + nir_ssa_def *input_base = nir_imul(&b, input_stride, global_id); nir_ssa_def *output_stride = nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 4), .range = 8); nir_ssa_def *output_base = nir_imul(&b, output_stride, global_id); @@ -334,25 +330,6 @@ build_pipeline_statistics_query_shader(struct radv_device *device) nir_store_var(&b, result, nir_isub(&b, end, start), 0x1); - nir_push_if(&b, nir_iand(&b, nir_i2b(&b, uses_gds), - nir_ieq(&b, nir_imm_int(&b, 1u << i), - nir_imm_int(&b, VK_QUERY_PIPELINE_STATISTIC_GEOMETRY_SHADER_PRIMITIVES_BIT)))); - { - /* Compute the GDS result if needed. */ - nir_ssa_def *gds_start_offset = - nir_iadd(&b, input_base, nir_imm_int(&b, pipelinestat_block_size * 2)); - nir_ssa_def *gds_start = nir_load_ssbo(&b, 1, 64, src_buf, gds_start_offset); - - nir_ssa_def *gds_end_offset = - nir_iadd(&b, input_base, nir_imm_int(&b, pipelinestat_block_size * 2 + 8)); - nir_ssa_def *gds_end = nir_load_ssbo(&b, 1, 64, src_buf, gds_end_offset); - - nir_ssa_def *ngg_gds_result = nir_isub(&b, gds_end, gds_start); - - nir_store_var(&b, result, nir_iadd(&b, nir_load_var(&b, result), ngg_gds_result), 0x1); - } - nir_pop_if(&b, NULL); - /* Store result */ nir_push_if(&b, result_is_64bit); @@ -1139,12 +1116,6 @@ radv_create_query_pool(struct radv_device *device, const VkQueryPoolCreateInfo * break; case VK_QUERY_TYPE_PIPELINE_STATISTICS: pool->stride = radv_get_pipelinestat_query_size(device) * 2; - if (pool->uses_gds) { - /* When the query pool needs GDS (for counting the number of primitives generated by a - * geometry shader with NGG), allocate 2x64-bit values for begin/end. - */ - pool->stride += 8 * 2; - } break; case VK_QUERY_TYPE_TIMESTAMP: case VK_QUERY_TYPE_ACCELERATION_STRUCTURE_COMPACTED_SIZE_KHR: @@ -1316,7 +1287,6 @@ radv_GetQueryPoolResults(VkDevice _device, VkQueryPool queryPool, uint32_t first unsigned pipelinestat_block_size = radv_get_pipelinestat_query_size(device); const uint32_t *avail_ptr = (const uint32_t *)(pool->ptr + pool->availability_offset + 4 * query); - uint64_t ngg_gds_result = 0; do { available = p_atomic_read(avail_ptr); @@ -1325,14 +1295,6 @@ radv_GetQueryPoolResults(VkDevice _device, VkQueryPool queryPool, uint32_t first if (!available && !(flags & VK_QUERY_RESULT_PARTIAL_BIT)) result = VK_NOT_READY; - if (pool->uses_gds) { - /* Compute the result that was copied from GDS. */ - const uint64_t *gds_start = (uint64_t *)(src + pipelinestat_block_size * 2); - const uint64_t *gds_stop = (uint64_t *)(src + pipelinestat_block_size * 2 + 8); - - ngg_gds_result = gds_stop[0] - gds_start[0]; - } - const uint64_t *start = (uint64_t *)src; const uint64_t *stop = (uint64_t *)(src + pipelinestat_block_size); if (flags & VK_QUERY_RESULT_64_BIT) { @@ -1341,13 +1303,7 @@ radv_GetQueryPoolResults(VkDevice _device, VkQueryPool queryPool, uint32_t first for (int i = 0; i < ARRAY_SIZE(pipeline_statistics_indices); ++i) { if (pool->pipeline_stats_mask & (1u << i)) { if (available || (flags & VK_QUERY_RESULT_PARTIAL_BIT)) { - *dst = stop[pipeline_statistics_indices[i]] - - start[pipeline_statistics_indices[i]]; - - if (pool->uses_gds && - (1u << i) == VK_QUERY_PIPELINE_STATISTIC_GEOMETRY_SHADER_PRIMITIVES_BIT) { - *dst += ngg_gds_result; - } + *dst = stop[pipeline_statistics_indices[i]] - start[pipeline_statistics_indices[i]]; } dst++; } @@ -1359,13 +1315,7 @@ radv_GetQueryPoolResults(VkDevice _device, VkQueryPool queryPool, uint32_t first for (int i = 0; i < ARRAY_SIZE(pipeline_statistics_indices); ++i) { if (pool->pipeline_stats_mask & (1u << i)) { if (available || (flags & VK_QUERY_RESULT_PARTIAL_BIT)) { - *dst = stop[pipeline_statistics_indices[i]] - - start[pipeline_statistics_indices[i]]; - - if (pool->uses_gds && - (1u << i) == VK_QUERY_PIPELINE_STATISTIC_GEOMETRY_SHADER_PRIMITIVES_BIT) { - *dst += ngg_gds_result; - } + *dst = stop[pipeline_statistics_indices[i]] - start[pipeline_statistics_indices[i]]; } dst++; } @@ -1588,11 +1538,10 @@ radv_CmdCopyQueryPoolResults(VkCommandBuffer commandBuffer, VkQueryPool queryPoo radv_cp_wait_mem(cs, WAIT_REG_MEM_EQUAL, avail_va, 1, 0xffffffff); } } - radv_query_shader( - cmd_buffer, &cmd_buffer->device->meta_state.query.pipeline_statistics_query_pipeline, - pool->bo, dst_buffer->bo, firstQuery * pool->stride, dst_buffer->offset + dstOffset, - pool->stride, stride, dst_size, queryCount, flags, pool->pipeline_stats_mask, - pool->availability_offset + 4 * firstQuery, pool->uses_gds); + radv_query_shader(cmd_buffer, &cmd_buffer->device->meta_state.query.pipeline_statistics_query_pipeline, pool->bo, + dst_buffer->bo, firstQuery * pool->stride, dst_buffer->offset + dstOffset, pool->stride, stride, + dst_size, queryCount, flags, pool->pipeline_stats_mask, + pool->availability_offset + 4 * firstQuery, false); break; case VK_QUERY_TYPE_TIMESTAMP: case VK_QUERY_TYPE_ACCELERATION_STRUCTURE_COMPACTED_SIZE_KHR: @@ -1838,8 +1787,6 @@ emit_begin_query(struct radv_cmd_buffer *cmd_buffer, struct radv_query_pool *poo radeon_emit(cs, va >> 32); break; case VK_QUERY_TYPE_PIPELINE_STATISTICS: { - unsigned pipelinestat_block_size = radv_get_pipelinestat_query_size(cmd_buffer->device); - radeon_check_space(cmd_buffer->device->ws, cs, 4); ++cmd_buffer->state.active_pipeline_queries; @@ -1854,7 +1801,10 @@ emit_begin_query(struct radv_cmd_buffer *cmd_buffer, struct radv_query_pool *poo radeon_emit(cs, va >> 32); if (pool->uses_gds) { - va += pipelinestat_block_size * 2; + uint32_t gs_prim_offset = + radv_get_pipelinestat_query_offset(VK_QUERY_PIPELINE_STATISTIC_GEOMETRY_SHADER_PRIMITIVES_BIT); + + va += gs_prim_offset; /* pipeline statistics counter for all streams */ gfx10_copy_gds_query(cmd_buffer, RADV_NGG_QUERY_PIPELINE_STAT_OFFSET, va); @@ -1996,7 +1946,10 @@ emit_end_query(struct radv_cmd_buffer *cmd_buffer, struct radv_query_pool *pool, cmd_buffer->gfx9_eop_bug_va); if (pool->uses_gds) { - va += pipelinestat_block_size + 8; + uint32_t gs_prim_offset = + radv_get_pipelinestat_query_offset(VK_QUERY_PIPELINE_STATISTIC_GEOMETRY_SHADER_PRIMITIVES_BIT); + + va += gs_prim_offset; /* pipeline statistics counter for all streams */ gfx10_copy_gds_query(cmd_buffer, RADV_NGG_QUERY_PIPELINE_STAT_OFFSET, va);