diff --git a/src/panfrost/ci/deqp-panfrost-g52-vk.toml b/src/panfrost/ci/deqp-panfrost-g52-vk.toml index fbafaf8daef..c9a918157ef 100644 --- a/src/panfrost/ci/deqp-panfrost-g52-vk.toml +++ b/src/panfrost/ci/deqp-panfrost-g52-vk.toml @@ -10,6 +10,10 @@ include = [ "dEQP-VK.glsl.builtin.function.integer.usubborrow.*", "dEQP-VK.glsl.builtin.precision.frexp.*", "dEQP-VK.glsl.builtin.precision.ldexp.*", + "dEQP-VK.glsl.conversions.*", + "dEQP-VK.glsl.derivate.*.constant.*", + "dEQP-VK.glsl.derivate.*.linear.*", + "dEQP-VK.glsl.derivate.*.uniform_*", "dEQP-VK.image.load_store.with_format.*", "dEQP-VK.pipeline.sampler.view_type.*.format.r*.address_modes.all_mode_clamp_to_border*", "dEQP-VK.ssbo.layout.single_basic_type.*", diff --git a/src/panfrost/vulkan/panvk_cmd_buffer.c b/src/panfrost/vulkan/panvk_cmd_buffer.c index 16ed06f90c2..ab420b0e2de 100644 --- a/src/panfrost/vulkan/panvk_cmd_buffer.c +++ b/src/panfrost/vulkan/panvk_cmd_buffer.c @@ -63,7 +63,27 @@ panvk_CmdBindIndexBuffer(VkCommandBuffer commandBuffer, VkDeviceSize offset, VkIndexType indexType) { - panvk_stub(); + VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer); + VK_FROM_HANDLE(panvk_buffer, buf, buffer); + + cmdbuf->state.ib.buffer = buf; + cmdbuf->state.ib.offset = offset; + switch (indexType) { + case VK_INDEX_TYPE_UINT16: + cmdbuf->state.ib.index_size = 16; + break; + case VK_INDEX_TYPE_UINT32: + cmdbuf->state.ib.index_size = 32; + break; + case VK_INDEX_TYPE_NONE_KHR: + cmdbuf->state.ib.index_size = 0; + break; + case VK_INDEX_TYPE_UINT8_EXT: + cmdbuf->state.ib.index_size = 8; + break; + default: + unreachable("Invalid index type\n"); + } } void @@ -530,17 +550,6 @@ panvk_cmd_open_batch(struct panvk_cmd_buffer *cmdbuf) return cmdbuf->state.batch; } -void -panvk_CmdDrawIndexed(VkCommandBuffer commandBuffer, - uint32_t indexCount, - uint32_t instanceCount, - uint32_t firstIndex, - int32_t vertexOffset, - uint32_t firstInstance) -{ - panvk_stub(); -} - void panvk_CmdDrawIndirect(VkCommandBuffer commandBuffer, VkBuffer _buffer, diff --git a/src/panfrost/vulkan/panvk_device.c b/src/panfrost/vulkan/panvk_device.c index 499ab2a7029..d1ce47e7cc0 100644 --- a/src/panfrost/vulkan/panvk_device.c +++ b/src/panfrost/vulkan/panvk_device.c @@ -151,6 +151,7 @@ panvk_get_device_extensions(const struct panvk_physical_device *device, .KHR_swapchain = true, #endif .EXT_custom_border_color = true, + .EXT_index_type_uint8 = true, }; } diff --git a/src/panfrost/vulkan/panvk_private.h b/src/panfrost/vulkan/panvk_private.h index 74686d0d2b0..d4dbd74c935 100644 --- a/src/panfrost/vulkan/panvk_private.h +++ b/src/panfrost/vulkan/panvk_private.h @@ -497,7 +497,8 @@ enum panvk_dynamic_state_bits { PANVK_DYNAMIC_STENCIL_REFERENCE = 1 << 8, PANVK_DYNAMIC_DISCARD_RECTANGLE = 1 << 9, PANVK_DYNAMIC_SSBO = 1 << 10, - PANVK_DYNAMIC_ALL = (1 << 11) - 1, + PANVK_DYNAMIC_VERTEX_INSTANCE_OFFSETS = 1 << 11, + PANVK_DYNAMIC_ALL = (1 << 12) - 1, }; struct panvk_descriptor_state { @@ -523,8 +524,10 @@ struct panvk_descriptor_state { struct panvk_draw_info { unsigned first_index; unsigned index_count; + unsigned index_size; unsigned first_vertex; unsigned vertex_count; + unsigned vertex_range; unsigned padded_vertex_count; unsigned first_instance; unsigned instance_count; @@ -542,6 +545,7 @@ struct panvk_draw_info { mali_ptr samplers; mali_ptr ubos; mali_ptr position; + mali_ptr indices; union { mali_ptr psiz; float line_width; @@ -626,10 +630,8 @@ struct panvk_cmd_state { struct { struct panvk_buffer *buffer; uint64_t offset; - uint32_t type; - uint32_t max_index_count; uint8_t index_size; - uint64_t index_va; + uint32_t first_vertex, base_vertex, base_instance; } ib; struct { diff --git a/src/panfrost/vulkan/panvk_vX_cmd_buffer.c b/src/panfrost/vulkan/panvk_vX_cmd_buffer.c index a5a14cb2d28..cb1598626fe 100644 --- a/src/panfrost/vulkan/panvk_vX_cmd_buffer.c +++ b/src/panfrost/vulkan/panvk_vX_cmd_buffer.c @@ -339,8 +339,9 @@ panvk_cmd_upload_sysval(struct panvk_cmd_buffer *cmdbuf, panvk_sysval_upload_viewport_offset(&cmdbuf->state.viewport, data); break; case PAN_SYSVAL_VERTEX_INSTANCE_OFFSETS: - /* TODO: support base_{vertex,instance} */ - data->u32[0] = data->u32[1] = data->u32[2] = 0; + data->u32[0] = cmdbuf->state.ib.first_vertex; + data->u32[1] = cmdbuf->state.ib.base_vertex; + data->u32[2] = cmdbuf->state.ib.base_instance; break; case PAN_SYSVAL_BLEND_CONSTANTS: memcpy(data->f32, cmdbuf->state.blend.constants, sizeof(data->f32)); @@ -639,7 +640,7 @@ panvk_draw_prepare_varyings(struct panvk_cmd_buffer *cmdbuf, struct panvk_varyings_info *varyings = &cmdbuf->state.varyings; panvk_varyings_alloc(varyings, &cmdbuf->varying_pool.base, - draw->vertex_count); + draw->padded_vertex_count * draw->instance_count); unsigned buf_count = panvk_varyings_buf_count(varyings); struct panfrost_ptr bufs = @@ -756,7 +757,7 @@ panvk_draw_prepare_vs_attribs(struct panvk_cmd_buffer *cmdbuf, unsigned num_imgs = pipeline->img_access_mask & BITFIELD_BIT(MESA_SHADER_VERTEX) ? pipeline->layout->num_imgs : 0; - unsigned attrib_count = pipeline->attribs.buf_count + num_imgs; + unsigned attrib_count = pipeline->attribs.attrib_count + num_imgs; if (desc_state->vs_attribs || !attrib_count) return; @@ -768,7 +769,7 @@ panvk_draw_prepare_vs_attribs(struct panvk_cmd_buffer *cmdbuf, return; } - unsigned attrib_buf_count = attrib_count * 2; + unsigned attrib_buf_count = pipeline->attribs.buf_count * 2; struct panfrost_ptr bufs = pan_pool_alloc_desc_array(&cmdbuf->desc_pool.base, attrib_buf_count + (PAN_ARCH >= 6 ? 1 : 0), @@ -880,18 +881,10 @@ panvk_draw_prepare_tiler_job(struct panvk_cmd_buffer *cmdbuf, panvk_per_arch(emit_tiler_job)(pipeline, draw, ptr.cpu); } -void -panvk_per_arch(CmdDraw)(VkCommandBuffer commandBuffer, - uint32_t vertexCount, - uint32_t instanceCount, - uint32_t firstVertex, - uint32_t firstInstance) +static void +panvk_cmd_draw(struct panvk_cmd_buffer *cmdbuf, + struct panvk_draw_info *draw) { - VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer); - - if (instanceCount == 0 || vertexCount == 0) - return; - struct panvk_batch *batch = cmdbuf->state.batch; struct panvk_cmd_bind_point_state *bind_point_state = panvk_cmd_get_bind_point_state(cmdbuf, GRAPHICS); @@ -911,6 +904,17 @@ panvk_per_arch(CmdDraw)(VkCommandBuffer commandBuffer, panvk_per_arch(cmd_alloc_fb_desc)(cmdbuf); panvk_per_arch(cmd_alloc_tls_desc)(cmdbuf, true); + + unsigned base_vertex = draw->index_size ? draw->vertex_offset : 0; + if (cmdbuf->state.ib.first_vertex != draw->offset_start || + cmdbuf->state.ib.base_vertex != base_vertex || + cmdbuf->state.ib.base_vertex != draw->first_instance) { + cmdbuf->state.ib.base_vertex = base_vertex; + cmdbuf->state.ib.base_instance = draw->first_instance; + cmdbuf->state.ib.first_vertex = draw->offset_start; + cmdbuf->state.dirty |= PANVK_DYNAMIC_VERTEX_INSTANCE_OFFSETS; + } + panvk_cmd_prepare_ubos(cmdbuf, bind_point_state); panvk_cmd_prepare_textures(cmdbuf, bind_point_state); panvk_cmd_prepare_samplers(cmdbuf, bind_point_state); @@ -919,48 +923,150 @@ panvk_per_arch(CmdDraw)(VkCommandBuffer commandBuffer, struct panvk_descriptor_state *desc_state = panvk_cmd_get_desc_state(cmdbuf, GRAPHICS); - struct panvk_draw_info draw = { - .first_vertex = firstVertex, - .vertex_count = vertexCount, - .first_instance = firstInstance, - .instance_count = instanceCount, - .padded_vertex_count = panfrost_padded_vertex_count(vertexCount), - .offset_start = firstVertex, - .tls = batch->tls.gpu, - .fb = batch->fb.desc.gpu, - .ubos = desc_state->ubos, - .textures = desc_state->textures, - .samplers = desc_state->samplers, - }; + draw->tls = batch->tls.gpu; + draw->fb = batch->fb.desc.gpu; + draw->ubos = desc_state->ubos; + draw->textures = desc_state->textures; + draw->samplers = desc_state->samplers; - STATIC_ASSERT(sizeof(draw.invocation) >= sizeof(struct mali_invocation_packed)); - panfrost_pack_work_groups_compute((struct mali_invocation_packed *)&draw.invocation, - 1, vertexCount, instanceCount, 1, 1, 1, true, false); - panvk_draw_prepare_fs_rsd(cmdbuf, &draw); - panvk_draw_prepare_varyings(cmdbuf, &draw); - panvk_draw_prepare_attributes(cmdbuf, &draw); - panvk_draw_prepare_viewport(cmdbuf, &draw); - panvk_draw_prepare_tiler_context(cmdbuf, &draw); - panvk_draw_prepare_vertex_job(cmdbuf, &draw); - panvk_draw_prepare_tiler_job(cmdbuf, &draw); + STATIC_ASSERT(sizeof(draw->invocation) >= sizeof(struct mali_invocation_packed)); + panfrost_pack_work_groups_compute((struct mali_invocation_packed *)&draw->invocation, + 1, draw->vertex_range, draw->instance_count, + 1, 1, 1, true, false); + + panvk_draw_prepare_fs_rsd(cmdbuf, draw); + panvk_draw_prepare_varyings(cmdbuf, draw); + panvk_draw_prepare_attributes(cmdbuf, draw); + panvk_draw_prepare_viewport(cmdbuf, draw); + panvk_draw_prepare_tiler_context(cmdbuf, draw); + panvk_draw_prepare_vertex_job(cmdbuf, draw); + panvk_draw_prepare_tiler_job(cmdbuf, draw); batch->tlsinfo.tls.size = MAX2(pipeline->tls_size, batch->tlsinfo.tls.size); assert(!pipeline->wls_size); unsigned vjob_id = panfrost_add_job(&cmdbuf->desc_pool.base, &batch->scoreboard, MALI_JOB_TYPE_VERTEX, false, false, 0, 0, - &draw.jobs.vertex, false); + &draw->jobs.vertex, false); if (pipeline->fs.required) { panfrost_add_job(&cmdbuf->desc_pool.base, &batch->scoreboard, MALI_JOB_TYPE_TILER, false, false, vjob_id, 0, - &draw.jobs.tiler, false); + &draw->jobs.tiler, false); } /* Clear the dirty flags all at once */ desc_state->dirty = cmdbuf->state.dirty = 0; } +void +panvk_per_arch(CmdDraw)(VkCommandBuffer commandBuffer, + uint32_t vertexCount, + uint32_t instanceCount, + uint32_t firstVertex, + uint32_t firstInstance) +{ + VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer); + + if (instanceCount == 0 || vertexCount == 0) + return; + + struct panvk_draw_info draw = { + .first_vertex = firstVertex, + .vertex_count = vertexCount, + .vertex_range = vertexCount, + .first_instance = firstInstance, + .instance_count = instanceCount, + .padded_vertex_count = instanceCount > 1 ? + panfrost_padded_vertex_count(vertexCount) : + vertexCount, + .offset_start = firstVertex, + }; + + panvk_cmd_draw(cmdbuf, &draw); +} + +static void +panvk_index_minmax_search(struct panvk_cmd_buffer *cmdbuf, + uint32_t start, uint32_t count, + uint32_t *min, uint32_t *max) +{ + void *ptr = cmdbuf->state.ib.buffer->bo->ptr.cpu + + cmdbuf->state.ib.buffer->bo_offset + + cmdbuf->state.ib.offset; + + fprintf(stderr, "WARNING: Crawling index buffers from the CPU isn't valid in Vulkan\n"); + + assert(cmdbuf->state.ib.buffer); + assert(cmdbuf->state.ib.buffer->bo); + assert(cmdbuf->state.ib.buffer->bo->ptr.cpu); + + *max = 0; + + /* TODO: Use panfrost_minmax_cache */ + /* TODO: Read full cacheline of data to mitigate the uncached + * mapping slowness. + */ + switch (cmdbuf->state.ib.index_size) { +#define MINMAX_SEARCH_CASE(sz) \ + case sz: { \ + uint ## sz ## _t *indices = ptr; \ + *min = UINT ## sz ## _MAX; \ + for (uint32_t i = 0; i < count; i++) { \ + *min = MIN2(indices[i + start], *min); \ + *max = MAX2(indices[i + start], *max); \ + } \ + break; \ + } + MINMAX_SEARCH_CASE(32) + MINMAX_SEARCH_CASE(16) + MINMAX_SEARCH_CASE(8) +#undef MINMAX_SEARCH_CASE + default: + unreachable("Invalid index size"); + } +} + +void +panvk_per_arch(CmdDrawIndexed)(VkCommandBuffer commandBuffer, + uint32_t indexCount, + uint32_t instanceCount, + uint32_t firstIndex, + int32_t vertexOffset, + uint32_t firstInstance) +{ + VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer); + uint32_t min_vertex, max_vertex; + + if (instanceCount == 0 || indexCount == 0) + return; + + panvk_index_minmax_search(cmdbuf, firstIndex, indexCount, + &min_vertex, &max_vertex); + + unsigned vertex_range = max_vertex - min_vertex + 1; + struct panvk_draw_info draw = { + .index_size = cmdbuf->state.ib.index_size, + .first_index = firstIndex, + .index_count = indexCount, + .vertex_offset = vertexOffset, + .first_instance = firstInstance, + .instance_count = instanceCount, + .vertex_range = vertex_range, + .vertex_count = indexCount + abs(vertexOffset), + .padded_vertex_count = instanceCount > 1 ? + panfrost_padded_vertex_count(vertex_range) : + vertex_range, + .offset_start = min_vertex + vertexOffset, + .indices = cmdbuf->state.ib.buffer->bo->ptr.gpu + + cmdbuf->state.ib.buffer->bo_offset + + cmdbuf->state.ib.offset + + (firstIndex * (cmdbuf->state.ib.index_size / 8)), + }; + + panvk_cmd_draw(cmdbuf, &draw); +} + VkResult panvk_per_arch(EndCommandBuffer)(VkCommandBuffer commandBuffer) { diff --git a/src/panfrost/vulkan/panvk_vX_cs.c b/src/panfrost/vulkan/panvk_vX_cs.c index 3bc37ac9a1a..5db6ccd311f 100644 --- a/src/panfrost/vulkan/panvk_vX_cs.c +++ b/src/panfrost/vulkan/panvk_vX_cs.c @@ -350,7 +350,7 @@ panvk_emit_attrib(const struct panvk_device *dev, pan_pack(attrib, ATTRIBUTE, cfg) { cfg.buffer_index = buf_idx * 2; cfg.offset = attribs->attrib[idx].offset + - (bufs[cfg.buffer_index].address & 63); + (bufs[buf_idx].address & 63); if (buf_info->per_instance) cfg.offset += draw->first_instance * buf_info->stride; @@ -513,8 +513,22 @@ panvk_emit_tiler_primitive(const struct panvk_pipeline *pipeline, if (pipeline->ia.primitive_restart) cfg.primitive_restart = MALI_PRIMITIVE_RESTART_IMPLICIT; cfg.job_task_split = 6; - /* TODO: indexed draws */ - cfg.index_count = draw->vertex_count; + + if (draw->index_size) { + cfg.index_count = draw->index_count; + cfg.indices = draw->indices; + cfg.base_vertex_offset = draw->vertex_offset - draw->offset_start; + + switch (draw->index_size) { + case 32: cfg.index_type = MALI_INDEX_TYPE_UINT32; break; + case 16: cfg.index_type = MALI_INDEX_TYPE_UINT16; break; + case 8: cfg.index_type = MALI_INDEX_TYPE_UINT8; break; + default: unreachable("Invalid index size"); + } + } else { + cfg.index_count = draw->vertex_count; + cfg.index_type = MALI_INDEX_TYPE_NONE; + } } } diff --git a/src/panfrost/vulkan/panvk_vX_pipeline.c b/src/panfrost/vulkan/panvk_vX_pipeline.c index 70daa5d5a8f..32b64f3de58 100644 --- a/src/panfrost/vulkan/panvk_vX_pipeline.c +++ b/src/panfrost/vulkan/panvk_vX_pipeline.c @@ -244,6 +244,9 @@ panvk_pipeline_builder_alloc_static_state_bo(struct panvk_pipeline_builder *buil case PAN_SYSVAL_SSBO: pipeline->sysvals[i].dirty_mask |= PANVK_DYNAMIC_SSBO; break; + case PAN_SYSVAL_VERTEX_INSTANCE_OFFSETS: + pipeline->sysvals[i].dirty_mask |= PANVK_DYNAMIC_VERTEX_INSTANCE_OFFSETS; + break; default: break; }