diff --git a/src/broadcom/vulkan/v3dv_cmd_buffer.c b/src/broadcom/vulkan/v3dv_cmd_buffer.c index f8090181604..c207c532926 100644 --- a/src/broadcom/vulkan/v3dv_cmd_buffer.c +++ b/src/broadcom/vulkan/v3dv_cmd_buffer.c @@ -161,7 +161,7 @@ v3dv_job_destroy(struct v3dv_job *job) list_del(&job->list_link); - if (job->type == V3DV_JOB_TYPE_GPU) { + if (job->type == V3DV_JOB_TYPE_GPU_CL) { v3dv_cl_destroy(&job->bcl); v3dv_cl_destroy(&job->rcl); v3dv_cl_destroy(&job->indirect); @@ -553,7 +553,7 @@ v3dv_job_init(struct v3dv_job *job, job->device = device; job->cmd_buffer = cmd_buffer; - if (type == V3DV_JOB_TYPE_GPU) { + if (type == V3DV_JOB_TYPE_GPU_CL) { job->bos = _mesa_set_create(NULL, _mesa_hash_pointer, _mesa_key_pointer_equal); job->bo_count = 0; @@ -623,7 +623,7 @@ v3dv_cmd_buffer_start_job(struct v3dv_cmd_buffer *cmd_buffer, return NULL; } - v3dv_job_init(job, V3DV_JOB_TYPE_GPU, cmd_buffer->device, + v3dv_job_init(job, V3DV_JOB_TYPE_GPU_CL, cmd_buffer->device, cmd_buffer, subpass_idx); return job; @@ -3504,6 +3504,19 @@ v3dv_cmd_buffer_copy_query_results(struct v3dv_cmd_buffer *cmd_buffer, list_addtail(&job->list_link, &cmd_buffer->submit_jobs); } +void +v3dv_cmd_buffer_add_tfu_job(struct v3dv_cmd_buffer *cmd_buffer, + struct drm_v3d_submit_tfu *tfu) +{ + struct v3dv_device *device = cmd_buffer->device; + struct v3dv_job *job = vk_zalloc(&device->alloc, + sizeof(struct v3dv_job), 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + v3dv_job_init(job, V3DV_JOB_TYPE_GPU_TFU, device, cmd_buffer, -1); + job->tfu = *tfu; + list_addtail(&job->list_link, &cmd_buffer->submit_jobs); +} + void v3dv_CmdSetEvent(VkCommandBuffer commandBuffer, VkEvent event, @@ -3535,15 +3548,3 @@ v3dv_CmdWaitEvents(VkCommandBuffer commandBuffer, { assert(!"vkCmdWaitEvents not implemented yet"); } - -void -v3dv_CmdBlitImage(VkCommandBuffer commandBuffer, - VkImage srcImage, - VkImageLayout srcImageLayout, - VkImage dstImage, - VkImageLayout dstImageLayout, - uint32_t regionCount, - const VkImageBlit* pRegions, VkFilter filter) -{ - assert(!"vkCmdBlitImage not implemented yet"); -} diff --git a/src/broadcom/vulkan/v3dv_formats.c b/src/broadcom/vulkan/v3dv_formats.c index d577792cfe0..1197c667665 100644 --- a/src/broadcom/vulkan/v3dv_formats.c +++ b/src/broadcom/vulkan/v3dv_formats.c @@ -319,6 +319,41 @@ v3dv_get_tex_return_size(const struct v3dv_format *vf, return vf->return_size; } +bool +v3dv_tfu_supports_tex_format(const struct v3d_device_info *devinfo, + uint32_t tex_format) +{ + assert(devinfo->ver >= 41); + + switch (tex_format) { + case TEXTURE_DATA_FORMAT_R8: + case TEXTURE_DATA_FORMAT_R8_SNORM: + case TEXTURE_DATA_FORMAT_RG8: + case TEXTURE_DATA_FORMAT_RG8_SNORM: + case TEXTURE_DATA_FORMAT_RGBA8: + case TEXTURE_DATA_FORMAT_RGBA8_SNORM: + case TEXTURE_DATA_FORMAT_RGB565: + case TEXTURE_DATA_FORMAT_RGBA4: + case TEXTURE_DATA_FORMAT_RGB5_A1: + case TEXTURE_DATA_FORMAT_RGB10_A2: + case TEXTURE_DATA_FORMAT_R16: + case TEXTURE_DATA_FORMAT_R16_SNORM: + case TEXTURE_DATA_FORMAT_RG16: + case TEXTURE_DATA_FORMAT_RG16_SNORM: + case TEXTURE_DATA_FORMAT_RGBA16: + case TEXTURE_DATA_FORMAT_RGBA16_SNORM: + case TEXTURE_DATA_FORMAT_R16F: + case TEXTURE_DATA_FORMAT_RG16F: + case TEXTURE_DATA_FORMAT_RGBA16F: + case TEXTURE_DATA_FORMAT_R11F_G11F_B10F: + case TEXTURE_DATA_FORMAT_R4: + return true; + default: + return false; + } +} + + static bool format_supports_blending(const struct v3dv_format *format) { diff --git a/src/broadcom/vulkan/v3dv_meta_copy.c b/src/broadcom/vulkan/v3dv_meta_copy.c index 723498f938a..1b4403e31e7 100644 --- a/src/broadcom/vulkan/v3dv_meta_copy.c +++ b/src/broadcom/vulkan/v3dv_meta_copy.c @@ -1680,3 +1680,223 @@ v3dv_CmdCopyBufferToImage(VkCommandBuffer commandBuffer, } } } + +/* Disable level 0 write, just write following mipmaps */ +#define V3D_TFU_IOA_DIMTW (1 << 0) +#define V3D_TFU_IOA_FORMAT_SHIFT 3 +#define V3D_TFU_IOA_FORMAT_LINEARTILE 3 +#define V3D_TFU_IOA_FORMAT_UBLINEAR_1_COLUMN 4 +#define V3D_TFU_IOA_FORMAT_UBLINEAR_2_COLUMN 5 +#define V3D_TFU_IOA_FORMAT_UIF_NO_XOR 6 +#define V3D_TFU_IOA_FORMAT_UIF_XOR 7 + +#define V3D_TFU_ICFG_NUMMM_SHIFT 5 +#define V3D_TFU_ICFG_TTYPE_SHIFT 9 + +#define V3D_TFU_ICFG_OPAD_SHIFT 22 + +#define V3D_TFU_ICFG_FORMAT_SHIFT 18 +#define V3D_TFU_ICFG_FORMAT_RASTER 0 +#define V3D_TFU_ICFG_FORMAT_SAND_128 1 +#define V3D_TFU_ICFG_FORMAT_SAND_256 2 +#define V3D_TFU_ICFG_FORMAT_LINEARTILE 11 +#define V3D_TFU_ICFG_FORMAT_UBLINEAR_1_COLUMN 12 +#define V3D_TFU_ICFG_FORMAT_UBLINEAR_2_COLUMN 13 +#define V3D_TFU_ICFG_FORMAT_UIF_NO_XOR 14 +#define V3D_TFU_ICFG_FORMAT_UIF_XOR 15 + +static void +emit_tfu_job(struct v3dv_cmd_buffer *cmd_buffer, + struct v3dv_image *dst, + uint32_t dst_mip_level, + uint32_t dst_layer, + struct v3dv_image *src, + uint32_t src_mip_level, + uint32_t src_layer, + uint32_t width, + uint32_t height) +{ + /* Blit jobs can only happen outside a render pass */ + assert(cmd_buffer->state.pass == NULL); + assert(cmd_buffer->state.job == NULL); + + const struct v3d_resource_slice *src_slice = &src->slices[src_mip_level]; + const struct v3d_resource_slice *dst_slice = &dst->slices[src_mip_level]; + + assert(dst->mem && dst->mem->bo); + const struct v3dv_bo *dst_bo = dst->mem->bo; + + assert(src->mem && src->mem->bo); + const struct v3dv_bo *src_bo = src->mem->bo; + + struct drm_v3d_submit_tfu tfu = { + .ios = (height << 16) | width, + .bo_handles = { + dst_bo->handle, + src != dst ? src_bo->handle : 0 + }, + }; + + const uint32_t src_offset = + src_bo->offset + v3dv_layer_offset(src, src_mip_level, src_layer); + tfu.iia |= src_offset; + + uint32_t icfg; + if (src_slice->tiling == VC5_TILING_RASTER) { + icfg = V3D_TFU_ICFG_FORMAT_RASTER; + } else { + icfg = V3D_TFU_ICFG_FORMAT_LINEARTILE + + (src_slice->tiling - VC5_TILING_LINEARTILE); + } + tfu.icfg |= icfg << V3D_TFU_ICFG_FORMAT_SHIFT; + + const uint32_t dst_offset = + dst_bo->offset + v3dv_layer_offset(dst, dst_mip_level, dst_layer); + tfu.ioa |= dst_offset; + + tfu.ioa |= (V3D_TFU_IOA_FORMAT_LINEARTILE + + (dst_slice->tiling - VC5_TILING_LINEARTILE)) << + V3D_TFU_IOA_FORMAT_SHIFT; + tfu.icfg |= dst->format->tex_type << V3D_TFU_ICFG_TTYPE_SHIFT; + + switch (src_slice->tiling) { + case VC5_TILING_UIF_NO_XOR: + case VC5_TILING_UIF_XOR: + tfu.iis |= src_slice->padded_height / (2 * v3d_utile_height(src->cpp)); + break; + case VC5_TILING_RASTER: + tfu.iis |= src_slice->stride / src->cpp; + break; + default: + break; + } + + /* If we're writing level 0 (!IOA_DIMTW), then we need to supply the + * OPAD field for the destination (how many extra UIF blocks beyond + * those necessary to cover the height). + */ + if (dst_slice->tiling == VC5_TILING_UIF_NO_XOR || + dst_slice->tiling == VC5_TILING_UIF_XOR) { + uint32_t uif_block_h = 2 * v3d_utile_height(dst->cpp); + uint32_t implicit_padded_height = align(height, uif_block_h); + uint32_t icfg = + (dst_slice->padded_height - implicit_padded_height) / uif_block_h; + tfu.icfg |= icfg << V3D_TFU_ICFG_OPAD_SHIFT; + } + + v3dv_cmd_buffer_add_tfu_job(cmd_buffer, &tfu); +} + +static bool +blit_tfu(struct v3dv_cmd_buffer *cmd_buffer, + struct v3dv_image *dst, + struct v3dv_image *src, + const VkImageBlit *region, + VkFilter filter) +{ + /* FIXME? The v3d driver seems to ignore filtering completely! */ + if (filter != VK_FILTER_NEAREST) + return false; + + /* Format must match */ + if (src->vk_format != dst->vk_format) + return false; + + VkFormat vk_format = dst->vk_format; + const struct v3dv_format *format = dst->format; + + /* Format must be supported for texturing */ + if (!v3dv_tfu_supports_tex_format(&cmd_buffer->device->devinfo, + format->tex_type)) { + return false; + } + + /* Only color formats */ + if (vk_format_is_depth_or_stencil(vk_format)) + return false; + +#if 0 + /* FIXME: Only 2D images? */ + if (dst->type == VK_IMAGE_TYPE_2D || src->type == VK_IMAGE_TYPE_2D) + return false; +#endif + + /* Destination can't be raster format */ + const uint32_t dst_mip_level = region->dstSubresource.mipLevel; + if (dst->slices[dst_mip_level].tiling == VC5_TILING_RASTER) + return false; + + /* Source region must start at (0,0) */ + if (region->srcOffsets[0].x != 0 || region->srcOffsets[0].y != 0) + return false; + + /* Destination image must be complete */ + if (region->dstOffsets[0].x != 0 || region->dstOffsets[0].y != 0) + return false; + + const uint32_t dst_width = u_minify(dst->extent.width, dst_mip_level); + const uint32_t dst_height = u_minify(dst->extent.height, dst_mip_level); + if (region->dstOffsets[1].x < dst_width - 1|| + region->dstOffsets[1].y < dst_height - 1) { + return false; + } + + /* No scaling */ + if (region->srcOffsets[1].x != region->dstOffsets[1].x || + region->srcOffsets[1].y != region->dstOffsets[1].y) { + return false; + } + + /* Emit a TFU job for each layer to blit */ + assert(region->dstSubresource.layerCount == + region->srcSubresource.layerCount); + const uint32_t layer_count = region->dstSubresource.layerCount; + const uint32_t src_mip_level = region->srcSubresource.mipLevel; + for (uint32_t i = 0; i < layer_count; i++) { + uint32_t src_layer, dst_layer; + if (src->type == VK_IMAGE_TYPE_3D) { + assert(layer_count == 1); + src_layer = u_minify(src->extent.depth, src_mip_level); + } else { + src_layer = region->srcSubresource.baseArrayLayer + i; + } + + if (dst->type == VK_IMAGE_TYPE_3D) { + assert(layer_count == 1); + dst_layer = u_minify(dst->extent.depth, dst_mip_level); + } else { + dst_layer = region->dstSubresource.baseArrayLayer + i; + } + + emit_tfu_job(cmd_buffer, + dst, dst_mip_level, dst_layer, + src, src_mip_level, src_layer, + dst_width, dst_height); + } + + return true; +} + +void +v3dv_CmdBlitImage(VkCommandBuffer commandBuffer, + VkImage srcImage, + VkImageLayout srcImageLayout, + VkImage dstImage, + VkImageLayout dstImageLayout, + uint32_t regionCount, + const VkImageBlit* pRegions, + VkFilter filter) +{ + V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer); + V3DV_FROM_HANDLE(v3dv_image, src, srcImage); + V3DV_FROM_HANDLE(v3dv_image, dst, dstImage); + + /* From the Vulkan 1.0 spec, vkCmdBlitImage valid usage */ + assert (dst->samples == VK_SAMPLE_COUNT_1_BIT && + src->samples == VK_SAMPLE_COUNT_1_BIT); + + for (uint32_t i = 0; i < regionCount; i++) { + if (!blit_tfu(cmd_buffer, dst, src, &pRegions[i], filter)) + assert(!"Fallback path for vkCmdBlitImage not implemented."); + } +} diff --git a/src/broadcom/vulkan/v3dv_private.h b/src/broadcom/vulkan/v3dv_private.h index e27f3af6726..95caeface3f 100644 --- a/src/broadcom/vulkan/v3dv_private.h +++ b/src/broadcom/vulkan/v3dv_private.h @@ -63,6 +63,8 @@ #include "v3dv_extensions.h" #include "v3dv_bo.h" +#include "drm-uapi/v3d_drm.h" + /* FIXME: hooks for the packet definition functions. */ static inline void pack_emit_reloc(void *cl, const void *reloc) {} @@ -590,7 +592,8 @@ enum v3dv_ez_state { }; enum v3dv_job_type { - V3DV_JOB_TYPE_GPU = 0, + V3DV_JOB_TYPE_GPU_CL = 0, + V3DV_JOB_TYPE_GPU_TFU, V3DV_JOB_TYPE_CPU_RESET_QUERIES, V3DV_JOB_TYPE_CPU_END_QUERY, V3DV_JOB_TYPE_CPU_COPY_QUERY_RESULTS, @@ -677,6 +680,9 @@ struct v3dv_job { struct v3dv_end_query_cpu_job_info query_end; struct v3dv_copy_query_results_cpu_job_info query_copy_results; } cpu; + + /* Job spects for TFU jobs */ + struct drm_v3d_submit_tfu tfu; }; void v3dv_job_init(struct v3dv_job *job, @@ -894,6 +900,9 @@ void v3dv_cmd_buffer_copy_query_results(struct v3dv_cmd_buffer *cmd_buffer, uint32_t stride, VkQueryResultFlags flags); +void v3dv_cmd_buffer_add_tfu_job(struct v3dv_cmd_buffer *cmd_buffer, + struct drm_v3d_submit_tfu *tfu); + struct v3dv_semaphore { /* A syncobject handle associated with this semaphore */ uint32_t sync; @@ -1334,6 +1343,9 @@ const struct v3dv_format *v3dv_get_format(VkFormat); const uint8_t *v3dv_get_format_swizzle(VkFormat f); void v3dv_get_internal_type_bpp_for_output_format(uint32_t format, uint32_t *type, uint32_t *bpp); uint8_t v3dv_get_tex_return_size(const struct v3dv_format *vf, bool compare_enable); +bool v3dv_tfu_supports_tex_format(const struct v3d_device_info *devinfo, + uint32_t tex_format); + uint32_t v3d_utile_width(int cpp); diff --git a/src/broadcom/vulkan/v3dv_queue.c b/src/broadcom/vulkan/v3dv_queue.c index 8d2dc80dbde..63ab2973987 100644 --- a/src/broadcom/vulkan/v3dv_queue.c +++ b/src/broadcom/vulkan/v3dv_queue.c @@ -156,21 +156,6 @@ handle_copy_query_results_cpu_job(struct v3dv_job *job) return VK_SUCCESS; } -static VkResult -handle_cpu_job(struct v3dv_job *job) -{ - switch (job->type) { - case V3DV_JOB_TYPE_CPU_RESET_QUERIES: - return handle_reset_query_cpu_job(job); - case V3DV_JOB_TYPE_CPU_END_QUERY: - return handle_end_query_cpu_job(job); - case V3DV_JOB_TYPE_CPU_COPY_QUERY_RESULTS: - return handle_copy_query_results_cpu_job(job); - default: - unreachable("Unhandled job type"); - } -} - static VkResult process_semaphores_to_signal(struct v3dv_device *device, uint32_t count, const VkSemaphore *sems) @@ -227,15 +212,10 @@ process_fence_to_signal(struct v3dv_device *device, VkFence _fence) } static VkResult -queue_submit_job(struct v3dv_queue *queue, - struct v3dv_job *job, - bool do_wait) +handle_cl_job(struct v3dv_queue *queue, + struct v3dv_job *job, + bool do_wait) { - assert(job); - - if (job->type != V3DV_JOB_TYPE_GPU) - return handle_cpu_job(job); - struct v3dv_device *device = queue->device; struct drm_v3d_submit_cl submit; @@ -302,6 +282,48 @@ queue_submit_job(struct v3dv_queue *queue, return VK_SUCCESS; } +static VkResult +handle_tfu_job(struct v3dv_queue *queue, + struct v3dv_job *job, + bool do_wait) +{ + const struct v3dv_device *device = queue->device; + + job->tfu.in_sync = do_wait ? device->last_job_sync : 0; + job->tfu.out_sync = device->last_job_sync; + + int ret = v3dv_ioctl(device->render_fd, DRM_IOCTL_V3D_SUBMIT_TFU, &job->tfu); + if (ret != 0) { + fprintf(stderr, "Failed to submit TFU job: %d\n", ret); + return vk_error(device->instance, VK_ERROR_DEVICE_LOST); + } + + return VK_SUCCESS; +} + +static VkResult +queue_submit_job(struct v3dv_queue *queue, + struct v3dv_job *job, + bool do_wait) +{ + assert(job); + + switch (job->type) { + case V3DV_JOB_TYPE_GPU_CL: + return handle_cl_job(queue, job, do_wait); + case V3DV_JOB_TYPE_GPU_TFU: + return handle_tfu_job(queue, job, do_wait); + case V3DV_JOB_TYPE_CPU_RESET_QUERIES: + return handle_reset_query_cpu_job(job); + case V3DV_JOB_TYPE_CPU_END_QUERY: + return handle_end_query_cpu_job(job); + case V3DV_JOB_TYPE_CPU_COPY_QUERY_RESULTS: + return handle_copy_query_results_cpu_job(job); + default: + unreachable("Unhandled job type"); + } +} + static void emit_noop_bin(struct v3dv_job *job) { @@ -395,7 +417,7 @@ queue_create_noop_job(struct v3dv_queue *queue, struct v3dv_job **job) VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); if (!*job) return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY); - v3dv_job_init(*job, V3DV_JOB_TYPE_GPU, device, NULL, -1); + v3dv_job_init(*job, V3DV_JOB_TYPE_GPU_CL, device, NULL, -1); emit_noop_bin(*job); emit_noop_render(*job);