diff --git a/src/nouveau/nil/format.rs b/src/nouveau/nil/format.rs index c742136fb53..25da0b895a2 100644 --- a/src/nouveau/nil/format.rs +++ b/src/nouveau/nil/format.rs @@ -122,6 +122,10 @@ impl Format { } self.info().support() & NIL_FORMAT_SUPPORTS_DEPTH_STENCIL_BIT != 0 } + + pub fn supports_2d_engine(&self, _dev: &nv_device_info) -> bool { + self.info().support() & NIL_FORMAT_SUPPORTS_2D_ENGINE_BIT != 0 + } } #[no_mangle] @@ -180,6 +184,14 @@ pub extern "C" fn nil_format_supports_depth_stencil( Format::try_from(p_format).is_ok_and(|f| f.supports_depth_stencil(dev)) } +#[no_mangle] +pub extern "C" fn nil_format_supports_2d_engine( + dev: &nv_device_info, + p_format: pipe_format, +) -> bool { + Format::try_from(p_format).is_ok_and(|f| f.supports_2d_engine(dev)) +} + #[no_mangle] pub extern "C" fn nil_format_to_color_target(p_format: pipe_format) -> u32 { Format::try_from(p_format).unwrap().info().czt() diff --git a/src/nouveau/nil/nil_format_table_gen.py b/src/nouveau/nil/nil_format_table_gen.py index ba8b64d968b..469e06c5841 100644 --- a/src/nouveau/nil/nil_format_table_gen.py +++ b/src/nouveau/nil/nil_format_table_gen.py @@ -27,6 +27,7 @@ enum nil_format_support_flags { NIL_FORMAT_SUPPORTS_ALPHA_BLEND_BIT = BITFIELD_BIT(4), NIL_FORMAT_SUPPORTS_DEPTH_STENCIL_BIT = BITFIELD_BIT(5), NIL_FORMAT_SUPPORTS_SCANOUT_BIT = BITFIELD_BIT(6), + NIL_FORMAT_SUPPORTS_2D_ENGINE_BIT = BITFIELD_BIT(7), }; struct nil_tic_format { @@ -62,6 +63,9 @@ TEMPLATE_C = template.Template(text="""\ #include "nil_format_table.h" +#include "nvtypes.h" + +#include "cl902d.h" #include "cl9097.h" #include "cl9097tex.h" #include "clb097.h" @@ -75,7 +79,13 @@ const struct nil_format_info nil_format_table[PIPE_FORMAT_COUNT] = { % for f in formats: [PIPE_FORMAT_${f.pipe}] = { .czt = ${f.czt()}, - .support = ${f.support()}, + .support = ${f.support()} | + #if defined(${f.twod()}) && ${f.twod()} == ${f.czt()} + NIL_FORMAT_SUPPORTS_2D_ENGINE_BIT + #else + 0 + #endif + , .tic_v2_data_type = ${f.v2_data_type()}, .tic = { .comp_sizes = ${f.tcs()}, @@ -106,6 +116,8 @@ ZT_FORMAT_PREFIX = { 'tk1' : 'NVB097_SET_ZT_FORMAT_V_', } +TWOD_FORMAT_PREFIX = "NV902D_SET_DST_FORMAT_V_" + TCS_PREFIX = { None : 'NV9097_TEXHEADV2_0_COMPONENT_SIZES_', 'maxwella' : 'NVB097_TEXHEAD_BL_COMPONENTS_SIZES_', @@ -188,6 +200,9 @@ class Format(object): else: return DATA_TYPES[self._types[0]] + def twod(self): + return TWOD_FORMAT_PREFIX + self._czt + def v2_data_type(self): return V2_DATA_TYPES[self._data_type] diff --git a/src/nouveau/vulkan/meson.build b/src/nouveau/vulkan/meson.build index 21b66e8a3b2..2ec82d459fa 100644 --- a/src/nouveau/vulkan/meson.build +++ b/src/nouveau/vulkan/meson.build @@ -6,6 +6,7 @@ nvk_files = files( 'nvk_buffer.h', 'nvk_buffer_view.c', 'nvk_buffer_view.h', + 'nvk_cmd_2d.c', 'nvk_cmd_buffer.c', 'nvk_cmd_buffer.h', 'nvk_cmd_clear.c', diff --git a/src/nouveau/vulkan/nvk_cmd_2d.c b/src/nouveau/vulkan/nvk_cmd_2d.c new file mode 100644 index 00000000000..46b7e5ee3f8 --- /dev/null +++ b/src/nouveau/vulkan/nvk_cmd_2d.c @@ -0,0 +1,331 @@ +/* + * Copyright © 2025 Valve Corporation + * SPDX-License-Identifier: MIT + */ +#include "nvk_cmd_buffer.h" +#include "nvk_entrypoints.h" +#include "nvk_format.h" + +#include "nv_push_cl902d.h" +#include "clc697.h" + +VkResult +nvk_push_2d_state_init(struct nvk_queue *queue, struct nv_push *p) +{ + struct nvk_device *dev = nvk_queue_device(queue); + const struct nvk_physical_device *pdev = nvk_device_physical(dev); + + /* 2D state */ + P_MTHD(p, NV902D, SET_OBJECT); + P_NV902D_SET_OBJECT(p, { + .class_id = pdev->info.cls_eng2d, + .engine_id = 0, + }); + + P_IMMD(p, NV902D, SET_CLIP_ENABLE, V_FALSE); + P_IMMD(p, NV902D, SET_PIXELS_FROM_MEMORY_CORRAL_SIZE, + (pdev->info.cls_eng3d >= AMPERE_A) ? 0x3ff : 0x3f); + + return VK_SUCCESS; +} + +/** + * Divide x by y, rounding to the nearest integer + */ +static int64_t +div_round(int64_t x, int64_t y) { + assert(y > 0); + if (x >= 0) { + return (x + y / 2) / y; + } else { + return (x - y / 2) / y; + } +} + +/** + * Convert an integer to 32.32 fixed point + */ +static int64_t +int_to_fixed(int64_t x) +{ + assert(INT32_MIN <= x); + assert(x <= INT32_MAX); + return x << 32; +} + +/** + * Compute offset and scale for one dimension + * + * dst0_out, dst1_out are integer coordinates + * src0_out, scale_out are 32.32 fixed point + */ +static inline void +compute_off_scale(uint32_t src0, uint32_t src1, + uint32_t dst0, uint32_t dst1, + uint32_t *dst0_out, uint32_t *dst1_out, + int64_t *src0_out, int64_t *scale_out) +{ + if (dst0 < dst1) { + *dst0_out = dst0; + *dst1_out = dst1; + } else { + *dst0_out = dst1; + *dst1_out = dst0; + + /* Flip the source region */ + SWAP(src0, src1); + } + + int64_t src_region_size = (int64_t)src1 - (int64_t)src0; + assert(src_region_size != 0); + + int64_t dst_region_size = (int64_t)*dst1_out - (int64_t)*dst0_out; + assert(dst_region_size > 0); + + /* Divide with result in 32.32 fixed point */ + int64_t scale = div_round(int_to_fixed(src_region_size), dst_region_size); + + /* Based on the equations in the spec for vkCmdBlitImage, we set i = x_dst0 + * to get the starting texel, which gives us: + * i = x_dst0 + * u_base = x_dst0 + 1/2 + * u_offset = (x_dst0 + 1/2) - x_dst0 = 1/2 + * u_scaled = u_offset * scale_u = scale_u / 2 + * u = u_scaled + x_src0 = x_src0 + scale_u / 2 + * Thanks to maxImageDimension, this should be nowhere near overflow. + */ + int64_t src_offset = int_to_fixed(src0) + div_round(scale, 2); + + *scale_out = scale; + *src0_out = src_offset; +} + +static void +nvk_2d_blit_rect(struct nvk_cmd_buffer *cmd, const VkImageBlit2 *region) +{ + struct nv_push *p = nvk_cmd_buffer_push(cmd, 13); + uint32_t dst_x0, dst_y0, dst_x1, dst_y1; + int64_t src_x0, src_y0, du_dx, dv_dy; + + compute_off_scale(region->srcOffsets[0].x, + region->srcOffsets[1].x, + region->dstOffsets[0].x, + region->dstOffsets[1].x, + &dst_x0, &dst_x1, + &src_x0, &du_dx); + compute_off_scale(region->srcOffsets[0].y, + region->srcOffsets[1].y, + region->dstOffsets[0].y, + region->dstOffsets[1].y, + &dst_y0, &dst_y1, + &src_y0, &dv_dy); + + P_MTHD(p, NV902D, SET_PIXELS_FROM_MEMORY_DST_X0); + P_NV902D_SET_PIXELS_FROM_MEMORY_DST_X0(p, dst_x0); + P_NV902D_SET_PIXELS_FROM_MEMORY_DST_Y0(p, dst_y0); + P_NV902D_SET_PIXELS_FROM_MEMORY_DST_WIDTH(p, dst_x1 - dst_x0); + P_NV902D_SET_PIXELS_FROM_MEMORY_DST_HEIGHT(p, dst_y1 - dst_y0); + + P_NV902D_SET_PIXELS_FROM_MEMORY_DU_DX_FRAC(p, du_dx); + P_NV902D_SET_PIXELS_FROM_MEMORY_DU_DX_INT(p, du_dx >> 32); + P_NV902D_SET_PIXELS_FROM_MEMORY_DV_DY_FRAC(p, dv_dy); + P_NV902D_SET_PIXELS_FROM_MEMORY_DV_DY_INT(p, dv_dy >> 32); + + P_NV902D_SET_PIXELS_FROM_MEMORY_SRC_X0_FRAC(p, src_x0); + P_NV902D_SET_PIXELS_FROM_MEMORY_SRC_X0_INT(p, src_x0 >> 32); + P_NV902D_SET_PIXELS_FROM_MEMORY_SRC_Y0_FRAC(p, src_y0); + P_NV902D_PIXELS_FROM_MEMORY_SRC_Y0_INT(p, src_y0 >> 32); +} + +static bool +nvk_2d_can_set_target(const struct nvk_physical_device *pdev, + const struct nvk_image *image) +{ + if (image->plane_count != 1) + return false; + + const struct nvk_image_plane *plane = &image->planes[0]; + const struct nil_image *nil_image = &plane->nil; + + if (nil_image->dim == NIL_IMAGE_DIM_3D) + return false; + + enum pipe_format p_format = + nvk_format_to_pipe_format(image->vk.format); + if (!nil_format_supports_2d_engine(&pdev->info, p_format)) + return false; + + return true; +} + +static void +nvk_2d_set_target(struct nvk_cmd_buffer *cmd, struct nvk_image *image, + const VkImageSubresourceLayers *subresource, bool is_src) +{ + assert(image->plane_count == 1); + const struct nvk_image_plane *plane = &image->planes[0]; + const struct nil_image *nil_image = &plane->nil; + const struct nil_image_level *level = + &nil_image->levels[subresource->mipLevel]; + + enum pipe_format p_format = + nvk_format_to_pipe_format(image->vk.format); + + struct nil_Extent4D_Samples level_extent_sa = + nil_image_level_extent_sa(nil_image, subresource->mipLevel); + + uint64_t addr = nvk_image_plane_base_address(plane) + level->offset_B; + + assert(nil_image->dim != NIL_IMAGE_DIM_3D); + assert(subresource->layerCount == 1); + addr += subresource->baseArrayLayer * + (uint64_t)nil_image->array_stride_B; + + struct nv_push *p = nvk_cmd_buffer_push(cmd, 11); + + if (is_src) { + P_MTHD(p, NV902D, SET_SRC_FORMAT); + } else { + P_MTHD(p, NV902D, SET_DST_FORMAT); + } + +#define SET(n, x...) do { \ + if (is_src) { \ + P_NV902D_SET_SRC_##n(p, x); \ + } else { \ + P_NV902D_SET_DST_##n(p, x); \ + } \ + } while (0) + + uint8_t ct_format = nil_format_to_color_target(p_format); + SET(FORMAT, ct_format); + + if (level->tiling.gob_type != NIL_GOB_TYPE_LINEAR) { + SET(MEMORY_LAYOUT, V_BLOCKLINEAR); + } else { + SET(MEMORY_LAYOUT, V_PITCH); + } + + SET(BLOCK_SIZE, { + .height = level->tiling.y_log2, + .depth = level->tiling.z_log2, + }); + SET(DEPTH, level_extent_sa.depth); + if (is_src) { + P_MTHD(p, NV902D, SET_SRC_PITCH); + } else { + P_NV902D_SET_DST_LAYER(p, 0); + } + + if (level->tiling.gob_type != NIL_GOB_TYPE_LINEAR) { + const uint32_t row_stride_el = + level->row_stride_B / util_format_get_blocksize(p_format); + SET(PITCH, 0); + SET(WIDTH, row_stride_el); + } else { + uint32_t pitch = level->row_stride_B; + assert(pitch % 32 == 0); + SET(PITCH, pitch); + SET(WIDTH, level_extent_sa.width); + } + SET(HEIGHT, level_extent_sa.height); + + assert(addr % 32 == 0); + SET(OFFSET_UPPER, addr >> 32); + SET(OFFSET_LOWER, addr); +#undef SET +} + +static void +nvk_2d_blit(struct nvk_cmd_buffer *cmd, + const VkBlitImageInfo2 *pBlitImageInfo) +{ + VK_FROM_HANDLE(nvk_image, dst_image, pBlitImageInfo->dstImage); + VK_FROM_HANDLE(nvk_image, src_image, pBlitImageInfo->srcImage); + + { + assert(pBlitImageInfo->filter == VK_FILTER_NEAREST || + pBlitImageInfo->filter == VK_FILTER_LINEAR); + bool nearest = pBlitImageInfo->filter == VK_FILTER_NEAREST; + + struct nv_push *p = nvk_cmd_buffer_push(cmd, 6); + P_IMMD(p, NV902D, SET_OPERATION, V_SRCCOPY); + P_IMMD(p, NV902D, SET_PIXELS_FROM_MEMORY_SAMPLE_MODE, { + .filter = nearest ? FILTER_POINT : FILTER_BILINEAR, + .origin = ORIGIN_CORNER, + }); + P_IMMD(p, NV902D, SET_COMPRESSION, dst_image->is_compressed); + } + + for (uint32_t r = 0; r < pBlitImageInfo->regionCount; r++) { + const VkImageBlit2 *region = &pBlitImageInfo->pRegions[r]; + + assert(region->dstSubresource.aspectMask == VK_IMAGE_ASPECT_COLOR_BIT); + + nvk_2d_set_target(cmd, dst_image, ®ion->dstSubresource, false); + nvk_2d_set_target(cmd, src_image, ®ion->srcSubresource, true); + + nvk_2d_blit_rect(cmd, region); + } +} + +static bool +can_use_2d_blit(const struct nvk_physical_device *pdev, + const VkBlitImageInfo2 *pBlitImageInfo) +{ + VK_FROM_HANDLE(nvk_image, dst_image, pBlitImageInfo->dstImage); + VK_FROM_HANDLE(nvk_image, src_image, pBlitImageInfo->srcImage); + + if (!nvk_2d_can_set_target(pdev, dst_image) || + !nvk_2d_can_set_target(pdev, src_image)) { + return false; + } + + for (uint32_t r = 0; r < pBlitImageInfo->regionCount; r++) { + const VkImageBlit2 *region = &pBlitImageInfo->pRegions[r]; + if (region->dstSubresource.layerCount != 1 || + region->srcSubresource.layerCount != 1 || + region->dstSubresource.aspectMask != VK_IMAGE_ASPECT_COLOR_BIT || + region->srcSubresource.aspectMask != VK_IMAGE_ASPECT_COLOR_BIT) { + return false; + } + } + + enum pipe_format src_p_format = + nvk_format_to_pipe_format(src_image->vk.format); + enum pipe_format dst_p_format = + nvk_format_to_pipe_format(dst_image->vk.format); + + if (util_format_is_red(src_p_format) && + !util_format_is_red(dst_p_format)) { + /* The 2D engine always treats single component formats as + * luminance rather than red + */ + return false; + } + + if (util_format_is_alpha(src_p_format) && + !util_format_is_alpha(dst_p_format)) { + /* Alpha copies seem to leave other channels unchanged, which + * isn't what we want + */ + return false; + } + + return true; +} + +VKAPI_ATTR void VKAPI_CALL +nvk_CmdBlitImage2(VkCommandBuffer commandBuffer, + const VkBlitImageInfo2 *pBlitImageInfo) +{ + VK_FROM_HANDLE(nvk_cmd_buffer, cmd, commandBuffer); + struct nvk_device *dev = nvk_cmd_buffer_device(cmd); + const struct nvk_physical_device *pdev = nvk_device_physical(dev); + + if (can_use_2d_blit(pdev, pBlitImageInfo)) { + nvk_2d_blit(cmd, pBlitImageInfo); + } else { + nvk_meta_blit(cmd, pBlitImageInfo); + } +} diff --git a/src/nouveau/vulkan/nvk_cmd_buffer.c b/src/nouveau/vulkan/nvk_cmd_buffer.c index 4f1d4b319a0..b5330b9f0a7 100644 --- a/src/nouveau/vulkan/nvk_cmd_buffer.c +++ b/src/nouveau/vulkan/nvk_cmd_buffer.c @@ -22,6 +22,7 @@ #include "clb097.h" #include "clcb97.h" +#include "nv_push_cl902d.h" #include "nv_push_cl906f.h" #include "nv_push_cla16f.h" #include "nv_push_cl9097.h" @@ -580,7 +581,8 @@ nvk_cmd_flush_wait_dep(struct nvk_cmd_buffer *cmd, struct nv_push *p = nvk_cmd_buffer_push(cmd, 2); /* This is also implicitly a WFI */ - if (nvk_cmd_buffer_last_subchannel(cmd) == SUBC_NVA097) { + /*if (nvk_cmd_buffer_last_subchannel(cmd) == SUBC_NV902D) { + } else if (nvk_cmd_buffer_last_subchannel(cmd) == SUBC_NVA097) { P_IMMD(p, NVA097, INVALIDATE_SHADER_CACHES, { .data = DATA_TRUE, .flush_data = FLUSH_DATA_TRUE, @@ -590,7 +592,8 @@ nvk_cmd_flush_wait_dep(struct nvk_cmd_buffer *cmd, .data = DATA_TRUE, .flush_data = FLUSH_DATA_TRUE, }); - } + }*/ + P_IMMD(p, NV902D, WAIT_FOR_IDLE, 0); } else if ((barriers & NVK_BARRIER_WFI) && wait) { /* If this comes from a vkCmdSetEvent, we don't need to wait * @@ -598,6 +601,11 @@ nvk_cmd_flush_wait_dep(struct nvk_cmd_buffer *cmd, * a WFI from the channel switch. */ switch (nvk_cmd_buffer_last_subchannel(cmd)) { + case SUBC_NV902D: { + struct nv_push *p = nvk_cmd_buffer_push(cmd, 2); + P_IMMD(p, NV902D, WAIT_FOR_IDLE, 0); + break; + } case SUBC_NV9097: { struct nv_push *p = nvk_cmd_buffer_push(cmd, 2); P_IMMD(p, NV9097, WAIT_FOR_IDLE, 0); @@ -685,7 +693,8 @@ nvk_cmd_invalidate_deps(struct nvk_cmd_buffer *cmd, if (barriers & NVK_BARRIER_INVALIDATE_TEX_DATA) { if (pdev->info.cls_eng3d >= MAXWELL_A) { - if (nvk_cmd_buffer_last_subchannel(cmd) == SUBC_NVA097) { + /*if (nvk_cmd_buffer_last_subchannel(cmd) == SUBC_NV902D) { + } else if (nvk_cmd_buffer_last_subchannel(cmd) == SUBC_NVA097) { P_IMMD(p, NVA097, INVALIDATE_TEXTURE_DATA_CACHE_NO_WFI, { .lines = LINES_ALL, }); @@ -693,7 +702,8 @@ nvk_cmd_invalidate_deps(struct nvk_cmd_buffer *cmd, P_IMMD(p, NVA0C0, INVALIDATE_TEXTURE_DATA_CACHE_NO_WFI, { .lines = LINES_ALL, }); - } + }*/ + P_IMMD(p, NV902D, WAIT_FOR_IDLE, 0); } else { /* On Kepler, the _NO_WFI form doesn't appear to actually work * properly. It exists in the headers but it doesn't fully @@ -718,7 +728,8 @@ nvk_cmd_invalidate_deps(struct nvk_cmd_buffer *cmd, if (barriers & (NVK_BARRIER_INVALIDATE_SHADER_DATA | NVK_BARRIER_INVALIDATE_CONSTANT)) { - if (nvk_cmd_buffer_last_subchannel(cmd) == SUBC_NVA097) { + /*if (nvk_cmd_buffer_last_subchannel(cmd) == SUBC_NV902D) { + } else if (nvk_cmd_buffer_last_subchannel(cmd) == SUBC_NVA097) { P_IMMD(p, NVA097, INVALIDATE_SHADER_CACHES_NO_WFI, { .global_data = (barriers & NVK_BARRIER_INVALIDATE_SHADER_DATA) != 0, .constant = (barriers & NVK_BARRIER_INVALIDATE_CONSTANT) != 0, @@ -728,7 +739,8 @@ nvk_cmd_invalidate_deps(struct nvk_cmd_buffer *cmd, .global_data = (barriers & NVK_BARRIER_INVALIDATE_SHADER_DATA) != 0, .constant = (barriers & NVK_BARRIER_INVALIDATE_CONSTANT) != 0, }); - } + }*/ + P_IMMD(p, NV902D, WAIT_FOR_IDLE, 0); } if (barriers & (NVK_BARRIER_INVALIDATE_MME_DATA)) { diff --git a/src/nouveau/vulkan/nvk_cmd_buffer.h b/src/nouveau/vulkan/nvk_cmd_buffer.h index 47356580e31..dff8719082b 100644 --- a/src/nouveau/vulkan/nvk_cmd_buffer.h +++ b/src/nouveau/vulkan/nvk_cmd_buffer.h @@ -441,6 +441,8 @@ void nvk_cmd_fill_memory(struct nvk_cmd_buffer *cmd, uint64_t dst_addr, uint64_t size, uint32_t data); +void nvk_meta_blit(struct nvk_cmd_buffer *cmd, + const VkBlitImageInfo2 *pBlitImageInfo); void nvk_meta_resolve_rendering(struct nvk_cmd_buffer *cmd, const VkRenderingInfo *pRenderingInfo); diff --git a/src/nouveau/vulkan/nvk_cmd_meta.c b/src/nouveau/vulkan/nvk_cmd_meta.c index d58f2ca4df2..241ae96ffca 100644 --- a/src/nouveau/vulkan/nvk_cmd_meta.c +++ b/src/nouveau/vulkan/nvk_cmd_meta.c @@ -7,6 +7,7 @@ #include "nvk_descriptor_set.h" #include "nvk_device.h" #include "nvk_entrypoints.h" +#include "nvk_format.h" #include "nvk_image.h" #include "nvk_physical_device.h" @@ -214,11 +215,10 @@ nvk_meta_end(struct nvk_cmd_buffer *cmd, P_IMMD(p, NV9097, SET_RENDER_ENABLE_OVERRIDE, MODE_USE_RENDER_ENABLE); } -VKAPI_ATTR void VKAPI_CALL -nvk_CmdBlitImage2(VkCommandBuffer commandBuffer, - const VkBlitImageInfo2 *pBlitImageInfo) +void +nvk_meta_blit(struct nvk_cmd_buffer *cmd, + const VkBlitImageInfo2 *pBlitImageInfo) { - VK_FROM_HANDLE(nvk_cmd_buffer, cmd, commandBuffer); struct nvk_device *dev = nvk_cmd_buffer_device(cmd); struct nvk_meta_save save; diff --git a/src/nouveau/vulkan/nvk_event.c b/src/nouveau/vulkan/nvk_event.c index 2a671c4be45..cd47851553f 100644 --- a/src/nouveau/vulkan/nvk_event.c +++ b/src/nouveau/vulkan/nvk_event.c @@ -189,8 +189,7 @@ nvk_event_report_semaphore(struct nvk_cmd_buffer *cmd, .operation = OPERATION_RELEASE, .structure_size = STRUCTURE_SIZE_ONE_WORD, }); - } else { - assert(subc == SUBC_NV90B5); + } else if (subc == SUBC_NV90B5) { struct nv_push *p = nvk_cmd_buffer_push(cmd, 6); P_MTHD(p, NV90B5, SET_SEMAPHORE_A); @@ -204,6 +203,22 @@ nvk_event_report_semaphore(struct nvk_cmd_buffer *cmd, .flush_enable = FLUSH_ENABLE_TRUE, /* Note: FLUSH_TYPE=SYS implicitly for NVC3B5+ */ }); + } else { + /* This should work for any engine. This assert is here as a reminder + * to check for an engine-specific version, since those will typically + * have perf benefits. + */ + assert(subc == SUBC_NV902D); + struct nv_push *p = nvk_cmd_buffer_push(cmd, 5); + __push_mthd(p, nvk_cmd_buffer_last_subchannel(cmd), NV906F_SEMAPHOREA); + P_NV906F_SEMAPHOREA(p, addr >> 32); + P_NV906F_SEMAPHOREB(p, (addr & UINT32_MAX) >> 2); + P_NV906F_SEMAPHOREC(p, value); + P_NV906F_SEMAPHORED(p, { + .operation = OPERATION_RELEASE, + .release_wfi = RELEASE_WFI_EN, + .release_size = RELEASE_SIZE_4BYTE, + }); } } diff --git a/src/nouveau/vulkan/nvk_queue.c b/src/nouveau/vulkan/nvk_queue.c index 4061705eb56..cd1088e4db1 100644 --- a/src/nouveau/vulkan/nvk_queue.c +++ b/src/nouveau/vulkan/nvk_queue.c @@ -355,6 +355,12 @@ nvk_queue_init_context_state(struct nvk_queue *queue) return result; } + if (queue->engines & NVKMD_ENGINE_2D) { + result = nvk_push_2d_state_init(queue, p); + if (result != VK_SUCCESS) + return result; + } + if (queue->engines & NVKMD_ENGINE_COMPUTE) { result = nvk_push_dispatch_state_init(queue, p); if (result != VK_SUCCESS) diff --git a/src/nouveau/vulkan/nvk_queue.h b/src/nouveau/vulkan/nvk_queue.h index 1e72efc4fd6..7e8235d2ae1 100644 --- a/src/nouveau/vulkan/nvk_queue.h +++ b/src/nouveau/vulkan/nvk_queue.h @@ -64,7 +64,7 @@ nvk_queue_engines_from_queue_flags(VkQueueFlags queue_flags) { enum nvkmd_engines engines = 0; if (queue_flags & VK_QUEUE_GRAPHICS_BIT) { - engines |= NVKMD_ENGINE_3D; + engines |= NVKMD_ENGINE_3D | NVKMD_ENGINE_2D; /* We rely on compute shaders for queries */ engines |= NVKMD_ENGINE_COMPUTE; } @@ -112,6 +112,9 @@ void nvk_queue_destroy(struct nvk_device *dev, struct nvk_queue *queue); VkResult nvk_push_draw_state_init(struct nvk_queue *queue, struct nv_push *p); +VkResult nvk_push_2d_state_init(struct nvk_queue *queue, + struct nv_push *p); + VkResult nvk_push_dispatch_state_init(struct nvk_queue *queue, struct nv_push *p); diff --git a/src/util/format/u_format.c b/src/util/format/u_format.c index 8f6defb3291..953e08f16fd 100644 --- a/src/util/format/u_format.c +++ b/src/util/format/u_format.c @@ -302,6 +302,23 @@ util_format_is_luminance_alpha(enum pipe_format format) return false; } +bool +util_format_is_red(enum pipe_format format) +{ + const struct util_format_description *desc = + util_format_description(format); + + if ((desc->colorspace == UTIL_FORMAT_COLORSPACE_RGB || + desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB) && + desc->swizzle[0] == PIPE_SWIZZLE_X && + desc->swizzle[1] == PIPE_SWIZZLE_0 && + desc->swizzle[2] == PIPE_SWIZZLE_0 && + desc->swizzle[3] == PIPE_SWIZZLE_1) { + return true; + } + return false; +} + bool util_format_is_red_alpha(enum pipe_format format) { diff --git a/src/util/format/u_format.h b/src/util/format/u_format.h index 2614a16150d..74dd51e9951 100644 --- a/src/util/format/u_format.h +++ b/src/util/format/u_format.h @@ -805,6 +805,9 @@ util_format_is_alpha(enum pipe_format format) ATTRIBUTE_CONST; bool util_format_is_luminance_alpha(enum pipe_format format) ATTRIBUTE_CONST; +bool +util_format_is_red(enum pipe_format format) ATTRIBUTE_CONST; + bool util_format_is_red_alpha(enum pipe_format format) ATTRIBUTE_CONST;