From dcba32bac08abfe71768a8b42e003b9350f3fef9 Mon Sep 17 00:00:00 2001 From: Jonathan Marek Date: Thu, 17 Sep 2020 10:16:42 -0400 Subject: [PATCH] turnip: implement VK_EXT_extended_dynamic_state Passes dEQP-VK.pipeline.extended_dynamic_state.* Signed-off-by: Jonathan Marek Part-of: --- src/freedreno/vulkan/tu_cmd_buffer.c | 299 ++++++++++++++++++++++++-- src/freedreno/vulkan/tu_device.c | 5 + src/freedreno/vulkan/tu_extensions.py | 1 + src/freedreno/vulkan/tu_pipeline.c | 217 ++++++++++++------- src/freedreno/vulkan/tu_private.h | 39 +++- 5 files changed, 452 insertions(+), 109 deletions(-) diff --git a/src/freedreno/vulkan/tu_cmd_buffer.c b/src/freedreno/vulkan/tu_cmd_buffer.c index 1ded67762c6..380ef74eb18 100644 --- a/src/freedreno/vulkan/tu_cmd_buffer.c +++ b/src/freedreno/vulkan/tu_cmd_buffer.c @@ -464,6 +464,8 @@ tu_cs_emit_draw_state(struct tu_cs *cs, uint32_t id, struct tu_draw_state state) break; } + STATIC_ASSERT(TU_DRAW_STATE_COUNT <= 32); + /* We need to reload the descriptors every time the descriptor sets * change. However, the commands we send only depend on the pipeline * because the whole point is to cache descriptors which are used by the @@ -1567,6 +1569,19 @@ tu_CmdBindVertexBuffers(VkCommandBuffer commandBuffer, uint32_t bindingCount, const VkBuffer *pBuffers, const VkDeviceSize *pOffsets) +{ + tu_CmdBindVertexBuffers2EXT(commandBuffer, firstBinding, bindingCount, + pBuffers, pOffsets, NULL, NULL); +} + +void +tu_CmdBindVertexBuffers2EXT(VkCommandBuffer commandBuffer, + uint32_t firstBinding, + uint32_t bindingCount, + const VkBuffer* pBuffers, + const VkDeviceSize* pOffsets, + const VkDeviceSize* pSizes, + const VkDeviceSize* pStrides) { TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer); struct tu_cs cs; @@ -1577,7 +1592,9 @@ tu_CmdBindVertexBuffers(VkCommandBuffer commandBuffer, struct tu_buffer *buf = tu_buffer_from_handle(pBuffers[i]); cmd->state.vb[firstBinding + i].base = tu_buffer_iova(buf) + pOffsets[i]; - cmd->state.vb[firstBinding + i].size = buf->size - pOffsets[i]; + cmd->state.vb[firstBinding + i].size = pSizes ? pSizes[i] : (buf->size - pOffsets[i]); + if (pStrides) + cmd->state.vb[firstBinding + i].stride = pStrides[i]; } for (uint32_t i = 0; i < MAX_VBS; i++) { @@ -1588,6 +1605,16 @@ tu_CmdBindVertexBuffers(VkCommandBuffer commandBuffer, } cmd->state.dirty |= TU_CMD_DIRTY_VERTEX_BUFFERS; + + if (pStrides) { + cmd->state.dynamic_state[TU_DYNAMIC_STATE_VB_STRIDE].iova = + tu_cs_draw_state(&cmd->sub_cs, &cs, 2 * MAX_VBS).iova; + + for (uint32_t i = 0; i < MAX_VBS; i++) + tu_cs_emit_regs(&cs, A6XX_VFD_FETCH_STRIDE(i, cmd->state.vb[i].stride)); + + cmd->state.dirty |= TU_CMD_DIRTY_VB_STRIDE; + } } void @@ -1985,29 +2012,17 @@ tu_CmdBindPipeline(VkCommandBuffer commandBuffer, uint32_t mask = ~pipeline->dynamic_state_mask & BITFIELD_MASK(TU_DYNAMIC_STATE_COUNT); uint32_t i; - tu_cs_emit_pkt7(cs, CP_SET_DRAW_STATE, 3 * (7 + util_bitcount(mask))); + tu_cs_emit_pkt7(cs, CP_SET_DRAW_STATE, 3 * (6 + util_bitcount(mask))); tu_cs_emit_draw_state(cs, TU_DRAW_STATE_PROGRAM, pipeline->program.state); tu_cs_emit_draw_state(cs, TU_DRAW_STATE_PROGRAM_BINNING, pipeline->program.binning_state); tu_cs_emit_draw_state(cs, TU_DRAW_STATE_VI, pipeline->vi.state); tu_cs_emit_draw_state(cs, TU_DRAW_STATE_VI_BINNING, pipeline->vi.binning_state); tu_cs_emit_draw_state(cs, TU_DRAW_STATE_RAST, pipeline->rast_state); - tu_cs_emit_draw_state(cs, TU_DRAW_STATE_DS, pipeline->ds_state); tu_cs_emit_draw_state(cs, TU_DRAW_STATE_BLEND, pipeline->blend_state); + for_each_bit(i, mask) tu_cs_emit_draw_state(cs, TU_DRAW_STATE_DYNAMIC + i, pipeline->dynamic_state[i]); - /* dynamic linewidth state depends pipeline state's gras_su_cntl - * so the dynamic state ib must be updated when pipeline changes - */ - if (pipeline->dynamic_state_mask & BIT(VK_DYNAMIC_STATE_LINE_WIDTH)) { - struct tu_cs cs = tu_cmd_dynamic_state(cmd, VK_DYNAMIC_STATE_LINE_WIDTH, 2); - - cmd->state.dynamic_gras_su_cntl &= A6XX_GRAS_SU_CNTL_LINEHALFWIDTH__MASK; - cmd->state.dynamic_gras_su_cntl |= pipeline->gras_su_cntl; - - tu_cs_emit_regs(&cs, A6XX_GRAS_SU_CNTL(.dword = cmd->state.dynamic_gras_su_cntl)); - } - /* the vertex_buffers draw state always contains all the currently * bound vertex buffers. update its size to only emit the vbs which * are actually used by the pipeline @@ -2018,6 +2033,38 @@ tu_CmdBindPipeline(VkCommandBuffer commandBuffer, cmd->state.vertex_buffers.size = pipeline->num_vbs * 4; cmd->state.dirty |= TU_CMD_DIRTY_VERTEX_BUFFERS; } + + if ((pipeline->dynamic_state_mask & BIT(TU_DYNAMIC_STATE_VB_STRIDE)) && + cmd->state.dynamic_state[TU_DYNAMIC_STATE_VB_STRIDE].size != pipeline->num_vbs * 2) { + cmd->state.dynamic_state[TU_DYNAMIC_STATE_VB_STRIDE].size = pipeline->num_vbs * 2; + cmd->state.dirty |= TU_CMD_DIRTY_VB_STRIDE; + } + +#define UPDATE_REG(X, Y) { \ + /* note: would be better to have pipeline bits already masked */ \ + uint32_t pipeline_bits = pipeline->X & pipeline->X##_mask; \ + if ((cmd->state.X & pipeline->X##_mask) != pipeline_bits) { \ + cmd->state.X &= ~pipeline->X##_mask; \ + cmd->state.X |= pipeline_bits; \ + cmd->state.dirty |= TU_CMD_DIRTY_##Y; \ + } \ + if (!(pipeline->dynamic_state_mask & BIT(TU_DYNAMIC_STATE_##Y))) \ + cmd->state.dirty &= ~TU_CMD_DIRTY_##Y; \ +} + + /* these registers can have bits set from both pipeline and dynamic state + * this updates the bits set by the pipeline + * if the pipeline doesn't use a dynamic state for the register, then + * the relevant dirty bit is cleared to avoid overriding the non-dynamic + * state with a dynamic state the next draw. + */ + UPDATE_REG(gras_su_cntl, GRAS_SU_CNTL); + UPDATE_REG(rb_depth_cntl, RB_DEPTH_CNTL); + UPDATE_REG(rb_stencil_cntl, RB_STENCIL_CNTL); +#undef UPDATE_REG + + if (pipeline->rb_depth_cntl_disable) + cmd->state.dirty |= TU_CMD_DIRTY_RB_DEPTH_CNTL; } void @@ -2056,12 +2103,11 @@ void tu_CmdSetLineWidth(VkCommandBuffer commandBuffer, float lineWidth) { TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer); - struct tu_cs cs = tu_cmd_dynamic_state(cmd, VK_DYNAMIC_STATE_LINE_WIDTH, 2); - cmd->state.dynamic_gras_su_cntl &= ~A6XX_GRAS_SU_CNTL_LINEHALFWIDTH__MASK; - cmd->state.dynamic_gras_su_cntl |= A6XX_GRAS_SU_CNTL_LINEHALFWIDTH(lineWidth / 2.0f); + cmd->state.gras_su_cntl &= ~A6XX_GRAS_SU_CNTL_LINEHALFWIDTH__MASK; + cmd->state.gras_su_cntl |= A6XX_GRAS_SU_CNTL_LINEHALFWIDTH(lineWidth / 2.0f); - tu_cs_emit_regs(&cs, A6XX_GRAS_SU_CNTL(.dword = cmd->state.dynamic_gras_su_cntl)); + cmd->state.dirty |= TU_CMD_DIRTY_GRAS_SU_CNTL; } void @@ -2160,6 +2206,178 @@ tu_CmdSetSampleLocationsEXT(VkCommandBuffer commandBuffer, tu6_emit_sample_locations(&cs, pSampleLocationsInfo); } +void +tu_CmdSetCullModeEXT(VkCommandBuffer commandBuffer, VkCullModeFlags cullMode) +{ + TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer); + + cmd->state.gras_su_cntl &= + ~(A6XX_GRAS_SU_CNTL_CULL_FRONT | A6XX_GRAS_SU_CNTL_CULL_BACK); + + if (cullMode & VK_CULL_MODE_FRONT_BIT) + cmd->state.gras_su_cntl |= A6XX_GRAS_SU_CNTL_CULL_FRONT; + if (cullMode & VK_CULL_MODE_BACK_BIT) + cmd->state.gras_su_cntl |= A6XX_GRAS_SU_CNTL_CULL_BACK; + + cmd->state.dirty |= TU_CMD_DIRTY_GRAS_SU_CNTL; +} + +void +tu_CmdSetFrontFaceEXT(VkCommandBuffer commandBuffer, VkFrontFace frontFace) +{ + TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer); + + cmd->state.gras_su_cntl &= ~A6XX_GRAS_SU_CNTL_FRONT_CW; + + if (frontFace == VK_FRONT_FACE_CLOCKWISE) + cmd->state.gras_su_cntl |= A6XX_GRAS_SU_CNTL_FRONT_CW; + + cmd->state.dirty |= TU_CMD_DIRTY_GRAS_SU_CNTL; +} + +void +tu_CmdSetPrimitiveTopologyEXT(VkCommandBuffer commandBuffer, + VkPrimitiveTopology primitiveTopology) +{ + TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer); + + cmd->state.primtype = tu6_primtype(primitiveTopology); +} + +void +tu_CmdSetViewportWithCountEXT(VkCommandBuffer commandBuffer, + uint32_t viewportCount, + const VkViewport* pViewports) +{ + tu_CmdSetViewport(commandBuffer, 0, viewportCount, pViewports); +} + +void +tu_CmdSetScissorWithCountEXT(VkCommandBuffer commandBuffer, + uint32_t scissorCount, + const VkRect2D* pScissors) +{ + tu_CmdSetScissor(commandBuffer, 0, scissorCount, pScissors); +} + +void +tu_CmdSetDepthTestEnableEXT(VkCommandBuffer commandBuffer, + VkBool32 depthTestEnable) +{ + TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer); + + cmd->state.rb_depth_cntl &= ~A6XX_RB_DEPTH_CNTL_Z_ENABLE; + + if (depthTestEnable) + cmd->state.rb_depth_cntl |= A6XX_RB_DEPTH_CNTL_Z_ENABLE; + + cmd->state.dirty |= TU_CMD_DIRTY_RB_DEPTH_CNTL; +} + +void +tu_CmdSetDepthWriteEnableEXT(VkCommandBuffer commandBuffer, + VkBool32 depthWriteEnable) +{ + TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer); + + cmd->state.rb_depth_cntl &= ~A6XX_RB_DEPTH_CNTL_Z_WRITE_ENABLE; + + if (depthWriteEnable) + cmd->state.rb_depth_cntl |= A6XX_RB_DEPTH_CNTL_Z_WRITE_ENABLE; + + cmd->state.dirty |= TU_CMD_DIRTY_RB_DEPTH_CNTL; +} + +void +tu_CmdSetDepthCompareOpEXT(VkCommandBuffer commandBuffer, + VkCompareOp depthCompareOp) +{ + TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer); + + cmd->state.rb_depth_cntl &= ~A6XX_RB_DEPTH_CNTL_ZFUNC__MASK; + + cmd->state.rb_depth_cntl |= + A6XX_RB_DEPTH_CNTL_ZFUNC(tu6_compare_func(depthCompareOp)); + + cmd->state.dirty |= TU_CMD_DIRTY_RB_DEPTH_CNTL; +} + +void +tu_CmdSetDepthBoundsTestEnableEXT(VkCommandBuffer commandBuffer, + VkBool32 depthBoundsTestEnable) +{ + TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer); + + cmd->state.rb_depth_cntl &= ~A6XX_RB_DEPTH_CNTL_Z_BOUNDS_ENABLE; + + if (depthBoundsTestEnable) + cmd->state.rb_depth_cntl |= A6XX_RB_DEPTH_CNTL_Z_BOUNDS_ENABLE; + + cmd->state.dirty |= TU_CMD_DIRTY_RB_DEPTH_CNTL; +} + +void +tu_CmdSetStencilTestEnableEXT(VkCommandBuffer commandBuffer, + VkBool32 stencilTestEnable) +{ + TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer); + + cmd->state.rb_stencil_cntl &= ~( + A6XX_RB_STENCIL_CONTROL_STENCIL_ENABLE | + A6XX_RB_STENCIL_CONTROL_STENCIL_ENABLE_BF | + A6XX_RB_STENCIL_CONTROL_STENCIL_READ); + + if (stencilTestEnable) { + cmd->state.rb_stencil_cntl |= + A6XX_RB_STENCIL_CONTROL_STENCIL_ENABLE | + A6XX_RB_STENCIL_CONTROL_STENCIL_ENABLE_BF | + A6XX_RB_STENCIL_CONTROL_STENCIL_READ; + } + + cmd->state.dirty |= TU_CMD_DIRTY_RB_STENCIL_CNTL; +} + +void +tu_CmdSetStencilOpEXT(VkCommandBuffer commandBuffer, + VkStencilFaceFlags faceMask, + VkStencilOp failOp, + VkStencilOp passOp, + VkStencilOp depthFailOp, + VkCompareOp compareOp) +{ + TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer); + + if (faceMask & VK_STENCIL_FACE_FRONT_BIT) { + cmd->state.rb_stencil_cntl &= ~( + A6XX_RB_STENCIL_CONTROL_FUNC__MASK | + A6XX_RB_STENCIL_CONTROL_FAIL__MASK | + A6XX_RB_STENCIL_CONTROL_ZPASS__MASK | + A6XX_RB_STENCIL_CONTROL_ZFAIL__MASK); + + cmd->state.rb_stencil_cntl |= + A6XX_RB_STENCIL_CONTROL_FUNC(tu6_compare_func(compareOp)) | + A6XX_RB_STENCIL_CONTROL_FAIL(tu6_stencil_op(failOp)) | + A6XX_RB_STENCIL_CONTROL_ZPASS(tu6_stencil_op(passOp)) | + A6XX_RB_STENCIL_CONTROL_ZFAIL(tu6_stencil_op(depthFailOp)); + } + + if (faceMask & VK_STENCIL_FACE_BACK_BIT) { + cmd->state.rb_stencil_cntl &= ~( + A6XX_RB_STENCIL_CONTROL_FUNC_BF__MASK | + A6XX_RB_STENCIL_CONTROL_FAIL_BF__MASK | + A6XX_RB_STENCIL_CONTROL_ZPASS_BF__MASK | + A6XX_RB_STENCIL_CONTROL_ZFAIL_BF__MASK); + + cmd->state.rb_stencil_cntl |= + A6XX_RB_STENCIL_CONTROL_FUNC_BF(tu6_compare_func(compareOp)) | + A6XX_RB_STENCIL_CONTROL_FAIL_BF(tu6_stencil_op(failOp)) | + A6XX_RB_STENCIL_CONTROL_ZPASS_BF(tu6_stencil_op(passOp)) | + A6XX_RB_STENCIL_CONTROL_ZFAIL_BF(tu6_stencil_op(depthFailOp)); + } + + cmd->state.dirty |= TU_CMD_DIRTY_RB_STENCIL_CNTL; +} + static void tu_flush_for_access(struct tu_cache_state *cache, enum tu_cmd_access_mask src_mask, @@ -2902,6 +3120,30 @@ tu6_draw_common(struct tu_cmd_buffer *cmd, .tess_upper_left_domain_origin = pipeline->tess.upper_left_domain_origin)); + if (cmd->state.dirty & TU_CMD_DIRTY_GRAS_SU_CNTL) { + struct tu_cs cs = tu_cmd_dynamic_state(cmd, TU_DYNAMIC_STATE_GRAS_SU_CNTL, 2); + tu_cs_emit_regs(&cs, A6XX_GRAS_SU_CNTL(.dword = cmd->state.gras_su_cntl)); + } + + if (cmd->state.dirty & TU_CMD_DIRTY_RB_DEPTH_CNTL) { + struct tu_cs cs = tu_cmd_dynamic_state(cmd, TU_DYNAMIC_STATE_RB_DEPTH_CNTL, 2); + uint32_t rb_depth_cntl = cmd->state.rb_depth_cntl; + + if ((rb_depth_cntl & A6XX_RB_DEPTH_CNTL_Z_ENABLE) || + (rb_depth_cntl & A6XX_RB_DEPTH_CNTL_Z_BOUNDS_ENABLE)) + rb_depth_cntl |= A6XX_RB_DEPTH_CNTL_Z_TEST_ENABLE; + + if (pipeline->rb_depth_cntl_disable) + rb_depth_cntl = 0; + + tu_cs_emit_regs(&cs, A6XX_RB_DEPTH_CNTL(.dword = rb_depth_cntl)); + } + + if (cmd->state.dirty & TU_CMD_DIRTY_RB_STENCIL_CNTL) { + struct tu_cs cs = tu_cmd_dynamic_state(cmd, TU_DYNAMIC_STATE_RB_STENCIL_CNTL, 2); + tu_cs_emit_regs(&cs, A6XX_RB_STENCIL_CONTROL(.dword = cmd->state.rb_stencil_cntl)); + } + if (cmd->state.dirty & TU_CMD_DIRTY_SHADER_CONSTS) { cmd->state.shader_const[MESA_SHADER_VERTEX] = tu6_emit_consts(cmd, pipeline, descriptors_state, MESA_SHADER_VERTEX); @@ -2958,7 +3200,6 @@ tu6_draw_common(struct tu_cmd_buffer *cmd, tu_cs_emit_draw_state(cs, TU_DRAW_STATE_VI, pipeline->vi.state); tu_cs_emit_draw_state(cs, TU_DRAW_STATE_VI_BINNING, pipeline->vi.binning_state); tu_cs_emit_draw_state(cs, TU_DRAW_STATE_RAST, pipeline->rast_state); - tu_cs_emit_draw_state(cs, TU_DRAW_STATE_DS, pipeline->ds_state); tu_cs_emit_draw_state(cs, TU_DRAW_STATE_BLEND, pipeline->blend_state); tu_cs_emit_draw_state(cs, TU_DRAW_STATE_VS_CONST, cmd->state.shader_const[MESA_SHADER_VERTEX]); tu_cs_emit_draw_state(cs, TU_DRAW_STATE_HS_CONST, cmd->state.shader_const[MESA_SHADER_TESS_CTRL]); @@ -2980,6 +3221,7 @@ tu6_draw_common(struct tu_cmd_buffer *cmd, /* emit draw states that were just updated * note we eventually don't want to have to emit anything here */ + bool emit_binding_stride = false; uint32_t draw_state_count = has_tess + ((cmd->state.dirty & TU_CMD_DIRTY_SHADER_CONSTS) ? 5 : 0) + @@ -2987,6 +3229,12 @@ tu6_draw_common(struct tu_cmd_buffer *cmd, ((cmd->state.dirty & TU_CMD_DIRTY_VERTEX_BUFFERS) ? 1 : 0) + 1; /* vs_params */ + if ((cmd->state.dirty & TU_CMD_DIRTY_VB_STRIDE) && + !(pipeline->dynamic_state_mask & BIT(TU_DYNAMIC_STATE_VB_STRIDE))) { + emit_binding_stride = true; + draw_state_count += 1; + } + tu_cs_emit_pkt7(cs, CP_SET_DRAW_STATE, 3 * draw_state_count); /* We may need to re-emit tess consts if the current draw call is @@ -3004,6 +3252,10 @@ tu6_draw_common(struct tu_cmd_buffer *cmd, tu_cs_emit_draw_state(cs, TU_DRAW_STATE_DESC_SETS_LOAD, pipeline->load_state); if (cmd->state.dirty & TU_CMD_DIRTY_VERTEX_BUFFERS) tu_cs_emit_draw_state(cs, TU_DRAW_STATE_VB, cmd->state.vertex_buffers); + if (emit_binding_stride) { + tu_cs_emit_draw_state(cs, TU_DRAW_STATE_DYNAMIC + TU_DYNAMIC_STATE_VB_STRIDE, + cmd->state.dynamic_state[TU_DYNAMIC_STATE_VB_STRIDE]); + } tu_cs_emit_draw_state(cs, TU_DRAW_STATE_VS_PARAMS, cmd->state.vs_params); } @@ -3021,8 +3273,13 @@ static uint32_t tu_draw_initiator(struct tu_cmd_buffer *cmd, enum pc_di_src_sel src_sel) { const struct tu_pipeline *pipeline = cmd->state.pipeline; + enum pc_di_primtype primtype = pipeline->ia.primtype; + + if (pipeline->dynamic_state_mask & BIT(TU_DYNAMIC_STATE_PRIMITIVE_TOPOLOGY)) + primtype = cmd->state.primtype; + uint32_t initiator = - CP_DRAW_INDX_OFFSET_0_PRIM_TYPE(pipeline->ia.primtype) | + CP_DRAW_INDX_OFFSET_0_PRIM_TYPE(primtype) | CP_DRAW_INDX_OFFSET_0_SOURCE_SELECT(src_sel) | CP_DRAW_INDX_OFFSET_0_INDEX_SIZE(cmd->state.index_size) | CP_DRAW_INDX_OFFSET_0_VIS_CULL(USE_VISIBILITY); diff --git a/src/freedreno/vulkan/tu_device.c b/src/freedreno/vulkan/tu_device.c index 9578153ec55..51b32a812d9 100644 --- a/src/freedreno/vulkan/tu_device.c +++ b/src/freedreno/vulkan/tu_device.c @@ -619,6 +619,11 @@ tu_GetPhysicalDeviceFeatures2(VkPhysicalDevice physicalDevice, features->hostQueryReset = true; break; } + case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTENDED_DYNAMIC_STATE_FEATURES_EXT: { + VkPhysicalDeviceExtendedDynamicStateFeaturesEXT *features = (void *)ext; + features->extendedDynamicState = true; + break; + } default: break; } diff --git a/src/freedreno/vulkan/tu_extensions.py b/src/freedreno/vulkan/tu_extensions.py index c95167e4cbb..ef874430e66 100644 --- a/src/freedreno/vulkan/tu_extensions.py +++ b/src/freedreno/vulkan/tu_extensions.py @@ -95,6 +95,7 @@ EXTENSIONS = [ Extension('VK_KHR_multiview', 1, True), Extension('VK_EXT_host_query_reset', 1, True), Extension('VK_EXT_shader_viewport_index_layer', 1, True), + Extension('VK_EXT_extended_dynamic_state', 1, True), ] MAX_API_VERSION = VkVersion(MAX_API_VERSION) diff --git a/src/freedreno/vulkan/tu_pipeline.c b/src/freedreno/vulkan/tu_pipeline.c index fe87c279b9c..b1913777ab9 100644 --- a/src/freedreno/vulkan/tu_pipeline.c +++ b/src/freedreno/vulkan/tu_pipeline.c @@ -1482,7 +1482,8 @@ tu6_emit_program(struct tu_cs *cs, } static void -tu6_emit_vertex_input(struct tu_cs *cs, +tu6_emit_vertex_input(struct tu_pipeline *pipeline, + struct tu_cs *cs, const struct ir3_shader_variant *vs, const VkPipelineVertexInputStateCreateInfo *info) { @@ -1494,8 +1495,10 @@ tu6_emit_vertex_input(struct tu_cs *cs, const VkVertexInputBindingDescription *binding = &info->pVertexBindingDescriptions[i]; - tu_cs_emit_regs(cs, - A6XX_VFD_FETCH_STRIDE(binding->binding, binding->stride)); + if (!(pipeline->dynamic_state_mask & BIT(TU_DYNAMIC_STATE_VB_STRIDE))) { + tu_cs_emit_regs(cs, + A6XX_VFD_FETCH_STRIDE(binding->binding, binding->stride)); + } if (binding->inputRate == VK_VERTEX_INPUT_RATE_INSTANCE) binding_instanced |= 1 << binding->binding; @@ -1717,7 +1720,8 @@ tu6_gras_su_cntl(const VkPipelineRasterizationStateCreateInfo *rast_info, if (rast_info->frontFace == VK_FRONT_FACE_CLOCKWISE) gras_su_cntl |= A6XX_GRAS_SU_CNTL_FRONT_CW; - /* don't set A6XX_GRAS_SU_CNTL_LINEHALFWIDTH */ + gras_su_cntl |= + A6XX_GRAS_SU_CNTL_LINEHALFWIDTH(rast_info->lineWidth / 2.0f); if (rast_info->depthBiasEnable) gras_su_cntl |= A6XX_GRAS_SU_CNTL_POLY_OFFSET; @@ -1746,58 +1750,6 @@ tu6_emit_depth_bias(struct tu_cs *cs, tu_cs_emit(cs, A6XX_GRAS_SU_POLY_OFFSET_OFFSET_CLAMP(clamp).value); } -static void -tu6_emit_depth_control(struct tu_cs *cs, - const VkPipelineDepthStencilStateCreateInfo *ds_info, - const VkPipelineRasterizationStateCreateInfo *rast_info) -{ - uint32_t rb_depth_cntl = 0; - if (ds_info->depthTestEnable) { - rb_depth_cntl |= - A6XX_RB_DEPTH_CNTL_Z_ENABLE | - A6XX_RB_DEPTH_CNTL_ZFUNC(tu6_compare_func(ds_info->depthCompareOp)) | - A6XX_RB_DEPTH_CNTL_Z_TEST_ENABLE; /* TODO: don't set for ALWAYS/NEVER */ - - if (rast_info->depthClampEnable) - rb_depth_cntl |= A6XX_RB_DEPTH_CNTL_Z_CLAMP_ENABLE; - - if (ds_info->depthWriteEnable) - rb_depth_cntl |= A6XX_RB_DEPTH_CNTL_Z_WRITE_ENABLE; - } - - if (ds_info->depthBoundsTestEnable) - rb_depth_cntl |= A6XX_RB_DEPTH_CNTL_Z_BOUNDS_ENABLE | A6XX_RB_DEPTH_CNTL_Z_TEST_ENABLE; - - tu_cs_emit_pkt4(cs, REG_A6XX_RB_DEPTH_CNTL, 1); - tu_cs_emit(cs, rb_depth_cntl); -} - -static void -tu6_emit_stencil_control(struct tu_cs *cs, - const VkPipelineDepthStencilStateCreateInfo *ds_info) -{ - uint32_t rb_stencil_control = 0; - if (ds_info->stencilTestEnable) { - const VkStencilOpState *front = &ds_info->front; - const VkStencilOpState *back = &ds_info->back; - rb_stencil_control |= - A6XX_RB_STENCIL_CONTROL_STENCIL_ENABLE | - A6XX_RB_STENCIL_CONTROL_STENCIL_ENABLE_BF | - A6XX_RB_STENCIL_CONTROL_STENCIL_READ | - A6XX_RB_STENCIL_CONTROL_FUNC(tu6_compare_func(front->compareOp)) | - A6XX_RB_STENCIL_CONTROL_FAIL(tu6_stencil_op(front->failOp)) | - A6XX_RB_STENCIL_CONTROL_ZPASS(tu6_stencil_op(front->passOp)) | - A6XX_RB_STENCIL_CONTROL_ZFAIL(tu6_stencil_op(front->depthFailOp)) | - A6XX_RB_STENCIL_CONTROL_FUNC_BF(tu6_compare_func(back->compareOp)) | - A6XX_RB_STENCIL_CONTROL_FAIL_BF(tu6_stencil_op(back->failOp)) | - A6XX_RB_STENCIL_CONTROL_ZPASS_BF(tu6_stencil_op(back->passOp)) | - A6XX_RB_STENCIL_CONTROL_ZFAIL_BF(tu6_stencil_op(back->depthFailOp)); - } - - tu_cs_emit_pkt4(cs, REG_A6XX_RB_STENCIL_CONTROL, 1); - tu_cs_emit(cs, rb_stencil_control); -} - static uint32_t tu6_rb_mrt_blend_control(const VkPipelineColorBlendAttachmentState *att, bool has_alpha) @@ -2131,15 +2083,72 @@ tu_pipeline_builder_parse_dynamic(struct tu_pipeline_builder *builder, if (!dynamic_info) return; + pipeline->gras_su_cntl_mask = ~0u; + pipeline->rb_depth_cntl_mask = ~0u; + pipeline->rb_stencil_cntl_mask = ~0u; + for (uint32_t i = 0; i < dynamic_info->dynamicStateCount; i++) { VkDynamicState state = dynamic_info->pDynamicStates[i]; switch (state) { case VK_DYNAMIC_STATE_VIEWPORT ... VK_DYNAMIC_STATE_STENCIL_REFERENCE: + if (state == VK_DYNAMIC_STATE_LINE_WIDTH) + pipeline->gras_su_cntl_mask &= ~A6XX_GRAS_SU_CNTL_LINEHALFWIDTH__MASK; pipeline->dynamic_state_mask |= BIT(state); break; case VK_DYNAMIC_STATE_SAMPLE_LOCATIONS_EXT: pipeline->dynamic_state_mask |= BIT(TU_DYNAMIC_STATE_SAMPLE_LOCATIONS); break; + case VK_DYNAMIC_STATE_CULL_MODE_EXT: + pipeline->gras_su_cntl_mask &= + ~(A6XX_GRAS_SU_CNTL_CULL_BACK | A6XX_GRAS_SU_CNTL_CULL_FRONT); + pipeline->dynamic_state_mask |= BIT(TU_DYNAMIC_STATE_GRAS_SU_CNTL); + break; + case VK_DYNAMIC_STATE_FRONT_FACE_EXT: + pipeline->gras_su_cntl_mask &= ~A6XX_GRAS_SU_CNTL_FRONT_CW; + pipeline->dynamic_state_mask |= BIT(TU_DYNAMIC_STATE_GRAS_SU_CNTL); + break; + case VK_DYNAMIC_STATE_PRIMITIVE_TOPOLOGY_EXT: + pipeline->dynamic_state_mask |= BIT(TU_DYNAMIC_STATE_PRIMITIVE_TOPOLOGY); + break; + case VK_DYNAMIC_STATE_VERTEX_INPUT_BINDING_STRIDE_EXT: + pipeline->dynamic_state_mask |= BIT(TU_DYNAMIC_STATE_VB_STRIDE); + break; + case VK_DYNAMIC_STATE_VIEWPORT_WITH_COUNT_EXT: + pipeline->dynamic_state_mask |= BIT(VK_DYNAMIC_STATE_VIEWPORT); + break; + case VK_DYNAMIC_STATE_SCISSOR_WITH_COUNT_EXT: + pipeline->dynamic_state_mask |= BIT(VK_DYNAMIC_STATE_SCISSOR); + break; + case VK_DYNAMIC_STATE_DEPTH_TEST_ENABLE_EXT: + pipeline->rb_depth_cntl_mask &= + ~(A6XX_RB_DEPTH_CNTL_Z_ENABLE | A6XX_RB_DEPTH_CNTL_Z_TEST_ENABLE); + pipeline->dynamic_state_mask |= BIT(TU_DYNAMIC_STATE_RB_DEPTH_CNTL); + break; + case VK_DYNAMIC_STATE_DEPTH_WRITE_ENABLE_EXT: + pipeline->rb_depth_cntl_mask &= ~A6XX_RB_DEPTH_CNTL_Z_WRITE_ENABLE; + pipeline->dynamic_state_mask |= BIT(TU_DYNAMIC_STATE_RB_DEPTH_CNTL); + break; + case VK_DYNAMIC_STATE_DEPTH_COMPARE_OP_EXT: + pipeline->rb_depth_cntl_mask &= ~A6XX_RB_DEPTH_CNTL_ZFUNC__MASK; + pipeline->dynamic_state_mask |= BIT(TU_DYNAMIC_STATE_RB_DEPTH_CNTL); + break; + case VK_DYNAMIC_STATE_DEPTH_BOUNDS_TEST_ENABLE_EXT: + pipeline->rb_depth_cntl_mask &= + ~(A6XX_RB_DEPTH_CNTL_Z_BOUNDS_ENABLE | A6XX_RB_DEPTH_CNTL_Z_TEST_ENABLE); + pipeline->dynamic_state_mask |= BIT(TU_DYNAMIC_STATE_RB_DEPTH_CNTL); + break; + case VK_DYNAMIC_STATE_STENCIL_TEST_ENABLE_EXT: + pipeline->rb_stencil_cntl_mask &= ~(A6XX_RB_STENCIL_CONTROL_STENCIL_ENABLE | + A6XX_RB_STENCIL_CONTROL_STENCIL_ENABLE_BF | + A6XX_RB_STENCIL_CONTROL_STENCIL_READ); + pipeline->dynamic_state_mask |= BIT(TU_DYNAMIC_STATE_RB_STENCIL_CNTL); + break; + case VK_DYNAMIC_STATE_STENCIL_OP_EXT: + pipeline->rb_stencil_cntl_mask &= A6XX_RB_STENCIL_CONTROL_STENCIL_ENABLE | + A6XX_RB_STENCIL_CONTROL_STENCIL_ENABLE_BF | + A6XX_RB_STENCIL_CONTROL_STENCIL_READ; + pipeline->dynamic_state_mask |= BIT(TU_DYNAMIC_STATE_RB_STENCIL_CNTL); + break; default: assert(!"unsupported dynamic state"); break; @@ -2203,13 +2212,13 @@ tu_pipeline_builder_parse_vertex_input(struct tu_pipeline_builder *builder, struct tu_cs vi_cs; tu_cs_begin_sub_stream(&pipeline->cs, MAX_VERTEX_ATTRIBS * 7 + 2, &vi_cs); - tu6_emit_vertex_input(&vi_cs, vs, vi_info); + tu6_emit_vertex_input(pipeline, &vi_cs, vs, vi_info); pipeline->vi.state = tu_cs_end_draw_state(&pipeline->cs, &vi_cs); if (bs) { tu_cs_begin_sub_stream(&pipeline->cs, MAX_VERTEX_ATTRIBS * 7 + 2, &vi_cs); - tu6_emit_vertex_input(&vi_cs, bs, vi_info); + tu6_emit_vertex_input(pipeline, &vi_cs, bs, vi_info); pipeline->vi.binning_state = tu_cs_end_draw_state(&pipeline->cs, &vi_cs); } @@ -2249,6 +2258,8 @@ tu_pipeline_builder_parse_tessellation(struct tu_pipeline_builder *builder, if (!tess_info) return; + assert(!(pipeline->dynamic_state_mask & BIT(TU_DYNAMIC_STATE_PRIMITIVE_TOPOLOGY))); + assert(pipeline->ia.primtype == DI_PT_PATCHES0); assert(tess_info->patchControlPoints <= 32); pipeline->ia.primtype += tess_info->patchControlPoints; @@ -2332,11 +2343,8 @@ tu_pipeline_builder_parse_rasterization(struct tu_pipeline_builder *builder, pipeline->gras_su_cntl = tu6_gras_su_cntl(rast_info, builder->samples, builder->multiview_mask != 0); - if (tu_pipeline_static_state(pipeline, &cs, VK_DYNAMIC_STATE_LINE_WIDTH, 2)) { - pipeline->gras_su_cntl |= - A6XX_GRAS_SU_CNTL_LINEHALFWIDTH(rast_info->lineWidth / 2.0f); + if (tu_pipeline_static_state(pipeline, &cs, TU_DYNAMIC_STATE_GRAS_SU_CNTL, 2)) tu_cs_emit_regs(&cs, A6XX_GRAS_SU_CNTL(.dword = pipeline->gras_su_cntl)); - } if (tu_pipeline_static_state(pipeline, &cs, VK_DYNAMIC_STATE_DEPTH_BIAS, 4)) { tu6_emit_depth_bias(&cs, rast_info->depthBiasConstantFactor, @@ -2357,26 +2365,79 @@ tu_pipeline_builder_parse_depth_stencil(struct tu_pipeline_builder *builder, * the pipeline has rasterization disabled or if the subpass of the * render pass the pipeline is created against does not use a * depth/stencil attachment. - * - * Disable both depth and stencil tests if there is no ds attachment, - * Disable depth test if ds attachment is S8_UINT, since S8_UINT defines - * only the separate stencil attachment */ - static const VkPipelineDepthStencilStateCreateInfo dummy_ds_info; const VkPipelineDepthStencilStateCreateInfo *ds_info = - builder->depth_attachment_format != VK_FORMAT_UNDEFINED - ? builder->create_info->pDepthStencilState - : &dummy_ds_info; - const VkPipelineDepthStencilStateCreateInfo *ds_info_depth = - builder->depth_attachment_format != VK_FORMAT_S8_UINT - ? ds_info : &dummy_ds_info; - + builder->create_info->pDepthStencilState; + const VkPipelineRasterizationStateCreateInfo *rast_info = + builder->create_info->pRasterizationState; + uint32_t rb_depth_cntl = 0, rb_stencil_cntl = 0; struct tu_cs cs; - pipeline->ds_state = tu_cs_draw_state(&pipeline->cs, &cs, 4); - tu6_emit_depth_control(&cs, ds_info_depth, - builder->create_info->pRasterizationState); - tu6_emit_stencil_control(&cs, ds_info); + if (builder->depth_attachment_format != VK_FORMAT_UNDEFINED && + builder->depth_attachment_format != VK_FORMAT_S8_UINT) { + if (ds_info->depthTestEnable) { + rb_depth_cntl |= + A6XX_RB_DEPTH_CNTL_Z_ENABLE | + A6XX_RB_DEPTH_CNTL_ZFUNC(tu6_compare_func(ds_info->depthCompareOp)) | + A6XX_RB_DEPTH_CNTL_Z_TEST_ENABLE; /* TODO: don't set for ALWAYS/NEVER */ + + if (rast_info->depthClampEnable) + rb_depth_cntl |= A6XX_RB_DEPTH_CNTL_Z_CLAMP_ENABLE; + + if (ds_info->depthWriteEnable) + rb_depth_cntl |= A6XX_RB_DEPTH_CNTL_Z_WRITE_ENABLE; + } + + if (ds_info->depthBoundsTestEnable) + rb_depth_cntl |= A6XX_RB_DEPTH_CNTL_Z_BOUNDS_ENABLE | A6XX_RB_DEPTH_CNTL_Z_TEST_ENABLE; + } else { + /* if RB_DEPTH_CNTL is set dynamically, we need to make sure it is set + * to 0 when this pipeline is used, as enabling depth test when there + * is no depth attachment is a problem (at least for the S8_UINT case) + */ + if (pipeline->dynamic_state_mask & BIT(TU_DYNAMIC_STATE_RB_DEPTH_CNTL)) + pipeline->rb_depth_cntl_disable = true; + } + + if (builder->depth_attachment_format != VK_FORMAT_UNDEFINED) { + const VkStencilOpState *front = &ds_info->front; + const VkStencilOpState *back = &ds_info->back; + + rb_stencil_cntl |= + A6XX_RB_STENCIL_CONTROL_FUNC(tu6_compare_func(front->compareOp)) | + A6XX_RB_STENCIL_CONTROL_FAIL(tu6_stencil_op(front->failOp)) | + A6XX_RB_STENCIL_CONTROL_ZPASS(tu6_stencil_op(front->passOp)) | + A6XX_RB_STENCIL_CONTROL_ZFAIL(tu6_stencil_op(front->depthFailOp)) | + A6XX_RB_STENCIL_CONTROL_FUNC_BF(tu6_compare_func(back->compareOp)) | + A6XX_RB_STENCIL_CONTROL_FAIL_BF(tu6_stencil_op(back->failOp)) | + A6XX_RB_STENCIL_CONTROL_ZPASS_BF(tu6_stencil_op(back->passOp)) | + A6XX_RB_STENCIL_CONTROL_ZFAIL_BF(tu6_stencil_op(back->depthFailOp)); + + if (ds_info->stencilTestEnable) { + rb_stencil_cntl |= + A6XX_RB_STENCIL_CONTROL_STENCIL_ENABLE | + A6XX_RB_STENCIL_CONTROL_STENCIL_ENABLE_BF | + A6XX_RB_STENCIL_CONTROL_STENCIL_READ; + } + } + + if (tu_pipeline_static_state(pipeline, &cs, TU_DYNAMIC_STATE_RB_DEPTH_CNTL, 2)) { + tu_cs_emit_pkt4(&cs, REG_A6XX_RB_DEPTH_CNTL, 1); + tu_cs_emit(&cs, rb_depth_cntl); + } else { + pipeline->rb_depth_cntl = rb_depth_cntl; + } + + if (tu_pipeline_static_state(pipeline, &cs, TU_DYNAMIC_STATE_RB_STENCIL_CNTL, 2)) { + tu_cs_emit_pkt4(&cs, REG_A6XX_RB_STENCIL_CONTROL, 1); + tu_cs_emit(&cs, rb_stencil_cntl); + } else { + pipeline->rb_stencil_cntl = rb_stencil_cntl; + } + + /* the remaining draw states arent used if there is no d/s, leave them empty */ + if (builder->depth_attachment_format == VK_FORMAT_UNDEFINED) + return; if (tu_pipeline_static_state(pipeline, &cs, VK_DYNAMIC_STATE_DEPTH_BOUNDS, 3)) { tu_cs_emit_regs(&cs, diff --git a/src/freedreno/vulkan/tu_private.h b/src/freedreno/vulkan/tu_private.h index 61da0ee33ae..1a78c9a7fda 100644 --- a/src/freedreno/vulkan/tu_private.h +++ b/src/freedreno/vulkan/tu_private.h @@ -460,7 +460,14 @@ enum tu_dynamic_state { /* re-use VK_DYNAMIC_STATE_ enums for non-extended dynamic states */ TU_DYNAMIC_STATE_SAMPLE_LOCATIONS = VK_DYNAMIC_STATE_STENCIL_REFERENCE + 1, + TU_DYNAMIC_STATE_RB_DEPTH_CNTL, + TU_DYNAMIC_STATE_RB_STENCIL_CNTL, + TU_DYNAMIC_STATE_VB_STRIDE, TU_DYNAMIC_STATE_COUNT, + /* no associated draw state: */ + TU_DYNAMIC_STATE_PRIMITIVE_TOPOLOGY = TU_DYNAMIC_STATE_COUNT, + /* re-use the line width enum as it uses GRAS_SU_CNTL: */ + TU_DYNAMIC_STATE_GRAS_SU_CNTL = VK_DYNAMIC_STATE_LINE_WIDTH, }; enum tu_draw_state_group_id @@ -472,7 +479,6 @@ enum tu_draw_state_group_id TU_DRAW_STATE_VI, TU_DRAW_STATE_VI_BINNING, TU_DRAW_STATE_RAST, - TU_DRAW_STATE_DS, TU_DRAW_STATE_BLEND, TU_DRAW_STATE_VS_CONST, TU_DRAW_STATE_HS_CONST, @@ -681,12 +687,18 @@ struct tu_descriptor_state enum tu_cmd_dirty_bits { - TU_CMD_DIRTY_VERTEX_BUFFERS = 1 << 2, - TU_CMD_DIRTY_DESC_SETS_LOAD = 1 << 3, - TU_CMD_DIRTY_COMPUTE_DESC_SETS_LOAD = 1 << 4, - TU_CMD_DIRTY_SHADER_CONSTS = 1 << 5, + TU_CMD_DIRTY_VERTEX_BUFFERS = BIT(0), + TU_CMD_DIRTY_VB_STRIDE = BIT(1), + TU_CMD_DIRTY_GRAS_SU_CNTL = BIT(2), + TU_CMD_DIRTY_RB_DEPTH_CNTL = BIT(3), + TU_CMD_DIRTY_RB_STENCIL_CNTL = BIT(4), + TU_CMD_DIRTY_DESC_SETS_LOAD = BIT(5), + TU_CMD_DIRTY_COMPUTE_DESC_SETS_LOAD = BIT(6), + TU_CMD_DIRTY_SHADER_CONSTS = BIT(7), /* all draw states were disabled and need to be re-enabled: */ - TU_CMD_DIRTY_DRAW_STATE = 1 << 7, + TU_CMD_DIRTY_DRAW_STATE = BIT(8) + + }; /* There are only three cache domains we have to care about: the CCU, or @@ -852,6 +864,7 @@ struct tu_cmd_state struct { uint64_t base; uint32_t size; + uint32_t stride; } vb[MAX_VBS]; VkViewport viewport[MAX_VIEWPORTS]; VkRect2D scissor[MAX_SCISSORS]; @@ -861,7 +874,9 @@ struct tu_cmd_state uint32_t dynamic_stencil_mask; uint32_t dynamic_stencil_wrmask; uint32_t dynamic_stencil_ref; - uint32_t dynamic_gras_su_cntl; + + uint32_t gras_su_cntl, rb_depth_cntl, rb_stencil_cntl; + enum pc_di_primtype primtype; /* saved states to re-emit in TU_CMD_DIRTY_DRAW_STATE case */ struct tu_draw_state dynamic_state[TU_DYNAMIC_STATE_COUNT]; @@ -1062,11 +1077,15 @@ struct tu_pipeline uint32_t dynamic_state_mask; struct tu_draw_state dynamic_state[TU_DYNAMIC_STATE_COUNT]; - /* gras_su_cntl without line width, used for dynamic line width state */ - uint32_t gras_su_cntl; + /* for dynamic states which use the same register: */ + uint32_t gras_su_cntl, gras_su_cntl_mask; + uint32_t rb_depth_cntl, rb_depth_cntl_mask; + uint32_t rb_stencil_cntl, rb_stencil_cntl_mask; + + bool rb_depth_cntl_disable; /* draw states for the pipeline */ - struct tu_draw_state load_state, rast_state, ds_state, blend_state; + struct tu_draw_state load_state, rast_state, blend_state; /* for vertex buffers state */ uint32_t num_vbs;