diff --git a/src/freedreno/ci/freedreno-a618-fails.txt b/src/freedreno/ci/freedreno-a618-fails.txt index 175d752b7f7..57f079acb34 100644 --- a/src/freedreno/ci/freedreno-a618-fails.txt +++ b/src/freedreno/ci/freedreno-a618-fails.txt @@ -362,7 +362,6 @@ wayland-dEQP-EGL.functional.wide_color.window_fp16_default_colorspace,Fail SRGBReadWritePixels,Fail # New CTS failures in 1.3.5.0 -dEQP-VK.pipeline.fast_linked_library.misc.interpolate_at_sample_no_sample_shading,Crash dEQP-VK.transform_feedback.simple.lines_or_triangles_line_strip_1,Fail dEQP-VK.transform_feedback.simple.lines_or_triangles_line_strip_3,Fail dEQP-VK.transform_feedback.simple.lines_or_triangles_triangle_strip_1,Fail diff --git a/src/freedreno/ci/freedreno-a630-fails.txt b/src/freedreno/ci/freedreno-a630-fails.txt index 37641551e05..3bc8a814b22 100644 --- a/src/freedreno/ci/freedreno-a630-fails.txt +++ b/src/freedreno/ci/freedreno-a630-fails.txt @@ -371,7 +371,6 @@ SRGBReadWritePixels,Fail spec@!opengl 1.1@line-smooth-stipple,Fail # New CTS failures in 1.3.5.0 -dEQP-VK.pipeline.fast_linked_library.misc.interpolate_at_sample_no_sample_shading,Crash dEQP-VK.transform_feedback.simple.lines_or_triangles_line_strip_1,Fail dEQP-VK.transform_feedback.simple.lines_or_triangles_line_strip_3,Fail dEQP-VK.transform_feedback.simple.lines_or_triangles_triangle_strip_1,Fail diff --git a/src/freedreno/vulkan/tu_cmd_buffer.cc b/src/freedreno/vulkan/tu_cmd_buffer.cc index 51cda50842a..c56609a0cbd 100644 --- a/src/freedreno/vulkan/tu_cmd_buffer.cc +++ b/src/freedreno/vulkan/tu_cmd_buffer.cc @@ -636,32 +636,29 @@ tu6_update_msaa(struct tu_cmd_buffer *cmd) struct tu_cs cs; cmd->state.msaa = tu_cs_draw_state(&cmd->sub_cs, &cs, 9); - tu6_emit_msaa(&cs, cmd->state.samples, cmd->state.msaa_disable); + tu6_emit_msaa(&cs, cmd->vk.dynamic_graphics_state.ms.rasterization_samples, + cmd->state.msaa_disable); if (!(cmd->state.dirty & TU_CMD_DIRTY_DRAW_STATE)) { tu_cs_emit_pkt7(&cmd->draw_cs, CP_SET_DRAW_STATE, 3); tu_cs_emit_draw_state(&cmd->draw_cs, TU_DRAW_STATE_MSAA, cmd->state.msaa); } } -static void -tu6_update_msaa_samples(struct tu_cmd_buffer *cmd, VkSampleCountFlagBits samples) -{ - if (cmd->state.samples != samples) { - cmd->state.samples = samples; - cmd->state.dirty |= TU_CMD_DIRTY_FS_PARAMS; - tu6_update_msaa(cmd); - } -} - static void tu6_update_msaa_disable(struct tu_cmd_buffer *cmd) { + VkPrimitiveTopology topology = + (VkPrimitiveTopology)cmd->vk.dynamic_graphics_state.ia.primitive_topology; bool is_line = - tu6_primtype_line(cmd->state.primtype) || - (tu6_primtype_patches(cmd->state.primtype) && + topology == VK_PRIMITIVE_TOPOLOGY_LINE_LIST || + topology == VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY || + topology == VK_PRIMITIVE_TOPOLOGY_LINE_STRIP || + topology == VK_PRIMITIVE_TOPOLOGY_LINE_STRIP_WITH_ADJACENCY || + (topology == VK_PRIMITIVE_TOPOLOGY_PATCH_LIST && cmd->state.pipeline && cmd->state.pipeline->base.tess.patch_type == IR3_TESS_ISOLINES); - bool msaa_disable = is_line && cmd->state.line_mode == BRESENHAM; + bool msaa_disable = is_line && + cmd->vk.dynamic_graphics_state.rs.line.mode == VK_LINE_RASTERIZATION_MODE_BRESENHAM_EXT; if (cmd->state.msaa_disable != msaa_disable) { cmd->state.msaa_disable = msaa_disable; @@ -1488,11 +1485,8 @@ tu_emit_renderpass_begin(struct tu_cmd_buffer *cmd, * be correctly added to the per-renderpass patchpoint list, even if they * are the same as before. */ - if (cmd->state.pass->has_fdm) { - cmd->state.dirty |= - TU_CMD_DIRTY_VIEWPORTS | TU_CMD_DIRTY_SCISSORS | - TU_CMD_DIRTY_FS_PARAMS; - } + if (cmd->state.pass->has_fdm) + cmd->state.dirty |= TU_CMD_DIRTY_FDM; } static void @@ -1736,11 +1730,8 @@ tu_cmd_render_tiles(struct tu_cmd_buffer *cmd, * via the patchpoints, so we need to re-emit them if they are reused for a * later render pass. */ - if (cmd->state.pass->has_fdm) { - cmd->state.dirty |= - TU_CMD_DIRTY_VIEWPORTS | TU_CMD_DIRTY_SCISSORS | - TU_CMD_DIRTY_FS_PARAMS; - } + if (cmd->state.pass->has_fdm) + cmd->state.dirty |= TU_CMD_DIRTY_FDM; /* tu6_render_tile has cloned these tracepoints for each tile */ if (!u_trace_iterator_equal(cmd->trace_renderpass_start, cmd->trace_renderpass_end)) @@ -1967,8 +1958,10 @@ tu_cmd_buffer_begin(struct tu_cmd_buffer *cmd_buffer, vk_command_buffer_begin(&cmd_buffer->vk, pBeginInfo); memset(&cmd_buffer->state, 0, sizeof(cmd_buffer->state)); + cmd_buffer->vk.dynamic_graphics_state = vk_default_dynamic_graphics_state; + cmd_buffer->vk.dynamic_graphics_state.vi = &cmd_buffer->state.vi; + cmd_buffer->vk.dynamic_graphics_state.ms.sample_locations = &cmd_buffer->state.sl; cmd_buffer->state.index_size = 0xff; /* dirty restart index */ - cmd_buffer->state.line_mode = RECTANGULAR; cmd_buffer->state.gmem_layout = TU_GMEM_LAYOUT_COUNT; /* dirty value */ tu_cache_init(&cmd_buffer->state.cache); @@ -2047,6 +2040,10 @@ tu_BeginCommandBuffer(VkCommandBuffer commandBuffer, cmd_buffer->state.subpass = &cmd_buffer->state.pass->subpasses[pBeginInfo->pInheritanceInfo->subpass]; } + tu_fill_render_pass_state(&cmd_buffer->state.vk_rp, + cmd_buffer->state.pass, + cmd_buffer->state.subpass); + cmd_buffer->state.dirty |= TU_CMD_DIRTY_SUBPASS; cmd_buffer->patchpoints_ctx = ralloc_parent(NULL); @@ -2067,19 +2064,6 @@ tu_BeginCommandBuffer(VkCommandBuffer commandBuffer, return VK_SUCCESS; } -static void -tu6_emit_vertex_strides(struct tu_cmd_buffer *cmd, unsigned num_vbs) -{ - struct tu_cs cs; - cmd->state.dynamic_state[TU_DYNAMIC_STATE_VB_STRIDE].iova = - tu_cs_draw_state(&cmd->sub_cs, &cs, 2 * num_vbs).iova; - - for (uint32_t i = 0; i < num_vbs; i++) - tu_cs_emit_regs(&cs, A6XX_VFD_FETCH_STRIDE(i, cmd->state.vb[i].stride)); - - cmd->state.dirty |= TU_CMD_DIRTY_VB_STRIDE; -} - static struct tu_cs tu_cmd_dynamic_state(struct tu_cmd_buffer *cmd, uint32_t id, uint32_t size) { @@ -2117,55 +2101,6 @@ tu_cmd_end_dynamic_state(struct tu_cmd_buffer *cmd, struct tu_cs *cs, tu_cs_emit_draw_state(&cmd->draw_cs, TU_DRAW_STATE_DYNAMIC + id, cmd->state.dynamic_state[id]); } -static void -tu_update_num_vbs(struct tu_cmd_buffer *cmd, unsigned num_vbs) -{ - /* the vertex_buffers draw state always contains all the currently - * bound vertex buffers. update its size to only emit the vbs which - * are actually used by the pipeline - * note there is a HW optimization which makes it so the draw state - * is not re-executed completely when only the size changes - */ - if (cmd->state.vertex_buffers.size != num_vbs * 4) { - cmd->state.vertex_buffers.size = num_vbs * 4; - cmd->state.dirty |= TU_CMD_DIRTY_VERTEX_BUFFERS; - } - - if (cmd->state.dynamic_state[TU_DYNAMIC_STATE_VB_STRIDE].size != num_vbs * 2) { - cmd->state.dynamic_state[TU_DYNAMIC_STATE_VB_STRIDE].size = num_vbs * 2; - cmd->state.dirty |= TU_CMD_DIRTY_VB_STRIDE; - } -} - -VKAPI_ATTR void VKAPI_CALL -tu_CmdSetVertexInputEXT(VkCommandBuffer commandBuffer, - uint32_t vertexBindingDescriptionCount, - const VkVertexInputBindingDescription2EXT *pVertexBindingDescriptions, - uint32_t vertexAttributeDescriptionCount, - const VkVertexInputAttributeDescription2EXT *pVertexAttributeDescriptions) -{ - TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer); - struct tu_cs cs; - - unsigned num_vbs = 0; - for (unsigned i = 0; i < vertexBindingDescriptionCount; i++) { - const VkVertexInputBindingDescription2EXT *binding = - &pVertexBindingDescriptions[i]; - num_vbs = MAX2(num_vbs, binding->binding + 1); - cmd->state.vb[binding->binding].stride = binding->stride; - } - - tu6_emit_vertex_strides(cmd, num_vbs); - tu_update_num_vbs(cmd, num_vbs); - - tu_cs_begin_sub_stream(&cmd->sub_cs, TU6_EMIT_VERTEX_INPUT_MAX_DWORDS, &cs); - tu6_emit_vertex_input(&cs, vertexBindingDescriptionCount, - pVertexBindingDescriptions, - vertexAttributeDescriptionCount, - pVertexAttributeDescriptions); - tu_cmd_end_dynamic_state(cmd, &cs, TU_DYNAMIC_STATE_VERTEX_INPUT); -} - VKAPI_ATTR void VKAPI_CALL tu_CmdBindVertexBuffers2EXT(VkCommandBuffer commandBuffer, uint32_t firstBinding, @@ -2181,6 +2116,11 @@ tu_CmdBindVertexBuffers2EXT(VkCommandBuffer commandBuffer, cmd->state.max_vbs_bound = MAX2( cmd->state.max_vbs_bound, firstBinding + bindingCount); + if (pStrides) { + vk_cmd_set_vertex_binding_strides(&cmd->vk, firstBinding, bindingCount, + pStrides); + } + cmd->state.vertex_buffers.iova = tu_cs_draw_state(&cmd->sub_cs, &cs, 4 * cmd->state.max_vbs_bound).iova; @@ -2193,9 +2133,6 @@ tu_CmdBindVertexBuffers2EXT(VkCommandBuffer commandBuffer, cmd->state.vb[firstBinding + i].base = buf->iova + pOffsets[i]; cmd->state.vb[firstBinding + i].size = pSizes ? pSizes[i] : (buf->vk.size - pOffsets[i]); } - - if (pStrides) - cmd->state.vb[firstBinding + i].stride = pStrides[i]; } for (uint32_t i = 0; i < cmd->state.max_vbs_bound; i++) { @@ -2205,9 +2142,6 @@ tu_CmdBindVertexBuffers2EXT(VkCommandBuffer commandBuffer, } cmd->state.dirty |= TU_CMD_DIRTY_VERTEX_BUFFERS; - - if (pStrides) - tu6_emit_vertex_strides(cmd, cmd->state.max_vbs_bound); } VKAPI_ATTR void VKAPI_CALL @@ -2850,10 +2784,13 @@ tu_CmdBindPipeline(VkCommandBuffer commandBuffer, cmd->state.pipeline = tu_pipeline_to_graphics(pipeline); cmd->state.dirty |= TU_CMD_DIRTY_DESC_SETS | TU_CMD_DIRTY_SHADER_CONSTS | - TU_CMD_DIRTY_LRZ | TU_CMD_DIRTY_VS_PARAMS | - TU_CMD_DIRTY_FS_PARAMS; + TU_CMD_DIRTY_VS_PARAMS | TU_CMD_DIRTY_LRZ | + TU_CMD_DIRTY_PIPELINE; - if (pipeline->output.feedback_loop_may_involve_textures && + vk_cmd_set_dynamic_graphics_state(&cmd->vk, + &cmd->state.pipeline->dynamic_state); + + if (cmd->state.pipeline->feedback_loop_may_involve_textures && !cmd->state.rp.disable_gmem) { /* VK_EXT_attachment_feedback_loop_layout allows feedback loop to involve * not only input attachments but also sampled images or image resources. @@ -2878,8 +2815,8 @@ tu_CmdBindPipeline(VkCommandBuffer commandBuffer, if (pipeline->prim_order.sysmem_single_prim_mode && !cmd->state.rp.sysmem_single_prim_mode) { - if (pipeline->output.subpass_feedback_loop_color || - pipeline->output.subpass_feedback_loop_ds) { + if (cmd->state.pipeline->feedback_loop_color || + cmd->state.pipeline->feedback_loop_ds) { perf_debug(cmd->device, "single_prim_mode due to feedback loop"); } else { perf_debug(cmd->device, "single_prim_mode due to rast order access"); @@ -2887,13 +2824,19 @@ tu_CmdBindPipeline(VkCommandBuffer commandBuffer, cmd->state.rp.sysmem_single_prim_mode = true; } + if (pipeline->lrz.blend_valid) + cmd->state.blend_reads_dest = pipeline->lrz.lrz_status & TU_LRZ_READS_DEST; + + if (pipeline->bandwidth.valid) + cmd->state.bandwidth = pipeline->bandwidth; + struct tu_cs *cs = &cmd->draw_cs; /* note: this also avoids emitting draw states before renderpass clears, * which may use the 3D clear path (for MSAA cases) */ if (!(cmd->state.dirty & TU_CMD_DIRTY_DRAW_STATE)) { - uint32_t mask = ~pipeline->dynamic_state_mask & BITFIELD_MASK(TU_DYNAMIC_STATE_COUNT); + uint32_t mask = pipeline->set_state_mask; tu_cs_emit_pkt7(cs, CP_SET_DRAW_STATE, 3 * (5 + util_bitcount(mask))); tu_cs_emit_draw_state(cs, TU_DRAW_STATE_PROGRAM_CONFIG, pipeline->program.config_state); @@ -2908,168 +2851,13 @@ tu_CmdBindPipeline(VkCommandBuffer commandBuffer, if (pipeline->active_stages & VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT) { cmd->state.rp.has_tess = true; - - if (!(pipeline->dynamic_state_mask & - BIT(TU_DYNAMIC_STATE_PATCH_CONTROL_POINTS))) { - cmd->state.patch_control_points = pipeline->tess.patch_control_points; - cmd->state.dirty &= ~TU_CMD_DIRTY_PATCH_CONTROL_POINTS; - } else { - cmd->state.dirty |= TU_CMD_DIRTY_PATCH_CONTROL_POINTS; - } - } - - if (!(pipeline->dynamic_state_mask & - BIT(TU_DYNAMIC_STATE_LINE_MODE))) - cmd->state.line_mode = pipeline->rast.line_mode; - - if (!(pipeline->dynamic_state_mask & - BIT(TU_DYNAMIC_STATE_PRIMITIVE_TOPOLOGY))) - cmd->state.primtype = pipeline->ia.primtype; - - if (!(pipeline->dynamic_state_mask & - BIT(TU_DYNAMIC_STATE_POLYGON_MODE))) - cmd->state.polygon_mode = pipeline->rast.polygon_mode; - - if (!(pipeline->dynamic_state_mask & - BIT(TU_DYNAMIC_STATE_TESS_DOMAIN_ORIGIN))) - cmd->state.tess_upper_left_domain_origin = - pipeline->tess.upper_left_domain_origin; - - if (!(pipeline->dynamic_state_mask & - BIT(TU_DYNAMIC_STATE_PROVOKING_VTX))) - cmd->state.provoking_vertex_last = pipeline->rast.provoking_vertex_last; - - tu6_update_msaa_disable(cmd); - - if (!(pipeline->dynamic_state_mask & BIT(TU_DYNAMIC_STATE_MSAA_SAMPLES))) - tu6_update_msaa_samples(cmd, pipeline->output.samples); - - if ((pipeline->dynamic_state_mask & BIT(VK_DYNAMIC_STATE_VIEWPORT)) && - !(pipeline->dynamic_state_mask & BIT(TU_DYNAMIC_STATE_VIEWPORT_RANGE)) && - (pipeline->viewport.z_negative_one_to_one != cmd->state.z_negative_one_to_one)) { - cmd->state.z_negative_one_to_one = pipeline->viewport.z_negative_one_to_one; - cmd->state.dirty |= TU_CMD_DIRTY_VIEWPORTS; - } - - if (pipeline->viewport.set_dynamic_vp_to_static) { - memcpy(cmd->state.viewport, pipeline->viewport.viewports, - pipeline->viewport.num_viewports * - sizeof(pipeline->viewport.viewports[0])); - - cmd->state.viewport_count = pipeline->viewport.num_viewports; - cmd->state.dirty |= TU_CMD_DIRTY_VIEWPORTS; - } - - if (pipeline->viewport.set_dynamic_scissor_to_static) { - memcpy(cmd->state.scissor, pipeline->viewport.scissors, - pipeline->viewport.num_viewports * - sizeof(pipeline->viewport.scissors[0])); - - cmd->state.scissor_count = pipeline->viewport.num_scissors; - cmd->state.dirty |= TU_CMD_DIRTY_SCISSORS; - } - - if ((pipeline->dynamic_state_mask & BIT(VK_DYNAMIC_STATE_VIEWPORT)) && - !(pipeline->dynamic_state_mask & BIT(TU_DYNAMIC_STATE_VIEWPORT_COUNT)) && - cmd->state.viewport_count != pipeline->viewport.num_viewports) { - cmd->state.viewport_count = pipeline->viewport.num_viewports; - cmd->state.dirty |= TU_CMD_DIRTY_VIEWPORTS; - } - - if ((pipeline->dynamic_state_mask & BIT(VK_DYNAMIC_STATE_SCISSOR)) && - !(pipeline->dynamic_state_mask & BIT(TU_DYNAMIC_STATE_SCISSOR_COUNT)) && - cmd->state.scissor_count != pipeline->viewport.num_scissors) { - cmd->state.scissor_count = pipeline->viewport.num_scissors; - cmd->state.dirty |= TU_CMD_DIRTY_SCISSORS; } if (pipeline->viewport.per_view_viewport != cmd->state.per_view_viewport) { cmd->state.per_view_viewport = pipeline->viewport.per_view_viewport; - if (pipeline->dynamic_state_mask & BIT(VK_DYNAMIC_STATE_VIEWPORT)) - cmd->state.dirty |= TU_CMD_DIRTY_VIEWPORTS; - if (pipeline->dynamic_state_mask & BIT(VK_DYNAMIC_STATE_SCISSOR)) - cmd->state.dirty |= TU_CMD_DIRTY_SCISSORS; + cmd->state.dirty |= TU_CMD_DIRTY_PER_VIEW_VIEWPORT; } - if (!(pipeline->dynamic_state_mask & BIT(VK_DYNAMIC_STATE_VIEWPORT))) - cmd->state.dirty &= ~TU_CMD_DIRTY_VIEWPORTS; - - if (!(pipeline->dynamic_state_mask & BIT(TU_DYNAMIC_STATE_VERTEX_INPUT))) - tu_update_num_vbs(cmd, pipeline->vi.num_vbs); - -#define UPDATE_REG(group, X, Y) { \ - /* note: would be better to have pipeline bits already masked */ \ - uint32_t pipeline_bits = pipeline->group.X & pipeline->group.X##_mask; \ - if ((cmd->state.X & pipeline->group.X##_mask) != pipeline_bits) { \ - cmd->state.X &= ~pipeline->group.X##_mask; \ - cmd->state.X |= pipeline_bits; \ - cmd->state.dirty |= TU_CMD_DIRTY_##Y; \ - } \ - if (!(pipeline->dynamic_state_mask & BIT(TU_DYNAMIC_STATE_##Y))) \ - cmd->state.dirty &= ~TU_CMD_DIRTY_##Y; \ -} - - /* these registers can have bits set from both pipeline and dynamic state - * this updates the bits set by the pipeline - * if the pipeline doesn't use a dynamic state for the register, then - * the relevant dirty bit is cleared to avoid overriding the non-dynamic - * state with a dynamic state the next draw. - */ - UPDATE_REG(rast, gras_su_cntl, RAST); - UPDATE_REG(rast, gras_cl_cntl, RAST); - UPDATE_REG(rast_ds, rb_depth_cntl, DS); - UPDATE_REG(ds, rb_stencil_cntl, DS); - UPDATE_REG(rast, pc_raster_cntl, PC_RASTER_CNTL); - UPDATE_REG(rast, vpc_unknown_9107, PC_RASTER_CNTL); - UPDATE_REG(blend, sp_blend_cntl, BLEND); - UPDATE_REG(blend, rb_blend_cntl, BLEND); - - for (unsigned i = 0; i < pipeline->blend.num_rts; i++) { - if ((cmd->state.rb_mrt_control[i] & pipeline->blend.rb_mrt_control_mask) != - pipeline->blend.rb_mrt_control[i]) { - cmd->state.rb_mrt_control[i] &= ~pipeline->blend.rb_mrt_control_mask; - cmd->state.rb_mrt_control[i] |= pipeline->blend.rb_mrt_control[i]; - cmd->state.dirty |= TU_CMD_DIRTY_BLEND; - } - - if (!(pipeline->dynamic_state_mask & BIT(TU_DYNAMIC_STATE_BLEND_EQUATION)) && - cmd->state.rb_mrt_blend_control[i] != pipeline->blend.rb_mrt_blend_control[i]) { - cmd->state.rb_mrt_blend_control[i] = pipeline->blend.rb_mrt_blend_control[i]; - cmd->state.dirty |= TU_CMD_DIRTY_BLEND; - } - } -#undef UPDATE_REG - - if (cmd->state.pipeline_color_write_enable != pipeline->blend.color_write_enable) { - cmd->state.pipeline_color_write_enable = pipeline->blend.color_write_enable; - cmd->state.dirty |= TU_CMD_DIRTY_BLEND; - } - if (!(pipeline->dynamic_state_mask & BIT(TU_DYNAMIC_STATE_BLEND_ENABLE)) && - cmd->state.blend_enable != pipeline->blend.blend_enable) { - cmd->state.blend_enable = pipeline->blend.blend_enable; - cmd->state.dirty |= TU_CMD_DIRTY_BLEND; - } - if (!(pipeline->dynamic_state_mask & BIT(TU_DYNAMIC_STATE_LOGIC_OP_ENABLE)) && - cmd->state.logic_op_enabled != pipeline->blend.logic_op_enabled) { - cmd->state.logic_op_enabled = pipeline->blend.logic_op_enabled; - cmd->state.dirty |= TU_CMD_DIRTY_BLEND; - } - if (!(pipeline->dynamic_state_mask & BIT(TU_DYNAMIC_STATE_LOGIC_OP)) && - cmd->state.rb_mrt_control_rop != pipeline->blend.rb_mrt_control_rop) { - cmd->state.rb_mrt_control_rop = pipeline->blend.rb_mrt_control_rop; - cmd->state.rop_reads_dst = pipeline->blend.rop_reads_dst; - cmd->state.dirty |= TU_CMD_DIRTY_BLEND; - } - if (cmd->state.dynamic_state[TU_DYNAMIC_STATE_BLEND].size != pipeline->blend.num_rts * 3 + 4) { - cmd->state.dirty |= TU_CMD_DIRTY_BLEND; - } - if (!(pipeline->dynamic_state_mask & BIT(TU_DYNAMIC_STATE_BLEND))) { - cmd->state.dirty &= ~TU_CMD_DIRTY_BLEND; - } - - if (pipeline->output.rb_depth_cntl_disable) - cmd->state.dirty |= TU_CMD_DIRTY_DS; - if (pipeline->active_stages & MESA_SHADER_TESS_CTRL) { if (!cmd->state.tess_params.valid || cmd->state.tess_params.output_upper_left != @@ -3088,683 +2876,6 @@ tu_CmdBindPipeline(VkCommandBuffer commandBuffer, } } -VKAPI_ATTR void VKAPI_CALL -tu_CmdSetViewport(VkCommandBuffer commandBuffer, - uint32_t firstViewport, - uint32_t viewportCount, - const VkViewport *pViewports) -{ - TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer); - - memcpy(&cmd->state.viewport[firstViewport], pViewports, viewportCount * sizeof(*pViewports)); - - /* With VK_EXT_depth_clip_control we have to take into account - * negativeOneToOne property of the pipeline, so the viewport calculations - * are deferred until it is known. - */ - cmd->state.dirty |= TU_CMD_DIRTY_VIEWPORTS; -} - -VKAPI_ATTR void VKAPI_CALL -tu_CmdSetScissor(VkCommandBuffer commandBuffer, - uint32_t firstScissor, - uint32_t scissorCount, - const VkRect2D *pScissors) -{ - TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer); - - memcpy(&cmd->state.scissor[firstScissor], pScissors, scissorCount * sizeof(*pScissors)); - - cmd->state.dirty |= TU_CMD_DIRTY_SCISSORS; -} - -VKAPI_ATTR void VKAPI_CALL -tu_CmdSetLineWidth(VkCommandBuffer commandBuffer, float lineWidth) -{ - TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer); - - cmd->state.gras_su_cntl &= ~A6XX_GRAS_SU_CNTL_LINEHALFWIDTH__MASK; - cmd->state.gras_su_cntl |= A6XX_GRAS_SU_CNTL_LINEHALFWIDTH(lineWidth / 2.0f); - - cmd->state.dirty |= TU_CMD_DIRTY_RAST; -} - -VKAPI_ATTR void VKAPI_CALL -tu_CmdSetDepthBias(VkCommandBuffer commandBuffer, - float depthBiasConstantFactor, - float depthBiasClamp, - float depthBiasSlopeFactor) -{ - TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer); - struct tu_cs cs = tu_cmd_dynamic_state(cmd, VK_DYNAMIC_STATE_DEPTH_BIAS, 4); - - tu6_emit_depth_bias(&cs, depthBiasConstantFactor, depthBiasClamp, depthBiasSlopeFactor); -} - -VKAPI_ATTR void VKAPI_CALL -tu_CmdSetBlendConstants(VkCommandBuffer commandBuffer, - const float blendConstants[4]) -{ - TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer); - struct tu_cs cs = tu_cmd_dynamic_state(cmd, VK_DYNAMIC_STATE_BLEND_CONSTANTS, 5); - - tu_cs_emit_pkt4(&cs, REG_A6XX_RB_BLEND_RED_F32, 4); - tu_cs_emit_array(&cs, (const uint32_t *) blendConstants, 4); -} - -VKAPI_ATTR void VKAPI_CALL -tu_CmdSetDepthBounds(VkCommandBuffer commandBuffer, - float minDepthBounds, - float maxDepthBounds) -{ - TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer); - struct tu_cs cs = tu_cmd_dynamic_state(cmd, VK_DYNAMIC_STATE_DEPTH_BOUNDS, 3); - - tu_cs_emit_regs(&cs, - A6XX_RB_Z_BOUNDS_MIN(minDepthBounds), - A6XX_RB_Z_BOUNDS_MAX(maxDepthBounds)); -} - -void -update_stencil_mask(uint32_t *value, VkStencilFaceFlags face, uint32_t mask) -{ - if (face & VK_STENCIL_FACE_FRONT_BIT) - *value = (*value & 0xff00) | (mask & 0xff); - if (face & VK_STENCIL_FACE_BACK_BIT) - *value = (*value & 0xff) | (mask & 0xff) << 8; -} - -VKAPI_ATTR void VKAPI_CALL -tu_CmdSetStencilCompareMask(VkCommandBuffer commandBuffer, - VkStencilFaceFlags faceMask, - uint32_t compareMask) -{ - TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer); - struct tu_cs cs = tu_cmd_dynamic_state(cmd, VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK, 2); - - update_stencil_mask(&cmd->state.dynamic_stencil_mask, faceMask, compareMask); - - tu_cs_emit_regs(&cs, A6XX_RB_STENCILMASK(.dword = cmd->state.dynamic_stencil_mask)); -} - -VKAPI_ATTR void VKAPI_CALL -tu_CmdSetStencilWriteMask(VkCommandBuffer commandBuffer, - VkStencilFaceFlags faceMask, - uint32_t writeMask) -{ - TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer); - struct tu_cs cs = tu_cmd_dynamic_state(cmd, VK_DYNAMIC_STATE_STENCIL_WRITE_MASK, 2); - - update_stencil_mask(&cmd->state.dynamic_stencil_wrmask, faceMask, writeMask); - - tu_cs_emit_regs(&cs, A6XX_RB_STENCILWRMASK(.dword = cmd->state.dynamic_stencil_wrmask)); - - cmd->state.dirty |= TU_CMD_DIRTY_LRZ; -} - -VKAPI_ATTR void VKAPI_CALL -tu_CmdSetStencilReference(VkCommandBuffer commandBuffer, - VkStencilFaceFlags faceMask, - uint32_t reference) -{ - TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer); - struct tu_cs cs = tu_cmd_dynamic_state(cmd, VK_DYNAMIC_STATE_STENCIL_REFERENCE, 2); - - update_stencil_mask(&cmd->state.dynamic_stencil_ref, faceMask, reference); - - tu_cs_emit_regs(&cs, A6XX_RB_STENCILREF(.dword = cmd->state.dynamic_stencil_ref)); -} - -VKAPI_ATTR void VKAPI_CALL -tu_CmdSetSampleLocationsEXT(VkCommandBuffer commandBuffer, - const VkSampleLocationsInfoEXT* pSampleLocationsInfo) -{ - TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer); - struct tu_cs cs = tu_cmd_dynamic_state(cmd, TU_DYNAMIC_STATE_SAMPLE_LOCATIONS, 6); - - assert(pSampleLocationsInfo); - - tu6_emit_sample_locations(&cs, pSampleLocationsInfo); -} - -VKAPI_ATTR void VKAPI_CALL -tu_CmdSetSampleLocationsEnableEXT(VkCommandBuffer commandBuffer, - VkBool32 sampleLocationsEnable) -{ - TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer); - - struct tu_cs cs = tu_cmd_dynamic_state(cmd, TU_DYNAMIC_STATE_SAMPLE_LOCATIONS_ENABLE, 6); - - tu6_emit_sample_locations_enable(&cs, sampleLocationsEnable); -} - -VKAPI_ATTR void VKAPI_CALL -tu_CmdSetCullModeEXT(VkCommandBuffer commandBuffer, VkCullModeFlags cullMode) -{ - TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer); - - cmd->state.gras_su_cntl &= - ~(A6XX_GRAS_SU_CNTL_CULL_FRONT | A6XX_GRAS_SU_CNTL_CULL_BACK); - - if (cullMode & VK_CULL_MODE_FRONT_BIT) - cmd->state.gras_su_cntl |= A6XX_GRAS_SU_CNTL_CULL_FRONT; - if (cullMode & VK_CULL_MODE_BACK_BIT) - cmd->state.gras_su_cntl |= A6XX_GRAS_SU_CNTL_CULL_BACK; - - cmd->state.dirty |= TU_CMD_DIRTY_RAST; -} - -VKAPI_ATTR void VKAPI_CALL -tu_CmdSetFrontFaceEXT(VkCommandBuffer commandBuffer, VkFrontFace frontFace) -{ - TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer); - - cmd->state.gras_su_cntl &= ~A6XX_GRAS_SU_CNTL_FRONT_CW; - - if (frontFace == VK_FRONT_FACE_CLOCKWISE) - cmd->state.gras_su_cntl |= A6XX_GRAS_SU_CNTL_FRONT_CW; - - cmd->state.dirty |= TU_CMD_DIRTY_RAST; -} - -VKAPI_ATTR void VKAPI_CALL -tu_CmdSetPrimitiveTopologyEXT(VkCommandBuffer commandBuffer, - VkPrimitiveTopology primitiveTopology) -{ - TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer); - - cmd->state.primtype = tu6_primtype(primitiveTopology); - tu6_update_msaa_disable(cmd); -} - -VKAPI_ATTR void VKAPI_CALL -tu_CmdSetViewportWithCountEXT(VkCommandBuffer commandBuffer, - uint32_t viewportCount, - const VkViewport* pViewports) -{ - TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer); - - memcpy(cmd->state.viewport, pViewports, viewportCount * sizeof(*pViewports)); - cmd->state.viewport_count = viewportCount; - - cmd->state.dirty |= TU_CMD_DIRTY_VIEWPORTS; -} - -VKAPI_ATTR void VKAPI_CALL -tu_CmdSetScissorWithCountEXT(VkCommandBuffer commandBuffer, - uint32_t scissorCount, - const VkRect2D* pScissors) -{ - TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer); - - memcpy(cmd->state.scissor, pScissors, scissorCount * sizeof(*pScissors)); - cmd->state.scissor_count = scissorCount; - - cmd->state.dirty |= TU_CMD_DIRTY_SCISSORS; -} - -VKAPI_ATTR void VKAPI_CALL -tu_CmdSetDepthTestEnableEXT(VkCommandBuffer commandBuffer, - VkBool32 depthTestEnable) -{ - TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer); - - cmd->state.rb_depth_cntl &= ~A6XX_RB_DEPTH_CNTL_Z_TEST_ENABLE; - - if (depthTestEnable) - cmd->state.rb_depth_cntl |= A6XX_RB_DEPTH_CNTL_Z_TEST_ENABLE; - - cmd->state.dirty |= TU_CMD_DIRTY_DS; -} - -VKAPI_ATTR void VKAPI_CALL -tu_CmdSetDepthWriteEnableEXT(VkCommandBuffer commandBuffer, - VkBool32 depthWriteEnable) -{ - TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer); - - cmd->state.rb_depth_cntl &= ~A6XX_RB_DEPTH_CNTL_Z_WRITE_ENABLE; - - if (depthWriteEnable) - cmd->state.rb_depth_cntl |= A6XX_RB_DEPTH_CNTL_Z_WRITE_ENABLE; - - cmd->state.dirty |= TU_CMD_DIRTY_DS; -} - -VKAPI_ATTR void VKAPI_CALL -tu_CmdSetDepthCompareOpEXT(VkCommandBuffer commandBuffer, - VkCompareOp depthCompareOp) -{ - TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer); - - cmd->state.rb_depth_cntl &= ~A6XX_RB_DEPTH_CNTL_ZFUNC__MASK; - - cmd->state.rb_depth_cntl |= - A6XX_RB_DEPTH_CNTL_ZFUNC(tu6_compare_func(depthCompareOp)); - - cmd->state.dirty |= TU_CMD_DIRTY_DS; -} - -VKAPI_ATTR void VKAPI_CALL -tu_CmdSetDepthBoundsTestEnableEXT(VkCommandBuffer commandBuffer, - VkBool32 depthBoundsTestEnable) -{ - TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer); - - cmd->state.rb_depth_cntl &= ~A6XX_RB_DEPTH_CNTL_Z_BOUNDS_ENABLE; - - if (depthBoundsTestEnable) - cmd->state.rb_depth_cntl |= A6XX_RB_DEPTH_CNTL_Z_BOUNDS_ENABLE; - - cmd->state.dirty |= TU_CMD_DIRTY_DS; -} - -VKAPI_ATTR void VKAPI_CALL -tu_CmdSetStencilTestEnableEXT(VkCommandBuffer commandBuffer, - VkBool32 stencilTestEnable) -{ - TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer); - - cmd->state.rb_stencil_cntl &= ~( - A6XX_RB_STENCIL_CONTROL_STENCIL_ENABLE | - A6XX_RB_STENCIL_CONTROL_STENCIL_ENABLE_BF | - A6XX_RB_STENCIL_CONTROL_STENCIL_READ); - - if (stencilTestEnable) { - cmd->state.rb_stencil_cntl |= - A6XX_RB_STENCIL_CONTROL_STENCIL_ENABLE | - A6XX_RB_STENCIL_CONTROL_STENCIL_ENABLE_BF | - A6XX_RB_STENCIL_CONTROL_STENCIL_READ; - } - - cmd->state.dirty |= TU_CMD_DIRTY_DS; -} - -VKAPI_ATTR void VKAPI_CALL -tu_CmdSetStencilOpEXT(VkCommandBuffer commandBuffer, - VkStencilFaceFlags faceMask, - VkStencilOp failOp, - VkStencilOp passOp, - VkStencilOp depthFailOp, - VkCompareOp compareOp) -{ - TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer); - - if (faceMask & VK_STENCIL_FACE_FRONT_BIT) { - cmd->state.rb_stencil_cntl &= ~( - A6XX_RB_STENCIL_CONTROL_FUNC__MASK | - A6XX_RB_STENCIL_CONTROL_FAIL__MASK | - A6XX_RB_STENCIL_CONTROL_ZPASS__MASK | - A6XX_RB_STENCIL_CONTROL_ZFAIL__MASK); - - cmd->state.rb_stencil_cntl |= - A6XX_RB_STENCIL_CONTROL_FUNC(tu6_compare_func(compareOp)) | - A6XX_RB_STENCIL_CONTROL_FAIL(tu6_stencil_op(failOp)) | - A6XX_RB_STENCIL_CONTROL_ZPASS(tu6_stencil_op(passOp)) | - A6XX_RB_STENCIL_CONTROL_ZFAIL(tu6_stencil_op(depthFailOp)); - } - - if (faceMask & VK_STENCIL_FACE_BACK_BIT) { - cmd->state.rb_stencil_cntl &= ~( - A6XX_RB_STENCIL_CONTROL_FUNC_BF__MASK | - A6XX_RB_STENCIL_CONTROL_FAIL_BF__MASK | - A6XX_RB_STENCIL_CONTROL_ZPASS_BF__MASK | - A6XX_RB_STENCIL_CONTROL_ZFAIL_BF__MASK); - - cmd->state.rb_stencil_cntl |= - A6XX_RB_STENCIL_CONTROL_FUNC_BF(tu6_compare_func(compareOp)) | - A6XX_RB_STENCIL_CONTROL_FAIL_BF(tu6_stencil_op(failOp)) | - A6XX_RB_STENCIL_CONTROL_ZPASS_BF(tu6_stencil_op(passOp)) | - A6XX_RB_STENCIL_CONTROL_ZFAIL_BF(tu6_stencil_op(depthFailOp)); - } - - cmd->state.dirty |= TU_CMD_DIRTY_DS; -} - -VKAPI_ATTR void VKAPI_CALL -tu_CmdSetDepthBiasEnableEXT(VkCommandBuffer commandBuffer, - VkBool32 depthBiasEnable) -{ - TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer); - - cmd->state.gras_su_cntl &= ~A6XX_GRAS_SU_CNTL_POLY_OFFSET; - if (depthBiasEnable) - cmd->state.gras_su_cntl |= A6XX_GRAS_SU_CNTL_POLY_OFFSET; - - cmd->state.dirty |= TU_CMD_DIRTY_RAST; -} - -VKAPI_ATTR void VKAPI_CALL -tu_CmdSetPrimitiveRestartEnableEXT(VkCommandBuffer commandBuffer, - VkBool32 primitiveRestartEnable) -{ - TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer); - - cmd->state.primitive_restart_enable = primitiveRestartEnable; -} - -VKAPI_ATTR void VKAPI_CALL -tu_CmdSetRasterizerDiscardEnableEXT(VkCommandBuffer commandBuffer, - VkBool32 rasterizerDiscardEnable) -{ - TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer); - - cmd->state.pc_raster_cntl &= ~A6XX_PC_RASTER_CNTL_DISCARD; - cmd->state.vpc_unknown_9107 &= ~A6XX_VPC_UNKNOWN_9107_RASTER_DISCARD; - if (rasterizerDiscardEnable) { - cmd->state.pc_raster_cntl |= A6XX_PC_RASTER_CNTL_DISCARD; - cmd->state.vpc_unknown_9107 |= A6XX_VPC_UNKNOWN_9107_RASTER_DISCARD; - } - - cmd->state.dirty |= TU_CMD_DIRTY_PC_RASTER_CNTL; -} - -VKAPI_ATTR void VKAPI_CALL -tu_CmdSetLogicOpEXT(VkCommandBuffer commandBuffer, - VkLogicOp logicOp) -{ - TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer); - - cmd->state.rb_mrt_control_rop = - tu6_rb_mrt_control_rop(logicOp, &cmd->state.rop_reads_dst); - - cmd->state.dirty |= TU_CMD_DIRTY_BLEND; -} - -VKAPI_ATTR void VKAPI_CALL -tu_CmdSetLogicOpEnableEXT(VkCommandBuffer commandBuffer, - VkBool32 logicOpEnable) -{ - TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer); - - cmd->state.logic_op_enabled = logicOpEnable; - - cmd->state.dirty |= TU_CMD_DIRTY_BLEND; -} - -VKAPI_ATTR void VKAPI_CALL -tu_CmdSetPatchControlPointsEXT(VkCommandBuffer commandBuffer, - uint32_t patchControlPoints) -{ - TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer); - - cmd->state.patch_control_points = patchControlPoints; - - cmd->state.dirty |= TU_CMD_DIRTY_PATCH_CONTROL_POINTS; -} - -VKAPI_ATTR void VKAPI_CALL -tu_CmdSetLineStippleEXT(VkCommandBuffer commandBuffer, - uint32_t lineStippleFactor, - uint16_t lineStipplePattern) -{ -} - -VKAPI_ATTR void VKAPI_CALL -tu_CmdSetColorWriteEnableEXT(VkCommandBuffer commandBuffer, uint32_t attachmentCount, - const VkBool32 *pColorWriteEnables) -{ - TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer); - uint32_t color_write_enable = 0; - - for (unsigned i = 0; i < attachmentCount; i++) { - if (pColorWriteEnables[i]) - color_write_enable |= BIT(i); - } - - cmd->state.color_write_enable = color_write_enable; - cmd->state.dirty |= TU_CMD_DIRTY_BLEND; -} - -VKAPI_ATTR void VKAPI_CALL -tu_CmdSetPolygonModeEXT(VkCommandBuffer commandBuffer, - VkPolygonMode polygonMode) -{ - TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer); - cmd->state.polygon_mode = tu6_polygon_mode(polygonMode); - cmd->state.dirty |= TU_CMD_DIRTY_RAST; -} - -VKAPI_ATTR void VKAPI_CALL -tu_CmdSetTessellationDomainOriginEXT(VkCommandBuffer commandBuffer, - VkTessellationDomainOrigin domainOrigin) -{ - TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer); - cmd->state.tess_upper_left_domain_origin = - domainOrigin == VK_TESSELLATION_DOMAIN_ORIGIN_UPPER_LEFT; -} - -VKAPI_ATTR void VKAPI_CALL -tu_CmdSetDepthClipEnableEXT(VkCommandBuffer commandBuffer, - VkBool32 depthClipEnable) -{ - TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer); - cmd->state.gras_cl_cntl = - (cmd->state.gras_cl_cntl & ~(A6XX_GRAS_CL_CNTL_ZNEAR_CLIP_DISABLE | - A6XX_GRAS_CL_CNTL_ZFAR_CLIP_DISABLE)) | - COND(!depthClipEnable, - A6XX_GRAS_CL_CNTL_ZNEAR_CLIP_DISABLE | - A6XX_GRAS_CL_CNTL_ZFAR_CLIP_DISABLE); - cmd->state.dirty |= TU_CMD_DIRTY_RAST; -} - -VKAPI_ATTR void VKAPI_CALL -tu_CmdSetDepthClampEnableEXT(VkCommandBuffer commandBuffer, - VkBool32 depthClampEnable) -{ - TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer); - cmd->state.gras_cl_cntl = - (cmd->state.gras_cl_cntl & ~A6XX_GRAS_CL_CNTL_Z_CLAMP_ENABLE) | - COND(depthClampEnable, A6XX_GRAS_CL_CNTL_Z_CLAMP_ENABLE); - cmd->state.rb_depth_cntl = - (cmd->state.rb_depth_cntl & ~A6XX_RB_DEPTH_CNTL_Z_CLAMP_ENABLE) | - COND(depthClampEnable, A6XX_RB_DEPTH_CNTL_Z_CLAMP_ENABLE); - cmd->state.dirty |= TU_CMD_DIRTY_RAST | TU_CMD_DIRTY_DS; -} - -VKAPI_ATTR void VKAPI_CALL -tu_CmdSetSampleMaskEXT(VkCommandBuffer commandBuffer, - VkSampleCountFlagBits samples, - const VkSampleMask *pSampleMask) -{ - TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer); - - cmd->state.rb_blend_cntl = - (cmd->state.rb_blend_cntl & ~A6XX_RB_BLEND_CNTL_SAMPLE_MASK__MASK) | - A6XX_RB_BLEND_CNTL_SAMPLE_MASK(*pSampleMask & 0xffff); - - cmd->state.dirty |= TU_CMD_DIRTY_BLEND; -} - -VKAPI_ATTR void VKAPI_CALL -tu_CmdSetRasterizationSamplesEXT(VkCommandBuffer commandBuffer, - VkSampleCountFlagBits rasterizationSamples) -{ - TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer); - - tu6_update_msaa_samples(cmd, rasterizationSamples); -} - -VKAPI_ATTR void VKAPI_CALL -tu_CmdSetAlphaToCoverageEnableEXT(VkCommandBuffer commandBuffer, - VkBool32 alphaToCoverageEnable) -{ - TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer); - - cmd->state.alpha_to_coverage = alphaToCoverageEnable; - cmd->state.rb_blend_cntl = - (cmd->state.rb_blend_cntl & ~A6XX_RB_BLEND_CNTL_ALPHA_TO_COVERAGE) | - COND(alphaToCoverageEnable, A6XX_RB_BLEND_CNTL_ALPHA_TO_COVERAGE); - cmd->state.sp_blend_cntl = - (cmd->state.sp_blend_cntl & ~A6XX_RB_BLEND_CNTL_ALPHA_TO_COVERAGE) | - COND(alphaToCoverageEnable, A6XX_RB_BLEND_CNTL_ALPHA_TO_COVERAGE); - - cmd->state.dirty |= TU_CMD_DIRTY_BLEND; -} - -VKAPI_ATTR void VKAPI_CALL -tu_CmdSetAlphaToOneEnableEXT(VkCommandBuffer commandBuffer, - VkBool32 alphaToOneEnable) -{ - TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer); - - cmd->state.rb_blend_cntl = - (cmd->state.rb_blend_cntl & ~A6XX_RB_BLEND_CNTL_ALPHA_TO_ONE) | - COND(alphaToOneEnable, A6XX_RB_BLEND_CNTL_ALPHA_TO_ONE); - - cmd->state.dirty |= TU_CMD_DIRTY_BLEND; -} - -VKAPI_ATTR void VKAPI_CALL -tu_CmdSetDepthClipNegativeOneToOneEXT(VkCommandBuffer commandBuffer, - VkBool32 negativeOneToOne) -{ - TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer); - - cmd->state.gras_cl_cntl = - (cmd->state.gras_cl_cntl & ~A6XX_GRAS_CL_CNTL_ZERO_GB_SCALE_Z) | - COND(!negativeOneToOne, A6XX_GRAS_CL_CNTL_ZERO_GB_SCALE_Z); - cmd->state.z_negative_one_to_one = negativeOneToOne; - - cmd->state.dirty |= TU_CMD_DIRTY_RAST | TU_CMD_DIRTY_VIEWPORTS; -} - -VKAPI_ATTR void VKAPI_CALL -tu_CmdSetRasterizationStreamEXT(VkCommandBuffer commandBuffer, - uint32_t rasterizationStream) -{ - TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer); - - cmd->state.pc_raster_cntl = - (cmd->state.pc_raster_cntl & ~A6XX_PC_RASTER_CNTL_STREAM__MASK) | - A6XX_PC_RASTER_CNTL_STREAM(rasterizationStream); - - cmd->state.dirty |= TU_CMD_DIRTY_PC_RASTER_CNTL; -} - -VKAPI_ATTR void VKAPI_CALL -tu_CmdSetLineRasterizationModeEXT(VkCommandBuffer commandBuffer, - VkLineRasterizationModeEXT lineRasterizationMode) -{ - TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer); - - cmd->state.line_mode = lineRasterizationMode == - VK_LINE_RASTERIZATION_MODE_BRESENHAM_EXT ? BRESENHAM : RECTANGULAR; - - tu6_update_msaa_disable(cmd); - - cmd->state.gras_su_cntl = - (cmd->state.gras_su_cntl & ~A6XX_GRAS_SU_CNTL_LINE_MODE__MASK) | - A6XX_GRAS_SU_CNTL_LINE_MODE(cmd->state.line_mode); - - cmd->state.dirty |= TU_CMD_DIRTY_RAST; -} - -VKAPI_ATTR void VKAPI_CALL -tu_CmdSetProvokingVertexModeEXT(VkCommandBuffer commandBuffer, - VkProvokingVertexModeEXT provokingVertexMode) -{ - TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer); - - cmd->state.provoking_vertex_last = - provokingVertexMode == VK_PROVOKING_VERTEX_MODE_LAST_VERTEX_EXT; -} - -VKAPI_ATTR void VKAPI_CALL -tu_CmdSetColorBlendEnableEXT(VkCommandBuffer commandBuffer, - uint32_t firstAttachment, - uint32_t attachmentCount, - const VkBool32 *pColorBlendEnables) -{ - TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer); - - for (unsigned i = 0; i < attachmentCount; i++) { - unsigned att = i + firstAttachment; - cmd->state.blend_enable = - (cmd->state.blend_enable & ~BIT(att)) | - COND(pColorBlendEnables[i], BIT(att)); - const uint32_t blend_enable = - A6XX_RB_MRT_CONTROL_BLEND | A6XX_RB_MRT_CONTROL_BLEND2; - cmd->state.rb_mrt_control[i] = - (cmd->state.rb_mrt_control[i] & ~blend_enable) | - COND(pColorBlendEnables[i], blend_enable); - } - - cmd->state.dirty |= TU_CMD_DIRTY_BLEND; -} - -VKAPI_ATTR void VKAPI_CALL -tu_CmdSetColorBlendEquationEXT(VkCommandBuffer commandBuffer, - uint32_t firstAttachment, - uint32_t attachmentCount, - const VkColorBlendEquationEXT *pColorBlendEquation) -{ - TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer); - - for (unsigned i = 0; i < attachmentCount; i++) { - unsigned att = i + firstAttachment; - const VkColorBlendEquationEXT *equation = &pColorBlendEquation[i]; - - const enum a3xx_rb_blend_opcode color_op = tu6_blend_op(equation->colorBlendOp); - const enum adreno_rb_blend_factor src_color_factor = - tu6_blend_factor(equation->srcColorBlendFactor); - const enum adreno_rb_blend_factor dst_color_factor = - tu6_blend_factor(equation->dstColorBlendFactor); - const enum a3xx_rb_blend_opcode alpha_op = tu6_blend_op(equation->alphaBlendOp); - const enum adreno_rb_blend_factor src_alpha_factor = - tu6_blend_factor(equation->srcAlphaBlendFactor); - const enum adreno_rb_blend_factor dst_alpha_factor = - tu6_blend_factor(equation->dstAlphaBlendFactor); - - cmd->state.rb_mrt_blend_control[att] = A6XX_RB_MRT_BLEND_CONTROL(0, - .rgb_src_factor = src_color_factor, - .rgb_blend_opcode = color_op, - .rgb_dest_factor = dst_color_factor, - .alpha_src_factor = src_alpha_factor, - .alpha_blend_opcode = alpha_op, - .alpha_dest_factor = dst_alpha_factor).value; - - /* Dual-src blend can only be on attachment 0, so we don't need to worry - * about OR'ing together the state from multiple attachments and can set - * DUAL_COLOR_IN_ENABLE right here. - */ - if (att == 0) { - bool dual_src_blend = - tu_blend_factor_is_dual_src(equation->srcColorBlendFactor) || - tu_blend_factor_is_dual_src(equation->dstColorBlendFactor) || - tu_blend_factor_is_dual_src(equation->srcAlphaBlendFactor) || - tu_blend_factor_is_dual_src(equation->dstAlphaBlendFactor); - cmd->state.sp_blend_cntl = - (cmd->state.sp_blend_cntl & ~A6XX_SP_BLEND_CNTL_DUAL_COLOR_IN_ENABLE) | - COND(dual_src_blend, A6XX_SP_BLEND_CNTL_DUAL_COLOR_IN_ENABLE); - cmd->state.rb_blend_cntl = - (cmd->state.rb_blend_cntl & ~A6XX_RB_BLEND_CNTL_DUAL_COLOR_IN_ENABLE) | - COND(dual_src_blend, A6XX_RB_BLEND_CNTL_DUAL_COLOR_IN_ENABLE); - } - } - - cmd->state.dirty |= TU_CMD_DIRTY_BLEND; -} - -VKAPI_ATTR void VKAPI_CALL -tu_CmdSetColorWriteMaskEXT(VkCommandBuffer commandBuffer, - uint32_t firstAttachment, - uint32_t attachmentCount, - const VkColorComponentFlags *pColorWriteMasks) -{ - TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer); - - for (unsigned i = 0; i < attachmentCount; i++) { - unsigned att = i + firstAttachment; - cmd->state.rb_mrt_control[att] = - (cmd->state.rb_mrt_control[att] & - ~A6XX_RB_MRT_CONTROL_COMPONENT_ENABLE__MASK) | - A6XX_RB_MRT_CONTROL_COMPONENT_ENABLE(pColorWriteMasks[i]); - } - - cmd->state.dirty |= TU_CMD_DIRTY_BLEND; -} - static void tu_flush_for_access(struct tu_cache_state *cache, enum tu_cmd_access_mask src_mask, @@ -4520,13 +3631,14 @@ tu_subpass_barrier(struct tu_cmd_buffer *cmd_buffer, static void tu_emit_subpass_begin(struct tu_cmd_buffer *cmd) { + tu_fill_render_pass_state(&cmd->state.vk_rp, cmd->state.pass, cmd->state.subpass); tu6_emit_zs(cmd, cmd->state.subpass, &cmd->draw_cs); tu6_emit_mrt(cmd, cmd->state.subpass, &cmd->draw_cs); - if (cmd->state.subpass->samples != 0) - tu6_update_msaa_samples(cmd, cmd->state.subpass->samples); tu6_emit_render_cntl(cmd, cmd->state.subpass, &cmd->draw_cs, false); tu_set_input_attachments(cmd, cmd->state.subpass); + + cmd->state.dirty |= TU_CMD_DIRTY_SUBPASS; } VKAPI_ATTR void VKAPI_CALL @@ -4998,8 +4110,9 @@ tu6_emit_consts(struct tu_cmd_buffer *cmd, static void tu6_update_simplified_stencil_state(struct tu_cmd_buffer *cmd) { - bool stencil_test_enable = - cmd->state.rb_stencil_cntl & A6XX_RB_STENCIL_CONTROL_STENCIL_ENABLE; + const struct vk_depth_stencil_state *ds = + &cmd->vk.dynamic_graphics_state.ds; + bool stencil_test_enable = ds->stencil.test_enable; if (!stencil_test_enable) { cmd->state.stencil_front_write = false; @@ -5007,28 +4120,15 @@ tu6_update_simplified_stencil_state(struct tu_cmd_buffer *cmd) return; } - bool stencil_front_writemask = - (cmd->state.pipeline->base.dynamic_state_mask & BIT(VK_DYNAMIC_STATE_STENCIL_WRITE_MASK)) ? - (cmd->state.dynamic_stencil_wrmask & 0xff) : - (cmd->state.pipeline->base.ds.stencil_wrmask & 0xff); + bool stencil_front_writemask = ds->stencil.front.write_mask; + bool stencil_back_writemask = ds->stencil.back.write_mask; - bool stencil_back_writemask = - (cmd->state.pipeline->base.dynamic_state_mask & BIT(VK_DYNAMIC_STATE_STENCIL_WRITE_MASK)) ? - ((cmd->state.dynamic_stencil_wrmask & 0xff00) >> 8) : - (cmd->state.pipeline->base.ds.stencil_wrmask & 0xff00) >> 8; - - VkStencilOp front_fail_op = (VkStencilOp) - ((cmd->state.rb_stencil_cntl & A6XX_RB_STENCIL_CONTROL_FAIL__MASK) >> A6XX_RB_STENCIL_CONTROL_FAIL__SHIFT); - VkStencilOp front_pass_op = (VkStencilOp) - ((cmd->state.rb_stencil_cntl & A6XX_RB_STENCIL_CONTROL_ZPASS__MASK) >> A6XX_RB_STENCIL_CONTROL_ZPASS__SHIFT); - VkStencilOp front_depth_fail_op = (VkStencilOp) - ((cmd->state.rb_stencil_cntl & A6XX_RB_STENCIL_CONTROL_ZFAIL__MASK) >> A6XX_RB_STENCIL_CONTROL_ZFAIL__SHIFT); - VkStencilOp back_fail_op = (VkStencilOp) - ((cmd->state.rb_stencil_cntl & A6XX_RB_STENCIL_CONTROL_FAIL_BF__MASK) >> A6XX_RB_STENCIL_CONTROL_FAIL_BF__SHIFT); - VkStencilOp back_pass_op = (VkStencilOp) - ((cmd->state.rb_stencil_cntl & A6XX_RB_STENCIL_CONTROL_ZPASS_BF__MASK) >> A6XX_RB_STENCIL_CONTROL_ZPASS_BF__SHIFT); - VkStencilOp back_depth_fail_op = (VkStencilOp) - ((cmd->state.rb_stencil_cntl & A6XX_RB_STENCIL_CONTROL_ZFAIL_BF__MASK) >> A6XX_RB_STENCIL_CONTROL_ZFAIL_BF__SHIFT); + VkStencilOp front_fail_op = (VkStencilOp)ds->stencil.front.op.fail; + VkStencilOp front_pass_op = (VkStencilOp)ds->stencil.front.op.pass; + VkStencilOp front_depth_fail_op = (VkStencilOp)ds->stencil.front.op.depth_fail; + VkStencilOp back_fail_op = (VkStencilOp)ds->stencil.back.op.fail; + VkStencilOp back_pass_op = (VkStencilOp)ds->stencil.back.op.pass; + VkStencilOp back_depth_fail_op = (VkStencilOp)ds->stencil.back.op.depth_fail; bool stencil_front_op_writes = front_pass_op != VK_STENCIL_OP_KEEP || @@ -5050,10 +4150,10 @@ static bool tu6_writes_depth(struct tu_cmd_buffer *cmd, bool depth_test_enable) { bool depth_write_enable = - cmd->state.rb_depth_cntl & A6XX_RB_DEPTH_CNTL_Z_WRITE_ENABLE; + cmd->vk.dynamic_graphics_state.ds.depth.write_enable; VkCompareOp depth_compare_op = (VkCompareOp) - ((cmd->state.rb_depth_cntl & A6XX_RB_DEPTH_CNTL_ZFUNC__MASK) >> A6XX_RB_DEPTH_CNTL_ZFUNC__SHIFT); + cmd->vk.dynamic_graphics_state.ds.depth.compare_op; bool depth_compare_op_writes = depth_compare_op != VK_COMPARE_OP_NEVER; @@ -5070,12 +4170,12 @@ static void tu6_build_depth_plane_z_mode(struct tu_cmd_buffer *cmd, struct tu_cs *cs) { enum a6xx_ztest_mode zmode = A6XX_EARLY_Z; - bool depth_test_enable = cmd->state.rb_depth_cntl & A6XX_RB_DEPTH_CNTL_Z_TEST_ENABLE; + bool depth_test_enable = cmd->vk.dynamic_graphics_state.ds.depth.test_enable; bool depth_write = tu6_writes_depth(cmd, depth_test_enable); bool stencil_write = tu6_writes_stencil(cmd); if ((cmd->state.pipeline->base.lrz.fs.has_kill || - cmd->state.pipeline->base.output.subpass_feedback_loop_ds) && + cmd->state.pipeline->feedback_loop_ds) && (depth_write || stencil_write)) { zmode = (cmd->state.lrz.valid && cmd->state.lrz.enabled) ? A6XX_EARLY_LRZ_LATE_Z @@ -5083,11 +4183,8 @@ tu6_build_depth_plane_z_mode(struct tu_cmd_buffer *cmd, struct tu_cs *cs) } bool force_late_z = cmd->state.pipeline->base.lrz.force_late_z || - /* If enabled dynamically, alpha-to-coverage can behave like a discard. - */ - ((cmd->state.pipeline->base.dynamic_state_mask & - BIT(TU_DYNAMIC_STATE_ALPHA_TO_COVERAGE)) && - cmd->state.alpha_to_coverage); + /* alpha-to-coverage can behave like a discard. */ + cmd->vk.dynamic_graphics_state.ms.alpha_to_coverage_enable; if ((force_late_z && !cmd->state.pipeline->base.lrz.fs.force_early_z) || !depth_test_enable) zmode = A6XX_LATE_Z; @@ -5103,193 +4200,6 @@ tu6_build_depth_plane_z_mode(struct tu_cmd_buffer *cmd, struct tu_cs *cs) tu_cs_emit(cs, A6XX_RB_DEPTH_PLANE_CNTL_Z_MODE(zmode)); } -static void -tu6_emit_blend(struct tu_cs *cs, struct tu_cmd_buffer *cmd) -{ - struct tu_pipeline *pipeline = &cmd->state.pipeline->base; - uint32_t color_write_enable = cmd->state.pipeline_color_write_enable; - - if (pipeline->dynamic_state_mask & - BIT(TU_DYNAMIC_STATE_COLOR_WRITE_ENABLE)) - color_write_enable &= cmd->state.color_write_enable; - - unsigned num_rts = pipeline->blend.num_rts; - if (num_rts == 0 && - (pipeline->dynamic_state_mask & BIT(TU_DYNAMIC_STATE_ALPHA_TO_COVERAGE)) && - cmd->state.alpha_to_coverage) { - num_rts = 1; - } - - for (unsigned i = 0; i < num_rts; i++) { - tu_cs_emit_pkt4(cs, REG_A6XX_RB_MRT_CONTROL(i), 2); - if (color_write_enable & BIT(i)) { - tu_cs_emit(cs, cmd->state.rb_mrt_control[i] | - (cmd->state.logic_op_enabled ? - cmd->state.rb_mrt_control_rop : 0)); - tu_cs_emit(cs, cmd->state.rb_mrt_blend_control[i]); - } else { - tu_cs_emit(cs, 0); - tu_cs_emit(cs, 0); - } - } - - uint32_t blend_enable_mask = color_write_enable; - if (!(cmd->state.logic_op_enabled && cmd->state.rop_reads_dst)) - blend_enable_mask &= cmd->state.blend_enable; - - tu_cs_emit_regs(cs, A6XX_SP_FS_OUTPUT_CNTL1(.mrt = num_rts)); - tu_cs_emit_regs(cs, A6XX_RB_FS_OUTPUT_CNTL1(.mrt = num_rts)); - tu_cs_emit_pkt4(cs, REG_A6XX_SP_BLEND_CNTL, 1); - tu_cs_emit(cs, cmd->state.sp_blend_cntl | - (A6XX_SP_BLEND_CNTL_ENABLE_BLEND(blend_enable_mask) & - ~pipeline->blend.sp_blend_cntl_mask)); - - tu_cs_emit_pkt4(cs, REG_A6XX_RB_BLEND_CNTL, 1); - tu_cs_emit(cs, cmd->state.rb_blend_cntl | - (A6XX_RB_BLEND_CNTL_ENABLE_BLEND(blend_enable_mask) & - ~pipeline->blend.rb_blend_cntl_mask)); -} - -struct apply_viewport_state { - VkViewport viewports[MAX_VIEWPORTS]; - unsigned num_viewports; - bool z_negative_one_to_one; - bool share_scale; -}; - -/* It's a hardware restriction that the window offset (i.e. bin.offset) must - * be the same for all views. This means that GMEM coordinates cannot be a - * simple scaling of framebuffer coordinates, because this would require us to - * scale the window offset and the scale may be different per view. Instead we - * have to apply a per-bin offset to the GMEM coordinate transform to make - * sure that the window offset maps to itself. Specifically we need an offset - * o to the transform: - * - * x' = s * x + o - * - * so that when we plug in the bin start b_s: - * - * b_s = s * b_s + o - * - * and we get: - * - * o = b_s - s * b_s - * - * We use this form exactly, because we know the bin offset is a multiple of - * the frag area so s * b_s is an integer and we can compute an exact result - * easily. - */ - -static VkOffset2D -fdm_per_bin_offset(VkExtent2D frag_area, VkRect2D bin) -{ - assert(bin.offset.x % frag_area.width == 0); - assert(bin.offset.y % frag_area.height == 0); - - return (VkOffset2D) { - bin.offset.x - bin.offset.x / frag_area.width, - bin.offset.y - bin.offset.y / frag_area.height - }; -} - -static void -fdm_apply_viewports(struct tu_cs *cs, void *data, VkRect2D bin, unsigned views, - VkExtent2D *frag_areas) -{ - VkViewport viewports[MAX_VIEWPORTS]; - const struct apply_viewport_state *state = - (const struct apply_viewport_state *)data; - - for (unsigned i = 0; i < state->num_viewports; i++) { - /* Note: If we're using shared scaling, the scale should already be the - * same across all views, we can pick any view. However the number - * of viewports and number of views is not guaranteed the same, so we - * need to pick the 0'th view which always exists to be safe. - * - * Conversly, if we're not using shared scaling then the rasterizer in - * the original pipeline is using only the first viewport, so we need to - * replicate it across all viewports. - */ - VkExtent2D frag_area = state->share_scale ? frag_areas[0] : frag_areas[i]; - VkViewport viewport = - state->share_scale ? state->viewports[i] : state->viewports[0]; - if (frag_area.width == 1 && frag_area.height == 1) { - viewports[i] = viewport; - continue; - } - - float scale_x = (float) 1.0f / frag_area.width; - float scale_y = (float) 1.0f / frag_area.height; - - viewports[i].minDepth = viewport.minDepth; - viewports[i].maxDepth = viewport.maxDepth; - viewports[i].width = viewport.width * scale_x; - viewports[i].height = viewport.height * scale_y; - - VkOffset2D offset = fdm_per_bin_offset(frag_area, bin); - - viewports[i].x = scale_x * viewport.x + offset.x; - viewports[i].y = scale_y * viewport.y + offset.y; - } - - tu6_emit_viewport(cs, viewports, state->num_viewports, state->z_negative_one_to_one); -} - -struct apply_scissor_state { - VkRect2D scissors[MAX_VIEWPORTS]; - unsigned num_scissors; - bool share_scale; -}; - -static void -fdm_apply_scissors(struct tu_cs *cs, void *data, VkRect2D bin, unsigned views, - VkExtent2D *frag_areas) -{ - VkRect2D scissors[MAX_VIEWPORTS]; - const struct apply_scissor_state *state = - (const struct apply_scissor_state *)data; - - for (unsigned i = 0; i < state->num_scissors; i++) { - VkExtent2D frag_area = state->share_scale ? frag_areas[0] : frag_areas[i]; - VkRect2D scissor = - state->share_scale ? state->scissors[i] : state->scissors[0]; - if (frag_area.width == 1 && frag_area.height == 1) { - scissors[i] = scissor; - continue; - } - - /* Transform the scissor following the viewport. It's unclear how this - * is supposed to handle cases where the scissor isn't aligned to the - * fragment area, but we round outwards to always render partial - * fragments if the scissor size equals the framebuffer size and it - * isn't aligned to the fragment area. - */ - VkOffset2D offset = fdm_per_bin_offset(frag_area, bin); - VkOffset2D min = { - scissor.offset.x / frag_area.width + offset.x, - scissor.offset.y / frag_area.width + offset.y, - }; - VkOffset2D max = { - DIV_ROUND_UP(scissor.offset.x + scissor.extent.width, frag_area.width) + offset.x, - DIV_ROUND_UP(scissor.offset.y + scissor.extent.height, frag_area.height) + offset.y, - }; - - /* Intersect scissor with the scaled bin, this essentially replaces the - * window scissor. - */ - uint32_t scaled_width = bin.extent.width / frag_area.width; - uint32_t scaled_height = bin.extent.height / frag_area.height; - scissors[i].offset.x = MAX2(min.x, bin.offset.x); - scissors[i].offset.y = MAX2(min.y, bin.offset.y); - scissors[i].extent.width = - MIN2(max.x, bin.offset.x + scaled_width) - scissors[i].offset.x; - scissors[i].extent.height = - MIN2(max.y, bin.offset.y + scaled_height) - scissors[i].offset.y; - } - - tu6_emit_scissor(cs, scissors, state->num_scissors); -} - static uint32_t fs_params_offset(struct tu_cmd_buffer *cmd) { @@ -5331,7 +4241,7 @@ fdm_apply_fs_params(struct tu_cs *cs, void *data, VkRect2D bin, unsigned views, for (unsigned i = 0; i < num_consts; i++) { assert(i < views); VkExtent2D area = frag_areas[i]; - VkOffset2D offset = fdm_per_bin_offset(area, bin); + VkOffset2D offset = tu_fdm_per_bin_offset(area, bin); tu_cs_emit(cs, area.width); tu_cs_emit(cs, area.height); @@ -5350,11 +4260,11 @@ tu6_emit_fs_params(struct tu_cmd_buffer *cmd) return; } - struct tu_pipeline *pipeline = &cmd->state.pipeline->base; + struct tu_graphics_pipeline *pipeline = cmd->state.pipeline; unsigned num_units = fs_params_size(cmd); - if (pipeline->fs.fragment_density_map) + if (pipeline->has_fdm) tu_cs_set_writeable(&cmd->sub_cs, true); struct tu_cs cs; @@ -5375,7 +4285,8 @@ tu6_emit_fs_params(struct tu_cmd_buffer *cmd) tu_cs_emit(&cs, 0); STATIC_ASSERT(IR3_DP_FS_FRAG_INVOCATION_COUNT == IR3_DP_FS_DYNAMIC); - tu_cs_emit(&cs, pipeline->program.per_samp ? cmd->state.samples : 1); + tu_cs_emit(&cs, pipeline->base.program.per_samp ? + cmd->vk.dynamic_graphics_state.ms.rasterization_samples : 1); tu_cs_emit(&cs, 0); tu_cs_emit(&cs, 0); tu_cs_emit(&cs, 0); @@ -5383,7 +4294,7 @@ tu6_emit_fs_params(struct tu_cmd_buffer *cmd) STATIC_ASSERT(IR3_DP_FS_FRAG_SIZE == IR3_DP_FS_DYNAMIC + 4); STATIC_ASSERT(IR3_DP_FS_FRAG_OFFSET == IR3_DP_FS_DYNAMIC + 6); if (num_units > 1) { - if (pipeline->fs.fragment_density_map) { + if (pipeline->has_fdm) { struct apply_fs_params_state state = { .num_consts = num_units - 1, }; @@ -5401,7 +4312,7 @@ tu6_emit_fs_params(struct tu_cmd_buffer *cmd) cmd->state.fs_params = tu_cs_end_draw_state(&cmd->sub_cs, &cs); - if (pipeline->fs.fragment_density_map) + if (pipeline->has_fdm) tu_cs_set_writeable(&cmd->sub_cs, false); } @@ -5414,35 +4325,45 @@ tu6_draw_common(struct tu_cmd_buffer *cmd, { const struct tu_pipeline *pipeline = &cmd->state.pipeline->base; struct tu_render_pass_state *rp = &cmd->state.rp; + + /* Emit state first, because it's needed for bandwidth calculations */ + uint32_t dynamic_draw_state_dirty = 0; + if (!BITSET_IS_EMPTY(cmd->vk.dynamic_graphics_state.dirty) || + (cmd->state.dirty & ~TU_CMD_DIRTY_COMPUTE_DESC_SETS)) { + dynamic_draw_state_dirty = tu_emit_draw_state(cmd); + } /* Fill draw stats for autotuner */ rp->drawcall_count++; rp->drawcall_bandwidth_per_sample_sum += - pipeline->output.color_bandwidth_per_sample; + cmd->state.bandwidth.color_bandwidth_per_sample; /* add depth memory bandwidth cost */ - const uint32_t depth_bandwidth = pipeline->output.depth_cpp_per_sample; - if (cmd->state.rb_depth_cntl & A6XX_RB_DEPTH_CNTL_Z_WRITE_ENABLE) + const uint32_t depth_bandwidth = cmd->state.bandwidth.depth_cpp_per_sample; + if (cmd->vk.dynamic_graphics_state.ds.depth.write_enable) rp->drawcall_bandwidth_per_sample_sum += depth_bandwidth; - if (cmd->state.rb_depth_cntl & A6XX_RB_DEPTH_CNTL_Z_TEST_ENABLE) + if (cmd->vk.dynamic_graphics_state.ds.depth.test_enable) rp->drawcall_bandwidth_per_sample_sum += depth_bandwidth; /* add stencil memory bandwidth cost */ - const uint32_t stencil_bandwidth = pipeline->output.stencil_cpp_per_sample; - if (cmd->state.rb_stencil_cntl & A6XX_RB_STENCIL_CONTROL_STENCIL_ENABLE) + const uint32_t stencil_bandwidth = + cmd->state.bandwidth.stencil_cpp_per_sample; + if (cmd->vk.dynamic_graphics_state.ds.stencil.test_enable) rp->drawcall_bandwidth_per_sample_sum += stencil_bandwidth * 2; tu_emit_cache_flush_renderpass(cmd); - bool primitive_restart_enabled = pipeline->ia.primitive_restart; - if (pipeline->dynamic_state_mask & BIT(TU_DYNAMIC_STATE_PRIMITIVE_RESTART_ENABLE)) - primitive_restart_enabled = cmd->state.primitive_restart_enable; + bool primitive_restart_enabled = + cmd->vk.dynamic_graphics_state.ia.primitive_restart_enable; bool primitive_restart = primitive_restart_enabled && indexed; - bool provoking_vtx_last = cmd->state.provoking_vertex_last; + bool provoking_vtx_last = + cmd->vk.dynamic_graphics_state.rs.provoking_vertex == + VK_PROVOKING_VERTEX_MODE_LAST_VERTEX_EXT; bool tess_upper_left_domain_origin = - cmd->state.tess_upper_left_domain_origin; + (VkTessellationDomainOrigin)cmd->vk.dynamic_graphics_state.ts.domain_origin == + VK_TESSELLATION_DOMAIN_ORIGIN_UPPER_LEFT; struct tu_primitive_params* prim_params = &cmd->state.last_prim_params; @@ -5473,11 +4394,27 @@ tu6_draw_common(struct tu_cmd_buffer *cmd, /* Early exit if there is nothing to emit, saves CPU cycles */ uint32_t dirty = cmd->state.dirty; - if (!(dirty & ~TU_CMD_DIRTY_COMPUTE_DESC_SETS)) + if (!dynamic_draw_state_dirty && !(dirty & ~TU_CMD_DIRTY_COMPUTE_DESC_SETS)) return VK_SUCCESS; bool dirty_lrz = - dirty & (TU_CMD_DIRTY_LRZ | TU_CMD_DIRTY_DS | TU_CMD_DIRTY_BLEND); + (dirty & TU_CMD_DIRTY_LRZ) || + BITSET_TEST(cmd->vk.dynamic_graphics_state.dirty, + MESA_VK_DYNAMIC_DS_DEPTH_TEST_ENABLE) || + BITSET_TEST(cmd->vk.dynamic_graphics_state.dirty, + MESA_VK_DYNAMIC_DS_DEPTH_WRITE_ENABLE) || + BITSET_TEST(cmd->vk.dynamic_graphics_state.dirty, + MESA_VK_DYNAMIC_DS_DEPTH_BOUNDS_TEST_ENABLE) || + BITSET_TEST(cmd->vk.dynamic_graphics_state.dirty, + MESA_VK_DYNAMIC_DS_DEPTH_COMPARE_OP) || + BITSET_TEST(cmd->vk.dynamic_graphics_state.dirty, + MESA_VK_DYNAMIC_DS_STENCIL_TEST_ENABLE) || + BITSET_TEST(cmd->vk.dynamic_graphics_state.dirty, + MESA_VK_DYNAMIC_DS_STENCIL_OP) || + BITSET_TEST(cmd->vk.dynamic_graphics_state.dirty, + MESA_VK_DYNAMIC_DS_STENCIL_WRITE_MASK) || + BITSET_TEST(cmd->vk.dynamic_graphics_state.dirty, + MESA_VK_DYNAMIC_MS_ALPHA_TO_COVERAGE_ENABLE); if (dirty_lrz) { struct tu_cs cs; @@ -5490,126 +4427,41 @@ tu6_draw_common(struct tu_cmd_buffer *cmd, tu6_build_depth_plane_z_mode(cmd, &cs); } - if (dirty & TU_CMD_DIRTY_PC_RASTER_CNTL) { - struct tu_cs cs = tu_cmd_dynamic_state(cmd, TU_DYNAMIC_STATE_PC_RASTER_CNTL, 4); - tu_cs_emit_regs(&cs, A6XX_PC_RASTER_CNTL(.dword = cmd->state.pc_raster_cntl)); - tu_cs_emit_regs(&cs, A6XX_VPC_UNKNOWN_9107(.dword = cmd->state.vpc_unknown_9107)); - } - - if (dirty & TU_CMD_DIRTY_RAST) { - struct tu_cs cs = tu_cmd_dynamic_state(cmd, TU_DYNAMIC_STATE_RAST, - tu6_rast_size(cmd->device)); - uint32_t gras_cl_cntl = cmd->state.gras_cl_cntl; - /* Implement this spec text from vkCmdSetDepthClampEnableEXT(): - * - * If the depth clamping state is changed dynamically, and the - * pipeline was not created with - * VK_DYNAMIC_STATE_DEPTH_CLIP_ENABLE_EXT enabled, then depth - * clipping is enabled when depth clamping is disabled and vice - * versa. - */ - if (pipeline->rast.override_depth_clip) { - gras_cl_cntl = - (gras_cl_cntl & ~(A6XX_GRAS_CL_CNTL_ZFAR_CLIP_DISABLE | - A6XX_GRAS_CL_CNTL_ZNEAR_CLIP_DISABLE)) | - COND(gras_cl_cntl & A6XX_GRAS_CL_CNTL_Z_CLAMP_ENABLE, - A6XX_GRAS_CL_CNTL_ZFAR_CLIP_DISABLE | - A6XX_GRAS_CL_CNTL_ZNEAR_CLIP_DISABLE); - } - tu6_emit_rast(&cs, cmd->state.gras_su_cntl, - gras_cl_cntl, cmd->state.polygon_mode); - } - - if (dirty & TU_CMD_DIRTY_DS) { - struct tu_cs cs = tu_cmd_dynamic_state(cmd, TU_DYNAMIC_STATE_DS, 4); - uint32_t rb_depth_cntl = cmd->state.rb_depth_cntl; - - if ((rb_depth_cntl & A6XX_RB_DEPTH_CNTL_Z_TEST_ENABLE) || - (rb_depth_cntl & A6XX_RB_DEPTH_CNTL_Z_BOUNDS_ENABLE)) - rb_depth_cntl |= A6XX_RB_DEPTH_CNTL_Z_READ_ENABLE; - - if ((rb_depth_cntl & A6XX_RB_DEPTH_CNTL_Z_BOUNDS_ENABLE) && - !(rb_depth_cntl & A6XX_RB_DEPTH_CNTL_Z_TEST_ENABLE)) - tu6_apply_depth_bounds_workaround(cmd->device, &rb_depth_cntl); - - if (pipeline->output.rb_depth_cntl_disable) - rb_depth_cntl = 0; - - tu_cs_emit_regs(&cs, A6XX_RB_DEPTH_CNTL(.dword = rb_depth_cntl)); - tu_cs_emit_regs(&cs, A6XX_RB_STENCIL_CONTROL(.dword = cmd->state.rb_stencil_cntl)); + if (BITSET_TEST(cmd->vk.dynamic_graphics_state.dirty, + MESA_VK_DYNAMIC_VI_BINDINGS_VALID)) { + cmd->state.vertex_buffers.size = + util_last_bit(cmd->vk.dynamic_graphics_state.vi_bindings_valid) * 4; + cmd->state.dirty |= TU_CMD_DIRTY_VERTEX_BUFFERS; } if (dirty & TU_CMD_DIRTY_SHADER_CONSTS) cmd->state.shader_const = tu6_emit_consts(cmd, pipeline, false); - if (dirty & TU_CMD_DIRTY_VIEWPORTS) { - if (pipeline->fs.fragment_density_map) { - unsigned num_views = MAX2(cmd->state.pass->num_views, 1); - unsigned num_viewports = cmd->state.per_view_viewport ? - num_views : cmd->state.viewport_count; - struct apply_viewport_state state = { - .num_viewports = num_viewports, - .z_negative_one_to_one = cmd->state.z_negative_one_to_one, - .share_scale = !cmd->state.per_view_viewport, - }; - memcpy(&state.viewports, cmd->state.viewport, sizeof(state.viewports)); - tu_cs_set_writeable(&cmd->sub_cs, true); - struct tu_cs cs = tu_cmd_dynamic_state(cmd, VK_DYNAMIC_STATE_VIEWPORT, 8 + 10 * num_viewports); - tu_cs_set_writeable(&cmd->sub_cs, false); - tu_create_fdm_bin_patchpoint(cmd, &cs, 8 + 10 * num_viewports, - fdm_apply_viewports, state); - cmd->state.rp.shared_viewport |= !cmd->state.per_view_viewport; - } else { - struct tu_cs cs = tu_cmd_dynamic_state(cmd, VK_DYNAMIC_STATE_VIEWPORT, 8 + 10 * cmd->state.viewport_count); - tu6_emit_viewport(&cs, cmd->state.viewport, cmd->state.viewport_count, - cmd->state.z_negative_one_to_one); - } - } - - if (dirty & TU_CMD_DIRTY_SCISSORS) { - if (pipeline->fs.fragment_density_map) { - unsigned num_views = MAX2(cmd->state.pass->num_views, 1); - unsigned num_scissors = cmd->state.per_view_viewport ? - num_views : cmd->state.scissor_count; - struct apply_scissor_state state = { - .num_scissors = num_scissors, - .share_scale = !cmd->state.per_view_viewport, - }; - memcpy(&state.scissors, cmd->state.scissor, sizeof(state.scissors)); - tu_cs_set_writeable(&cmd->sub_cs, true); - struct tu_cs cs = tu_cmd_dynamic_state(cmd, VK_DYNAMIC_STATE_SCISSOR, 1 + 2 * num_scissors); - tu_cs_set_writeable(&cmd->sub_cs, false); - tu_create_fdm_bin_patchpoint(cmd, &cs, 1 + 2 * num_scissors, - fdm_apply_scissors, state); - cmd->state.rp.shared_viewport |= !cmd->state.per_view_viewport; - } else { - struct tu_cs cs = tu_cmd_dynamic_state(cmd, VK_DYNAMIC_STATE_SCISSOR, 1 + 2 * cmd->state.scissor_count); - tu6_emit_scissor(&cs, cmd->state.scissor, cmd->state.scissor_count); - } - } - - if (dirty & TU_CMD_DIRTY_BLEND) { - struct tu_cs cs = tu_cmd_dynamic_state(cmd, TU_DYNAMIC_STATE_BLEND, - 8 + 3 * pipeline->blend.num_rts); - tu6_emit_blend(&cs, cmd); - } - - if (dirty & TU_CMD_DIRTY_PATCH_CONTROL_POINTS) { - bool tess = pipeline->active_stages & - VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT; - uint32_t state_size = TU6_EMIT_PATCH_CONTROL_POINTS_DWORDS( - pipeline->program.hs_param_dwords); - struct tu_cs cs = tu_cmd_dynamic_state( - cmd, TU_DYNAMIC_STATE_PATCH_CONTROL_POINTS, tess ? state_size : 0); - tu6_emit_patch_control_points(&cs, &cmd->state.pipeline->base, - cmd->state.patch_control_points); - } - if (dirty & TU_CMD_DIRTY_DESC_SETS) tu6_emit_descriptor_sets(cmd, VK_PIPELINE_BIND_POINT_GRAPHICS); - if (dirty & TU_CMD_DIRTY_FS_PARAMS) + if (BITSET_TEST(cmd->vk.dynamic_graphics_state.dirty, + MESA_VK_DYNAMIC_MS_RASTERIZATION_SAMPLES) || + BITSET_TEST(cmd->vk.dynamic_graphics_state.dirty, + MESA_VK_DYNAMIC_IA_PRIMITIVE_TOPOLOGY) || + BITSET_TEST(cmd->vk.dynamic_graphics_state.dirty, + MESA_VK_DYNAMIC_RS_LINE_MODE) || + (cmd->state.dirty & TU_CMD_DIRTY_PIPELINE)) { + tu6_update_msaa_disable(cmd); + } + + if (BITSET_TEST(cmd->vk.dynamic_graphics_state.dirty, + MESA_VK_DYNAMIC_MS_RASTERIZATION_SAMPLES)) { + tu6_update_msaa(cmd); + } + + bool dirty_fs_params = false; + if (BITSET_TEST(cmd->vk.dynamic_graphics_state.dirty, + MESA_VK_DYNAMIC_MS_RASTERIZATION_SAMPLES) || + (cmd->state.dirty & (TU_CMD_DIRTY_PIPELINE | TU_CMD_DIRTY_FDM))) { tu6_emit_fs_params(cmd); + dirty_fs_params = true; + } /* for the first draw in a renderpass, re-emit all the draw states * @@ -5640,43 +4492,21 @@ tu6_draw_common(struct tu_cmd_buffer *cmd, for (uint32_t i = 0; i < ARRAY_SIZE(cmd->state.dynamic_state); i++) { tu_cs_emit_draw_state(cs, TU_DRAW_STATE_DYNAMIC + i, - ((pipeline->dynamic_state_mask & BIT(i)) ? - cmd->state.dynamic_state[i] : - pipeline->dynamic_state[i])); + ((pipeline->set_state_mask & BIT(i)) ? + pipeline->dynamic_state[i] : + cmd->state.dynamic_state[i])); } } else { - /* emit draw states that were just updated - * note we eventually don't want to have to emit anything here - */ - bool emit_binding_stride = false, emit_blend = false, - emit_patch_control_points = false; + /* emit draw states that were just updated */ uint32_t draw_state_count = + util_bitcount(dynamic_draw_state_dirty) + ((dirty & TU_CMD_DIRTY_SHADER_CONSTS) ? 1 : 0) + ((dirty & TU_CMD_DIRTY_DESC_SETS) ? 1 : 0) + ((dirty & TU_CMD_DIRTY_VERTEX_BUFFERS) ? 1 : 0) + ((dirty & TU_CMD_DIRTY_VS_PARAMS) ? 1 : 0) + - ((dirty & TU_CMD_DIRTY_FS_PARAMS) ? 1 : 0) + + (dirty_fs_params ? 1 : 0) + (dirty_lrz ? 1 : 0); - if ((dirty & TU_CMD_DIRTY_VB_STRIDE) && - (pipeline->dynamic_state_mask & BIT(TU_DYNAMIC_STATE_VB_STRIDE))) { - emit_binding_stride = true; - draw_state_count += 1; - } - - if ((dirty & TU_CMD_DIRTY_BLEND) && - (pipeline->dynamic_state_mask & BIT(TU_DYNAMIC_STATE_BLEND))) { - emit_blend = true; - draw_state_count += 1; - } - - if ((dirty & TU_CMD_DIRTY_PATCH_CONTROL_POINTS) && - (pipeline->dynamic_state_mask & - BIT(TU_DYNAMIC_STATE_PATCH_CONTROL_POINTS))) { - emit_patch_control_points = true; - draw_state_count += 1; - } - if (draw_state_count > 0) tu_cs_emit_pkt7(cs, CP_SET_DRAW_STATE, 3 * draw_state_count); @@ -5688,23 +4518,14 @@ tu6_draw_common(struct tu_cmd_buffer *cmd, } if (dirty & TU_CMD_DIRTY_VERTEX_BUFFERS) tu_cs_emit_draw_state(cs, TU_DRAW_STATE_VB, cmd->state.vertex_buffers); - if (emit_binding_stride) { - tu_cs_emit_draw_state(cs, TU_DRAW_STATE_DYNAMIC + TU_DYNAMIC_STATE_VB_STRIDE, - cmd->state.dynamic_state[TU_DYNAMIC_STATE_VB_STRIDE]); - } - if (emit_blend) { - tu_cs_emit_draw_state(cs, TU_DRAW_STATE_DYNAMIC + TU_DYNAMIC_STATE_BLEND, - cmd->state.dynamic_state[TU_DYNAMIC_STATE_BLEND]); - } - if (emit_patch_control_points) { - tu_cs_emit_draw_state(cs, TU_DRAW_STATE_DYNAMIC + TU_DYNAMIC_STATE_PATCH_CONTROL_POINTS, - cmd->state.dynamic_state[TU_DYNAMIC_STATE_PATCH_CONTROL_POINTS]); + u_foreach_bit (i, dynamic_draw_state_dirty) { + tu_cs_emit_draw_state(cs, TU_DRAW_STATE_DYNAMIC + i, + cmd->state.dynamic_state[i]); } if (dirty & TU_CMD_DIRTY_VS_PARAMS) tu_cs_emit_draw_state(cs, TU_DRAW_STATE_VS_PARAMS, cmd->state.vs_params); - if (dirty & TU_CMD_DIRTY_FS_PARAMS) + if (dirty_fs_params) tu_cs_emit_draw_state(cs, TU_DRAW_STATE_FS_PARAMS, cmd->state.fs_params); - if (dirty_lrz) { tu_cs_emit_draw_state(cs, TU_DRAW_STATE_LRZ_AND_DEPTH_PLANE, cmd->state.lrz_and_depth_plane_state); } @@ -5717,6 +4538,7 @@ tu6_draw_common(struct tu_cmd_buffer *cmd, * compute-related state. */ cmd->state.dirty &= TU_CMD_DIRTY_COMPUTE_DESC_SETS; + BITSET_ZERO(cmd->vk.dynamic_graphics_state.dirty); return VK_SUCCESS; } @@ -5724,10 +4546,12 @@ static uint32_t tu_draw_initiator(struct tu_cmd_buffer *cmd, enum pc_di_src_sel src_sel) { const struct tu_pipeline *pipeline = &cmd->state.pipeline->base; - enum pc_di_primtype primtype = cmd->state.primtype; + enum pc_di_primtype primtype = + tu6_primtype((VkPrimitiveTopology)cmd->vk.dynamic_graphics_state.ia.primitive_topology); if (primtype == DI_PT_PATCHES0) - primtype = (enum pc_di_primtype) (primtype + cmd->state.patch_control_points); + primtype = (enum pc_di_primtype) (primtype + + cmd->vk.dynamic_graphics_state.ts.patch_control_points); uint32_t initiator = CP_DRAW_INDX_OFFSET_0_PRIM_TYPE(primtype) | diff --git a/src/freedreno/vulkan/tu_cmd_buffer.h b/src/freedreno/vulkan/tu_cmd_buffer.h index 1b181dffbbd..954222817dc 100644 --- a/src/freedreno/vulkan/tu_cmd_buffer.h +++ b/src/freedreno/vulkan/tu_cmd_buffer.h @@ -55,23 +55,18 @@ struct tu_descriptor_state enum tu_cmd_dirty_bits { TU_CMD_DIRTY_VERTEX_BUFFERS = BIT(0), - TU_CMD_DIRTY_VB_STRIDE = BIT(1), - TU_CMD_DIRTY_RAST = BIT(2), - TU_CMD_DIRTY_DS = BIT(3), - TU_CMD_DIRTY_DESC_SETS = BIT(4), - TU_CMD_DIRTY_COMPUTE_DESC_SETS = BIT(5), - TU_CMD_DIRTY_SHADER_CONSTS = BIT(6), - TU_CMD_DIRTY_LRZ = BIT(7), - TU_CMD_DIRTY_VS_PARAMS = BIT(8), - TU_CMD_DIRTY_FS_PARAMS = BIT(9), - TU_CMD_DIRTY_PC_RASTER_CNTL = BIT(10), - TU_CMD_DIRTY_VIEWPORTS = BIT(11), - TU_CMD_DIRTY_SCISSORS = BIT(12), - TU_CMD_DIRTY_BLEND = BIT(13), - TU_CMD_DIRTY_PATCH_CONTROL_POINTS = BIT(14), - TU_CMD_DIRTY_TESS_PARAMS = BIT(15), + TU_CMD_DIRTY_DESC_SETS = BIT(1), + TU_CMD_DIRTY_COMPUTE_DESC_SETS = BIT(2), + TU_CMD_DIRTY_SHADER_CONSTS = BIT(3), + TU_CMD_DIRTY_LRZ = BIT(4), + TU_CMD_DIRTY_VS_PARAMS = BIT(5), + TU_CMD_DIRTY_TESS_PARAMS = BIT(6), + TU_CMD_DIRTY_SUBPASS = BIT(7), + TU_CMD_DIRTY_FDM = BIT(8), + TU_CMD_DIRTY_PER_VIEW_VIEWPORT = BIT(9), + TU_CMD_DIRTY_PIPELINE = BIT(10), /* all draw states were disabled and need to be re-enabled: */ - TU_CMD_DIRTY_DRAW_STATE = BIT(16) + TU_CMD_DIRTY_DRAW_STATE = BIT(11) }; /* There are only three cache domains we have to care about: the CCU, or @@ -423,45 +418,25 @@ struct tu_cmd_state struct tu_render_pass_state rp; - /* Vertex buffers, viewports, and scissors + struct vk_render_pass_state vk_rp; + struct vk_vertex_input_state vi; + struct vk_sample_locations_state sl; + + struct tu_bandwidth bandwidth; + + /* Vertex buffers * the states for these can be updated partially, so we need to save these * to be able to emit a complete draw state */ struct { uint64_t base; uint32_t size; - uint32_t stride; } vb[MAX_VBS]; uint32_t max_vbs_bound; - VkViewport viewport[MAX_VIEWPORTS]; - VkRect2D scissor[MAX_SCISSORS]; - uint32_t viewport_count, scissor_count; bool per_view_viewport; - - /* for dynamic states that can't be emitted directly */ - uint32_t dynamic_stencil_mask; - uint32_t dynamic_stencil_wrmask; - uint32_t dynamic_stencil_ref; - bool stencil_front_write; - bool stencil_back_write; - - uint32_t gras_su_cntl, gras_cl_cntl, rb_depth_cntl, rb_stencil_cntl; - uint32_t pc_raster_cntl, vpc_unknown_9107; - enum a6xx_polygon_mode polygon_mode; - uint32_t rb_mrt_control[MAX_RTS], rb_mrt_blend_control[MAX_RTS]; - uint32_t rb_mrt_control_rop; - uint32_t rb_blend_cntl, sp_blend_cntl; - uint32_t pipeline_color_write_enable, blend_enable; - uint32_t color_write_enable; - bool logic_op_enabled; - bool rop_reads_dst; - bool alpha_to_coverage; - enum pc_di_primtype primtype; - bool primitive_restart_enable; - bool tess_upper_left_domain_origin; - bool provoking_vertex_last; + bool pipeline_has_fdm; /* saved states to re-emit in TU_CMD_DIRTY_DRAW_STATE case */ struct tu_draw_state dynamic_state[TU_DYNAMIC_STATE_COUNT]; @@ -527,12 +502,10 @@ struct tu_cmd_state bool tessfactor_addr_set; bool predication_active; - enum a5xx_line_mode line_mode; - VkSampleCountFlagBits samples; bool msaa_disable; - bool z_negative_one_to_one; - - unsigned patch_control_points; + bool blend_reads_dest; + bool stencil_front_write; + bool stencil_back_write; /* VK_QUERY_PIPELINE_STATISTIC_CLIPPING_INVOCATIONS_BIT and * VK_QUERY_TYPE_PRIMITIVES_GENERATED_EXT are allowed to run simultaniously, diff --git a/src/freedreno/vulkan/tu_lrz.cc b/src/freedreno/vulkan/tu_lrz.cc index c672bd43320..c8ce6c3eaab 100644 --- a/src/freedreno/vulkan/tu_lrz.cc +++ b/src/freedreno/vulkan/tu_lrz.cc @@ -560,13 +560,11 @@ tu6_calculate_lrz_state(struct tu_cmd_buffer *cmd, const uint32_t a) { struct tu_pipeline *pipeline = &cmd->state.pipeline->base; - bool z_test_enable = (bool) (cmd->state.rb_depth_cntl & A6XX_RB_DEPTH_CNTL_Z_TEST_ENABLE); - bool z_write_enable = (bool) (cmd->state.rb_depth_cntl & A6XX_RB_DEPTH_CNTL_Z_WRITE_ENABLE); - bool z_bounds_enable = (bool) (cmd->state.rb_depth_cntl & A6XX_RB_DEPTH_CNTL_Z_BOUNDS_ENABLE); + bool z_test_enable = cmd->vk.dynamic_graphics_state.ds.depth.test_enable; + bool z_write_enable = cmd->vk.dynamic_graphics_state.ds.depth.write_enable; + bool z_bounds_enable = cmd->vk.dynamic_graphics_state.ds.depth.bounds_test.enable; VkCompareOp depth_compare_op = - (VkCompareOp) ((cmd->state.rb_depth_cntl & - A6XX_RB_DEPTH_CNTL_ZFUNC__MASK) >> - A6XX_RB_DEPTH_CNTL_ZFUNC__SHIFT); + cmd->vk.dynamic_graphics_state.ds.depth.compare_op; struct A6XX_GRAS_LRZ_CNTL gras_lrz_cntl = { 0 }; @@ -599,63 +597,7 @@ tu6_calculate_lrz_state(struct tu_cmd_buffer *cmd, /* See comment in tu_pipeline about disabling LRZ write for blending. */ - bool reads_dest = !!(pipeline->lrz.lrz_status & TU_LRZ_READS_DEST); - if (gras_lrz_cntl.lrz_write && pipeline->dynamic_state_mask & - (BIT(TU_DYNAMIC_STATE_LOGIC_OP) | - BIT(TU_DYNAMIC_STATE_BLEND_ENABLE))) { - if (cmd->state.logic_op_enabled && cmd->state.rop_reads_dst) { - perf_debug(cmd->device, "disabling lrz write due to dynamic logic op"); - gras_lrz_cntl.lrz_write = false; - reads_dest = true; - } - - if (cmd->state.blend_enable) { - perf_debug(cmd->device, "disabling lrz write due to dynamic blend"); - gras_lrz_cntl.lrz_write = false; - reads_dest = true; - } - } - - if ((pipeline->dynamic_state_mask & BIT(TU_DYNAMIC_STATE_BLEND))) { - for (unsigned i = 0; i < cmd->state.subpass->color_count; i++) { - unsigned a = cmd->state.subpass->color_attachments[i].attachment; - if (a == VK_ATTACHMENT_UNUSED) - continue; - - VkFormat format = cmd->state.pass->attachments[a].format; - unsigned mask = MASK(vk_format_get_nr_components(format)); - uint32_t enabled_mask = (cmd->state.rb_mrt_control[i] & - A6XX_RB_MRT_CONTROL_COMPONENT_ENABLE__MASK) >> - A6XX_RB_MRT_CONTROL_COMPONENT_ENABLE__SHIFT; - if ((enabled_mask & mask) != mask) { - if (gras_lrz_cntl.lrz_write) { - perf_debug(cmd->device, - "disabling lrz write due to dynamic color write " - "mask (%x/%x)", - enabled_mask, mask); - } - gras_lrz_cntl.lrz_write = false; - reads_dest = true; - break; - } - } - } - - if ((pipeline->dynamic_state_mask & - BIT(TU_DYNAMIC_STATE_COLOR_WRITE_ENABLE)) && - (cmd->state.color_write_enable & - MASK(cmd->state.subpass->color_count)) != - MASK(pipeline->blend.num_rts)) { - if (gras_lrz_cntl.lrz_write) { - perf_debug( - cmd->device, - "disabling lrz write due to dynamic color write enables (%x/%x)", - cmd->state.color_write_enable, - MASK(pipeline->blend.num_rts)); - } - gras_lrz_cntl.lrz_write = false; - reads_dest = true; - } + bool reads_dest = cmd->state.blend_reads_dest; /* LRZ is disabled until it is cleared, which means that one "wrong" * depth test or shader could disable LRZ until depth buffer is cleared. @@ -761,13 +703,13 @@ tu6_calculate_lrz_state(struct tu_cmd_buffer *cmd, cmd->state.lrz.prev_direction = lrz_direction; /* Invalidate LRZ and disable write if stencil test is enabled */ - bool stencil_test_enable = cmd->state.rb_stencil_cntl & A6XX_RB_STENCIL_CONTROL_STENCIL_ENABLE; + bool stencil_test_enable = cmd->vk.dynamic_graphics_state.ds.stencil.test_enable; if (!disable_lrz && stencil_test_enable) { VkCompareOp stencil_front_compare_op = (VkCompareOp) - ((cmd->state.rb_stencil_cntl & A6XX_RB_STENCIL_CONTROL_FUNC__MASK) >> A6XX_RB_STENCIL_CONTROL_FUNC__SHIFT); + cmd->vk.dynamic_graphics_state.ds.stencil.front.op.compare; VkCompareOp stencil_back_compare_op = (VkCompareOp) - ((cmd->state.rb_stencil_cntl & A6XX_RB_STENCIL_CONTROL_FUNC_BF__MASK) >> A6XX_RB_STENCIL_CONTROL_FUNC_BF__SHIFT); + cmd->vk.dynamic_graphics_state.ds.stencil.back.op.compare; bool lrz_allowed = true; lrz_allowed = lrz_allowed && tu6_stencil_op_lrz_allowed( diff --git a/src/freedreno/vulkan/tu_pipeline.cc b/src/freedreno/vulkan/tu_pipeline.cc index d5d4a0a41f2..3612f7a9fd6 100644 --- a/src/freedreno/vulkan/tu_pipeline.cc +++ b/src/freedreno/vulkan/tu_pipeline.cc @@ -263,17 +263,6 @@ struct tu_pipeline_builder bool rasterizer_discard; /* these states are affectd by rasterizer_discard */ - bool use_color_attachments; - bool attachment_state_valid; - VkFormat color_attachment_formats[MAX_RTS]; - VkFormat depth_attachment_format; - uint32_t multiview_mask; - - bool subpass_raster_order_attachment_access; - bool subpass_feedback_loop_color; - bool subpass_feedback_loop_ds; - bool feedback_loop_may_involve_textures; - bool fragment_density_map; uint8_t unscaled_input_fragcoord; /* Each library defines at least one piece of state in @@ -293,6 +282,11 @@ struct tu_pipeline_builder /* The stages we are compiling now. */ VkShaderStageFlags active_stages; + + bool fragment_density_map; + + struct vk_graphics_pipeline_all_state all_state; + struct vk_graphics_pipeline_state graphics_state; }; static bool @@ -310,17 +304,13 @@ tu_logic_op_reads_dst(VkLogicOp op) } static bool -tu_blend_state_is_dual_src(const VkPipelineColorBlendStateCreateInfo *info) +tu_blend_state_is_dual_src(const struct vk_color_blend_state *cb) { - if (!info) - return false; - - for (unsigned i = 0; i < info->attachmentCount; i++) { - const VkPipelineColorBlendAttachmentState *blend = &info->pAttachments[i]; - if (tu_blend_factor_is_dual_src(blend->srcColorBlendFactor) || - tu_blend_factor_is_dual_src(blend->dstColorBlendFactor) || - tu_blend_factor_is_dual_src(blend->srcAlphaBlendFactor) || - tu_blend_factor_is_dual_src(blend->dstAlphaBlendFactor)) + for (unsigned i = 0; i < cb->attachment_count; i++) { + if (tu_blend_factor_is_dual_src((VkBlendFactor)cb->attachments[i].src_color_blend_factor) || + tu_blend_factor_is_dual_src((VkBlendFactor)cb->attachments[i].dst_color_blend_factor) || + tu_blend_factor_is_dual_src((VkBlendFactor)cb->attachments[i].src_alpha_blend_factor) || + tu_blend_factor_is_dual_src((VkBlendFactor)cb->attachments[i].dst_alpha_blend_factor)) return true; } @@ -1631,6 +1621,12 @@ tu6_emit_fs_outputs(struct tu_cs *cs, A6XX_RB_RENDER_COMPONENTS(.dword = fs_render_components)); if (pipeline) { + if (fs->has_kill) { + pipeline->lrz.lrz_status |= TU_LRZ_FORCE_DISABLE_WRITE; + } + if (fs->no_earlyz || fs->writes_pos) { + pipeline->lrz.lrz_status = TU_LRZ_FORCE_DISABLE_LRZ; + } pipeline->lrz.fs.has_kill = fs->has_kill; pipeline->lrz.fs.early_fragment_tests = fs->fs.early_fragment_tests; @@ -1678,10 +1674,25 @@ tu_get_tess_iova(struct tu_device *dev, *tess_param_iova = dev->tess_bo->iova + TU_TESS_FACTOR_SIZE; } +static const enum mesa_vk_dynamic_graphics_state tu_patch_control_points_state[] = { + MESA_VK_DYNAMIC_TS_PATCH_CONTROL_POINTS, +}; + +static unsigned +tu6_patch_control_points_size(struct tu_device *dev, + const struct tu_pipeline *pipeline, + uint32_t patch_control_points) +{ +#define EMIT_CONST_DWORDS(const_dwords) (4 + const_dwords) + return EMIT_CONST_DWORDS(4) + + EMIT_CONST_DWORDS(pipeline->program.hs_param_dwords) + 2 + 2 + 2; +#undef EMIT_CONST_DWORDS +} + void tu6_emit_patch_control_points(struct tu_cs *cs, const struct tu_pipeline *pipeline, - unsigned patch_control_points) + uint32_t patch_control_points) { if (!(pipeline->active_stages & VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT)) return; @@ -1868,8 +1879,8 @@ tu6_emit_program(struct tu_cs *cs, tu6_emit_dynamic_offset(cs, xs, builder); } - uint32_t multiview_views = util_logbase2(pipeline->rast.multiview_mask) + 1; - uint32_t multiview_cntl = pipeline->rast.multiview_mask ? + uint32_t multiview_views = util_logbase2(builder->graphics_state.rp->view_mask) + 1; + uint32_t multiview_cntl = builder->graphics_state.rp->view_mask ? A6XX_PC_MULTIVIEW_CNTL_ENABLE | A6XX_PC_MULTIVIEW_CNTL_VIEWS(multiview_views) | COND(!multi_pos_output, A6XX_PC_MULTIVIEW_CNTL_DISABLEMULTIPOS) @@ -1894,7 +1905,7 @@ tu6_emit_program(struct tu_cs *cs, if (multiview_cntl && builder->device->physical_device->info->a6xx.supports_multiview_mask) { tu_cs_emit_pkt4(cs, REG_A6XX_PC_MULTIVIEW_MASK, 1); - tu_cs_emit(cs, pipeline->rast.multiview_mask); + tu_cs_emit(cs, builder->graphics_state.rp->view_mask); } tu_cs_emit_pkt4(cs, REG_A6XX_SP_HS_WAVE_INPUT_SIZE, 1); @@ -1920,448 +1931,6 @@ tu6_emit_program(struct tu_cs *cs, } } -void -tu6_emit_vertex_input(struct tu_cs *cs, - uint32_t binding_count, - const VkVertexInputBindingDescription2EXT *bindings, - uint32_t unsorted_attr_count, - const VkVertexInputAttributeDescription2EXT *unsorted_attrs) -{ - uint32_t binding_instanced = 0; /* bitmask of instanced bindings */ - uint32_t step_rate[MAX_VBS]; - - for (uint32_t i = 0; i < binding_count; i++) { - const VkVertexInputBindingDescription2EXT *binding = &bindings[i]; - - if (binding->inputRate == VK_VERTEX_INPUT_RATE_INSTANCE) - binding_instanced |= 1u << binding->binding; - - step_rate[binding->binding] = binding->divisor; - } - - const VkVertexInputAttributeDescription2EXT *attrs[MAX_VERTEX_ATTRIBS] = { }; - unsigned attr_count = 0; - for (uint32_t i = 0; i < unsorted_attr_count; i++) { - const VkVertexInputAttributeDescription2EXT *attr = &unsorted_attrs[i]; - attrs[attr->location] = attr; - attr_count = MAX2(attr_count, attr->location + 1); - } - - if (attr_count != 0) - tu_cs_emit_pkt4(cs, REG_A6XX_VFD_DECODE_INSTR(0), attr_count * 2); - - for (uint32_t loc = 0; loc < attr_count; loc++) { - const VkVertexInputAttributeDescription2EXT *attr = attrs[loc]; - - if (attr) { - const struct tu_native_format format = tu6_format_vtx( - tu_vk_format_to_pipe_format(attr->format)); - tu_cs_emit(cs, A6XX_VFD_DECODE_INSTR(0, - .idx = attr->binding, - .offset = attr->offset, - .instanced = binding_instanced & (1 << attr->binding), - .format = format.fmt, - .swap = format.swap, - .unk30 = 1, - ._float = !vk_format_is_int(attr->format)).value); - tu_cs_emit(cs, A6XX_VFD_DECODE_STEP_RATE(0, step_rate[attr->binding]).value); - } else { - tu_cs_emit(cs, 0); - tu_cs_emit(cs, 0); - } - } -} - -void -tu6_emit_viewport(struct tu_cs *cs, const VkViewport *viewports, uint32_t num_viewport, - bool z_negative_one_to_one) -{ - VkExtent2D guardband = {511, 511}; - - tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_CL_VPORT_XOFFSET(0), num_viewport * 6); - for (uint32_t i = 0; i < num_viewport; i++) { - const VkViewport *viewport = &viewports[i]; - float offsets[3]; - float scales[3]; - scales[0] = viewport->width / 2.0f; - scales[1] = viewport->height / 2.0f; - if (z_negative_one_to_one) { - scales[2] = 0.5 * (viewport->maxDepth - viewport->minDepth); - } else { - scales[2] = viewport->maxDepth - viewport->minDepth; - } - - offsets[0] = viewport->x + scales[0]; - offsets[1] = viewport->y + scales[1]; - if (z_negative_one_to_one) { - offsets[2] = 0.5 * (viewport->minDepth + viewport->maxDepth); - } else { - offsets[2] = viewport->minDepth; - } - - for (uint32_t j = 0; j < 3; j++) { - tu_cs_emit(cs, fui(offsets[j])); - tu_cs_emit(cs, fui(scales[j])); - } - - guardband.width = - MIN2(guardband.width, fd_calc_guardband(offsets[0], scales[0], false)); - guardband.height = - MIN2(guardband.height, fd_calc_guardband(offsets[1], scales[1], false)); - } - - tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_SC_VIEWPORT_SCISSOR_TL(0), num_viewport * 2); - for (uint32_t i = 0; i < num_viewport; i++) { - const VkViewport *viewport = &viewports[i]; - VkOffset2D min; - VkOffset2D max; - min.x = (int32_t) viewport->x; - max.x = (int32_t) ceilf(viewport->x + viewport->width); - if (viewport->height >= 0.0f) { - min.y = (int32_t) viewport->y; - max.y = (int32_t) ceilf(viewport->y + viewport->height); - } else { - min.y = (int32_t)(viewport->y + viewport->height); - max.y = (int32_t) ceilf(viewport->y); - } - /* the spec allows viewport->height to be 0.0f */ - if (min.y == max.y) - max.y++; - /* allow viewport->width = 0.0f for un-initialized viewports: */ - if (min.x == max.x) - max.x++; - - min.x = MAX2(min.x, 0); - min.y = MAX2(min.y, 0); - max.x = MAX2(max.x, 1); - max.y = MAX2(max.y, 1); - - assert(min.x < max.x); - assert(min.y < max.y); - - tu_cs_emit(cs, A6XX_GRAS_SC_VIEWPORT_SCISSOR_TL_X(min.x) | - A6XX_GRAS_SC_VIEWPORT_SCISSOR_TL_Y(min.y)); - tu_cs_emit(cs, A6XX_GRAS_SC_VIEWPORT_SCISSOR_BR_X(max.x - 1) | - A6XX_GRAS_SC_VIEWPORT_SCISSOR_BR_Y(max.y - 1)); - } - - tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_CL_Z_CLAMP(0), num_viewport * 2); - for (uint32_t i = 0; i < num_viewport; i++) { - const VkViewport *viewport = &viewports[i]; - tu_cs_emit(cs, fui(MIN2(viewport->minDepth, viewport->maxDepth))); - tu_cs_emit(cs, fui(MAX2(viewport->minDepth, viewport->maxDepth))); - } - tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_CL_GUARDBAND_CLIP_ADJ, 1); - tu_cs_emit(cs, A6XX_GRAS_CL_GUARDBAND_CLIP_ADJ_HORZ(guardband.width) | - A6XX_GRAS_CL_GUARDBAND_CLIP_ADJ_VERT(guardband.height)); - - /* TODO: what to do about this and multi viewport ? */ - float z_clamp_min = num_viewport ? MIN2(viewports[0].minDepth, viewports[0].maxDepth) : 0; - float z_clamp_max = num_viewport ? MAX2(viewports[0].minDepth, viewports[0].maxDepth) : 0; - - tu_cs_emit_regs(cs, - A6XX_RB_Z_CLAMP_MIN(z_clamp_min), - A6XX_RB_Z_CLAMP_MAX(z_clamp_max)); -} - -void -tu6_emit_scissor(struct tu_cs *cs, const VkRect2D *scissors, uint32_t scissor_count) -{ - tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_SC_SCREEN_SCISSOR_TL(0), scissor_count * 2); - - for (uint32_t i = 0; i < scissor_count; i++) { - const VkRect2D *scissor = &scissors[i]; - - uint32_t min_x = scissor->offset.x; - uint32_t min_y = scissor->offset.y; - uint32_t max_x = min_x + scissor->extent.width - 1; - uint32_t max_y = min_y + scissor->extent.height - 1; - - if (!scissor->extent.width || !scissor->extent.height) { - min_x = min_y = 1; - max_x = max_y = 0; - } else { - /* avoid overflow */ - uint32_t scissor_max = BITFIELD_MASK(15); - min_x = MIN2(scissor_max, min_x); - min_y = MIN2(scissor_max, min_y); - max_x = MIN2(scissor_max, max_x); - max_y = MIN2(scissor_max, max_y); - } - - tu_cs_emit(cs, A6XX_GRAS_SC_SCREEN_SCISSOR_TL_X(min_x) | - A6XX_GRAS_SC_SCREEN_SCISSOR_TL_Y(min_y)); - tu_cs_emit(cs, A6XX_GRAS_SC_SCREEN_SCISSOR_BR_X(max_x) | - A6XX_GRAS_SC_SCREEN_SCISSOR_BR_Y(max_y)); - } -} - -void -tu6_emit_sample_locations_enable(struct tu_cs *cs, bool enable) -{ - uint32_t sample_config = - COND(enable, A6XX_RB_SAMPLE_CONFIG_LOCATION_ENABLE); - - tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_SAMPLE_CONFIG, 1); - tu_cs_emit(cs, sample_config); - - tu_cs_emit_pkt4(cs, REG_A6XX_RB_SAMPLE_CONFIG, 1); - tu_cs_emit(cs, sample_config); - - tu_cs_emit_pkt4(cs, REG_A6XX_SP_TP_SAMPLE_CONFIG, 1); - tu_cs_emit(cs, sample_config); -} - -void -tu6_emit_sample_locations(struct tu_cs *cs, const VkSampleLocationsInfoEXT *samp_loc) -{ - assert(samp_loc->sampleLocationsPerPixel == samp_loc->sampleLocationsCount); - assert(samp_loc->sampleLocationGridSize.width == 1); - assert(samp_loc->sampleLocationGridSize.height == 1); - - uint32_t sample_locations = 0; - for (uint32_t i = 0; i < samp_loc->sampleLocationsCount; i++) { - /* From VkSampleLocationEXT: - * - * The values specified in a VkSampleLocationEXT structure are always - * clamped to the implementation-dependent sample location coordinate - * range - * [sampleLocationCoordinateRange[0],sampleLocationCoordinateRange[1]] - */ - float x = CLAMP(samp_loc->pSampleLocations[i].x, SAMPLE_LOCATION_MIN, - SAMPLE_LOCATION_MAX); - float y = CLAMP(samp_loc->pSampleLocations[i].y, SAMPLE_LOCATION_MIN, - SAMPLE_LOCATION_MAX); - - sample_locations |= - (A6XX_RB_SAMPLE_LOCATION_0_SAMPLE_0_X(x) | - A6XX_RB_SAMPLE_LOCATION_0_SAMPLE_0_Y(y)) << i*8; - } - - tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_SAMPLE_LOCATION_0, 1); - tu_cs_emit(cs, sample_locations); - - tu_cs_emit_pkt4(cs, REG_A6XX_RB_SAMPLE_LOCATION_0, 1); - tu_cs_emit(cs, sample_locations); - - tu_cs_emit_pkt4(cs, REG_A6XX_SP_TP_SAMPLE_LOCATION_0, 1); - tu_cs_emit(cs, sample_locations); -} - -static uint32_t -tu6_gras_su_cntl(const VkPipelineRasterizationStateCreateInfo *rast_info, - enum a5xx_line_mode line_mode, - bool multiview) -{ - uint32_t gras_su_cntl = 0; - - if (rast_info->cullMode & VK_CULL_MODE_FRONT_BIT) - gras_su_cntl |= A6XX_GRAS_SU_CNTL_CULL_FRONT; - if (rast_info->cullMode & VK_CULL_MODE_BACK_BIT) - gras_su_cntl |= A6XX_GRAS_SU_CNTL_CULL_BACK; - - if (rast_info->frontFace == VK_FRONT_FACE_CLOCKWISE) - gras_su_cntl |= A6XX_GRAS_SU_CNTL_FRONT_CW; - - gras_su_cntl |= - A6XX_GRAS_SU_CNTL_LINEHALFWIDTH(rast_info->lineWidth / 2.0f); - - if (rast_info->depthBiasEnable) - gras_su_cntl |= A6XX_GRAS_SU_CNTL_POLY_OFFSET; - - gras_su_cntl |= A6XX_GRAS_SU_CNTL_LINE_MODE(line_mode); - - if (multiview) { - gras_su_cntl |= - A6XX_GRAS_SU_CNTL_MULTIVIEW_ENABLE | - A6XX_GRAS_SU_CNTL_RENDERTARGETINDEXINCR; - } - - return gras_su_cntl; -} - -void -tu6_emit_depth_bias(struct tu_cs *cs, - float constant_factor, - float clamp, - float slope_factor) -{ - tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_SU_POLY_OFFSET_SCALE, 3); - tu_cs_emit(cs, A6XX_GRAS_SU_POLY_OFFSET_SCALE(slope_factor).value); - tu_cs_emit(cs, A6XX_GRAS_SU_POLY_OFFSET_OFFSET(constant_factor).value); - tu_cs_emit(cs, A6XX_GRAS_SU_POLY_OFFSET_OFFSET_CLAMP(clamp).value); -} - -static uint32_t -tu6_rb_mrt_blend_control(const VkPipelineColorBlendAttachmentState *att) -{ - const enum a3xx_rb_blend_opcode color_op = tu6_blend_op(att->colorBlendOp); - const enum adreno_rb_blend_factor src_color_factor = - tu6_blend_factor(att->srcColorBlendFactor); - const enum adreno_rb_blend_factor dst_color_factor = - tu6_blend_factor(att->dstColorBlendFactor); - const enum a3xx_rb_blend_opcode alpha_op = tu6_blend_op(att->alphaBlendOp); - const enum adreno_rb_blend_factor src_alpha_factor = - tu6_blend_factor(att->srcAlphaBlendFactor); - const enum adreno_rb_blend_factor dst_alpha_factor = - tu6_blend_factor(att->dstAlphaBlendFactor); - - return A6XX_RB_MRT_BLEND_CONTROL_RGB_SRC_FACTOR(src_color_factor) | - A6XX_RB_MRT_BLEND_CONTROL_RGB_BLEND_OPCODE(color_op) | - A6XX_RB_MRT_BLEND_CONTROL_RGB_DEST_FACTOR(dst_color_factor) | - A6XX_RB_MRT_BLEND_CONTROL_ALPHA_SRC_FACTOR(src_alpha_factor) | - A6XX_RB_MRT_BLEND_CONTROL_ALPHA_BLEND_OPCODE(alpha_op) | - A6XX_RB_MRT_BLEND_CONTROL_ALPHA_DEST_FACTOR(dst_alpha_factor); -} - -static uint32_t -tu6_rb_mrt_control(const VkPipelineColorBlendAttachmentState *att) -{ - uint32_t rb_mrt_control = - A6XX_RB_MRT_CONTROL_COMPONENT_ENABLE(att->colorWriteMask); - - rb_mrt_control |= COND(att->blendEnable, - A6XX_RB_MRT_CONTROL_BLEND | - A6XX_RB_MRT_CONTROL_BLEND2); - - return rb_mrt_control; -} - -uint32_t -tu6_rb_mrt_control_rop(VkLogicOp op, bool *rop_reads_dst) -{ - *rop_reads_dst = tu_logic_op_reads_dst(op); - return A6XX_RB_MRT_CONTROL_ROP_ENABLE | - A6XX_RB_MRT_CONTROL_ROP_CODE(tu6_rop(op)); -} - -static void -tu6_emit_rb_mrt_controls(struct tu_pipeline *pipeline, - const VkPipelineColorBlendStateCreateInfo *blend_info, - const VkFormat attachment_formats[MAX_RTS], - bool *rop_reads_dst, - uint32_t *color_bandwidth_per_sample) -{ - const VkPipelineColorWriteCreateInfoEXT *color_info = - vk_find_struct_const(blend_info->pNext, - PIPELINE_COLOR_WRITE_CREATE_INFO_EXT); - - /* The static state is ignored if it's dynamic. In that case assume - * everything is enabled and then the appropriate registers will be zero'd - * dynamically. - */ - if (pipeline->dynamic_state_mask & BIT(TU_DYNAMIC_STATE_COLOR_WRITE_ENABLE)) - color_info = NULL; - - *rop_reads_dst = false; - *color_bandwidth_per_sample = 0; - - pipeline->blend.rb_mrt_control_rop = - tu6_rb_mrt_control_rop(blend_info->logicOp, rop_reads_dst); - pipeline->blend.logic_op_enabled = blend_info->logicOpEnable; - - uint32_t total_bpp = 0; - pipeline->blend.num_rts = blend_info->attachmentCount; - for (uint32_t i = 0; i < blend_info->attachmentCount; i++) { - const VkPipelineColorBlendAttachmentState *att = - &blend_info->pAttachments[i]; - const VkFormat format = attachment_formats[i]; - - uint32_t rb_mrt_control = 0; - uint32_t rb_mrt_blend_control = 0; - if (format != VK_FORMAT_UNDEFINED && - (!color_info || color_info->pColorWriteEnables[i])) { - const uint64_t blend_att_states = - BIT(TU_DYNAMIC_STATE_COLOR_WRITE_MASK) | - BIT(TU_DYNAMIC_STATE_BLEND_ENABLE) | - BIT(TU_DYNAMIC_STATE_BLEND_EQUATION); - if ((pipeline->dynamic_state_mask & blend_att_states) != blend_att_states) { - rb_mrt_control = tu6_rb_mrt_control(att); - rb_mrt_blend_control = tu6_rb_mrt_blend_control(att); - } - - /* calculate bpp based on format and write mask */ - uint32_t write_bpp = 0; - if ((pipeline->dynamic_state_mask & BIT(TU_DYNAMIC_STATE_COLOR_WRITE_MASK)) || - att->colorWriteMask == 0xf) { - write_bpp = vk_format_get_blocksizebits(format); - } else { - const enum pipe_format pipe_format = vk_format_to_pipe_format(format); - for (uint32_t i = 0; i < 4; i++) { - if (att->colorWriteMask & (1 << i)) { - write_bpp += util_format_get_component_bits(pipe_format, - UTIL_FORMAT_COLORSPACE_RGB, i); - } - } - } - total_bpp += write_bpp; - - pipeline->blend.color_write_enable |= BIT(i); - - if (!(pipeline->dynamic_state_mask & BIT(TU_DYNAMIC_STATE_BLEND_ENABLE))) { - if (att->blendEnable) - pipeline->blend.blend_enable |= BIT(i); - - if (att->blendEnable || (blend_info->logicOpEnable && *rop_reads_dst)) { - total_bpp += write_bpp; - } - } - } - - pipeline->blend.rb_mrt_control[i] = rb_mrt_control & pipeline->blend.rb_mrt_control_mask; - pipeline->blend.rb_mrt_blend_control[i] = rb_mrt_blend_control; - } - - *color_bandwidth_per_sample = total_bpp / 8; -} - -static void -tu6_emit_blend_control(struct tu_pipeline *pipeline, - uint32_t blend_enable_mask, - bool dual_src_blend, - const VkPipelineMultisampleStateCreateInfo *msaa_info) -{ - const uint32_t sample_mask = - msaa_info->pSampleMask ? (*msaa_info->pSampleMask & 0xffff) - : 0xffff; - - pipeline->blend.sp_blend_cntl = - A6XX_SP_BLEND_CNTL(.enable_blend = blend_enable_mask, - .unk8 = true, - .dual_color_in_enable = dual_src_blend, - .alpha_to_coverage = - msaa_info->alphaToCoverageEnable, ).value & - pipeline->blend.sp_blend_cntl_mask; - - /* set A6XX_RB_BLEND_CNTL_INDEPENDENT_BLEND only when enabled? */ - pipeline->blend.rb_blend_cntl = - A6XX_RB_BLEND_CNTL(.enable_blend = blend_enable_mask, - .independent_blend = true, - .dual_color_in_enable = dual_src_blend, - .alpha_to_coverage = msaa_info->alphaToCoverageEnable, - .alpha_to_one = msaa_info->alphaToOneEnable, - .sample_mask = sample_mask,).value & - pipeline->blend.rb_blend_cntl_mask; -} - -static void -tu6_emit_blend(struct tu_cs *cs, - struct tu_pipeline *pipeline) -{ - tu_cs_emit_regs(cs, A6XX_SP_FS_OUTPUT_CNTL1(.mrt = pipeline->blend.num_rts)); - tu_cs_emit_regs(cs, A6XX_RB_FS_OUTPUT_CNTL1(.mrt = pipeline->blend.num_rts)); - tu_cs_emit_regs(cs, A6XX_SP_BLEND_CNTL(.dword = pipeline->blend.sp_blend_cntl)); - tu_cs_emit_regs(cs, A6XX_RB_BLEND_CNTL(.dword = pipeline->blend.rb_blend_cntl)); - - for (unsigned i = 0; i < pipeline->blend.num_rts; i++) { - tu_cs_emit_regs(cs, - A6XX_RB_MRT_CONTROL(i, .dword = pipeline->blend.rb_mrt_control[i] | - (pipeline->blend.logic_op_enabled ? - pipeline->blend.rb_mrt_control_rop : 0)), - A6XX_RB_MRT_BLEND_CONTROL(i, .dword = pipeline->blend.rb_mrt_blend_control[i])); - } -} - static VkResult tu_setup_pvtmem(struct tu_device *dev, struct tu_pipeline *pipeline, @@ -2466,6 +2035,8 @@ set_combined_state(struct tu_pipeline_builder *builder, return true; } +#define TU6_EMIT_VERTEX_INPUT_MAX_DWORDS (MAX_VERTEX_ATTRIBS * 2 + 1) + static VkResult tu_pipeline_allocate_cs(struct tu_device *dev, struct tu_pipeline *pipeline, @@ -3088,7 +2659,6 @@ tu_nir_cache_insert(struct vk_pipeline_cache *cache, return container_of(object, struct tu_nir_shaders, base); } - static VkResult tu_pipeline_builder_compile_shaders(struct tu_pipeline_builder *builder, struct tu_pipeline *pipeline) @@ -3175,17 +2745,18 @@ tu_pipeline_builder_compile_shaders(struct tu_pipeline_builder *builder, if (builder->state & VK_GRAPHICS_PIPELINE_LIBRARY_PRE_RASTERIZATION_SHADERS_BIT_EXT) { - keys[MESA_SHADER_VERTEX].multiview_mask = builder->multiview_mask; + keys[MESA_SHADER_VERTEX].multiview_mask = + builder->graphics_state.rp->view_mask; } if (builder->state & VK_GRAPHICS_PIPELINE_LIBRARY_FRAGMENT_SHADER_BIT_EXT) { - keys[MESA_SHADER_FRAGMENT].multiview_mask = builder->multiview_mask; + keys[MESA_SHADER_FRAGMENT].multiview_mask = + builder->graphics_state.rp->view_mask; keys[MESA_SHADER_FRAGMENT].force_sample_interp = ir3_key.sample_shading; keys[MESA_SHADER_FRAGMENT].fragment_density_map = builder->fragment_density_map; keys[MESA_SHADER_FRAGMENT].unscaled_input_fragcoord = builder->unscaled_input_fragcoord; - pipeline->fs.fragment_density_map = builder->fragment_density_map; } unsigned char pipeline_sha1[20]; @@ -3585,264 +3156,6 @@ fail: return result; } -static void -tu_pipeline_builder_parse_dynamic(struct tu_pipeline_builder *builder, - struct tu_pipeline *pipeline) -{ - const VkPipelineDynamicStateCreateInfo *dynamic_info = - builder->create_info->pDynamicState; - - pipeline->rast.gras_su_cntl_mask = ~0u; - pipeline->rast.gras_cl_cntl_mask = ~0u; - pipeline->rast.rb_depth_cntl_mask = ~0u; - pipeline->rast.pc_raster_cntl_mask = ~0u; - pipeline->rast.vpc_unknown_9107_mask = ~0u; - pipeline->ds.rb_depth_cntl_mask = ~0u; - pipeline->ds.rb_stencil_cntl_mask = ~0u; - pipeline->blend.sp_blend_cntl_mask = ~0u; - pipeline->blend.rb_blend_cntl_mask = ~0u; - pipeline->blend.rb_mrt_control_mask = ~0u; - - if (!dynamic_info) - return; - - bool dynamic_depth_clip = false, dynamic_depth_clamp = false; - bool dynamic_viewport = false, dynamic_viewport_range = false; - - for (uint32_t i = 0; i < dynamic_info->dynamicStateCount; i++) { - VkDynamicState state = dynamic_info->pDynamicStates[i]; - switch (state) { - case VK_DYNAMIC_STATE_VIEWPORT ... VK_DYNAMIC_STATE_STENCIL_REFERENCE: - if (state == VK_DYNAMIC_STATE_LINE_WIDTH) - pipeline->rast.gras_su_cntl_mask &= ~A6XX_GRAS_SU_CNTL_LINEHALFWIDTH__MASK; - pipeline->dynamic_state_mask |= BIT(state); - if (state == VK_DYNAMIC_STATE_VIEWPORT) - dynamic_viewport = true; - break; - case VK_DYNAMIC_STATE_SAMPLE_LOCATIONS_EXT: - pipeline->dynamic_state_mask |= BIT(TU_DYNAMIC_STATE_SAMPLE_LOCATIONS); - break; - case VK_DYNAMIC_STATE_SAMPLE_LOCATIONS_ENABLE_EXT: - pipeline->dynamic_state_mask |= BIT(TU_DYNAMIC_STATE_SAMPLE_LOCATIONS_ENABLE); - break; - case VK_DYNAMIC_STATE_CULL_MODE: - pipeline->rast.gras_su_cntl_mask &= - ~(A6XX_GRAS_SU_CNTL_CULL_BACK | A6XX_GRAS_SU_CNTL_CULL_FRONT); - pipeline->dynamic_state_mask |= BIT(TU_DYNAMIC_STATE_RAST); - break; - case VK_DYNAMIC_STATE_FRONT_FACE: - pipeline->rast.gras_su_cntl_mask &= ~A6XX_GRAS_SU_CNTL_FRONT_CW; - pipeline->dynamic_state_mask |= BIT(TU_DYNAMIC_STATE_RAST); - break; - case VK_DYNAMIC_STATE_PRIMITIVE_TOPOLOGY: - pipeline->dynamic_state_mask |= BIT(TU_DYNAMIC_STATE_PRIMITIVE_TOPOLOGY); - break; - case VK_DYNAMIC_STATE_VERTEX_INPUT_BINDING_STRIDE: - pipeline->dynamic_state_mask |= BIT(TU_DYNAMIC_STATE_VB_STRIDE); - break; - case VK_DYNAMIC_STATE_VIEWPORT_WITH_COUNT: - pipeline->dynamic_state_mask |= - BIT(VK_DYNAMIC_STATE_VIEWPORT) | - BIT(TU_DYNAMIC_STATE_VIEWPORT_COUNT); - dynamic_viewport = true; - break; - case VK_DYNAMIC_STATE_SCISSOR_WITH_COUNT: - pipeline->dynamic_state_mask |= - BIT(VK_DYNAMIC_STATE_SCISSOR) | - BIT(TU_DYNAMIC_STATE_SCISSOR_COUNT); - break; - case VK_DYNAMIC_STATE_DEPTH_TEST_ENABLE: - pipeline->ds.rb_depth_cntl_mask &= - ~(A6XX_RB_DEPTH_CNTL_Z_TEST_ENABLE | A6XX_RB_DEPTH_CNTL_Z_READ_ENABLE); - pipeline->dynamic_state_mask |= BIT(TU_DYNAMIC_STATE_DS); - break; - case VK_DYNAMIC_STATE_DEPTH_WRITE_ENABLE: - pipeline->ds.rb_depth_cntl_mask &= ~A6XX_RB_DEPTH_CNTL_Z_WRITE_ENABLE; - pipeline->dynamic_state_mask |= BIT(TU_DYNAMIC_STATE_DS); - break; - case VK_DYNAMIC_STATE_DEPTH_COMPARE_OP: - pipeline->ds.rb_depth_cntl_mask &= ~A6XX_RB_DEPTH_CNTL_ZFUNC__MASK; - pipeline->dynamic_state_mask |= BIT(TU_DYNAMIC_STATE_DS); - break; - case VK_DYNAMIC_STATE_DEPTH_BOUNDS_TEST_ENABLE: - pipeline->ds.rb_depth_cntl_mask &= - ~(A6XX_RB_DEPTH_CNTL_Z_BOUNDS_ENABLE | A6XX_RB_DEPTH_CNTL_Z_READ_ENABLE); - pipeline->dynamic_state_mask |= BIT(TU_DYNAMIC_STATE_DS); - break; - case VK_DYNAMIC_STATE_STENCIL_TEST_ENABLE: - pipeline->ds.rb_stencil_cntl_mask &= ~(A6XX_RB_STENCIL_CONTROL_STENCIL_ENABLE | - A6XX_RB_STENCIL_CONTROL_STENCIL_ENABLE_BF | - A6XX_RB_STENCIL_CONTROL_STENCIL_READ); - pipeline->dynamic_state_mask |= BIT(TU_DYNAMIC_STATE_DS); - break; - case VK_DYNAMIC_STATE_STENCIL_OP: - pipeline->ds.rb_stencil_cntl_mask &= ~(A6XX_RB_STENCIL_CONTROL_FUNC__MASK | - A6XX_RB_STENCIL_CONTROL_FAIL__MASK | - A6XX_RB_STENCIL_CONTROL_ZPASS__MASK | - A6XX_RB_STENCIL_CONTROL_ZFAIL__MASK | - A6XX_RB_STENCIL_CONTROL_FUNC_BF__MASK | - A6XX_RB_STENCIL_CONTROL_FAIL_BF__MASK | - A6XX_RB_STENCIL_CONTROL_ZPASS_BF__MASK | - A6XX_RB_STENCIL_CONTROL_ZFAIL_BF__MASK); - pipeline->dynamic_state_mask |= BIT(TU_DYNAMIC_STATE_DS); - break; - case VK_DYNAMIC_STATE_DEPTH_BIAS_ENABLE: - pipeline->rast.gras_su_cntl_mask &= ~A6XX_GRAS_SU_CNTL_POLY_OFFSET; - pipeline->dynamic_state_mask |= BIT(TU_DYNAMIC_STATE_RAST); - break; - case VK_DYNAMIC_STATE_PRIMITIVE_RESTART_ENABLE: - pipeline->dynamic_state_mask |= BIT(TU_DYNAMIC_STATE_PRIMITIVE_RESTART_ENABLE); - break; - case VK_DYNAMIC_STATE_RASTERIZER_DISCARD_ENABLE: - pipeline->rast.pc_raster_cntl_mask &= ~A6XX_PC_RASTER_CNTL_DISCARD; - pipeline->rast.vpc_unknown_9107_mask &= ~A6XX_VPC_UNKNOWN_9107_RASTER_DISCARD; - pipeline->dynamic_state_mask |= BIT(TU_DYNAMIC_STATE_PC_RASTER_CNTL); - break; - case VK_DYNAMIC_STATE_LOGIC_OP_EXT: - pipeline->blend.sp_blend_cntl_mask &= ~A6XX_SP_BLEND_CNTL_ENABLE_BLEND__MASK; - pipeline->blend.rb_blend_cntl_mask &= ~A6XX_RB_BLEND_CNTL_ENABLE_BLEND__MASK; - pipeline->dynamic_state_mask |= BIT(TU_DYNAMIC_STATE_BLEND); - pipeline->dynamic_state_mask |= BIT(TU_DYNAMIC_STATE_LOGIC_OP); - break; - case VK_DYNAMIC_STATE_LOGIC_OP_ENABLE_EXT: - pipeline->blend.sp_blend_cntl_mask &= ~A6XX_SP_BLEND_CNTL_ENABLE_BLEND__MASK; - pipeline->blend.rb_blend_cntl_mask &= ~A6XX_RB_BLEND_CNTL_ENABLE_BLEND__MASK; - pipeline->dynamic_state_mask |= BIT(TU_DYNAMIC_STATE_BLEND); - pipeline->dynamic_state_mask |= BIT(TU_DYNAMIC_STATE_LOGIC_OP_ENABLE); - break; - case VK_DYNAMIC_STATE_COLOR_WRITE_ENABLE_EXT: - pipeline->blend.sp_blend_cntl_mask &= ~A6XX_SP_BLEND_CNTL_ENABLE_BLEND__MASK; - pipeline->blend.rb_blend_cntl_mask &= ~A6XX_RB_BLEND_CNTL_ENABLE_BLEND__MASK; - pipeline->dynamic_state_mask |= BIT(TU_DYNAMIC_STATE_BLEND); - - /* Dynamic color write enable doesn't directly change any of the - * registers, but it causes us to make some of the registers 0, so we - * set this dynamic state instead of making the register dynamic. - */ - pipeline->dynamic_state_mask |= BIT(TU_DYNAMIC_STATE_COLOR_WRITE_ENABLE); - break; - case VK_DYNAMIC_STATE_VERTEX_INPUT_EXT: - pipeline->dynamic_state_mask |= BIT(TU_DYNAMIC_STATE_VERTEX_INPUT) | - BIT(TU_DYNAMIC_STATE_VB_STRIDE); - break; - case VK_DYNAMIC_STATE_LINE_STIPPLE_EXT: - break; - case VK_DYNAMIC_STATE_PATCH_CONTROL_POINTS_EXT: - pipeline->dynamic_state_mask |= - BIT(TU_DYNAMIC_STATE_PATCH_CONTROL_POINTS); - break; - case VK_DYNAMIC_STATE_POLYGON_MODE_EXT: - pipeline->dynamic_state_mask |= - BIT(TU_DYNAMIC_STATE_RAST) | - BIT(TU_DYNAMIC_STATE_POLYGON_MODE); - break; - case VK_DYNAMIC_STATE_TESSELLATION_DOMAIN_ORIGIN_EXT: - pipeline->dynamic_state_mask |= - BIT(TU_DYNAMIC_STATE_TESS_DOMAIN_ORIGIN); - break; - case VK_DYNAMIC_STATE_DEPTH_CLIP_ENABLE_EXT: - pipeline->dynamic_state_mask |= BIT(TU_DYNAMIC_STATE_RAST); - pipeline->rast.gras_cl_cntl_mask &= - ~(A6XX_GRAS_CL_CNTL_ZNEAR_CLIP_DISABLE | - A6XX_GRAS_CL_CNTL_ZFAR_CLIP_DISABLE); - dynamic_depth_clip = true; - break; - case VK_DYNAMIC_STATE_DEPTH_CLAMP_ENABLE_EXT: - pipeline->dynamic_state_mask |= - BIT(TU_DYNAMIC_STATE_RAST) | - BIT(TU_DYNAMIC_STATE_DS); - pipeline->rast.gras_cl_cntl_mask &= - ~A6XX_GRAS_CL_CNTL_Z_CLAMP_ENABLE; - pipeline->rast.rb_depth_cntl_mask &= - ~A6XX_RB_DEPTH_CNTL_Z_CLAMP_ENABLE; - dynamic_depth_clamp = true; - break; - case VK_DYNAMIC_STATE_SAMPLE_MASK_EXT: - pipeline->dynamic_state_mask |= BIT(TU_DYNAMIC_STATE_BLEND); - pipeline->blend.rb_blend_cntl_mask &= - ~A6XX_RB_BLEND_CNTL_SAMPLE_MASK__MASK; - break; - case VK_DYNAMIC_STATE_RASTERIZATION_SAMPLES_EXT: - pipeline->dynamic_state_mask |= BIT(TU_DYNAMIC_STATE_MSAA_SAMPLES); - break; - case VK_DYNAMIC_STATE_ALPHA_TO_COVERAGE_ENABLE_EXT: - pipeline->dynamic_state_mask |= - BIT(TU_DYNAMIC_STATE_ALPHA_TO_COVERAGE) | - BIT(TU_DYNAMIC_STATE_BLEND); - pipeline->blend.rb_blend_cntl_mask &= - ~A6XX_RB_BLEND_CNTL_ALPHA_TO_COVERAGE; - pipeline->blend.sp_blend_cntl_mask &= - ~A6XX_SP_BLEND_CNTL_ALPHA_TO_COVERAGE; - break; - case VK_DYNAMIC_STATE_ALPHA_TO_ONE_ENABLE_EXT: - pipeline->dynamic_state_mask |= BIT(TU_DYNAMIC_STATE_BLEND); - pipeline->blend.rb_blend_cntl_mask &= - ~A6XX_RB_BLEND_CNTL_ALPHA_TO_ONE; - break; - case VK_DYNAMIC_STATE_DEPTH_CLIP_NEGATIVE_ONE_TO_ONE_EXT: - pipeline->dynamic_state_mask |= - BIT(VK_DYNAMIC_STATE_VIEWPORT) | - BIT(TU_DYNAMIC_STATE_RAST) | - BIT(TU_DYNAMIC_STATE_VIEWPORT_RANGE); - pipeline->rast.gras_cl_cntl_mask &= - ~A6XX_GRAS_CL_CNTL_ZERO_GB_SCALE_Z; - dynamic_viewport_range = true; - break; - case VK_DYNAMIC_STATE_RASTERIZATION_STREAM_EXT: - pipeline->dynamic_state_mask |= BIT(TU_DYNAMIC_STATE_PC_RASTER_CNTL); - pipeline->rast.pc_raster_cntl_mask &= - ~A6XX_PC_RASTER_CNTL_STREAM__MASK; - break; - case VK_DYNAMIC_STATE_LINE_RASTERIZATION_MODE_EXT: - pipeline->dynamic_state_mask |= - BIT(TU_DYNAMIC_STATE_RAST) | - BIT(TU_DYNAMIC_STATE_LINE_MODE); - pipeline->rast.gras_su_cntl_mask &= - ~A6XX_GRAS_SU_CNTL_LINE_MODE__MASK; - break; - case VK_DYNAMIC_STATE_PROVOKING_VERTEX_MODE_EXT: - pipeline->dynamic_state_mask |= BIT(TU_DYNAMIC_STATE_PROVOKING_VTX); - break; - case VK_DYNAMIC_STATE_COLOR_BLEND_ENABLE_EXT: - pipeline->dynamic_state_mask |= - BIT(TU_DYNAMIC_STATE_BLEND) | - BIT(TU_DYNAMIC_STATE_BLEND_ENABLE); - pipeline->blend.rb_mrt_control_mask &= - ~(A6XX_RB_MRT_CONTROL_BLEND | A6XX_RB_MRT_CONTROL_BLEND2); - pipeline->blend.sp_blend_cntl_mask &= ~A6XX_SP_BLEND_CNTL_ENABLE_BLEND__MASK; - pipeline->blend.rb_blend_cntl_mask &= ~A6XX_RB_BLEND_CNTL_ENABLE_BLEND__MASK; - break; - case VK_DYNAMIC_STATE_COLOR_BLEND_EQUATION_EXT: - pipeline->dynamic_state_mask |= - BIT(TU_DYNAMIC_STATE_BLEND) | - BIT(TU_DYNAMIC_STATE_BLEND_EQUATION); - pipeline->blend.sp_blend_cntl_mask &= ~A6XX_SP_BLEND_CNTL_DUAL_COLOR_IN_ENABLE; - pipeline->blend.rb_blend_cntl_mask &= ~A6XX_RB_BLEND_CNTL_DUAL_COLOR_IN_ENABLE; - break; - case VK_DYNAMIC_STATE_COLOR_WRITE_MASK_EXT: - pipeline->dynamic_state_mask |= - BIT(TU_DYNAMIC_STATE_BLEND) | - BIT(TU_DYNAMIC_STATE_COLOR_WRITE_MASK); - pipeline->blend.rb_mrt_control_mask &= ~A6XX_RB_MRT_CONTROL_COMPONENT_ENABLE__MASK; - break; - default: - assert(!"unsupported dynamic state"); - break; - } - } - - pipeline->rast.override_depth_clip = - dynamic_depth_clamp && !dynamic_depth_clip; - - /* If the viewport range is dynamic, the viewport may need to be adjusted - * dynamically so we can't emit it up-front, but we need to copy the state - * viewport state to the dynamic state as if we had called CmdSetViewport() - * when binding the pipeline. - */ - pipeline->viewport.set_dynamic_vp_to_static = - dynamic_viewport_range && !dynamic_viewport; -} - static void tu_pipeline_builder_parse_libraries(struct tu_pipeline_builder *builder, struct tu_pipeline *pipeline) @@ -3870,75 +3183,31 @@ tu_pipeline_builder_parse_libraries(struct tu_pipeline_builder *builder, if (pipeline->type == TU_PIPELINE_GRAPHICS_LIB) tu_pipeline_to_graphics_lib(pipeline)->state |= library->state; - uint64_t library_dynamic_state = 0; if (library->state & VK_GRAPHICS_PIPELINE_LIBRARY_VERTEX_INPUT_INTERFACE_BIT_EXT) { - pipeline->vi = library->base.vi; - pipeline->ia = library->base.ia; - library_dynamic_state |= - BIT(TU_DYNAMIC_STATE_VERTEX_INPUT) | - BIT(TU_DYNAMIC_STATE_VB_STRIDE) | - BIT(TU_DYNAMIC_STATE_PRIMITIVE_TOPOLOGY) | - BIT(TU_DYNAMIC_STATE_PRIMITIVE_RESTART_ENABLE); pipeline->shared_consts = library->base.shared_consts; } if (library->state & VK_GRAPHICS_PIPELINE_LIBRARY_PRE_RASTERIZATION_SHADERS_BIT_EXT) { pipeline->tess = library->base.tess; - pipeline->rast = library->base.rast; - pipeline->viewport = library->base.viewport; - library_dynamic_state |= - BIT(VK_DYNAMIC_STATE_VIEWPORT) | - BIT(VK_DYNAMIC_STATE_SCISSOR) | - BIT(TU_DYNAMIC_STATE_RAST) | - BIT(VK_DYNAMIC_STATE_DEPTH_BIAS) | - BIT(TU_DYNAMIC_STATE_PC_RASTER_CNTL) | - BIT(TU_DYNAMIC_STATE_PATCH_CONTROL_POINTS) | - BIT(TU_DYNAMIC_STATE_POLYGON_MODE) | - BIT(TU_DYNAMIC_STATE_TESS_DOMAIN_ORIGIN) | - BIT(TU_DYNAMIC_STATE_VIEWPORT_RANGE) | - BIT(TU_DYNAMIC_STATE_LINE_MODE) | - BIT(TU_DYNAMIC_STATE_PROVOKING_VTX) | - BIT(TU_DYNAMIC_STATE_VIEWPORT_COUNT) | - BIT(TU_DYNAMIC_STATE_SCISSOR_COUNT); } if (library->state & VK_GRAPHICS_PIPELINE_LIBRARY_FRAGMENT_SHADER_BIT_EXT) { pipeline->ds = library->base.ds; - pipeline->fs = library->base.fs; pipeline->lrz.fs = library->base.lrz.fs; pipeline->lrz.lrz_status |= library->base.lrz.lrz_status; pipeline->lrz.force_late_z |= library->base.lrz.force_late_z; - library_dynamic_state |= - BIT(VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK) | - BIT(VK_DYNAMIC_STATE_STENCIL_WRITE_MASK) | - BIT(VK_DYNAMIC_STATE_STENCIL_REFERENCE) | - BIT(TU_DYNAMIC_STATE_DS) | - BIT(VK_DYNAMIC_STATE_DEPTH_BOUNDS); pipeline->shared_consts = library->base.shared_consts; } if (library->state & VK_GRAPHICS_PIPELINE_LIBRARY_FRAGMENT_OUTPUT_INTERFACE_BIT_EXT) { - pipeline->blend = library->base.blend; pipeline->output = library->base.output; pipeline->lrz.lrz_status |= library->base.lrz.lrz_status; pipeline->lrz.force_late_z |= library->base.lrz.force_late_z; pipeline->prim_order = library->base.prim_order; - library_dynamic_state |= - BIT(VK_DYNAMIC_STATE_BLEND_CONSTANTS) | - BIT(TU_DYNAMIC_STATE_SAMPLE_LOCATIONS) | - BIT(TU_DYNAMIC_STATE_SAMPLE_LOCATIONS_ENABLE) | - BIT(TU_DYNAMIC_STATE_BLEND) | - BIT(TU_DYNAMIC_STATE_BLEND_ENABLE) | - BIT(TU_DYNAMIC_STATE_BLEND_EQUATION) | - BIT(TU_DYNAMIC_STATE_LOGIC_OP) | - BIT(TU_DYNAMIC_STATE_LOGIC_OP_ENABLE) | - BIT(TU_DYNAMIC_STATE_COLOR_WRITE_ENABLE) | - BIT(TU_DYNAMIC_STATE_MSAA_SAMPLES) | - BIT(TU_DYNAMIC_STATE_ALPHA_TO_COVERAGE); } if ((library->state & @@ -3948,23 +3217,9 @@ tu_pipeline_builder_parse_libraries(struct tu_pipeline_builder *builder, pipeline->prim_order = library->base.prim_order; } - if ((library->state & - VK_GRAPHICS_PIPELINE_LIBRARY_FRAGMENT_SHADER_BIT_EXT) && - (library->state & - VK_GRAPHICS_PIPELINE_LIBRARY_FRAGMENT_OUTPUT_INTERFACE_BIT_EXT) && - (library->state & - VK_GRAPHICS_PIPELINE_LIBRARY_PRE_RASTERIZATION_SHADERS_BIT_EXT)) { - pipeline->rast_ds = library->base.rast_ds; - } - - pipeline->dynamic_state_mask = - (pipeline->dynamic_state_mask & ~library_dynamic_state) | - (library->base.dynamic_state_mask & library_dynamic_state); - - u_foreach_bit (i, library_dynamic_state & ~library->base.dynamic_state_mask) { - if (i >= TU_DYNAMIC_STATE_COUNT) - break; + pipeline->set_state_mask |= library->base.set_state_mask; + u_foreach_bit (i, library->base.set_state_mask) { pipeline->dynamic_state[i] = library->base.dynamic_state[i]; } @@ -3972,6 +3227,9 @@ tu_pipeline_builder_parse_libraries(struct tu_pipeline_builder *builder, pipeline->program = library->base.program; pipeline->load_state = library->base.load_state; } + + vk_graphics_pipeline_state_merge(&builder->graphics_state, + &library->graphics_state); } } @@ -4029,19 +3287,6 @@ tu_pipeline_set_linkage(struct tu_program_descriptor_linkage *link, link->constlen = v->constlen; } -static bool -tu_pipeline_static_state(struct tu_pipeline *pipeline, struct tu_cs *cs, - uint32_t id, uint32_t size) -{ - assert(id < ARRAY_SIZE(pipeline->dynamic_state)); - - if (pipeline->dynamic_state_mask & BIT(id)) - return false; - - pipeline->dynamic_state[id] = tu_cs_draw_state(&pipeline->cs, cs, size); - return true; -} - static void tu_pipeline_builder_parse_shader_stages(struct tu_pipeline_builder *builder, struct tu_pipeline *pipeline) @@ -4097,17 +3342,6 @@ tu_pipeline_builder_parse_shader_stages(struct tu_pipeline_builder *builder, pipeline->program.hs_param_dwords = MIN2((hs_constlen - hs_base) * 4, 8); - uint32_t state_size = TU6_EMIT_PATCH_CONTROL_POINTS_DWORDS( - pipeline->program.hs_param_dwords); - - struct tu_cs cs; - if (tu_pipeline_static_state(pipeline, &cs, - TU_DYNAMIC_STATE_PATCH_CONTROL_POINTS, - state_size)) { - tu6_emit_patch_control_points(&cs, pipeline, - pipeline->tess.patch_control_points); - } - /* In SPIR-V generated from GLSL, the tessellation primitive params are * are specified in the tess eval shader, but in SPIR-V generated from * HLSL, they are specified in the tess control shader. */ @@ -4154,186 +3388,799 @@ tu_pipeline_builder_parse_shader_stages(struct tu_pipeline_builder *builder, else last_shader = vs; - pipeline->program.writes_viewport = last_shader->writes_viewport; + pipeline->program.per_view_viewport = + !last_shader->writes_viewport && + builder->fragment_density_map && + builder->device->physical_device->info->a6xx.has_per_view_viewport; +} + +static const enum mesa_vk_dynamic_graphics_state tu_vertex_input_state[] = { + MESA_VK_DYNAMIC_VI, +}; + +static unsigned +tu6_vertex_input_size(struct tu_device *dev, + const struct vk_vertex_input_state *vi) +{ + return 1 + 2 * util_last_bit(vi->attributes_valid); } static void -tu_pipeline_builder_parse_vertex_input(struct tu_pipeline_builder *builder, - struct tu_pipeline *pipeline) +tu6_emit_vertex_input(struct tu_cs *cs, + const struct vk_vertex_input_state *vi) { - if (pipeline->dynamic_state_mask & BIT(TU_DYNAMIC_STATE_VERTEX_INPUT)) - return; + unsigned attr_count = util_last_bit(vi->attributes_valid); + if (attr_count != 0) + tu_cs_emit_pkt4(cs, REG_A6XX_VFD_DECODE_INSTR(0), attr_count * 2); - const VkPipelineVertexInputStateCreateInfo *vi_info = - builder->create_info->pVertexInputState; + for (uint32_t loc = 0; loc < attr_count; loc++) { + const struct vk_vertex_attribute_state *attr = &vi->attributes[loc]; - struct tu_cs cs; - if (tu_pipeline_static_state(pipeline, &cs, TU_DYNAMIC_STATE_VB_STRIDE, - 2 * vi_info->vertexBindingDescriptionCount)) { - for (uint32_t i = 0; i < vi_info->vertexBindingDescriptionCount; i++) { - const VkVertexInputBindingDescription *binding = - &vi_info->pVertexBindingDescriptions[i]; + if (vi->attributes_valid & (1u << loc)) { + const struct vk_vertex_binding_state *binding = + &vi->bindings[attr->binding]; - tu_cs_emit_regs(&cs, - A6XX_VFD_FETCH_STRIDE(binding->binding, binding->stride)); + enum pipe_format pipe_format = vk_format_to_pipe_format(attr->format); + const struct tu_native_format format = tu6_format_vtx(pipe_format); + tu_cs_emit(cs, A6XX_VFD_DECODE_INSTR(0, + .idx = attr->binding, + .offset = attr->offset, + .instanced = binding->input_rate == VK_VERTEX_INPUT_RATE_INSTANCE, + .format = format.fmt, + .swap = format.swap, + .unk30 = 1, + ._float = !util_format_is_pure_integer(pipe_format)).value); + tu_cs_emit(cs, A6XX_VFD_DECODE_STEP_RATE(0, binding->divisor).value); + } else { + tu_cs_emit(cs, 0); + tu_cs_emit(cs, 0); } } +} - VkVertexInputBindingDescription2EXT bindings[MAX_VBS]; - VkVertexInputAttributeDescription2EXT attrs[MAX_VERTEX_ATTRIBS]; +static const enum mesa_vk_dynamic_graphics_state tu_vertex_stride_state[] = { + MESA_VK_DYNAMIC_VI_BINDINGS_VALID, + MESA_VK_DYNAMIC_VI_BINDING_STRIDES, +}; - for (unsigned i = 0; i < vi_info->vertexBindingDescriptionCount; i++) { - const VkVertexInputBindingDescription *binding = - &vi_info->pVertexBindingDescriptions[i]; - bindings[i] = (VkVertexInputBindingDescription2EXT) { - .sType = VK_STRUCTURE_TYPE_VERTEX_INPUT_BINDING_DESCRIPTION_2_EXT, - .pNext = NULL, - .binding = binding->binding, - .stride = binding->stride, - .inputRate = binding->inputRate, - .divisor = 1, - }; +static unsigned +tu6_vertex_stride_size(struct tu_device *dev, + const struct vk_vertex_input_state *vi) +{ + return 1 + 2 * util_last_bit(vi->bindings_valid); +} - /* Bindings may contain holes */ - pipeline->vi.num_vbs = MAX2(pipeline->vi.num_vbs, binding->binding + 1); - } - - const VkPipelineVertexInputDivisorStateCreateInfoEXT *div_state = - vk_find_struct_const(vi_info->pNext, PIPELINE_VERTEX_INPUT_DIVISOR_STATE_CREATE_INFO_EXT); - if (div_state) { - for (uint32_t i = 0; i < div_state->vertexBindingDivisorCount; i++) { - const VkVertexInputBindingDivisorDescriptionEXT *desc = - &div_state->pVertexBindingDivisors[i]; - bindings[desc->binding].divisor = desc->divisor; +static void +tu6_emit_vertex_stride(struct tu_cs *cs, const struct vk_vertex_input_state *vi) +{ + if (vi->bindings_valid) { + unsigned bindings_count = util_last_bit(vi->bindings_valid); + tu_cs_emit_pkt7(cs, CP_CONTEXT_REG_BUNCH, 2 * bindings_count); + for (unsigned i = 0; i < bindings_count; i++) { + tu_cs_emit(cs, REG_A6XX_VFD_FETCH_STRIDE(i)); + tu_cs_emit(cs, vi->bindings[i].stride); } } +} - for (unsigned i = 0; i < vi_info->vertexAttributeDescriptionCount; i++) { - const VkVertexInputAttributeDescription *attr = - &vi_info->pVertexAttributeDescriptions[i]; - attrs[i] = (VkVertexInputAttributeDescription2EXT) { - .sType = VK_STRUCTURE_TYPE_VERTEX_INPUT_ATTRIBUTE_DESCRIPTION_2_EXT, - .pNext = NULL, - .location = attr->location, - .binding = attr->binding, - .format = attr->format, - .offset = attr->offset, - }; - } - - tu_cs_begin_sub_stream(&pipeline->cs, - TU6_EMIT_VERTEX_INPUT_MAX_DWORDS, &cs); - tu6_emit_vertex_input(&cs, - vi_info->vertexBindingDescriptionCount, bindings, - vi_info->vertexAttributeDescriptionCount, attrs); - pipeline->dynamic_state[TU_DYNAMIC_STATE_VERTEX_INPUT] = - tu_cs_end_draw_state(&pipeline->cs, &cs); +static unsigned +tu6_vertex_stride_size_dyn(struct tu_device *dev, + const uint16_t *vi_binding_stride, + uint32_t bindings_valid) +{ + return 1 + 2 * util_last_bit(bindings_valid); } static void -tu_pipeline_builder_parse_input_assembly(struct tu_pipeline_builder *builder, - struct tu_pipeline *pipeline) +tu6_emit_vertex_stride_dyn(struct tu_cs *cs, const uint16_t *vi_binding_stride, + uint32_t bindings_valid) { - const VkPipelineInputAssemblyStateCreateInfo *ia_info = - builder->create_info->pInputAssemblyState; + if (bindings_valid) { + unsigned bindings_count = util_last_bit(bindings_valid); + tu_cs_emit_pkt7(cs, CP_CONTEXT_REG_BUNCH, 2 * bindings_count); + for (unsigned i = 0; i < bindings_count; i++) { + tu_cs_emit(cs, REG_A6XX_VFD_FETCH_STRIDE(i)); + tu_cs_emit(cs, vi_binding_stride[i]); + } + } +} - pipeline->ia.primtype = tu6_primtype(ia_info->topology); - pipeline->ia.primitive_restart = ia_info->primitiveRestartEnable; +static const enum mesa_vk_dynamic_graphics_state tu_viewport_state[] = { + MESA_VK_DYNAMIC_VP_VIEWPORTS, + MESA_VK_DYNAMIC_VP_VIEWPORT_COUNT, + MESA_VK_DYNAMIC_VP_DEPTH_CLIP_NEGATIVE_ONE_TO_ONE, +}; + +static unsigned +tu6_viewport_size(struct tu_device *dev, const struct vk_viewport_state *vp) +{ + return 1 + vp->viewport_count * 6 + 1 + vp->viewport_count * 2 + + 1 + vp->viewport_count * 2 + 5; } static void -tu_pipeline_builder_parse_tessellation(struct tu_pipeline_builder *builder, - struct tu_pipeline *pipeline) +tu6_emit_viewport(struct tu_cs *cs, const struct vk_viewport_state *vp) { - if (!(builder->active_stages & VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT) || - !(builder->active_stages & VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT)) - return; + VkExtent2D guardband = {511, 511}; - const VkPipelineTessellationStateCreateInfo *tess_info = - builder->create_info->pTessellationState; + tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_CL_VPORT_XOFFSET(0), vp->viewport_count * 6); + for (uint32_t i = 0; i < vp->viewport_count; i++) { + const VkViewport *viewport = &vp->viewports[i]; + float offsets[3]; + float scales[3]; + scales[0] = viewport->width / 2.0f; + scales[1] = viewport->height / 2.0f; + if (vp->depth_clip_negative_one_to_one) { + scales[2] = 0.5 * (viewport->maxDepth - viewport->minDepth); + } else { + scales[2] = viewport->maxDepth - viewport->minDepth; + } - if (!(pipeline->dynamic_state_mask & - BIT(TU_DYNAMIC_STATE_PATCH_CONTROL_POINTS))) { - assert(tess_info->patchControlPoints <= 32); - pipeline->tess.patch_control_points = tess_info->patchControlPoints; + offsets[0] = viewport->x + scales[0]; + offsets[1] = viewport->y + scales[1]; + if (vp->depth_clip_negative_one_to_one) { + offsets[2] = 0.5 * (viewport->minDepth + viewport->maxDepth); + } else { + offsets[2] = viewport->minDepth; + } + + for (uint32_t j = 0; j < 3; j++) { + tu_cs_emit(cs, fui(offsets[j])); + tu_cs_emit(cs, fui(scales[j])); + } + + guardband.width = + MIN2(guardband.width, fd_calc_guardband(offsets[0], scales[0], false)); + guardband.height = + MIN2(guardband.height, fd_calc_guardband(offsets[1], scales[1], false)); } - const VkPipelineTessellationDomainOriginStateCreateInfo *domain_info = - vk_find_struct_const(tess_info->pNext, PIPELINE_TESSELLATION_DOMAIN_ORIGIN_STATE_CREATE_INFO); - pipeline->tess.upper_left_domain_origin = !domain_info || - domain_info->domainOrigin == VK_TESSELLATION_DOMAIN_ORIGIN_UPPER_LEFT; + tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_SC_VIEWPORT_SCISSOR_TL(0), vp->viewport_count * 2); + for (uint32_t i = 0; i < vp->viewport_count; i++) { + const VkViewport *viewport = &vp->viewports[i]; + VkOffset2D min; + VkOffset2D max; + min.x = (int32_t) viewport->x; + max.x = (int32_t) ceilf(viewport->x + viewport->width); + if (viewport->height >= 0.0f) { + min.y = (int32_t) viewport->y; + max.y = (int32_t) ceilf(viewport->y + viewport->height); + } else { + min.y = (int32_t)(viewport->y + viewport->height); + max.y = (int32_t) ceilf(viewport->y); + } + /* the spec allows viewport->height to be 0.0f */ + if (min.y == max.y) + max.y++; + /* allow viewport->width = 0.0f for un-initialized viewports: */ + if (min.x == max.x) + max.x++; + + min.x = MAX2(min.x, 0); + min.y = MAX2(min.y, 0); + max.x = MAX2(max.x, 1); + max.y = MAX2(max.y, 1); + + assert(min.x < max.x); + assert(min.y < max.y); + + tu_cs_emit(cs, A6XX_GRAS_SC_VIEWPORT_SCISSOR_TL_X(min.x) | + A6XX_GRAS_SC_VIEWPORT_SCISSOR_TL_Y(min.y)); + tu_cs_emit(cs, A6XX_GRAS_SC_VIEWPORT_SCISSOR_BR_X(max.x - 1) | + A6XX_GRAS_SC_VIEWPORT_SCISSOR_BR_Y(max.y - 1)); + } + + tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_CL_Z_CLAMP(0), vp->viewport_count * 2); + for (uint32_t i = 0; i < vp->viewport_count; i++) { + const VkViewport *viewport = &vp->viewports[i]; + tu_cs_emit(cs, fui(MIN2(viewport->minDepth, viewport->maxDepth))); + tu_cs_emit(cs, fui(MAX2(viewport->minDepth, viewport->maxDepth))); + } + tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_CL_GUARDBAND_CLIP_ADJ, 1); + tu_cs_emit(cs, A6XX_GRAS_CL_GUARDBAND_CLIP_ADJ_HORZ(guardband.width) | + A6XX_GRAS_CL_GUARDBAND_CLIP_ADJ_VERT(guardband.height)); + + /* TODO: what to do about this and multi viewport ? */ + float z_clamp_min = vp->viewport_count ? MIN2(vp->viewports[0].minDepth, vp->viewports[0].maxDepth) : 0; + float z_clamp_max = vp->viewport_count ? MAX2(vp->viewports[0].minDepth, vp->viewports[0].maxDepth) : 0; + + tu_cs_emit_regs(cs, + A6XX_RB_Z_CLAMP_MIN(z_clamp_min), + A6XX_RB_Z_CLAMP_MAX(z_clamp_max)); +} + +struct apply_viewport_state { + struct vk_viewport_state vp; + bool share_scale; +}; + +/* It's a hardware restriction that the window offset (i.e. bin.offset) must + * be the same for all views. This means that GMEM coordinates cannot be a + * simple scaling of framebuffer coordinates, because this would require us to + * scale the window offset and the scale may be different per view. Instead we + * have to apply a per-bin offset to the GMEM coordinate transform to make + * sure that the window offset maps to itself. Specifically we need an offset + * o to the transform: + * + * x' = s * x + o + * + * so that when we plug in the bin start b_s: + * + * b_s = s * b_s + o + * + * and we get: + * + * o = b_s - s * b_s + * + * We use this form exactly, because we know the bin offset is a multiple of + * the frag area so s * b_s is an integer and we can compute an exact result + * easily. + */ + +VkOffset2D +tu_fdm_per_bin_offset(VkExtent2D frag_area, VkRect2D bin) +{ + assert(bin.offset.x % frag_area.width == 0); + assert(bin.offset.y % frag_area.height == 0); + + return (VkOffset2D) { + bin.offset.x - bin.offset.x / frag_area.width, + bin.offset.y - bin.offset.y / frag_area.height + }; } static void -tu_pipeline_builder_parse_viewport(struct tu_pipeline_builder *builder, - struct tu_pipeline *pipeline) +fdm_apply_viewports(struct tu_cs *cs, void *data, VkRect2D bin, unsigned views, + VkExtent2D *frag_areas) { - /* The spec says: - * - * pViewportState is a pointer to an instance of the - * VkPipelineViewportStateCreateInfo structure, and is ignored if the - * pipeline has rasterization disabled." - * - * We leave the relevant registers stale in that case. - */ - if (builder->rasterizer_discard) - return; + const struct apply_viewport_state *state = + (const struct apply_viewport_state *)data; - const VkPipelineViewportStateCreateInfo *vp_info = - builder->create_info->pViewportState; - const VkPipelineViewportDepthClipControlCreateInfoEXT *depth_clip_info = - vk_find_struct_const(vp_info->pNext, PIPELINE_VIEWPORT_DEPTH_CLIP_CONTROL_CREATE_INFO_EXT); - pipeline->viewport.z_negative_one_to_one = depth_clip_info ? depth_clip_info->negativeOneToOne : false; + struct vk_viewport_state vp = state->vp; - struct tu_cs cs; - - if (tu_pipeline_static_state(pipeline, &cs, VK_DYNAMIC_STATE_VIEWPORT, 8 + 10 * vp_info->viewportCount)) { - tu6_emit_viewport(&cs, vp_info->pViewports, vp_info->viewportCount, pipeline->viewport.z_negative_one_to_one); - } - - /* We have to save the static viewports if set_dynamic_vp_to_static is set, - * but it may also be set later during pipeline linking if viewports are - * static state becuase FDM also enables set_dynamic_vp_to_static but in a - * different pipeline stage. Therefore we also have to save them if the - * viewport state is static, even though we emit them above. - */ - if (pipeline->viewport.set_dynamic_vp_to_static || - !(pipeline->dynamic_state_mask & BIT(VK_DYNAMIC_STATE_VIEWPORT))) { - memcpy(pipeline->viewport.viewports, vp_info->pViewports, - vp_info->viewportCount * sizeof(*vp_info->pViewports)); - } - - pipeline->viewport.num_viewports = vp_info->viewportCount; - - assert(!pipeline->viewport.set_dynamic_scissor_to_static); - if (tu_pipeline_static_state(pipeline, &cs, VK_DYNAMIC_STATE_SCISSOR, 1 + 2 * vp_info->scissorCount)) { - tu6_emit_scissor(&cs, vp_info->pScissors, vp_info->scissorCount); - - /* Similarly to the above we need to save off the static scissors if - * they were originally static, but nothing sets - * set_dynamic_scissor_to_static except FDM. + for (unsigned i = 0; i < state->vp.viewport_count; i++) { + /* Note: If we're using shared scaling, the scale should already be the + * same across all views, we can pick any view. However the number + * of viewports and number of views is not guaranteed the same, so we + * need to pick the 0'th view which always exists to be safe. + * + * Conversly, if we're not using shared scaling then the rasterizer in + * the original pipeline is using only the first viewport, so we need to + * replicate it across all viewports. */ - memcpy(pipeline->viewport.scissors, vp_info->pScissors, - vp_info->scissorCount * sizeof(*vp_info->pScissors)); + VkExtent2D frag_area = state->share_scale ? frag_areas[0] : frag_areas[i]; + VkViewport viewport = + state->share_scale ? state->vp.viewports[i] : state->vp.viewports[0]; + if (frag_area.width == 1 && frag_area.height == 1) { + vp.viewports[i] = viewport; + continue; + } + + float scale_x = (float) 1.0f / frag_area.width; + float scale_y = (float) 1.0f / frag_area.height; + + vp.viewports[i].minDepth = viewport.minDepth; + vp.viewports[i].maxDepth = viewport.maxDepth; + vp.viewports[i].width = viewport.width * scale_x; + vp.viewports[i].height = viewport.height * scale_y; + + VkOffset2D offset = tu_fdm_per_bin_offset(frag_area, bin); + + vp.viewports[i].x = scale_x * viewport.x + offset.x; + vp.viewports[i].y = scale_y * viewport.y + offset.y; } - pipeline->viewport.num_scissors = vp_info->scissorCount; + tu6_emit_viewport(cs, &vp); } +static void +tu6_emit_viewport_fdm(struct tu_cs *cs, struct tu_cmd_buffer *cmd, + const struct vk_viewport_state *vp) +{ + unsigned num_views = MAX2(cmd->state.pass->num_views, 1); + struct apply_viewport_state state = { + .vp = *vp, + .share_scale = !cmd->state.pipeline->base.program.per_view_viewport, + }; + if (!state.share_scale) + state.vp.viewport_count = num_views; + unsigned size = tu6_viewport_size(cmd->device, &state.vp); + tu_cs_begin_sub_stream(&cmd->sub_cs, size, cs); + tu_create_fdm_bin_patchpoint(cmd, cs, size, fdm_apply_viewports, state); +} + +static const enum mesa_vk_dynamic_graphics_state tu_scissor_state[] = { + MESA_VK_DYNAMIC_VP_SCISSORS, + MESA_VK_DYNAMIC_VP_SCISSOR_COUNT, +}; + +static unsigned +tu6_scissor_size(struct tu_device *dev, const struct vk_viewport_state *vp) +{ + return 1 + vp->scissor_count * 2; +} + +void +tu6_emit_scissor(struct tu_cs *cs, const struct vk_viewport_state *vp) +{ + tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_SC_SCREEN_SCISSOR_TL(0), vp->scissor_count * 2); + + for (uint32_t i = 0; i < vp->scissor_count; i++) { + const VkRect2D *scissor = &vp->scissors[i]; + + uint32_t min_x = scissor->offset.x; + uint32_t min_y = scissor->offset.y; + uint32_t max_x = min_x + scissor->extent.width - 1; + uint32_t max_y = min_y + scissor->extent.height - 1; + + if (!scissor->extent.width || !scissor->extent.height) { + min_x = min_y = 1; + max_x = max_y = 0; + } else { + /* avoid overflow */ + uint32_t scissor_max = BITFIELD_MASK(15); + min_x = MIN2(scissor_max, min_x); + min_y = MIN2(scissor_max, min_y); + max_x = MIN2(scissor_max, max_x); + max_y = MIN2(scissor_max, max_y); + } + + tu_cs_emit(cs, A6XX_GRAS_SC_SCREEN_SCISSOR_TL_X(min_x) | + A6XX_GRAS_SC_SCREEN_SCISSOR_TL_Y(min_y)); + tu_cs_emit(cs, A6XX_GRAS_SC_SCREEN_SCISSOR_BR_X(max_x) | + A6XX_GRAS_SC_SCREEN_SCISSOR_BR_Y(max_y)); + } +} + +static void +fdm_apply_scissors(struct tu_cs *cs, void *data, VkRect2D bin, unsigned views, + VkExtent2D *frag_areas) +{ + const struct apply_viewport_state *state = + (const struct apply_viewport_state *)data; + + struct vk_viewport_state vp = state->vp; + + for (unsigned i = 0; i < vp.scissor_count; i++) { + VkExtent2D frag_area = state->share_scale ? frag_areas[0] : frag_areas[i]; + VkRect2D scissor = + state->share_scale ? state->vp.scissors[i] : state->vp.scissors[0]; + if (frag_area.width == 1 && frag_area.height == 1) { + vp.scissors[i] = scissor; + continue; + } + + /* Transform the scissor following the viewport. It's unclear how this + * is supposed to handle cases where the scissor isn't aligned to the + * fragment area, but we round outwards to always render partial + * fragments if the scissor size equals the framebuffer size and it + * isn't aligned to the fragment area. + */ + VkOffset2D offset = tu_fdm_per_bin_offset(frag_area, bin); + VkOffset2D min = { + scissor.offset.x / frag_area.width + offset.x, + scissor.offset.y / frag_area.width + offset.y, + }; + VkOffset2D max = { + DIV_ROUND_UP(scissor.offset.x + scissor.extent.width, frag_area.width) + offset.x, + DIV_ROUND_UP(scissor.offset.y + scissor.extent.height, frag_area.height) + offset.y, + }; + + /* Intersect scissor with the scaled bin, this essentially replaces the + * window scissor. + */ + uint32_t scaled_width = bin.extent.width / frag_area.width; + uint32_t scaled_height = bin.extent.height / frag_area.height; + vp.scissors[i].offset.x = MAX2(min.x, bin.offset.x); + vp.scissors[i].offset.y = MAX2(min.y, bin.offset.y); + vp.scissors[i].extent.width = + MIN2(max.x, bin.offset.x + scaled_width) - vp.scissors[i].offset.x; + vp.scissors[i].extent.height = + MIN2(max.y, bin.offset.y + scaled_height) - vp.scissors[i].offset.y; + } + + tu6_emit_scissor(cs, &vp); +} + +static void +tu6_emit_scissor_fdm(struct tu_cs *cs, struct tu_cmd_buffer *cmd, + const struct vk_viewport_state *vp) +{ + unsigned num_views = MAX2(cmd->state.pass->num_views, 1); + struct apply_viewport_state state = { + .vp = *vp, + .share_scale = !cmd->state.pipeline->base.program.per_view_viewport, + }; + if (!state.share_scale) + state.vp.scissor_count = num_views; + unsigned size = tu6_scissor_size(cmd->device, &state.vp); + tu_cs_begin_sub_stream(&cmd->sub_cs, size, cs); + tu_create_fdm_bin_patchpoint(cmd, cs, size, fdm_apply_scissors, state); +} + +static const enum mesa_vk_dynamic_graphics_state tu_sample_locations_enable_state[] = { + MESA_VK_DYNAMIC_MS_SAMPLE_LOCATIONS_ENABLE, +}; + +static unsigned +tu6_sample_locations_enable_size(struct tu_device *dev, bool enable) +{ + return 6; +} + +void +tu6_emit_sample_locations_enable(struct tu_cs *cs, bool enable) +{ + uint32_t sample_config = + COND(enable, A6XX_RB_SAMPLE_CONFIG_LOCATION_ENABLE); + + tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_SAMPLE_CONFIG, 1); + tu_cs_emit(cs, sample_config); + + tu_cs_emit_pkt4(cs, REG_A6XX_RB_SAMPLE_CONFIG, 1); + tu_cs_emit(cs, sample_config); + + tu_cs_emit_pkt4(cs, REG_A6XX_SP_TP_SAMPLE_CONFIG, 1); + tu_cs_emit(cs, sample_config); +} + +static const enum mesa_vk_dynamic_graphics_state tu_sample_locations_state[] = { + MESA_VK_DYNAMIC_MS_SAMPLE_LOCATIONS, +}; + +static unsigned +tu6_sample_locations_size(struct tu_device *dev, + const struct vk_sample_locations_state *samp_loc) +{ + return 6; +} + +void +tu6_emit_sample_locations(struct tu_cs *cs, const struct vk_sample_locations_state *samp_loc) +{ + /* Return if it hasn't been set yet in the dynamic case or the struct is + * NULL in the static case (because sample locations aren't enabled) + */ + if (!samp_loc || samp_loc->grid_size.width == 0) + return; + + assert(samp_loc->grid_size.width == 1); + assert(samp_loc->grid_size.height == 1); + + uint32_t sample_locations = 0; + for (uint32_t i = 0; i < samp_loc->per_pixel; i++) { + /* From VkSampleLocationEXT: + * + * The values specified in a VkSampleLocationEXT structure are always + * clamped to the implementation-dependent sample location coordinate + * range + * [sampleLocationCoordinateRange[0],sampleLocationCoordinateRange[1]] + */ + float x = CLAMP(samp_loc->locations[i].x, SAMPLE_LOCATION_MIN, + SAMPLE_LOCATION_MAX); + float y = CLAMP(samp_loc->locations[i].y, SAMPLE_LOCATION_MIN, + SAMPLE_LOCATION_MAX); + + sample_locations |= + (A6XX_RB_SAMPLE_LOCATION_0_SAMPLE_0_X(x) | + A6XX_RB_SAMPLE_LOCATION_0_SAMPLE_0_Y(y)) << i*8; + } + + tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_SAMPLE_LOCATION_0, 1); + tu_cs_emit(cs, sample_locations); + + tu_cs_emit_pkt4(cs, REG_A6XX_RB_SAMPLE_LOCATION_0, 1); + tu_cs_emit(cs, sample_locations); + + tu_cs_emit_pkt4(cs, REG_A6XX_SP_TP_SAMPLE_LOCATION_0, 1); + tu_cs_emit(cs, sample_locations); +} + +static const enum mesa_vk_dynamic_graphics_state tu_depth_bias_state[] = { + MESA_VK_DYNAMIC_RS_DEPTH_BIAS_FACTORS, +}; + +static unsigned +tu6_depth_bias_size(struct tu_device *dev, + const struct vk_rasterization_state *rs) +{ + return 4; +} + +void +tu6_emit_depth_bias(struct tu_cs *cs, const struct vk_rasterization_state *rs) +{ + tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_SU_POLY_OFFSET_SCALE, 3); + tu_cs_emit(cs, A6XX_GRAS_SU_POLY_OFFSET_SCALE(rs->depth_bias.slope).value); + tu_cs_emit(cs, A6XX_GRAS_SU_POLY_OFFSET_OFFSET(rs->depth_bias.constant).value); + tu_cs_emit(cs, A6XX_GRAS_SU_POLY_OFFSET_OFFSET_CLAMP(rs->depth_bias.clamp).value); +} + +static const enum mesa_vk_dynamic_graphics_state tu_bandwidth_state[] = { + MESA_VK_DYNAMIC_CB_LOGIC_OP_ENABLE, + MESA_VK_DYNAMIC_CB_LOGIC_OP, + MESA_VK_DYNAMIC_CB_ATTACHMENT_COUNT, + MESA_VK_DYNAMIC_CB_COLOR_WRITE_ENABLES, + MESA_VK_DYNAMIC_CB_BLEND_ENABLES, + MESA_VK_DYNAMIC_CB_WRITE_MASKS, +}; + +static void +tu_calc_bandwidth(struct tu_bandwidth *bandwidth, + const struct vk_color_blend_state *cb, + const struct vk_render_pass_state *rp) +{ + bool rop_reads_dst = cb->logic_op_enable && tu_logic_op_reads_dst((VkLogicOp)cb->logic_op); + + uint32_t total_bpp = 0; + for (unsigned i = 0; i < cb->attachment_count; i++) { + const struct vk_color_blend_attachment_state *att = &cb->attachments[i]; + if (!(cb->color_write_enables & (1u << i))) + continue; + + const VkFormat format = rp->color_attachment_formats[i]; + + uint32_t write_bpp = 0; + if (att->write_mask == 0xf) { + write_bpp = vk_format_get_blocksizebits(format); + } else { + const enum pipe_format pipe_format = vk_format_to_pipe_format(format); + for (uint32_t i = 0; i < 4; i++) { + if (att->write_mask & (1 << i)) { + write_bpp += util_format_get_component_bits(pipe_format, + UTIL_FORMAT_COLORSPACE_RGB, i); + } + } + } + total_bpp += write_bpp; + + if (rop_reads_dst || att->blend_enable) { + total_bpp += write_bpp; + } + } + + bandwidth->color_bandwidth_per_sample = total_bpp / 8; + + if (rp->attachment_aspects & VK_IMAGE_ASPECT_DEPTH_BIT) { + bandwidth->depth_cpp_per_sample = util_format_get_component_bits( + vk_format_to_pipe_format(rp->depth_attachment_format), + UTIL_FORMAT_COLORSPACE_ZS, 0) / 8; + } + + if (rp->attachment_aspects & VK_IMAGE_ASPECT_STENCIL_BIT) { + bandwidth->stencil_cpp_per_sample = util_format_get_component_bits( + vk_format_to_pipe_format(rp->stencil_attachment_format), + UTIL_FORMAT_COLORSPACE_ZS, 1) / 8; + } +} + +/* Return true if the blend state reads the color attachments. */ +static bool +tu6_calc_blend_lrz(const struct vk_color_blend_state *cb, + const struct vk_render_pass_state *rp) +{ + if (cb->logic_op_enable && tu_logic_op_reads_dst((VkLogicOp)cb->logic_op)) + return true; + + for (unsigned i = 0; i < cb->attachment_count; i++) { + if (rp->color_attachment_formats[i] == VK_FORMAT_UNDEFINED) + continue; + + const struct vk_color_blend_attachment_state *att = &cb->attachments[i]; + if (att->blend_enable) + return true; + if (!(cb->color_write_enables & (1u << i))) + return true; + unsigned mask = + MASK(vk_format_get_nr_components(rp->color_attachment_formats[i])); + if ((att->write_mask & mask) != mask) + return true; + } + + return false; +} + +static const enum mesa_vk_dynamic_graphics_state tu_blend_lrz_state[] = { + MESA_VK_DYNAMIC_CB_LOGIC_OP_ENABLE, + MESA_VK_DYNAMIC_CB_LOGIC_OP, + MESA_VK_DYNAMIC_CB_ATTACHMENT_COUNT, + MESA_VK_DYNAMIC_CB_COLOR_WRITE_ENABLES, + MESA_VK_DYNAMIC_CB_BLEND_ENABLES, + MESA_VK_DYNAMIC_CB_WRITE_MASKS, +}; + +static void +tu_emit_blend_lrz(struct tu_lrz_pipeline *lrz, + const struct vk_color_blend_state *cb, + const struct vk_render_pass_state *rp) +{ + if (tu6_calc_blend_lrz(cb, rp)) + lrz->lrz_status |= TU_LRZ_FORCE_DISABLE_WRITE | TU_LRZ_READS_DEST; + lrz->blend_valid = true; +} + +static const enum mesa_vk_dynamic_graphics_state tu_blend_state[] = { + MESA_VK_DYNAMIC_CB_LOGIC_OP_ENABLE, + MESA_VK_DYNAMIC_CB_LOGIC_OP, + MESA_VK_DYNAMIC_CB_ATTACHMENT_COUNT, + MESA_VK_DYNAMIC_CB_COLOR_WRITE_ENABLES, + MESA_VK_DYNAMIC_CB_BLEND_ENABLES, + MESA_VK_DYNAMIC_CB_BLEND_EQUATIONS, + MESA_VK_DYNAMIC_CB_WRITE_MASKS, + MESA_VK_DYNAMIC_MS_ALPHA_TO_COVERAGE_ENABLE, + MESA_VK_DYNAMIC_MS_ALPHA_TO_ONE_ENABLE, + MESA_VK_DYNAMIC_MS_SAMPLE_MASK, +}; + +static unsigned +tu6_blend_size(struct tu_device *dev, + const struct vk_color_blend_state *cb, + bool alpha_to_coverage_enable, + bool alpha_to_one_enable, + uint32_t sample_mask) +{ + unsigned num_rts = alpha_to_coverage_enable ? + MAX2(cb->attachment_count, 1) : cb->attachment_count; + return 8 + 3 * num_rts; +} + +static void +tu6_emit_blend(struct tu_cs *cs, + const struct vk_color_blend_state *cb, + bool alpha_to_coverage_enable, + bool alpha_to_one_enable, + uint32_t sample_mask) +{ + bool rop_reads_dst = cb->logic_op_enable && tu_logic_op_reads_dst((VkLogicOp)cb->logic_op); + enum a3xx_rop_code rop = tu6_rop((VkLogicOp)cb->logic_op); + + uint32_t blend_enable_mask = 0; + for (unsigned i = 0; i < cb->attachment_count; i++) { + const struct vk_color_blend_attachment_state *att = &cb->attachments[i]; + if (!(cb->color_write_enables & (1u << i))) + continue; + + if (rop_reads_dst || att->blend_enable) { + blend_enable_mask |= 1u << i; + } + } + + /* This will emit a dummy RB_MRT_*_CONTROL below if alpha-to-coverage is + * enabled but there are no color attachments, in addition to changing + * *_FS_OUTPUT_CNTL1. + */ + unsigned num_rts = alpha_to_coverage_enable ? + MAX2(cb->attachment_count, 1) : cb->attachment_count; + + bool dual_src_blend = tu_blend_state_is_dual_src(cb); + + tu_cs_emit_regs(cs, A6XX_SP_FS_OUTPUT_CNTL1(.mrt = num_rts)); + tu_cs_emit_regs(cs, A6XX_RB_FS_OUTPUT_CNTL1(.mrt = num_rts)); + tu_cs_emit_regs(cs, A6XX_SP_BLEND_CNTL(.enable_blend = blend_enable_mask, + .unk8 = true, + .dual_color_in_enable = + dual_src_blend, + .alpha_to_coverage = + alpha_to_coverage_enable)); + /* set A6XX_RB_BLEND_CNTL_INDEPENDENT_BLEND only when enabled? */ + tu_cs_emit_regs(cs, A6XX_RB_BLEND_CNTL(.enable_blend = blend_enable_mask, + .independent_blend = true, + .dual_color_in_enable = + dual_src_blend, + .alpha_to_coverage = + alpha_to_coverage_enable, + .alpha_to_one = alpha_to_one_enable, + .sample_mask = sample_mask)); + + for (unsigned i = 0; i < num_rts; i++) { + const struct vk_color_blend_attachment_state *att = &cb->attachments[i]; + if ((cb->color_write_enables & (1u << i)) && i < cb->attachment_count) { + const enum a3xx_rb_blend_opcode color_op = tu6_blend_op(att->color_blend_op); + const enum adreno_rb_blend_factor src_color_factor = + tu6_blend_factor((VkBlendFactor)att->src_color_blend_factor); + const enum adreno_rb_blend_factor dst_color_factor = + tu6_blend_factor((VkBlendFactor)att->dst_color_blend_factor); + const enum a3xx_rb_blend_opcode alpha_op = + tu6_blend_op(att->alpha_blend_op); + const enum adreno_rb_blend_factor src_alpha_factor = + tu6_blend_factor((VkBlendFactor)att->src_alpha_blend_factor); + const enum adreno_rb_blend_factor dst_alpha_factor = + tu6_blend_factor((VkBlendFactor)att->dst_alpha_blend_factor); + + tu_cs_emit_regs(cs, + A6XX_RB_MRT_CONTROL(i, + .blend = att->blend_enable, + .blend2 = att->blend_enable, + .rop_enable = cb->logic_op_enable, + .rop_code = rop, + .component_enable = att->write_mask), + A6XX_RB_MRT_BLEND_CONTROL(i, + .rgb_src_factor = src_color_factor, + .rgb_blend_opcode = color_op, + .rgb_dest_factor = dst_color_factor, + .alpha_src_factor = src_alpha_factor, + .alpha_blend_opcode = alpha_op, + .alpha_dest_factor = dst_alpha_factor)); + } else { + tu_cs_emit_regs(cs, + A6XX_RB_MRT_CONTROL(i,), + A6XX_RB_MRT_BLEND_CONTROL(i,)); + } + } +} + +static const enum mesa_vk_dynamic_graphics_state tu_blend_constants_state[] = { + MESA_VK_DYNAMIC_CB_BLEND_CONSTANTS, +}; + +static unsigned +tu6_blend_constants_size(struct tu_device *dev, + const struct vk_color_blend_state *cb) +{ + return 5; +} + +static void +tu6_emit_blend_constants(struct tu_cs *cs, const struct vk_color_blend_state *cb) +{ + tu_cs_emit_pkt4(cs, REG_A6XX_RB_BLEND_RED_F32, 4); + tu_cs_emit_array(cs, (const uint32_t *) cb->blend_constants, 4); +} + +static const enum mesa_vk_dynamic_graphics_state tu_rast_state[] = { + MESA_VK_DYNAMIC_RS_DEPTH_CLAMP_ENABLE, + MESA_VK_DYNAMIC_RS_DEPTH_CLIP_ENABLE, + MESA_VK_DYNAMIC_RS_POLYGON_MODE, + MESA_VK_DYNAMIC_RS_CULL_MODE, + MESA_VK_DYNAMIC_RS_FRONT_FACE, + MESA_VK_DYNAMIC_RS_DEPTH_BIAS_ENABLE, + MESA_VK_DYNAMIC_RS_LINE_MODE, + MESA_VK_DYNAMIC_VP_DEPTH_CLIP_NEGATIVE_ONE_TO_ONE, +}; + uint32_t -tu6_rast_size(struct tu_device *dev) +tu6_rast_size(struct tu_device *dev, + const struct vk_rasterization_state *rs, + const struct vk_viewport_state *vp, + bool multiview, + bool per_view_viewport) { return 11 + (dev->physical_device->info->a6xx.has_shading_rate ? 8 : 0); } void tu6_emit_rast(struct tu_cs *cs, - uint32_t gras_su_cntl, - uint32_t gras_cl_cntl, - enum a6xx_polygon_mode polygon_mode) + const struct vk_rasterization_state *rs, + const struct vk_viewport_state *vp, + bool multiview, + bool per_view_viewport) { - tu_cs_emit_regs(cs, A6XX_GRAS_SU_CNTL(.dword = gras_su_cntl)); - tu_cs_emit_regs(cs, A6XX_GRAS_CL_CNTL(.dword = gras_cl_cntl)); + enum a5xx_line_mode line_mode = + rs->line.mode == VK_LINE_RASTERIZATION_MODE_BRESENHAM_EXT ? + BRESENHAM : RECTANGULAR; + tu_cs_emit_regs(cs, + A6XX_GRAS_SU_CNTL( + .cull_front = rs->cull_mode & VK_CULL_MODE_FRONT_BIT, + .cull_back = rs->cull_mode & VK_CULL_MODE_BACK_BIT, + .front_cw = rs->front_face == VK_FRONT_FACE_CLOCKWISE, + .linehalfwidth = rs->line.width / 2.0f, + .poly_offset = rs->depth_bias.enable, + .line_mode = line_mode, + .multiview_enable = multiview, + .rendertargetindexincr = multiview, + .viewportindexincr = multiview && per_view_viewport)); + + bool depth_clip_enable = vk_rasterization_state_depth_clip_enable(rs); + + tu_cs_emit_regs(cs, + A6XX_GRAS_CL_CNTL( + .znear_clip_disable = !depth_clip_enable, + .zfar_clip_disable = !depth_clip_enable, + .z_clamp_enable = rs->depth_clamp_enable, + .zero_gb_scale_z = vp->depth_clip_negative_one_to_one ? 0 : 1, + .vp_clip_code_ignore = 1));; + + enum a6xx_polygon_mode polygon_mode = tu6_polygon_mode(rs->polygon_mode); tu_cs_emit_regs(cs, A6XX_VPC_POLYGON_MODE(polygon_mode)); @@ -4354,193 +4201,556 @@ tu6_emit_rast(struct tu_cs *cs, } } -static void -tu_pipeline_builder_parse_rasterization(struct tu_pipeline_builder *builder, - struct tu_pipeline *pipeline) +static const enum mesa_vk_dynamic_graphics_state tu_pc_raster_cntl_state[] = { + MESA_VK_DYNAMIC_RS_RASTERIZER_DISCARD_ENABLE, + MESA_VK_DYNAMIC_RS_RASTERIZATION_STREAM, +}; + +static unsigned +tu6_pc_raster_cntl_size(struct tu_device *dev, + const struct vk_rasterization_state *rs) { - const VkPipelineRasterizationStateCreateInfo *rast_info = - builder->create_info->pRasterizationState; - - pipeline->rast.polygon_mode = tu6_polygon_mode(rast_info->polygonMode); - - bool depth_clip_disable = rast_info->depthClampEnable; - - const VkPipelineRasterizationDepthClipStateCreateInfoEXT *depth_clip_state = - vk_find_struct_const(rast_info, PIPELINE_RASTERIZATION_DEPTH_CLIP_STATE_CREATE_INFO_EXT); - if (depth_clip_state) - depth_clip_disable = !depth_clip_state->depthClipEnable; - - pipeline->rast.rb_depth_cntl = - COND(rast_info->depthClampEnable, A6XX_RB_DEPTH_CNTL_Z_CLAMP_ENABLE); - - pipeline->rast.line_mode = RECTANGULAR; - - const VkPipelineRasterizationLineStateCreateInfoEXT *rast_line_state = - vk_find_struct_const(rast_info->pNext, - PIPELINE_RASTERIZATION_LINE_STATE_CREATE_INFO_EXT); - - if (rast_line_state && - rast_line_state->lineRasterizationMode == VK_LINE_RASTERIZATION_MODE_BRESENHAM_EXT) { - pipeline->rast.line_mode = BRESENHAM; - } - - pipeline->rast.gras_cl_cntl = - A6XX_GRAS_CL_CNTL( - .znear_clip_disable = depth_clip_disable, - .zfar_clip_disable = depth_clip_disable, - .z_clamp_enable = rast_info->depthClampEnable, - .zero_gb_scale_z = pipeline->viewport.z_negative_one_to_one ? 0 : 1, - .vp_clip_code_ignore = 1).value; - - const VkPipelineRasterizationStateStreamCreateInfoEXT *stream_info = - vk_find_struct_const(rast_info->pNext, - PIPELINE_RASTERIZATION_STATE_STREAM_CREATE_INFO_EXT); - unsigned stream = stream_info ? stream_info->rasterizationStream : 0; - - pipeline->rast.pc_raster_cntl = A6XX_PC_RASTER_CNTL_STREAM(stream); - pipeline->rast.vpc_unknown_9107 = 0; - if (rast_info->rasterizerDiscardEnable) { - pipeline->rast.pc_raster_cntl |= A6XX_PC_RASTER_CNTL_DISCARD; - pipeline->rast.vpc_unknown_9107 |= A6XX_VPC_UNKNOWN_9107_RASTER_DISCARD; - } - - pipeline->rast.gras_su_cntl = - tu6_gras_su_cntl(rast_info, pipeline->rast.line_mode, builder->multiview_mask != 0); - - struct tu_cs cs; - - if (tu_pipeline_static_state(pipeline, &cs, TU_DYNAMIC_STATE_PC_RASTER_CNTL, 4)) { - tu_cs_emit_regs(&cs, A6XX_PC_RASTER_CNTL(.dword = pipeline->rast.pc_raster_cntl)); - tu_cs_emit_regs(&cs, A6XX_VPC_UNKNOWN_9107(.dword = pipeline->rast.vpc_unknown_9107)); - } - - if (tu_pipeline_static_state(pipeline, &cs, TU_DYNAMIC_STATE_RAST, - tu6_rast_size(builder->device))) { - tu6_emit_rast(&cs, pipeline->rast.gras_su_cntl, - pipeline->rast.gras_cl_cntl, - pipeline->rast.polygon_mode); - } - - if (tu_pipeline_static_state(pipeline, &cs, VK_DYNAMIC_STATE_DEPTH_BIAS, 4)) { - tu6_emit_depth_bias(&cs, rast_info->depthBiasConstantFactor, - rast_info->depthBiasClamp, - rast_info->depthBiasSlopeFactor); - } - - const struct VkPipelineRasterizationProvokingVertexStateCreateInfoEXT *provoking_vtx_state = - vk_find_struct_const(rast_info->pNext, PIPELINE_RASTERIZATION_PROVOKING_VERTEX_STATE_CREATE_INFO_EXT); - pipeline->rast.provoking_vertex_last = provoking_vtx_state && - provoking_vtx_state->provokingVertexMode == VK_PROVOKING_VERTEX_MODE_LAST_VERTEX_EXT; - - pipeline->rast.multiview_mask = builder->multiview_mask; + return 4; } static void -tu_pipeline_builder_parse_depth_stencil(struct tu_pipeline_builder *builder, - struct tu_pipeline *pipeline) +tu6_emit_pc_raster_cntl(struct tu_cs *cs, + const struct vk_rasterization_state *rs) { - /* The spec says: - * - * pDepthStencilState is a pointer to an instance of the - * VkPipelineDepthStencilStateCreateInfo structure, and is ignored if - * the pipeline has rasterization disabled or if the subpass of the - * render pass the pipeline is created against does not use a - * depth/stencil attachment. - */ - const VkPipelineDepthStencilStateCreateInfo *ds_info = - builder->create_info->pDepthStencilState; - uint32_t rb_depth_cntl = 0, rb_stencil_cntl = 0; - struct tu_cs cs; + tu_cs_emit_regs(cs, A6XX_PC_RASTER_CNTL( + .stream = rs->rasterization_stream, + .discard = rs->rasterizer_discard_enable)); + tu_cs_emit_regs(cs, A6XX_VPC_UNKNOWN_9107( + .raster_discard = rs->rasterizer_discard_enable)); +} - if (!builder->attachment_state_valid || - (builder->depth_attachment_format != VK_FORMAT_UNDEFINED && - builder->depth_attachment_format != VK_FORMAT_S8_UINT)) { - if (ds_info->depthTestEnable) { - rb_depth_cntl |= - A6XX_RB_DEPTH_CNTL_Z_TEST_ENABLE | - A6XX_RB_DEPTH_CNTL_ZFUNC(tu6_compare_func(ds_info->depthCompareOp)) | - A6XX_RB_DEPTH_CNTL_Z_READ_ENABLE; /* TODO: don't set for ALWAYS/NEVER */ +static const enum mesa_vk_dynamic_graphics_state tu_ds_state[] = { + MESA_VK_DYNAMIC_DS_DEPTH_TEST_ENABLE, + MESA_VK_DYNAMIC_DS_DEPTH_WRITE_ENABLE, + MESA_VK_DYNAMIC_DS_DEPTH_COMPARE_OP, + MESA_VK_DYNAMIC_DS_DEPTH_BOUNDS_TEST_ENABLE, + MESA_VK_DYNAMIC_DS_STENCIL_TEST_ENABLE, + MESA_VK_DYNAMIC_DS_STENCIL_OP, + MESA_VK_DYNAMIC_RS_DEPTH_CLAMP_ENABLE, +}; - if (ds_info->depthWriteEnable) - rb_depth_cntl |= A6XX_RB_DEPTH_CNTL_Z_WRITE_ENABLE; +static unsigned +tu6_ds_size(struct tu_device *dev, + const struct vk_depth_stencil_state *ds, + const struct vk_render_pass_state *rp, + const struct vk_rasterization_state *rs) +{ + return 4; +} + +static void +tu6_emit_ds(struct tu_cs *cs, + const struct vk_depth_stencil_state *ds, + const struct vk_render_pass_state *rp, + const struct vk_rasterization_state *rs) +{ + tu_cs_emit_regs(cs, A6XX_RB_STENCIL_CONTROL( + .stencil_enable = ds->stencil.test_enable, + .stencil_enable_bf = ds->stencil.test_enable, + .stencil_read = ds->stencil.test_enable, + .func = tu6_compare_func((VkCompareOp)ds->stencil.front.op.compare), + .fail = tu6_stencil_op((VkStencilOp)ds->stencil.front.op.fail), + .zpass = tu6_stencil_op((VkStencilOp)ds->stencil.front.op.pass), + .zfail = tu6_stencil_op((VkStencilOp)ds->stencil.front.op.depth_fail), + .func_bf = tu6_compare_func((VkCompareOp)ds->stencil.back.op.compare), + .fail_bf = tu6_stencil_op((VkStencilOp)ds->stencil.back.op.fail), + .zpass_bf = tu6_stencil_op((VkStencilOp)ds->stencil.back.op.pass), + .zfail_bf = tu6_stencil_op((VkStencilOp)ds->stencil.back.op.depth_fail))); + + if (rp->attachment_aspects & VK_IMAGE_ASPECT_DEPTH_BIT) { + bool depth_test = ds->depth.test_enable; + enum adreno_compare_func zfunc = tu6_compare_func(ds->depth.compare_op); + + /* On some GPUs it is necessary to enable z test for depth bounds test + * when UBWC is enabled. Otherwise, the GPU would hang. FUNC_ALWAYS is + * required to pass z test. Relevant tests: + * dEQP-VK.pipeline.extended_dynamic_state.two_draws_dynamic.depth_bounds_test_disable + * dEQP-VK.dynamic_state.ds_state.depth_bounds_1 + */ + if (ds->depth.bounds_test.enable && + !ds->depth.test_enable && + cs->device->physical_device->info->a6xx.depth_bounds_require_depth_test_quirk) { + depth_test = true; + zfunc = FUNC_ALWAYS; } - if (ds_info->depthBoundsTestEnable) - rb_depth_cntl |= A6XX_RB_DEPTH_CNTL_Z_BOUNDS_ENABLE | A6XX_RB_DEPTH_CNTL_Z_READ_ENABLE; + tu_cs_emit_regs(cs, A6XX_RB_DEPTH_CNTL( + .z_test_enable = depth_test, + .z_write_enable = ds->depth.test_enable && ds->depth.write_enable, + .zfunc = zfunc, + .z_clamp_enable = rs->depth_clamp_enable, + /* TODO don't set for ALWAYS/NEVER */ + .z_read_enable = ds->depth.test_enable || ds->depth.bounds_test.enable, + .z_bounds_enable = ds->depth.bounds_test.enable)); + } else { + tu_cs_emit_regs(cs, A6XX_RB_DEPTH_CNTL()); + } +} - if (ds_info->depthBoundsTestEnable && !ds_info->depthTestEnable) - tu6_apply_depth_bounds_workaround(builder->device, &rb_depth_cntl); +static const enum mesa_vk_dynamic_graphics_state tu_depth_bounds_state[] = { + MESA_VK_DYNAMIC_DS_DEPTH_BOUNDS_TEST_BOUNDS, +}; + +static unsigned +tu6_depth_bounds_size(struct tu_device *dev, + const struct vk_depth_stencil_state *ds) +{ + return 3; +} + +static void +tu6_emit_depth_bounds(struct tu_cs *cs, + const struct vk_depth_stencil_state *ds) +{ + tu_cs_emit_regs(cs, + A6XX_RB_Z_BOUNDS_MIN(ds->depth.bounds_test.min), + A6XX_RB_Z_BOUNDS_MAX(ds->depth.bounds_test.max)); +} + +static const enum mesa_vk_dynamic_graphics_state tu_stencil_compare_mask_state[] = { + MESA_VK_DYNAMIC_DS_STENCIL_COMPARE_MASK, +}; + +static unsigned +tu6_stencil_compare_mask_size(struct tu_device *dev, + const struct vk_depth_stencil_state *ds) +{ + return 2; +} + +static void +tu6_emit_stencil_compare_mask(struct tu_cs *cs, + const struct vk_depth_stencil_state *ds) +{ + tu_cs_emit_regs(cs, A6XX_RB_STENCILMASK( + .mask = ds->stencil.front.compare_mask, + .bfmask = ds->stencil.back.compare_mask)); +} + +static const enum mesa_vk_dynamic_graphics_state tu_stencil_write_mask_state[] = { + MESA_VK_DYNAMIC_DS_STENCIL_WRITE_MASK, +}; + +static unsigned +tu6_stencil_write_mask_size(struct tu_device *dev, + const struct vk_depth_stencil_state *ds) +{ + return 2; +} + +static void +tu6_emit_stencil_write_mask(struct tu_cs *cs, + const struct vk_depth_stencil_state *ds) +{ + tu_cs_emit_regs(cs, A6XX_RB_STENCILWRMASK( + .wrmask = ds->stencil.front.write_mask, + .bfwrmask = ds->stencil.back.write_mask)); +} + +static const enum mesa_vk_dynamic_graphics_state tu_stencil_reference_state[] = { + MESA_VK_DYNAMIC_DS_STENCIL_REFERENCE, +}; + +static unsigned +tu6_stencil_reference_size(struct tu_device *dev, + const struct vk_depth_stencil_state *ds) +{ + return 2; +} + +static void +tu6_emit_stencil_reference(struct tu_cs *cs, + const struct vk_depth_stencil_state *ds) +{ + tu_cs_emit_regs(cs, A6XX_RB_STENCILREF( + .ref = ds->stencil.front.reference, + .bfref = ds->stencil.back.reference)); +} + +static inline bool +emit_pipeline_state(BITSET_WORD *keep, BITSET_WORD *remove, + BITSET_WORD *pipeline_set, + const enum mesa_vk_dynamic_graphics_state *state_array, + unsigned num_states, bool extra_cond, + struct tu_pipeline_builder *builder) +{ + BITSET_DECLARE(state, MESA_VK_DYNAMIC_GRAPHICS_STATE_ENUM_MAX) = {}; + + /* Unrolling this loop should produce a constant value once the function is + * inlined, because state_array and num_states are a per-draw-state + * constant, but GCC seems to need a little encouragement. clang does a + * little better but still needs a pragma when there are a large number of + * states. + */ +#if defined(__clang__) +#pragma clang loop unroll(full) +#elif defined(__GNUC__) && __GNUC__ >= 8 +#pragma GCC unroll MESA_VK_DYNAMIC_GRAPHICS_STATE_ENUM_MAX +#endif + for (unsigned i = 0; i < num_states; i++) { + BITSET_SET(state, state_array[i]); } - if (!builder->attachment_state_valid || - builder->depth_attachment_format != VK_FORMAT_UNDEFINED) { - const VkStencilOpState *front = &ds_info->front; - const VkStencilOpState *back = &ds_info->back; + /* If all of the state is set, then after we emit it we can tentatively + * remove it from the states to set for the pipeline by making it dynamic. + * If we can't emit it, though, we need to keep around the partial state so + * that we can emit it later, even if another draw state consumes it. That + * is, we have to cancel any tentative removal. + */ + BITSET_DECLARE(temp, MESA_VK_DYNAMIC_GRAPHICS_STATE_ENUM_MAX); + memcpy(temp, pipeline_set, sizeof(temp)); + BITSET_AND(temp, temp, state); + if (!BITSET_EQUAL(temp, state) || !extra_cond) { + __bitset_or(keep, keep, temp, ARRAY_SIZE(temp)); + return false; + } + __bitset_or(remove, remove, state, ARRAY_SIZE(state)); + return true; +} - rb_stencil_cntl |= - A6XX_RB_STENCIL_CONTROL_FUNC(tu6_compare_func(front->compareOp)) | - A6XX_RB_STENCIL_CONTROL_FAIL(tu6_stencil_op(front->failOp)) | - A6XX_RB_STENCIL_CONTROL_ZPASS(tu6_stencil_op(front->passOp)) | - A6XX_RB_STENCIL_CONTROL_ZFAIL(tu6_stencil_op(front->depthFailOp)) | - A6XX_RB_STENCIL_CONTROL_FUNC_BF(tu6_compare_func(back->compareOp)) | - A6XX_RB_STENCIL_CONTROL_FAIL_BF(tu6_stencil_op(back->failOp)) | - A6XX_RB_STENCIL_CONTROL_ZPASS_BF(tu6_stencil_op(back->passOp)) | - A6XX_RB_STENCIL_CONTROL_ZFAIL_BF(tu6_stencil_op(back->depthFailOp)); +static void +tu_pipeline_builder_emit_state(struct tu_pipeline_builder *builder, + struct tu_pipeline *pipeline) +{ + struct tu_cs cs; + BITSET_DECLARE(keep, MESA_VK_DYNAMIC_GRAPHICS_STATE_ENUM_MAX) = {}; + BITSET_DECLARE(remove, MESA_VK_DYNAMIC_GRAPHICS_STATE_ENUM_MAX) = {}; + BITSET_DECLARE(pipeline_set, MESA_VK_DYNAMIC_GRAPHICS_STATE_ENUM_MAX) = {}; - if (ds_info->stencilTestEnable) { - rb_stencil_cntl |= - A6XX_RB_STENCIL_CONTROL_STENCIL_ENABLE | - A6XX_RB_STENCIL_CONTROL_STENCIL_ENABLE_BF | - A6XX_RB_STENCIL_CONTROL_STENCIL_READ; + vk_graphics_pipeline_get_state(&builder->graphics_state, pipeline_set); + +#define EMIT_STATE(name, extra_cond) \ + emit_pipeline_state(keep, remove, pipeline_set, tu_##name##_state, \ + ARRAY_SIZE(tu_##name##_state), extra_cond, builder) + +#define DRAW_STATE_COND(name, id, extra_cond, ...) \ + if (EMIT_STATE(name, extra_cond)) { \ + unsigned size = tu6_##name##_size(builder->device, __VA_ARGS__); \ + if (size > 0) { \ + tu_cs_begin_sub_stream(&pipeline->cs, size, &cs); \ + tu6_emit_##name(&cs, __VA_ARGS__); \ + pipeline->dynamic_state[id] = \ + tu_cs_end_draw_state(&pipeline->cs, &cs); \ + } \ + pipeline->set_state_mask |= (1u << id); \ + } +#define DRAW_STATE(name, id, ...) DRAW_STATE_COND(name, id, true, __VA_ARGS__) + + DRAW_STATE(vertex_input, TU_DYNAMIC_STATE_VERTEX_INPUT, + builder->graphics_state.vi); + DRAW_STATE(vertex_stride, TU_DYNAMIC_STATE_VB_STRIDE, + builder->graphics_state.vi); + /* If (a) per-view viewport is used or (b) we don't know yet, then we need + * to set viewport and stencil state dynamically. + */ + bool no_per_view_viewport = pipeline_contains_all_shader_state(pipeline) && + !pipeline->program.per_view_viewport; + DRAW_STATE_COND(viewport, VK_DYNAMIC_STATE_VIEWPORT, no_per_view_viewport, + builder->graphics_state.vp); + DRAW_STATE_COND(scissor, VK_DYNAMIC_STATE_SCISSOR, no_per_view_viewport, + builder->graphics_state.vp); + DRAW_STATE(sample_locations_enable, + TU_DYNAMIC_STATE_SAMPLE_LOCATIONS_ENABLE, + builder->graphics_state.ms->sample_locations_enable); + DRAW_STATE(sample_locations, + TU_DYNAMIC_STATE_SAMPLE_LOCATIONS, + builder->graphics_state.ms->sample_locations); + DRAW_STATE(depth_bias, VK_DYNAMIC_STATE_DEPTH_BIAS, + builder->graphics_state.rs); + bool attachments_valid = + builder->graphics_state.rp && + !(builder->graphics_state.rp->attachment_aspects & + VK_IMAGE_ASPECT_METADATA_BIT); + struct vk_color_blend_state dummy_cb = {}; + const struct vk_color_blend_state *cb = builder->graphics_state.cb; + if (attachments_valid && + !(builder->graphics_state.rp->attachment_aspects & + VK_IMAGE_ASPECT_COLOR_BIT)) { + /* If there are no color attachments, then the original blend state may + * be NULL and the common code sanitizes it to always be NULL. In this + * case we want to emit an empty blend/bandwidth/etc. rather than + * letting it be dynamic (and potentially garbage). + */ + cb = &dummy_cb; + BITSET_SET(pipeline_set, MESA_VK_DYNAMIC_CB_LOGIC_OP_ENABLE); + BITSET_SET(pipeline_set, MESA_VK_DYNAMIC_CB_LOGIC_OP); + BITSET_SET(pipeline_set, MESA_VK_DYNAMIC_CB_ATTACHMENT_COUNT); + BITSET_SET(pipeline_set, MESA_VK_DYNAMIC_CB_COLOR_WRITE_ENABLES); + BITSET_SET(pipeline_set, MESA_VK_DYNAMIC_CB_BLEND_ENABLES); + BITSET_SET(pipeline_set, MESA_VK_DYNAMIC_CB_BLEND_EQUATIONS); + BITSET_SET(pipeline_set, MESA_VK_DYNAMIC_CB_WRITE_MASKS); + BITSET_SET(pipeline_set, MESA_VK_DYNAMIC_CB_BLEND_CONSTANTS); + } + DRAW_STATE(blend, TU_DYNAMIC_STATE_BLEND, cb, + builder->graphics_state.ms->alpha_to_coverage_enable, + builder->graphics_state.ms->alpha_to_one_enable, + builder->graphics_state.ms->sample_mask); + if (EMIT_STATE(blend_lrz, attachments_valid)) + tu_emit_blend_lrz(&pipeline->lrz, cb, + builder->graphics_state.rp); + if (EMIT_STATE(bandwidth, attachments_valid)) + tu_calc_bandwidth(&pipeline->bandwidth, cb, + builder->graphics_state.rp); + DRAW_STATE(blend_constants, VK_DYNAMIC_STATE_BLEND_CONSTANTS, cb); + if (attachments_valid && + !(builder->graphics_state.rp->attachment_aspects & + VK_IMAGE_ASPECT_COLOR_BIT)) { + /* Don't actually make anything dynamic as that may mean a partially-set + * state group where the group is NULL which angers common code. + */ + BITSET_CLEAR(remove, MESA_VK_DYNAMIC_CB_LOGIC_OP_ENABLE); + BITSET_CLEAR(remove, MESA_VK_DYNAMIC_CB_LOGIC_OP); + BITSET_CLEAR(remove, MESA_VK_DYNAMIC_CB_ATTACHMENT_COUNT); + BITSET_CLEAR(remove, MESA_VK_DYNAMIC_CB_COLOR_WRITE_ENABLES); + BITSET_CLEAR(remove, MESA_VK_DYNAMIC_CB_BLEND_ENABLES); + BITSET_CLEAR(remove, MESA_VK_DYNAMIC_CB_BLEND_EQUATIONS); + BITSET_CLEAR(remove, MESA_VK_DYNAMIC_CB_WRITE_MASKS); + BITSET_CLEAR(remove, MESA_VK_DYNAMIC_CB_BLEND_CONSTANTS); + } + DRAW_STATE_COND(rast, TU_DYNAMIC_STATE_RAST, + pipeline_contains_all_shader_state(pipeline), + builder->graphics_state.rs, + builder->graphics_state.vp, + builder->graphics_state.rp->view_mask != 0, + pipeline->program.per_view_viewport); + DRAW_STATE(pc_raster_cntl, TU_DYNAMIC_STATE_PC_RASTER_CNTL, + builder->graphics_state.rs); + DRAW_STATE_COND(ds, TU_DYNAMIC_STATE_DS, + attachments_valid, + builder->graphics_state.ds, + builder->graphics_state.rp, + builder->graphics_state.rs); + DRAW_STATE(depth_bounds, VK_DYNAMIC_STATE_DEPTH_BOUNDS, + builder->graphics_state.ds); + DRAW_STATE(depth_bounds, VK_DYNAMIC_STATE_DEPTH_BOUNDS, + builder->graphics_state.ds); + DRAW_STATE(stencil_compare_mask, VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK, + builder->graphics_state.ds); + DRAW_STATE(stencil_write_mask, VK_DYNAMIC_STATE_STENCIL_WRITE_MASK, + builder->graphics_state.ds); + DRAW_STATE(stencil_reference, VK_DYNAMIC_STATE_STENCIL_REFERENCE, + builder->graphics_state.ds); + DRAW_STATE_COND(patch_control_points, + TU_DYNAMIC_STATE_PATCH_CONTROL_POINTS, + pipeline_contains_all_shader_state(pipeline), + pipeline, + builder->graphics_state.ts->patch_control_points); +#undef DRAW_STATE +#undef DRAW_STATE_COND +#undef EMIT_STATE + + /* LRZ always needs depth/stencil state at draw time */ + BITSET_SET(keep, MESA_VK_DYNAMIC_DS_DEPTH_TEST_ENABLE); + BITSET_SET(keep, MESA_VK_DYNAMIC_DS_DEPTH_WRITE_ENABLE); + BITSET_SET(keep, MESA_VK_DYNAMIC_DS_DEPTH_BOUNDS_TEST_ENABLE); + BITSET_SET(keep, MESA_VK_DYNAMIC_DS_DEPTH_COMPARE_OP); + BITSET_SET(keep, MESA_VK_DYNAMIC_DS_STENCIL_TEST_ENABLE); + BITSET_SET(keep, MESA_VK_DYNAMIC_DS_STENCIL_OP); + BITSET_SET(keep, MESA_VK_DYNAMIC_DS_STENCIL_WRITE_MASK); + BITSET_SET(keep, MESA_VK_DYNAMIC_MS_ALPHA_TO_COVERAGE_ENABLE); + + /* MSAA needs line mode */ + BITSET_SET(keep, MESA_VK_DYNAMIC_RS_LINE_MODE); + + /* The patch control points is part of the draw */ + BITSET_SET(keep, MESA_VK_DYNAMIC_TS_PATCH_CONTROL_POINTS); + + /* Vertex buffer state needs to know the max valid binding */ + BITSET_SET(keep, MESA_VK_DYNAMIC_VI_BINDINGS_VALID); + + /* Remove state which has been emitted and we no longer need to set when + * binding the pipeline by making it "dynamic". + */ + BITSET_ANDNOT(remove, remove, keep); + BITSET_OR(builder->graphics_state.dynamic, builder->graphics_state.dynamic, + remove); +} + +static inline bool +emit_draw_state(const struct vk_dynamic_graphics_state *dynamic_state, + const enum mesa_vk_dynamic_graphics_state *state_array, + unsigned num_states) +{ + BITSET_DECLARE(state, MESA_VK_DYNAMIC_GRAPHICS_STATE_ENUM_MAX) = {}; + + /* Unrolling this loop should produce a constant value once the function is + * inlined, because state_array and num_states are a per-draw-state + * constant, but GCC seems to need a little encouragement. clang does a + * little better but still needs a pragma when there are a large number of + * states. + */ +#if defined(__clang__) +#pragma clang loop unroll(full) +#elif defined(__GNUC__) && __GNUC__ >= 8 +#pragma GCC unroll MESA_VK_DYNAMIC_GRAPHICS_STATE_ENUM_MAX +#endif + for (unsigned i = 0; i < num_states; i++) { + BITSET_SET(state, state_array[i]); + } + + BITSET_DECLARE(temp, MESA_VK_DYNAMIC_GRAPHICS_STATE_ENUM_MAX); + BITSET_AND(temp, state, dynamic_state->dirty); + return !BITSET_IS_EMPTY(temp); +} + +uint32_t +tu_emit_draw_state(struct tu_cmd_buffer *cmd) +{ + struct tu_cs cs; + uint32_t dirty_draw_states = 0; + +#define EMIT_STATE(name) \ + emit_draw_state(&cmd->vk.dynamic_graphics_state, tu_##name##_state, \ + ARRAY_SIZE(tu_##name##_state)) +#define DRAW_STATE_COND(name, id, extra_cond, ...) \ + if ((EMIT_STATE(name) || extra_cond) && \ + !(cmd->state.pipeline->base.set_state_mask & (1u << id))) { \ + unsigned size = tu6_##name##_size(cmd->device, __VA_ARGS__); \ + if (size > 0) { \ + tu_cs_begin_sub_stream(&cmd->sub_cs, size, &cs); \ + tu6_emit_##name(&cs, __VA_ARGS__); \ + cmd->state.dynamic_state[id] = \ + tu_cs_end_draw_state(&cmd->sub_cs, &cs); \ + } else { \ + cmd->state.dynamic_state[id] = {}; \ + } \ + dirty_draw_states |= (1u << id); \ + } +#define DRAW_STATE_FDM(name, id, ...) \ + if ((EMIT_STATE(name) || (cmd->state.dirty & TU_CMD_DIRTY_FDM)) && \ + !(cmd->state.pipeline->base.set_state_mask & (1u << id))) { \ + if (cmd->state.pipeline_has_fdm) { \ + tu_cs_set_writeable(&cmd->sub_cs, true); \ + tu6_emit_##name##_fdm(&cs, cmd, __VA_ARGS__); \ + tu_cs_set_writeable(&cmd->sub_cs, false); \ + cmd->state.dynamic_state[id] = \ + tu_cs_end_draw_state(&cmd->sub_cs, &cs); \ + } else { \ + unsigned size = tu6_##name##_size(cmd->device, __VA_ARGS__); \ + if (size > 0) { \ + tu_cs_begin_sub_stream(&cmd->sub_cs, size, &cs); \ + tu6_emit_##name(&cs, __VA_ARGS__); \ + cmd->state.dynamic_state[id] = \ + tu_cs_end_draw_state(&cmd->sub_cs, &cs); \ + } else { \ + cmd->state.dynamic_state[id] = {}; \ + } \ + tu_cs_begin_sub_stream(&cmd->sub_cs, \ + tu6_##name##_size(cmd->device, __VA_ARGS__), \ + &cs); \ + tu6_emit_##name(&cs, __VA_ARGS__); \ + cmd->state.dynamic_state[id] = \ + tu_cs_end_draw_state(&cmd->sub_cs, &cs); \ + } \ + dirty_draw_states |= (1u << id); \ + } +#define DRAW_STATE(name, id, ...) DRAW_STATE_COND(name, id, false, __VA_ARGS__) + + DRAW_STATE(vertex_input, TU_DYNAMIC_STATE_VERTEX_INPUT, + cmd->vk.dynamic_graphics_state.vi); + + /* Vertex input stride is special because it's part of the vertex input in + * the pipeline but a separate array when it's dynamic state so we have to + * use two separate functions. + */ +#define tu6_emit_vertex_stride tu6_emit_vertex_stride_dyn +#define tu6_vertex_stride_size tu6_vertex_stride_size_dyn + + DRAW_STATE(vertex_stride, TU_DYNAMIC_STATE_VB_STRIDE, + cmd->vk.dynamic_graphics_state.vi_binding_strides, + cmd->vk.dynamic_graphics_state.vi_bindings_valid); + +#undef tu6_emit_vertex_stride +#undef tu6_vertex_stride_size + + DRAW_STATE_FDM(viewport, VK_DYNAMIC_STATE_VIEWPORT, + &cmd->vk.dynamic_graphics_state.vp); + DRAW_STATE_FDM(scissor, VK_DYNAMIC_STATE_SCISSOR, + &cmd->vk.dynamic_graphics_state.vp); + DRAW_STATE(sample_locations_enable, + TU_DYNAMIC_STATE_SAMPLE_LOCATIONS_ENABLE, + cmd->vk.dynamic_graphics_state.ms.sample_locations_enable); + DRAW_STATE(sample_locations, + TU_DYNAMIC_STATE_SAMPLE_LOCATIONS, + cmd->vk.dynamic_graphics_state.ms.sample_locations); + DRAW_STATE(depth_bias, VK_DYNAMIC_STATE_DEPTH_BIAS, + &cmd->vk.dynamic_graphics_state.rs); + DRAW_STATE(blend, TU_DYNAMIC_STATE_BLEND, + &cmd->vk.dynamic_graphics_state.cb, + cmd->vk.dynamic_graphics_state.ms.alpha_to_coverage_enable, + cmd->vk.dynamic_graphics_state.ms.alpha_to_one_enable, + cmd->vk.dynamic_graphics_state.ms.sample_mask); + if (EMIT_STATE(blend_lrz) || + ((cmd->state.dirty & TU_CMD_DIRTY_SUBPASS) && + !cmd->state.pipeline->base.lrz.blend_valid)) { + bool blend_reads_dest = tu6_calc_blend_lrz(&cmd->vk.dynamic_graphics_state.cb, + &cmd->state.vk_rp); + if (blend_reads_dest != cmd->state.blend_reads_dest) { + cmd->state.blend_reads_dest = blend_reads_dest; + cmd->state.dirty |= TU_CMD_DIRTY_LRZ; } + } + if (EMIT_STATE(bandwidth) || + ((cmd->state.dirty & TU_CMD_DIRTY_SUBPASS) && + !cmd->state.pipeline->base.bandwidth.valid)) + tu_calc_bandwidth(&cmd->state.bandwidth, &cmd->vk.dynamic_graphics_state.cb, + &cmd->state.vk_rp); + DRAW_STATE(blend_constants, VK_DYNAMIC_STATE_BLEND_CONSTANTS, + &cmd->vk.dynamic_graphics_state.cb); + DRAW_STATE_COND(rast, TU_DYNAMIC_STATE_RAST, + cmd->state.dirty & (TU_CMD_DIRTY_SUBPASS | + TU_CMD_DIRTY_PER_VIEW_VIEWPORT), + &cmd->vk.dynamic_graphics_state.rs, + &cmd->vk.dynamic_graphics_state.vp, + cmd->state.vk_rp.view_mask != 0, + cmd->state.per_view_viewport); + DRAW_STATE(pc_raster_cntl, TU_DYNAMIC_STATE_PC_RASTER_CNTL, + &cmd->vk.dynamic_graphics_state.rs); + DRAW_STATE_COND(ds, TU_DYNAMIC_STATE_DS, + cmd->state.dirty & TU_CMD_DIRTY_SUBPASS, + &cmd->vk.dynamic_graphics_state.ds, + &cmd->state.vk_rp, + &cmd->vk.dynamic_graphics_state.rs); + DRAW_STATE(depth_bounds, VK_DYNAMIC_STATE_DEPTH_BOUNDS, + &cmd->vk.dynamic_graphics_state.ds); + DRAW_STATE(stencil_compare_mask, VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK, + &cmd->vk.dynamic_graphics_state.ds); + DRAW_STATE(stencil_write_mask, VK_DYNAMIC_STATE_STENCIL_WRITE_MASK, + &cmd->vk.dynamic_graphics_state.ds); + DRAW_STATE(stencil_reference, VK_DYNAMIC_STATE_STENCIL_REFERENCE, + &cmd->vk.dynamic_graphics_state.ds); + DRAW_STATE_COND(patch_control_points, + TU_DYNAMIC_STATE_PATCH_CONTROL_POINTS, + cmd->state.dirty & TU_CMD_DIRTY_PIPELINE, + &cmd->state.pipeline->base, + cmd->vk.dynamic_graphics_state.ts.patch_control_points); +#undef DRAW_STATE +#undef DRAW_STATE_COND +#undef EMIT_STATE + return dirty_draw_states; +} + +static void +tu_pipeline_builder_parse_depth_stencil( + struct tu_pipeline_builder *builder, struct tu_pipeline *pipeline) +{ + const VkPipelineDepthStencilStateCreateInfo *ds_info = + builder->create_info->pDepthStencilState; + + if ((builder->graphics_state.rp->attachment_aspects & + VK_IMAGE_ASPECT_METADATA_BIT) || + (builder->graphics_state.rp->attachment_aspects & + VK_IMAGE_ASPECT_DEPTH_BIT)) { pipeline->ds.raster_order_attachment_access = ds_info->flags & (VK_PIPELINE_DEPTH_STENCIL_STATE_CREATE_RASTERIZATION_ORDER_ATTACHMENT_DEPTH_ACCESS_BIT_ARM | VK_PIPELINE_DEPTH_STENCIL_STATE_CREATE_RASTERIZATION_ORDER_ATTACHMENT_STENCIL_ACCESS_BIT_ARM); - - pipeline->ds.write_enable = - ds_info->depthWriteEnable || ds_info->stencilTestEnable; - } - - pipeline->ds.rb_depth_cntl = rb_depth_cntl; - pipeline->ds.rb_stencil_cntl = rb_stencil_cntl; - - /* the remaining draw states arent used if there is no d/s, leave them empty */ - if (builder->depth_attachment_format == VK_FORMAT_UNDEFINED && - builder->attachment_state_valid) - return; - - if (tu_pipeline_static_state(pipeline, &cs, VK_DYNAMIC_STATE_DEPTH_BOUNDS, 3)) { - tu_cs_emit_regs(&cs, - A6XX_RB_Z_BOUNDS_MIN(ds_info->minDepthBounds), - A6XX_RB_Z_BOUNDS_MAX(ds_info->maxDepthBounds)); - } - - if (tu_pipeline_static_state(pipeline, &cs, VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK, 2)) { - tu_cs_emit_regs(&cs, A6XX_RB_STENCILMASK(.mask = ds_info->front.compareMask & 0xff, - .bfmask = ds_info->back.compareMask & 0xff)); - } - - if (tu_pipeline_static_state(pipeline, &cs, VK_DYNAMIC_STATE_STENCIL_WRITE_MASK, 2)) { - update_stencil_mask(&pipeline->ds.stencil_wrmask, VK_STENCIL_FACE_FRONT_BIT, ds_info->front.writeMask); - update_stencil_mask(&pipeline->ds.stencil_wrmask, VK_STENCIL_FACE_BACK_BIT, ds_info->back.writeMask); - tu_cs_emit_regs(&cs, A6XX_RB_STENCILWRMASK(.dword = pipeline->ds.stencil_wrmask)); - } - - if (tu_pipeline_static_state(pipeline, &cs, VK_DYNAMIC_STATE_STENCIL_REFERENCE, 2)) { - tu_cs_emit_regs(&cs, A6XX_RB_STENCILREF(.ref = ds_info->front.reference & 0xff, - .bfref = ds_info->back.reference & 0xff)); - } - - if (builder->variants[MESA_SHADER_FRAGMENT]) { - const struct ir3_shader_variant *fs = builder->variants[MESA_SHADER_FRAGMENT]; - if (fs->has_kill) { - pipeline->lrz.lrz_status |= TU_LRZ_FORCE_DISABLE_WRITE; - } - if (fs->no_earlyz || fs->writes_pos) { - pipeline->lrz.lrz_status = TU_LRZ_FORCE_DISABLE_LRZ; - } } /* FDM isn't compatible with LRZ, because the LRZ image uses the original @@ -4552,67 +4762,6 @@ tu_pipeline_builder_parse_depth_stencil(struct tu_pipeline_builder *builder, pipeline->lrz.lrz_status = TU_LRZ_FORCE_DISABLE_LRZ; } -static void -tu_pipeline_builder_parse_rast_ds(struct tu_pipeline_builder *builder, - struct tu_pipeline *pipeline) -{ - if (builder->rasterizer_discard) - return; - - pipeline->rast_ds.rb_depth_cntl = - pipeline->rast.rb_depth_cntl | pipeline->ds.rb_depth_cntl; - pipeline->rast_ds.rb_depth_cntl_mask = - pipeline->rast.rb_depth_cntl_mask & pipeline->ds.rb_depth_cntl_mask; - - struct tu_cs cs; - if (tu_pipeline_static_state(pipeline, &cs, TU_DYNAMIC_STATE_DS, 4)) { - tu_cs_emit_pkt4(&cs, REG_A6XX_RB_STENCIL_CONTROL, 1); - tu_cs_emit(&cs, pipeline->ds.rb_stencil_cntl); - - tu_cs_emit_pkt4(&cs, REG_A6XX_RB_DEPTH_CNTL, 1); - if (pipeline->output.rb_depth_cntl_disable) - tu_cs_emit(&cs, 0); - else - tu_cs_emit(&cs, pipeline->rast_ds.rb_depth_cntl); - } - - /* With FDM we have to overwrite the viewport and scissor so they have to - * be set dynamically. This can only be done once we know the output state - * and whether viewport/scissor is dynamic. We also have to figure out - * whether we can use per-view viewports to and enable that if true. - */ - if (pipeline->fs.fragment_density_map) { - if (!(pipeline->dynamic_state_mask & BIT(VK_DYNAMIC_STATE_VIEWPORT))) { - pipeline->viewport.set_dynamic_vp_to_static = true; - pipeline->dynamic_state_mask |= BIT(VK_DYNAMIC_STATE_VIEWPORT); - } - - if (!(pipeline->dynamic_state_mask & BIT(VK_DYNAMIC_STATE_SCISSOR))) { - pipeline->viewport.set_dynamic_scissor_to_static = true; - pipeline->dynamic_state_mask |= BIT(VK_DYNAMIC_STATE_SCISSOR); - } - - /* We can use per-view viewports if the last geometry stage doesn't - * write its own viewport. - */ - pipeline->viewport.per_view_viewport = - !pipeline->program.writes_viewport && - builder->device->physical_device->info->a6xx.has_per_view_viewport; - - /* Fixup GRAS_SU_CNTL and re-emit rast state if necessary. */ - if (pipeline->viewport.per_view_viewport) { - pipeline->rast.gras_su_cntl |= A6XX_GRAS_SU_CNTL_VIEWPORTINDEXINCR; - - if (tu_pipeline_static_state(pipeline, &cs, TU_DYNAMIC_STATE_RAST, - tu6_rast_size(builder->device))) { - tu6_emit_rast(&cs, pipeline->rast.gras_su_cntl, - pipeline->rast.gras_cl_cntl, - pipeline->rast.polygon_mode); - } - } - } -} - static void tu_pipeline_builder_parse_multisample_and_color_blend( struct tu_pipeline_builder *builder, struct tu_pipeline *pipeline) @@ -4634,155 +4783,24 @@ tu_pipeline_builder_parse_multisample_and_color_blend( * We leave the relevant registers stale when rasterization is disabled. */ if (builder->rasterizer_discard) { - pipeline->output.samples = VK_SAMPLE_COUNT_1_BIT; return; } - pipeline->output.feedback_loop_may_involve_textures = - builder->feedback_loop_may_involve_textures; - static const VkPipelineColorBlendStateCreateInfo dummy_blend_info = {}; - const VkPipelineMultisampleStateCreateInfo *msaa_info = - builder->create_info->pMultisampleState; - pipeline->output.samples = msaa_info->rasterizationSamples; const VkPipelineColorBlendStateCreateInfo *blend_info = - builder->use_color_attachments ? builder->create_info->pColorBlendState - : &dummy_blend_info; + (builder->graphics_state.rp->attachment_aspects & + VK_IMAGE_ASPECT_COLOR_BIT) ? builder->create_info->pColorBlendState : + &dummy_blend_info; - bool alpha_to_coverage = - !(pipeline->dynamic_state_mask & - BIT(TU_DYNAMIC_STATE_ALPHA_TO_COVERAGE)) && - msaa_info->alphaToCoverageEnable; + pipeline->lrz.force_late_z |= + builder->graphics_state.rp->depth_attachment_format == VK_FORMAT_S8_UINT; - bool no_earlyz = builder->depth_attachment_format == VK_FORMAT_S8_UINT || - /* alpha to coverage can behave like a discard */ - alpha_to_coverage; - pipeline->lrz.force_late_z |= no_earlyz; - - pipeline->output.subpass_feedback_loop_color = - builder->subpass_feedback_loop_color; - pipeline->output.subpass_feedback_loop_ds = - builder->subpass_feedback_loop_ds; - - if (builder->use_color_attachments) { - pipeline->blend.raster_order_attachment_access = + if (builder->graphics_state.rp->attachment_aspects & VK_IMAGE_ASPECT_COLOR_BIT) { + pipeline->output.raster_order_attachment_access = blend_info->flags & VK_PIPELINE_COLOR_BLEND_STATE_CREATE_RASTERIZATION_ORDER_ATTACHMENT_ACCESS_BIT_ARM; } - - const enum pipe_format ds_pipe_format = - vk_format_to_pipe_format(builder->depth_attachment_format); - - if (builder->depth_attachment_format != VK_FORMAT_UNDEFINED && - builder->depth_attachment_format != VK_FORMAT_S8_UINT) { - pipeline->output.depth_cpp_per_sample = util_format_get_component_bits( - ds_pipe_format, UTIL_FORMAT_COLORSPACE_ZS, 0) / 8; - } else { - /* We need to make sure RB_DEPTH_CNTL is set to 0 when this pipeline is - * used, regardless of whether it's linked with a fragment shader - * pipeline that has an enabled depth test or if RB_DEPTH_CNTL is set - * dynamically. - */ - pipeline->output.rb_depth_cntl_disable = true; - } - - if (builder->depth_attachment_format != VK_FORMAT_UNDEFINED) { - pipeline->output.stencil_cpp_per_sample = util_format_get_component_bits( - ds_pipe_format, UTIL_FORMAT_COLORSPACE_ZS, 1) / 8; - } - - struct tu_cs cs; - tu6_emit_rb_mrt_controls(pipeline, blend_info, - builder->color_attachment_formats, - &pipeline->blend.rop_reads_dst, - &pipeline->output.color_bandwidth_per_sample); - - if (alpha_to_coverage && pipeline->blend.num_rts == 0) { - /* In addition to changing the *_OUTPUT_CNTL1 registers, this will also - * make sure we disable memory writes for MRT0 rather than using - * whatever setting was leftover. - */ - pipeline->blend.num_rts = 1; - } - - uint32_t blend_enable_mask = - (pipeline->blend.logic_op_enabled && pipeline->blend.rop_reads_dst) ? - pipeline->blend.color_write_enable : - pipeline->blend.blend_enable; - tu6_emit_blend_control(pipeline, blend_enable_mask, - !(pipeline->dynamic_state_mask & BIT(TU_DYNAMIC_STATE_BLEND_EQUATION)) && - tu_blend_state_is_dual_src(blend_info), msaa_info); - - if (tu_pipeline_static_state(pipeline, &cs, TU_DYNAMIC_STATE_BLEND, - pipeline->blend.num_rts * 3 + 8)) { - tu6_emit_blend(&cs, pipeline); - assert(cs.cur == cs.end); /* validate draw state size */ - } - - /* Disable LRZ writes when blend or logic op that reads the destination is - * enabled, since the resulting pixel value from the blend-draw depends on - * an earlier draw, which LRZ in the draw pass could early-reject if the - * previous blend-enabled draw wrote LRZ. - * - * TODO: We need to disable LRZ writes only for the binning pass. - * Therefore, we need to emit it in a separate draw state. We keep - * it disabled for sysmem path as well for the moment. - */ - if (blend_enable_mask && - !(pipeline->dynamic_state_mask & - (BIT(TU_DYNAMIC_STATE_LOGIC_OP) | - BIT(TU_DYNAMIC_STATE_BLEND_ENABLE)))) - pipeline->lrz.lrz_status |= TU_LRZ_FORCE_DISABLE_WRITE | TU_LRZ_READS_DEST; - - if (!(pipeline->dynamic_state_mask & - BIT(TU_DYNAMIC_STATE_COLOR_WRITE_ENABLE)) && - (pipeline->blend.color_write_enable & MASK(pipeline->blend.num_rts)) != - MASK(pipeline->blend.num_rts)) - pipeline->lrz.lrz_status |= TU_LRZ_FORCE_DISABLE_WRITE | TU_LRZ_READS_DEST; - - if (!(pipeline->dynamic_state_mask & BIT(TU_DYNAMIC_STATE_BLEND))) { - for (int i = 0; i < blend_info->attachmentCount; i++) { - VkPipelineColorBlendAttachmentState blendAttachment = blend_info->pAttachments[i]; - /* From the PoV of LRZ, having masked color channels is - * the same as having blend enabled, in that the draw will - * care about the fragments from an earlier draw. - */ - VkFormat format = builder->color_attachment_formats[i]; - unsigned mask = MASK(vk_format_get_nr_components(format)); - if (format != VK_FORMAT_UNDEFINED && - (blendAttachment.colorWriteMask & mask) != mask) { - pipeline->lrz.lrz_status |= TU_LRZ_FORCE_DISABLE_WRITE | TU_LRZ_READS_DEST; - } - } - } - - if (tu_pipeline_static_state(pipeline, &cs, VK_DYNAMIC_STATE_BLEND_CONSTANTS, 5)) { - tu_cs_emit_pkt4(&cs, REG_A6XX_RB_BLEND_RED_F32, 4); - tu_cs_emit_array(&cs, (const uint32_t *) blend_info->blendConstants, 4); - } - - const struct VkPipelineSampleLocationsStateCreateInfoEXT *sample_locations = - vk_find_struct_const(msaa_info->pNext, PIPELINE_SAMPLE_LOCATIONS_STATE_CREATE_INFO_EXT); - const VkSampleLocationsInfoEXT *samp_loc = NULL; - - if (sample_locations) - samp_loc = &sample_locations->sampleLocationsInfo; - - bool samp_loc_enable = sample_locations && - sample_locations->sampleLocationsEnable; - - if (samp_loc && - ((pipeline->dynamic_state_mask & BIT(TU_DYNAMIC_STATE_SAMPLE_LOCATIONS_ENABLE)) || - samp_loc_enable) && - tu_pipeline_static_state(pipeline, &cs, TU_DYNAMIC_STATE_SAMPLE_LOCATIONS, 6)) { - tu6_emit_sample_locations(&cs, samp_loc); - } - - if (tu_pipeline_static_state(pipeline, &cs, - TU_DYNAMIC_STATE_SAMPLE_LOCATIONS_ENABLE, 6)) { - tu6_emit_sample_locations_enable(&cs, samp_loc_enable); - } } static void @@ -4793,7 +4811,7 @@ tu_pipeline_builder_parse_rasterization_order( return; bool raster_order_attachment_access = - pipeline->blend.raster_order_attachment_access || + pipeline->output.raster_order_attachment_access || pipeline->ds.raster_order_attachment_access || TU_DEBUG(RAST_ORDER); @@ -4823,9 +4841,9 @@ tu_pipeline_builder_parse_rasterization_order( * setting the SINGLE_PRIM_MODE field to the same value that the blob does * for advanced_blend in sysmem mode if a feedback loop is detected. */ - if (pipeline->output.subpass_feedback_loop_color || - (pipeline->output.subpass_feedback_loop_ds && - pipeline->ds.write_enable)) { + if (builder->graphics_state.rp->pipeline_flags & + (VK_PIPELINE_CREATE_COLOR_ATTACHMENT_FEEDBACK_LOOP_BIT_EXT | + VK_PIPELINE_CREATE_DEPTH_STENCIL_ATTACHMENT_FEEDBACK_LOOP_BIT_EXT)) { sysmem_prim_mode = FLUSH_PER_OVERLAP_AND_OVERWRITE; pipeline->prim_order.sysmem_single_prim_mode = true; } @@ -4872,6 +4890,8 @@ tu_pipeline_finish(struct tu_pipeline *pipeline, if (library->layouts[i]) vk_descriptor_set_layout_unref(&dev->vk, &library->layouts[i]->vk); } + + vk_free2(&dev->vk.alloc, alloc, library->state_data); } ralloc_free(pipeline->executables_mem_ctx); @@ -4923,7 +4943,6 @@ tu_pipeline_builder_build(struct tu_pipeline_builder *builder, (*pipeline)->executables_mem_ctx = ralloc_context(NULL); util_dynarray_init(&(*pipeline)->executables, (*pipeline)->executables_mem_ctx); - tu_pipeline_builder_parse_dynamic(builder, *pipeline); tu_pipeline_builder_parse_libraries(builder, *pipeline); VkShaderStageFlags stages = 0; @@ -4957,16 +4976,6 @@ tu_pipeline_builder_build(struct tu_pipeline_builder *builder, &builder->layout, builder, NULL); - /* This has to come before emitting the program so that - * pipeline->tess.patch_control_points and pipeline->rast.multiview_mask - * are always set. - */ - if (builder->state & - VK_GRAPHICS_PIPELINE_LIBRARY_PRE_RASTERIZATION_SHADERS_BIT_EXT) { - tu_pipeline_builder_parse_tessellation(builder, *pipeline); - (*pipeline)->rast.multiview_mask = builder->multiview_mask; - } - if (set_combined_state(builder, *pipeline, VK_GRAPHICS_PIPELINE_LIBRARY_PRE_RASTERIZATION_SHADERS_BIT_EXT | VK_GRAPHICS_PIPELINE_LIBRARY_FRAGMENT_SHADER_BIT_EXT)) { @@ -5014,18 +5023,6 @@ tu_pipeline_builder_build(struct tu_pipeline_builder *builder, tu6_emit_load_state(*pipeline, &builder->layout); } - if (builder->state & - VK_GRAPHICS_PIPELINE_LIBRARY_VERTEX_INPUT_INTERFACE_BIT_EXT) { - tu_pipeline_builder_parse_vertex_input(builder, *pipeline); - tu_pipeline_builder_parse_input_assembly(builder, *pipeline); - } - - if (builder->state & - VK_GRAPHICS_PIPELINE_LIBRARY_PRE_RASTERIZATION_SHADERS_BIT_EXT) { - tu_pipeline_builder_parse_viewport(builder, *pipeline); - tu_pipeline_builder_parse_rasterization(builder, *pipeline); - } - if (builder->state & VK_GRAPHICS_PIPELINE_LIBRARY_FRAGMENT_SHADER_BIT_EXT) { tu_pipeline_builder_parse_depth_stencil(builder, *pipeline); } @@ -5041,11 +5038,37 @@ tu_pipeline_builder_build(struct tu_pipeline_builder *builder, tu_pipeline_builder_parse_rasterization_order(builder, *pipeline); } - if (set_combined_state(builder, *pipeline, - VK_GRAPHICS_PIPELINE_LIBRARY_PRE_RASTERIZATION_SHADERS_BIT_EXT | - VK_GRAPHICS_PIPELINE_LIBRARY_FRAGMENT_SHADER_BIT_EXT | - VK_GRAPHICS_PIPELINE_LIBRARY_FRAGMENT_OUTPUT_INTERFACE_BIT_EXT)) { - tu_pipeline_builder_parse_rast_ds(builder, *pipeline); + tu_pipeline_builder_emit_state(builder, *pipeline); + + if ((*pipeline)->type == TU_PIPELINE_GRAPHICS_LIB) { + struct tu_graphics_lib_pipeline *library = + tu_pipeline_to_graphics_lib(*pipeline); + result = vk_graphics_pipeline_state_copy(&builder->device->vk, + &library->graphics_state, + &builder->graphics_state, + builder->alloc, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT, + &library->state_data); + if (result != VK_SUCCESS) { + tu_pipeline_finish(*pipeline, builder->device, builder->alloc); + return result; + } + } else { + struct tu_graphics_pipeline *gfx_pipeline = + tu_pipeline_to_graphics(*pipeline); + vk_dynamic_graphics_state_fill(&gfx_pipeline->dynamic_state, + &builder->graphics_state); + gfx_pipeline->feedback_loop_color = + (builder->graphics_state.rp->pipeline_flags & + VK_PIPELINE_CREATE_COLOR_ATTACHMENT_FEEDBACK_LOOP_BIT_EXT); + gfx_pipeline->feedback_loop_ds = + (builder->graphics_state.rp->pipeline_flags & + VK_PIPELINE_CREATE_DEPTH_STENCIL_ATTACHMENT_FEEDBACK_LOOP_BIT_EXT); + gfx_pipeline->feedback_loop_may_involve_textures = + (gfx_pipeline->feedback_loop_color || + gfx_pipeline->feedback_loop_ds) && + !builder->graphics_state.rp->feedback_loop_input_only; + gfx_pipeline->has_fdm = builder->fragment_density_map; } return VK_SUCCESS; @@ -5060,6 +5083,43 @@ tu_pipeline_builder_finish(struct tu_pipeline_builder *builder) ralloc_free(builder->mem_ctx); } +void +tu_fill_render_pass_state(struct vk_render_pass_state *rp, + const struct tu_render_pass *pass, + const struct tu_subpass *subpass) +{ + rp->view_mask = subpass->multiview_mask; + rp->color_attachment_count = subpass->color_count; + rp->pipeline_flags = 0; + + const uint32_t a = subpass->depth_stencil_attachment.attachment; + rp->depth_attachment_format = VK_FORMAT_UNDEFINED; + rp->stencil_attachment_format = VK_FORMAT_UNDEFINED; + rp->attachment_aspects = 0; + if (a != VK_ATTACHMENT_UNUSED) { + VkFormat ds_format = pass->attachments[a].format; + if (vk_format_has_depth(ds_format)) { + rp->depth_attachment_format = ds_format; + rp->attachment_aspects |= VK_IMAGE_ASPECT_DEPTH_BIT; + } + if (vk_format_has_stencil(ds_format)) { + rp->stencil_attachment_format = ds_format; + rp->attachment_aspects |= VK_IMAGE_ASPECT_STENCIL_BIT; + } + } + + for (uint32_t i = 0; i < subpass->color_count; i++) { + const uint32_t a = subpass->color_attachments[i].attachment; + if (a == VK_ATTACHMENT_UNUSED) { + rp->color_attachment_formats[i] = VK_FORMAT_UNDEFINED; + continue; + } + + rp->color_attachment_formats[i] = pass->attachments[a].format; + rp->attachment_aspects |= VK_IMAGE_ASPECT_COLOR_BIT; + } +} + static void tu_pipeline_builder_init_graphics( struct tu_pipeline_builder *builder, @@ -5127,138 +5187,97 @@ tu_pipeline_builder_init_graphics( builder->create_info->pRasterizationState->rasterizerDiscardEnable && !rasterizer_discard_dynamic; - VkPipelineCreateFlags rendering_flags = builder->create_info->flags; + struct vk_render_pass_state rp_state = { + .render_pass = builder->create_info->renderPass, + .subpass = builder->create_info->subpass, + }; + const struct vk_render_pass_state *driver_rp = NULL; - if (builder->state & - (VK_GRAPHICS_PIPELINE_LIBRARY_PRE_RASTERIZATION_SHADERS_BIT_EXT | - VK_GRAPHICS_PIPELINE_LIBRARY_FRAGMENT_SHADER_BIT_EXT | - VK_GRAPHICS_PIPELINE_LIBRARY_FRAGMENT_OUTPUT_INTERFACE_BIT_EXT)) { - const VkPipelineRenderingCreateInfo *rendering_info = - vk_find_struct_const(create_info->pNext, PIPELINE_RENDERING_CREATE_INFO); + builder->unscaled_input_fragcoord = 0; - if (TU_DEBUG(DYNAMIC) && !rendering_info) - rendering_info = vk_get_pipeline_rendering_create_info(create_info); + /* Extract information we need from the turnip renderpass. This will be + * filled out automatically if the app is using dynamic rendering or + * renderpasses are emulated. + */ + if (!TU_DEBUG(DYNAMIC) && + (builder->state & + (VK_GRAPHICS_PIPELINE_LIBRARY_PRE_RASTERIZATION_SHADERS_BIT_EXT | + VK_GRAPHICS_PIPELINE_LIBRARY_FRAGMENT_SHADER_BIT_EXT | + VK_GRAPHICS_PIPELINE_LIBRARY_FRAGMENT_OUTPUT_INTERFACE_BIT_EXT)) && + builder->create_info->renderPass) { + const struct tu_render_pass *pass = + tu_render_pass_from_handle(create_info->renderPass); + const struct tu_subpass *subpass = + &pass->subpasses[create_info->subpass]; - /* Get multiview_mask, which is only used for shaders */ - if (builder->state & - (VK_GRAPHICS_PIPELINE_LIBRARY_PRE_RASTERIZATION_SHADERS_BIT_EXT | - VK_GRAPHICS_PIPELINE_LIBRARY_FRAGMENT_SHADER_BIT_EXT)) { - if (rendering_info) { - builder->multiview_mask = rendering_info->viewMask; - } else { - const struct tu_render_pass *pass = - tu_render_pass_from_handle(create_info->renderPass); - const struct tu_subpass *subpass = - &pass->subpasses[create_info->subpass]; - builder->multiview_mask = subpass->multiview_mask; - } + rp_state = (struct vk_render_pass_state) { + .render_pass = builder->create_info->renderPass, + .subpass = builder->create_info->subpass, + }; + + tu_fill_render_pass_state(&rp_state, pass, subpass); + + rp_state.feedback_loop_input_only = true; + + for (unsigned i = 0; i < subpass->input_count; i++) { + /* Input attachments stored in GMEM must be loaded with unscaled + * FragCoord. + */ + if (subpass->input_attachments[i].patch_input_gmem) + builder->unscaled_input_fragcoord |= 1u << i; } - /* Get the attachment state. This is valid: - * - * - With classic renderpasses, when either fragment shader or fragment - * output interface state is being compiled. This includes when we - * emulate classic renderpasses with dynamic rendering with the debug - * flag. - * - With dynamic rendering (renderPass is NULL) only when compiling the - * output interface state. - * - * We only actually need this for the fragment output interface state, - * but the spec also requires us to skip parsing depth/stencil state - * when the attachment state is defined *and* no depth/stencil - * attachment is not used, so we have to parse it for fragment shader - * state when possible. Life is pain. + /* Feedback loop flags can come from either the user (in which case they + * may involve textures) or from the driver (in which case they don't). */ - if (((builder->state & - VK_GRAPHICS_PIPELINE_LIBRARY_FRAGMENT_OUTPUT_INTERFACE_BIT_EXT) || - ((builder->state & - VK_GRAPHICS_PIPELINE_LIBRARY_FRAGMENT_SHADER_BIT_EXT) && - builder->create_info->renderPass)) && - rendering_info) { - builder->subpass_raster_order_attachment_access = false; - builder->subpass_feedback_loop_ds = false; - builder->subpass_feedback_loop_color = false; - builder->unscaled_input_fragcoord = 0; - - rendering_flags = vk_get_pipeline_rendering_flags(builder->create_info); - - if (!builder->rasterizer_discard) { - builder->depth_attachment_format = - rendering_info->depthAttachmentFormat == VK_FORMAT_UNDEFINED ? - rendering_info->stencilAttachmentFormat : - rendering_info->depthAttachmentFormat; - - for (unsigned i = 0; i < rendering_info->colorAttachmentCount; i++) { - builder->color_attachment_formats[i] = - rendering_info->pColorAttachmentFormats[i]; - if (builder->color_attachment_formats[i] != VK_FORMAT_UNDEFINED) { - builder->use_color_attachments = true; - } - } - } - - builder->attachment_state_valid = true; - } else if ((builder->state & - (VK_GRAPHICS_PIPELINE_LIBRARY_FRAGMENT_SHADER_BIT_EXT | - VK_GRAPHICS_PIPELINE_LIBRARY_FRAGMENT_OUTPUT_INTERFACE_BIT_EXT)) && - create_info->renderPass != VK_NULL_HANDLE) { - const struct tu_render_pass *pass = - tu_render_pass_from_handle(create_info->renderPass); - const struct tu_subpass *subpass = - &pass->subpasses[create_info->subpass]; - - builder->subpass_raster_order_attachment_access = - subpass->raster_order_attachment_access; - builder->subpass_feedback_loop_color = subpass->feedback_loop_color; - builder->subpass_feedback_loop_ds = subpass->feedback_loop_ds; - if (pass->fragment_density_map.attachment != VK_ATTACHMENT_UNUSED) - rendering_flags |= - VK_PIPELINE_CREATE_RENDERING_FRAGMENT_DENSITY_MAP_ATTACHMENT_BIT_EXT; - - builder->unscaled_input_fragcoord = 0; - for (unsigned i = 0; i < subpass->input_count; i++) { - /* Input attachments stored in GMEM must be loaded with unscaled - * FragCoord. - */ - if (subpass->input_attachments[i].patch_input_gmem) - builder->unscaled_input_fragcoord |= 1u << i; - } - - if (!builder->rasterizer_discard) { - const uint32_t a = subpass->depth_stencil_attachment.attachment; - builder->depth_attachment_format = (a != VK_ATTACHMENT_UNUSED) ? - pass->attachments[a].format : VK_FORMAT_UNDEFINED; - - assert(subpass->color_count == 0 || - !create_info->pColorBlendState || - subpass->color_count == create_info->pColorBlendState->attachmentCount); - for (uint32_t i = 0; i < subpass->color_count; i++) { - const uint32_t a = subpass->color_attachments[i].attachment; - if (a == VK_ATTACHMENT_UNUSED) - continue; - - builder->color_attachment_formats[i] = pass->attachments[a].format; - builder->use_color_attachments = true; - } - } - - builder->attachment_state_valid = true; + VkPipelineCreateFlags feedback_flags = builder->create_info->flags & + (VK_PIPELINE_CREATE_COLOR_ATTACHMENT_FEEDBACK_LOOP_BIT_EXT | + VK_PIPELINE_CREATE_DEPTH_STENCIL_ATTACHMENT_FEEDBACK_LOOP_BIT_EXT); + if (feedback_flags) { + rp_state.feedback_loop_input_only = false; + rp_state.pipeline_flags |= feedback_flags; } + + if (subpass->feedback_loop_color) { + rp_state.pipeline_flags |= + VK_PIPELINE_CREATE_COLOR_ATTACHMENT_FEEDBACK_LOOP_BIT_EXT; + } + + if (subpass->feedback_loop_ds) { + rp_state.pipeline_flags |= + VK_PIPELINE_CREATE_DEPTH_STENCIL_ATTACHMENT_FEEDBACK_LOOP_BIT_EXT; + } + + if (pass->fragment_density_map.attachment != VK_ATTACHMENT_UNUSED) { + rp_state.pipeline_flags |= + VK_PIPELINE_CREATE_RENDERING_FRAGMENT_DENSITY_MAP_ATTACHMENT_BIT_EXT; + } + + builder->unscaled_input_fragcoord = 0; + for (unsigned i = 0; i < subpass->input_count; i++) { + /* Input attachments stored in GMEM must be loaded with unscaled + * FragCoord. + */ + if (subpass->input_attachments[i].patch_input_gmem) + builder->unscaled_input_fragcoord |= 1u << i; + } + + driver_rp = &rp_state; } - if (rendering_flags & VK_PIPELINE_CREATE_COLOR_ATTACHMENT_FEEDBACK_LOOP_BIT_EXT) { - builder->subpass_feedback_loop_color = true; - builder->feedback_loop_may_involve_textures = true; - } + vk_graphics_pipeline_state_fill(&dev->vk, + &builder->graphics_state, + builder->create_info, + driver_rp, + &builder->all_state, + NULL, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT, + NULL); - if (rendering_flags & VK_PIPELINE_CREATE_DEPTH_STENCIL_ATTACHMENT_FEEDBACK_LOOP_BIT_EXT) { - builder->subpass_feedback_loop_ds = true; - builder->feedback_loop_may_involve_textures = true; + if (builder->graphics_state.rp) { + builder->fragment_density_map = (builder->graphics_state.rp->pipeline_flags & + VK_PIPELINE_CREATE_RENDERING_FRAGMENT_DENSITY_MAP_ATTACHMENT_BIT_EXT) || + TU_DEBUG(FDM); } - - builder->fragment_density_map = (rendering_flags & - VK_PIPELINE_CREATE_RENDERING_FRAGMENT_DENSITY_MAP_ATTACHMENT_BIT_EXT) || - TU_DEBUG(FDM); } static VkResult diff --git a/src/freedreno/vulkan/tu_pipeline.h b/src/freedreno/vulkan/tu_pipeline.h index 90cd25443bc..d860ef78b09 100644 --- a/src/freedreno/vulkan/tu_pipeline.h +++ b/src/freedreno/vulkan/tu_pipeline.h @@ -29,25 +29,6 @@ enum tu_dynamic_state TU_DYNAMIC_STATE_VERTEX_INPUT, TU_DYNAMIC_STATE_PATCH_CONTROL_POINTS, TU_DYNAMIC_STATE_COUNT, - /* no associated draw state: */ - TU_DYNAMIC_STATE_PRIMITIVE_TOPOLOGY = TU_DYNAMIC_STATE_COUNT, - TU_DYNAMIC_STATE_PRIMITIVE_RESTART_ENABLE, - TU_DYNAMIC_STATE_LOGIC_OP, - TU_DYNAMIC_STATE_LOGIC_OP_ENABLE, - TU_DYNAMIC_STATE_COLOR_WRITE_ENABLE, - TU_DYNAMIC_STATE_POLYGON_MODE, - TU_DYNAMIC_STATE_TESS_DOMAIN_ORIGIN, - TU_DYNAMIC_STATE_MSAA_SAMPLES, - TU_DYNAMIC_STATE_ALPHA_TO_COVERAGE, - TU_DYNAMIC_STATE_DEPTH_CLIP_RANGE, - TU_DYNAMIC_STATE_VIEWPORT_RANGE, - TU_DYNAMIC_STATE_LINE_MODE, - TU_DYNAMIC_STATE_PROVOKING_VTX, - TU_DYNAMIC_STATE_BLEND_ENABLE, - TU_DYNAMIC_STATE_BLEND_EQUATION, - TU_DYNAMIC_STATE_COLOR_WRITE_MASK, - TU_DYNAMIC_STATE_VIEWPORT_COUNT, - TU_DYNAMIC_STATE_SCISSOR_COUNT, /* re-use the line width enum as it uses GRAS_SU_CNTL: */ TU_DYNAMIC_STATE_RAST = VK_DYNAMIC_STATE_LINE_WIDTH, }; @@ -65,6 +46,15 @@ struct tu_lrz_pipeline } fs; bool force_late_z; + bool blend_valid; +}; + +struct tu_bandwidth +{ + uint32_t color_bandwidth_per_sample; + uint32_t depth_cpp_per_sample; + uint32_t stencil_cpp_per_sample; + bool valid; }; struct tu_compiled_shaders @@ -141,72 +131,29 @@ struct tu_pipeline uint32_t active_desc_sets; /* mask of enabled dynamic states - * if BIT(i) is set, pipeline->dynamic_state[i] is *NOT* used + * if BIT(i) is set, pipeline->dynamic_state[i] is used */ - uint64_t dynamic_state_mask; + uint32_t set_state_mask; struct tu_draw_state dynamic_state[TU_DYNAMIC_STATE_COUNT]; + struct { + unsigned patch_type; + } tess; + /* for dynamic states which use the same register: */ struct { - uint32_t gras_su_cntl, gras_su_cntl_mask; - uint32_t gras_cl_cntl, gras_cl_cntl_mask; - uint32_t pc_raster_cntl, pc_raster_cntl_mask; - uint32_t vpc_unknown_9107, vpc_unknown_9107_mask; - uint32_t rb_depth_cntl, rb_depth_cntl_mask; - enum a5xx_line_mode line_mode; - enum a6xx_polygon_mode polygon_mode; - bool provoking_vertex_last; - bool override_depth_clip; - - uint32_t multiview_mask; - } rast; - - /* RB_DEPTH_CNTL state comes from both rast and depth/stencil state. - */ - struct { - uint32_t rb_depth_cntl, rb_depth_cntl_mask; - } rast_ds; + bool per_view_viewport; + } viewport; struct { - uint32_t rb_depth_cntl, rb_depth_cntl_mask; - uint32_t rb_stencil_cntl, rb_stencil_cntl_mask; - uint32_t stencil_wrmask; bool raster_order_attachment_access; - bool write_enable; } ds; - /* Misc. information provided by the fragment shader stage. */ - struct { - bool fragment_density_map; - } fs; - - struct { - unsigned num_rts; - uint32_t rb_mrt_control[MAX_RTS], rb_mrt_control_mask; - uint32_t rb_mrt_control_rop; - uint32_t rb_mrt_blend_control[MAX_RTS]; - uint32_t sp_blend_cntl, sp_blend_cntl_mask; - uint32_t rb_blend_cntl, rb_blend_cntl_mask; - uint32_t color_write_enable, blend_enable; - bool logic_op_enabled, rop_reads_dst; - bool raster_order_attachment_access; - } blend; - /* Misc. info from the fragment output interface state that is used * elsewhere. */ struct { - /* memory bandwidth cost (in bytes) for color attachments */ - uint32_t color_bandwidth_per_sample; - uint32_t depth_cpp_per_sample; - uint32_t stencil_cpp_per_sample; - - bool rb_depth_cntl_disable; - - VkSampleCountFlagBits samples; - - bool subpass_feedback_loop_color, subpass_feedback_loop_ds; - bool feedback_loop_may_involve_textures; + bool raster_order_attachment_access; } output; /* In other words - framebuffer fetch support */ @@ -219,10 +166,6 @@ struct tu_pipeline /* draw states for the pipeline */ struct tu_draw_state load_state; - struct { - uint32_t num_vbs; - } vi; - struct tu_push_constant_range shared_consts; struct @@ -238,37 +181,15 @@ struct tu_pipeline uint32_t hs_param_dwords; uint32_t hs_vertices_out; - bool writes_viewport; + bool per_view_viewport; bool per_samp; enum a6xx_tess_output tess_output_upper_left, tess_output_lower_left; enum a6xx_tess_spacing tess_spacing; } program; - struct - { - enum pc_di_primtype primtype; - bool primitive_restart; - } ia; - - struct - { - uint32_t patch_type; - uint32_t patch_control_points; - bool upper_left_domain_origin; - } tess; - struct tu_lrz_pipeline lrz; - - struct { - VkViewport viewports[MAX_VIEWPORTS]; - VkRect2D scissors[MAX_SCISSORS]; - unsigned num_viewports, num_scissors; - bool set_dynamic_vp_to_static; - bool set_dynamic_scissor_to_static; - bool z_negative_one_to_one; - bool per_view_viewport; - } viewport; + struct tu_bandwidth bandwidth; void *executables_mem_ctx; /* tu_pipeline_executable */ @@ -280,6 +201,11 @@ struct tu_graphics_lib_pipeline { VkGraphicsPipelineLibraryFlagsEXT state; + struct vk_graphics_pipeline_state graphics_state; + + /* For vk_graphics_pipeline_state */ + void *state_data; + /* compiled_shaders only contains variants compiled by this pipeline, and * it owns them, so when it is freed they disappear. Similarly, * nir_shaders owns the link-time NIR. shaders points to the shaders from @@ -305,6 +231,11 @@ struct tu_graphics_lib_pipeline { struct tu_graphics_pipeline { struct tu_pipeline base; + + struct vk_dynamic_graphics_state dynamic_state; + bool feedback_loop_color, feedback_loop_ds; + bool feedback_loop_may_involve_textures; + bool has_fdm; }; struct tu_compute_pipeline { @@ -330,48 +261,9 @@ TU_DECL_PIPELINE_DOWNCAST(graphics, TU_PIPELINE_GRAPHICS) TU_DECL_PIPELINE_DOWNCAST(graphics_lib, TU_PIPELINE_GRAPHICS_LIB) TU_DECL_PIPELINE_DOWNCAST(compute, TU_PIPELINE_COMPUTE) -void -tu6_emit_viewport(struct tu_cs *cs, const VkViewport *viewport, uint32_t num_viewport, - bool z_negative_one_to_one); +VkOffset2D tu_fdm_per_bin_offset(VkExtent2D frag_area, VkRect2D bin); -void -tu6_emit_scissor(struct tu_cs *cs, const VkRect2D *scs, uint32_t scissor_count); - -void -tu6_emit_sample_locations(struct tu_cs *cs, const VkSampleLocationsInfoEXT *samp_loc); - -void -tu6_emit_sample_locations_enable(struct tu_cs *cs, bool enable); - -void -tu6_emit_depth_bias(struct tu_cs *cs, - float constant_factor, - float clamp, - float slope_factor); - -#define TU6_EMIT_VERTEX_INPUT_MAX_DWORDS (MAX_VERTEX_ATTRIBS * 2 + 1) - -void tu6_emit_vertex_input(struct tu_cs *cs, - uint32_t binding_count, - const VkVertexInputBindingDescription2EXT *bindings, - uint32_t attr_count, - const VkVertexInputAttributeDescription2EXT *attrs); - -#define EMIT_CONST_DWORDS(const_dwords) (4 + const_dwords) -#define TU6_EMIT_PATCH_CONTROL_POINTS_DWORDS(hs_param_dwords) \ - (EMIT_CONST_DWORDS(4) + EMIT_CONST_DWORDS(hs_param_dwords) + 2 + 2 + 2) -void tu6_emit_patch_control_points(struct tu_cs *cs, - const struct tu_pipeline *pipeline, - unsigned patch_control_points); - -uint32_t tu6_rast_size(struct tu_device *dev); - -void tu6_emit_rast(struct tu_cs *cs, - uint32_t gras_su_cntl, - uint32_t gras_cl_cntl, - enum a6xx_polygon_mode polygon_mode); - -uint32_t tu6_rb_mrt_control_rop(VkLogicOp op, bool *rop_reads_dst); +uint32_t tu_emit_draw_state(struct tu_cmd_buffer *cmd); struct tu_pvtmem_config { uint64_t iova; @@ -403,4 +295,9 @@ tu6_emit_vpc(struct tu_cs *cs, void tu6_emit_fs_inputs(struct tu_cs *cs, const struct ir3_shader_variant *fs); +void +tu_fill_render_pass_state(struct vk_render_pass_state *rp, + const struct tu_render_pass *pass, + const struct tu_subpass *subpass); + #endif /* TU_PIPELINE_H */