tu: Rewrite to use common Vulkan dynamic state

This drops a significant amount of code for tracking partial state that
is now handled by the common state tracking infrastructure.

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/22301>
This commit is contained in:
Connor Abbott 2023-04-03 19:42:53 +02:00 committed by Marge Bot
parent 56cd6a8b64
commit 97da0a7734
7 changed files with 1701 additions and 3048 deletions

View file

@ -362,7 +362,6 @@ wayland-dEQP-EGL.functional.wide_color.window_fp16_default_colorspace,Fail
SRGBReadWritePixels,Fail
# New CTS failures in 1.3.5.0
dEQP-VK.pipeline.fast_linked_library.misc.interpolate_at_sample_no_sample_shading,Crash
dEQP-VK.transform_feedback.simple.lines_or_triangles_line_strip_1,Fail
dEQP-VK.transform_feedback.simple.lines_or_triangles_line_strip_3,Fail
dEQP-VK.transform_feedback.simple.lines_or_triangles_triangle_strip_1,Fail

View file

@ -371,7 +371,6 @@ SRGBReadWritePixels,Fail
spec@!opengl 1.1@line-smooth-stipple,Fail
# New CTS failures in 1.3.5.0
dEQP-VK.pipeline.fast_linked_library.misc.interpolate_at_sample_no_sample_shading,Crash
dEQP-VK.transform_feedback.simple.lines_or_triangles_line_strip_1,Fail
dEQP-VK.transform_feedback.simple.lines_or_triangles_line_strip_3,Fail
dEQP-VK.transform_feedback.simple.lines_or_triangles_triangle_strip_1,Fail

File diff suppressed because it is too large Load diff

View file

@ -55,23 +55,18 @@ struct tu_descriptor_state
enum tu_cmd_dirty_bits
{
TU_CMD_DIRTY_VERTEX_BUFFERS = BIT(0),
TU_CMD_DIRTY_VB_STRIDE = BIT(1),
TU_CMD_DIRTY_RAST = BIT(2),
TU_CMD_DIRTY_DS = BIT(3),
TU_CMD_DIRTY_DESC_SETS = BIT(4),
TU_CMD_DIRTY_COMPUTE_DESC_SETS = BIT(5),
TU_CMD_DIRTY_SHADER_CONSTS = BIT(6),
TU_CMD_DIRTY_LRZ = BIT(7),
TU_CMD_DIRTY_VS_PARAMS = BIT(8),
TU_CMD_DIRTY_FS_PARAMS = BIT(9),
TU_CMD_DIRTY_PC_RASTER_CNTL = BIT(10),
TU_CMD_DIRTY_VIEWPORTS = BIT(11),
TU_CMD_DIRTY_SCISSORS = BIT(12),
TU_CMD_DIRTY_BLEND = BIT(13),
TU_CMD_DIRTY_PATCH_CONTROL_POINTS = BIT(14),
TU_CMD_DIRTY_TESS_PARAMS = BIT(15),
TU_CMD_DIRTY_DESC_SETS = BIT(1),
TU_CMD_DIRTY_COMPUTE_DESC_SETS = BIT(2),
TU_CMD_DIRTY_SHADER_CONSTS = BIT(3),
TU_CMD_DIRTY_LRZ = BIT(4),
TU_CMD_DIRTY_VS_PARAMS = BIT(5),
TU_CMD_DIRTY_TESS_PARAMS = BIT(6),
TU_CMD_DIRTY_SUBPASS = BIT(7),
TU_CMD_DIRTY_FDM = BIT(8),
TU_CMD_DIRTY_PER_VIEW_VIEWPORT = BIT(9),
TU_CMD_DIRTY_PIPELINE = BIT(10),
/* all draw states were disabled and need to be re-enabled: */
TU_CMD_DIRTY_DRAW_STATE = BIT(16)
TU_CMD_DIRTY_DRAW_STATE = BIT(11)
};
/* There are only three cache domains we have to care about: the CCU, or
@ -423,45 +418,25 @@ struct tu_cmd_state
struct tu_render_pass_state rp;
/* Vertex buffers, viewports, and scissors
struct vk_render_pass_state vk_rp;
struct vk_vertex_input_state vi;
struct vk_sample_locations_state sl;
struct tu_bandwidth bandwidth;
/* Vertex buffers
* the states for these can be updated partially, so we need to save these
* to be able to emit a complete draw state
*/
struct {
uint64_t base;
uint32_t size;
uint32_t stride;
} vb[MAX_VBS];
uint32_t max_vbs_bound;
VkViewport viewport[MAX_VIEWPORTS];
VkRect2D scissor[MAX_SCISSORS];
uint32_t viewport_count, scissor_count;
bool per_view_viewport;
/* for dynamic states that can't be emitted directly */
uint32_t dynamic_stencil_mask;
uint32_t dynamic_stencil_wrmask;
uint32_t dynamic_stencil_ref;
bool stencil_front_write;
bool stencil_back_write;
uint32_t gras_su_cntl, gras_cl_cntl, rb_depth_cntl, rb_stencil_cntl;
uint32_t pc_raster_cntl, vpc_unknown_9107;
enum a6xx_polygon_mode polygon_mode;
uint32_t rb_mrt_control[MAX_RTS], rb_mrt_blend_control[MAX_RTS];
uint32_t rb_mrt_control_rop;
uint32_t rb_blend_cntl, sp_blend_cntl;
uint32_t pipeline_color_write_enable, blend_enable;
uint32_t color_write_enable;
bool logic_op_enabled;
bool rop_reads_dst;
bool alpha_to_coverage;
enum pc_di_primtype primtype;
bool primitive_restart_enable;
bool tess_upper_left_domain_origin;
bool provoking_vertex_last;
bool pipeline_has_fdm;
/* saved states to re-emit in TU_CMD_DIRTY_DRAW_STATE case */
struct tu_draw_state dynamic_state[TU_DYNAMIC_STATE_COUNT];
@ -527,12 +502,10 @@ struct tu_cmd_state
bool tessfactor_addr_set;
bool predication_active;
enum a5xx_line_mode line_mode;
VkSampleCountFlagBits samples;
bool msaa_disable;
bool z_negative_one_to_one;
unsigned patch_control_points;
bool blend_reads_dest;
bool stencil_front_write;
bool stencil_back_write;
/* VK_QUERY_PIPELINE_STATISTIC_CLIPPING_INVOCATIONS_BIT and
* VK_QUERY_TYPE_PRIMITIVES_GENERATED_EXT are allowed to run simultaniously,

View file

@ -560,13 +560,11 @@ tu6_calculate_lrz_state(struct tu_cmd_buffer *cmd,
const uint32_t a)
{
struct tu_pipeline *pipeline = &cmd->state.pipeline->base;
bool z_test_enable = (bool) (cmd->state.rb_depth_cntl & A6XX_RB_DEPTH_CNTL_Z_TEST_ENABLE);
bool z_write_enable = (bool) (cmd->state.rb_depth_cntl & A6XX_RB_DEPTH_CNTL_Z_WRITE_ENABLE);
bool z_bounds_enable = (bool) (cmd->state.rb_depth_cntl & A6XX_RB_DEPTH_CNTL_Z_BOUNDS_ENABLE);
bool z_test_enable = cmd->vk.dynamic_graphics_state.ds.depth.test_enable;
bool z_write_enable = cmd->vk.dynamic_graphics_state.ds.depth.write_enable;
bool z_bounds_enable = cmd->vk.dynamic_graphics_state.ds.depth.bounds_test.enable;
VkCompareOp depth_compare_op =
(VkCompareOp) ((cmd->state.rb_depth_cntl &
A6XX_RB_DEPTH_CNTL_ZFUNC__MASK) >>
A6XX_RB_DEPTH_CNTL_ZFUNC__SHIFT);
cmd->vk.dynamic_graphics_state.ds.depth.compare_op;
struct A6XX_GRAS_LRZ_CNTL gras_lrz_cntl = { 0 };
@ -599,63 +597,7 @@ tu6_calculate_lrz_state(struct tu_cmd_buffer *cmd,
/* See comment in tu_pipeline about disabling LRZ write for blending. */
bool reads_dest = !!(pipeline->lrz.lrz_status & TU_LRZ_READS_DEST);
if (gras_lrz_cntl.lrz_write && pipeline->dynamic_state_mask &
(BIT(TU_DYNAMIC_STATE_LOGIC_OP) |
BIT(TU_DYNAMIC_STATE_BLEND_ENABLE))) {
if (cmd->state.logic_op_enabled && cmd->state.rop_reads_dst) {
perf_debug(cmd->device, "disabling lrz write due to dynamic logic op");
gras_lrz_cntl.lrz_write = false;
reads_dest = true;
}
if (cmd->state.blend_enable) {
perf_debug(cmd->device, "disabling lrz write due to dynamic blend");
gras_lrz_cntl.lrz_write = false;
reads_dest = true;
}
}
if ((pipeline->dynamic_state_mask & BIT(TU_DYNAMIC_STATE_BLEND))) {
for (unsigned i = 0; i < cmd->state.subpass->color_count; i++) {
unsigned a = cmd->state.subpass->color_attachments[i].attachment;
if (a == VK_ATTACHMENT_UNUSED)
continue;
VkFormat format = cmd->state.pass->attachments[a].format;
unsigned mask = MASK(vk_format_get_nr_components(format));
uint32_t enabled_mask = (cmd->state.rb_mrt_control[i] &
A6XX_RB_MRT_CONTROL_COMPONENT_ENABLE__MASK) >>
A6XX_RB_MRT_CONTROL_COMPONENT_ENABLE__SHIFT;
if ((enabled_mask & mask) != mask) {
if (gras_lrz_cntl.lrz_write) {
perf_debug(cmd->device,
"disabling lrz write due to dynamic color write "
"mask (%x/%x)",
enabled_mask, mask);
}
gras_lrz_cntl.lrz_write = false;
reads_dest = true;
break;
}
}
}
if ((pipeline->dynamic_state_mask &
BIT(TU_DYNAMIC_STATE_COLOR_WRITE_ENABLE)) &&
(cmd->state.color_write_enable &
MASK(cmd->state.subpass->color_count)) !=
MASK(pipeline->blend.num_rts)) {
if (gras_lrz_cntl.lrz_write) {
perf_debug(
cmd->device,
"disabling lrz write due to dynamic color write enables (%x/%x)",
cmd->state.color_write_enable,
MASK(pipeline->blend.num_rts));
}
gras_lrz_cntl.lrz_write = false;
reads_dest = true;
}
bool reads_dest = cmd->state.blend_reads_dest;
/* LRZ is disabled until it is cleared, which means that one "wrong"
* depth test or shader could disable LRZ until depth buffer is cleared.
@ -761,13 +703,13 @@ tu6_calculate_lrz_state(struct tu_cmd_buffer *cmd,
cmd->state.lrz.prev_direction = lrz_direction;
/* Invalidate LRZ and disable write if stencil test is enabled */
bool stencil_test_enable = cmd->state.rb_stencil_cntl & A6XX_RB_STENCIL_CONTROL_STENCIL_ENABLE;
bool stencil_test_enable = cmd->vk.dynamic_graphics_state.ds.stencil.test_enable;
if (!disable_lrz && stencil_test_enable) {
VkCompareOp stencil_front_compare_op = (VkCompareOp)
((cmd->state.rb_stencil_cntl & A6XX_RB_STENCIL_CONTROL_FUNC__MASK) >> A6XX_RB_STENCIL_CONTROL_FUNC__SHIFT);
cmd->vk.dynamic_graphics_state.ds.stencil.front.op.compare;
VkCompareOp stencil_back_compare_op = (VkCompareOp)
((cmd->state.rb_stencil_cntl & A6XX_RB_STENCIL_CONTROL_FUNC_BF__MASK) >> A6XX_RB_STENCIL_CONTROL_FUNC_BF__SHIFT);
cmd->vk.dynamic_graphics_state.ds.stencil.back.op.compare;
bool lrz_allowed = true;
lrz_allowed = lrz_allowed && tu6_stencil_op_lrz_allowed(

File diff suppressed because it is too large Load diff

View file

@ -29,25 +29,6 @@ enum tu_dynamic_state
TU_DYNAMIC_STATE_VERTEX_INPUT,
TU_DYNAMIC_STATE_PATCH_CONTROL_POINTS,
TU_DYNAMIC_STATE_COUNT,
/* no associated draw state: */
TU_DYNAMIC_STATE_PRIMITIVE_TOPOLOGY = TU_DYNAMIC_STATE_COUNT,
TU_DYNAMIC_STATE_PRIMITIVE_RESTART_ENABLE,
TU_DYNAMIC_STATE_LOGIC_OP,
TU_DYNAMIC_STATE_LOGIC_OP_ENABLE,
TU_DYNAMIC_STATE_COLOR_WRITE_ENABLE,
TU_DYNAMIC_STATE_POLYGON_MODE,
TU_DYNAMIC_STATE_TESS_DOMAIN_ORIGIN,
TU_DYNAMIC_STATE_MSAA_SAMPLES,
TU_DYNAMIC_STATE_ALPHA_TO_COVERAGE,
TU_DYNAMIC_STATE_DEPTH_CLIP_RANGE,
TU_DYNAMIC_STATE_VIEWPORT_RANGE,
TU_DYNAMIC_STATE_LINE_MODE,
TU_DYNAMIC_STATE_PROVOKING_VTX,
TU_DYNAMIC_STATE_BLEND_ENABLE,
TU_DYNAMIC_STATE_BLEND_EQUATION,
TU_DYNAMIC_STATE_COLOR_WRITE_MASK,
TU_DYNAMIC_STATE_VIEWPORT_COUNT,
TU_DYNAMIC_STATE_SCISSOR_COUNT,
/* re-use the line width enum as it uses GRAS_SU_CNTL: */
TU_DYNAMIC_STATE_RAST = VK_DYNAMIC_STATE_LINE_WIDTH,
};
@ -65,6 +46,15 @@ struct tu_lrz_pipeline
} fs;
bool force_late_z;
bool blend_valid;
};
struct tu_bandwidth
{
uint32_t color_bandwidth_per_sample;
uint32_t depth_cpp_per_sample;
uint32_t stencil_cpp_per_sample;
bool valid;
};
struct tu_compiled_shaders
@ -141,72 +131,29 @@ struct tu_pipeline
uint32_t active_desc_sets;
/* mask of enabled dynamic states
* if BIT(i) is set, pipeline->dynamic_state[i] is *NOT* used
* if BIT(i) is set, pipeline->dynamic_state[i] is used
*/
uint64_t dynamic_state_mask;
uint32_t set_state_mask;
struct tu_draw_state dynamic_state[TU_DYNAMIC_STATE_COUNT];
struct {
unsigned patch_type;
} tess;
/* for dynamic states which use the same register: */
struct {
uint32_t gras_su_cntl, gras_su_cntl_mask;
uint32_t gras_cl_cntl, gras_cl_cntl_mask;
uint32_t pc_raster_cntl, pc_raster_cntl_mask;
uint32_t vpc_unknown_9107, vpc_unknown_9107_mask;
uint32_t rb_depth_cntl, rb_depth_cntl_mask;
enum a5xx_line_mode line_mode;
enum a6xx_polygon_mode polygon_mode;
bool provoking_vertex_last;
bool override_depth_clip;
uint32_t multiview_mask;
} rast;
/* RB_DEPTH_CNTL state comes from both rast and depth/stencil state.
*/
struct {
uint32_t rb_depth_cntl, rb_depth_cntl_mask;
} rast_ds;
bool per_view_viewport;
} viewport;
struct {
uint32_t rb_depth_cntl, rb_depth_cntl_mask;
uint32_t rb_stencil_cntl, rb_stencil_cntl_mask;
uint32_t stencil_wrmask;
bool raster_order_attachment_access;
bool write_enable;
} ds;
/* Misc. information provided by the fragment shader stage. */
struct {
bool fragment_density_map;
} fs;
struct {
unsigned num_rts;
uint32_t rb_mrt_control[MAX_RTS], rb_mrt_control_mask;
uint32_t rb_mrt_control_rop;
uint32_t rb_mrt_blend_control[MAX_RTS];
uint32_t sp_blend_cntl, sp_blend_cntl_mask;
uint32_t rb_blend_cntl, rb_blend_cntl_mask;
uint32_t color_write_enable, blend_enable;
bool logic_op_enabled, rop_reads_dst;
bool raster_order_attachment_access;
} blend;
/* Misc. info from the fragment output interface state that is used
* elsewhere.
*/
struct {
/* memory bandwidth cost (in bytes) for color attachments */
uint32_t color_bandwidth_per_sample;
uint32_t depth_cpp_per_sample;
uint32_t stencil_cpp_per_sample;
bool rb_depth_cntl_disable;
VkSampleCountFlagBits samples;
bool subpass_feedback_loop_color, subpass_feedback_loop_ds;
bool feedback_loop_may_involve_textures;
bool raster_order_attachment_access;
} output;
/* In other words - framebuffer fetch support */
@ -219,10 +166,6 @@ struct tu_pipeline
/* draw states for the pipeline */
struct tu_draw_state load_state;
struct {
uint32_t num_vbs;
} vi;
struct tu_push_constant_range shared_consts;
struct
@ -238,37 +181,15 @@ struct tu_pipeline
uint32_t hs_param_dwords;
uint32_t hs_vertices_out;
bool writes_viewport;
bool per_view_viewport;
bool per_samp;
enum a6xx_tess_output tess_output_upper_left, tess_output_lower_left;
enum a6xx_tess_spacing tess_spacing;
} program;
struct
{
enum pc_di_primtype primtype;
bool primitive_restart;
} ia;
struct
{
uint32_t patch_type;
uint32_t patch_control_points;
bool upper_left_domain_origin;
} tess;
struct tu_lrz_pipeline lrz;
struct {
VkViewport viewports[MAX_VIEWPORTS];
VkRect2D scissors[MAX_SCISSORS];
unsigned num_viewports, num_scissors;
bool set_dynamic_vp_to_static;
bool set_dynamic_scissor_to_static;
bool z_negative_one_to_one;
bool per_view_viewport;
} viewport;
struct tu_bandwidth bandwidth;
void *executables_mem_ctx;
/* tu_pipeline_executable */
@ -280,6 +201,11 @@ struct tu_graphics_lib_pipeline {
VkGraphicsPipelineLibraryFlagsEXT state;
struct vk_graphics_pipeline_state graphics_state;
/* For vk_graphics_pipeline_state */
void *state_data;
/* compiled_shaders only contains variants compiled by this pipeline, and
* it owns them, so when it is freed they disappear. Similarly,
* nir_shaders owns the link-time NIR. shaders points to the shaders from
@ -305,6 +231,11 @@ struct tu_graphics_lib_pipeline {
struct tu_graphics_pipeline {
struct tu_pipeline base;
struct vk_dynamic_graphics_state dynamic_state;
bool feedback_loop_color, feedback_loop_ds;
bool feedback_loop_may_involve_textures;
bool has_fdm;
};
struct tu_compute_pipeline {
@ -330,48 +261,9 @@ TU_DECL_PIPELINE_DOWNCAST(graphics, TU_PIPELINE_GRAPHICS)
TU_DECL_PIPELINE_DOWNCAST(graphics_lib, TU_PIPELINE_GRAPHICS_LIB)
TU_DECL_PIPELINE_DOWNCAST(compute, TU_PIPELINE_COMPUTE)
void
tu6_emit_viewport(struct tu_cs *cs, const VkViewport *viewport, uint32_t num_viewport,
bool z_negative_one_to_one);
VkOffset2D tu_fdm_per_bin_offset(VkExtent2D frag_area, VkRect2D bin);
void
tu6_emit_scissor(struct tu_cs *cs, const VkRect2D *scs, uint32_t scissor_count);
void
tu6_emit_sample_locations(struct tu_cs *cs, const VkSampleLocationsInfoEXT *samp_loc);
void
tu6_emit_sample_locations_enable(struct tu_cs *cs, bool enable);
void
tu6_emit_depth_bias(struct tu_cs *cs,
float constant_factor,
float clamp,
float slope_factor);
#define TU6_EMIT_VERTEX_INPUT_MAX_DWORDS (MAX_VERTEX_ATTRIBS * 2 + 1)
void tu6_emit_vertex_input(struct tu_cs *cs,
uint32_t binding_count,
const VkVertexInputBindingDescription2EXT *bindings,
uint32_t attr_count,
const VkVertexInputAttributeDescription2EXT *attrs);
#define EMIT_CONST_DWORDS(const_dwords) (4 + const_dwords)
#define TU6_EMIT_PATCH_CONTROL_POINTS_DWORDS(hs_param_dwords) \
(EMIT_CONST_DWORDS(4) + EMIT_CONST_DWORDS(hs_param_dwords) + 2 + 2 + 2)
void tu6_emit_patch_control_points(struct tu_cs *cs,
const struct tu_pipeline *pipeline,
unsigned patch_control_points);
uint32_t tu6_rast_size(struct tu_device *dev);
void tu6_emit_rast(struct tu_cs *cs,
uint32_t gras_su_cntl,
uint32_t gras_cl_cntl,
enum a6xx_polygon_mode polygon_mode);
uint32_t tu6_rb_mrt_control_rop(VkLogicOp op, bool *rop_reads_dst);
uint32_t tu_emit_draw_state(struct tu_cmd_buffer *cmd);
struct tu_pvtmem_config {
uint64_t iova;
@ -403,4 +295,9 @@ tu6_emit_vpc(struct tu_cs *cs,
void
tu6_emit_fs_inputs(struct tu_cs *cs, const struct ir3_shader_variant *fs);
void
tu_fill_render_pass_state(struct vk_render_pass_state *rp,
const struct tu_render_pass *pass,
const struct tu_subpass *subpass);
#endif /* TU_PIPELINE_H */