mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-01-21 17:20:21 +01:00
tu: Support VK_EXT_attachment_feedback_loop_dynamic_state
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/23374>
This commit is contained in:
parent
833a0cf76e
commit
0e220cd45a
5 changed files with 136 additions and 40 deletions
|
|
@ -733,13 +733,25 @@ tu_cs_emit_draw_state(struct tu_cs *cs, uint32_t id, struct tu_draw_state state)
|
|||
enable_mask = CP_SET_DRAW_STATE__0_BINNING;
|
||||
break;
|
||||
case TU_DRAW_STATE_INPUT_ATTACHMENTS_GMEM:
|
||||
case TU_DRAW_STATE_PRIM_MODE_GMEM:
|
||||
enable_mask = CP_SET_DRAW_STATE__0_GMEM;
|
||||
break;
|
||||
case TU_DRAW_STATE_PRIM_MODE_GMEM:
|
||||
/* On a7xx the prim mode is the same for gmem and sysmem, and it no
|
||||
* longer depends on dynamic state, so we reuse the gmem state for
|
||||
* everything:
|
||||
*/
|
||||
if (cs->device->physical_device->info->a6xx.has_coherent_ubwc_flag_caches) {
|
||||
enable_mask = CP_SET_DRAW_STATE__0_GMEM |
|
||||
CP_SET_DRAW_STATE__0_SYSMEM |
|
||||
CP_SET_DRAW_STATE__0_BINNING;
|
||||
} else {
|
||||
enable_mask = CP_SET_DRAW_STATE__0_GMEM;
|
||||
}
|
||||
break;
|
||||
case TU_DRAW_STATE_INPUT_ATTACHMENTS_SYSMEM:
|
||||
enable_mask = CP_SET_DRAW_STATE__0_SYSMEM;
|
||||
break;
|
||||
case TU_DRAW_STATE_PRIM_MODE_SYSMEM:
|
||||
case TU_DRAW_STATE_DYNAMIC + TU_DYNAMIC_STATE_PRIM_MODE_SYSMEM:
|
||||
/* By also applying the state during binning we ensure that there
|
||||
* is no rotation applied, by previous A6XX_GRAS_SC_CNTL::rotation.
|
||||
*/
|
||||
|
|
@ -3418,7 +3430,6 @@ tu_CmdBindPipeline(VkCommandBuffer commandBuffer,
|
|||
cmd->state.program = pipeline->program;
|
||||
|
||||
cmd->state.load_state = pipeline->load_state;
|
||||
cmd->state.prim_order_sysmem = pipeline->prim_order.state_sysmem;
|
||||
cmd->state.prim_order_gmem = pipeline->prim_order.state_gmem;
|
||||
cmd->state.pipeline_sysmem_single_prim_mode = pipeline->prim_order.sysmem_single_prim_mode;
|
||||
cmd->state.pipeline_has_tess = pipeline->active_stages & VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT;
|
||||
|
|
@ -3447,7 +3458,7 @@ tu_CmdBindPipeline(VkCommandBuffer commandBuffer,
|
|||
if (!(cmd->state.dirty & TU_CMD_DIRTY_DRAW_STATE)) {
|
||||
uint32_t mask = pipeline->set_state_mask;
|
||||
|
||||
tu_cs_emit_pkt7(cs, CP_SET_DRAW_STATE, 3 * (11 + util_bitcount(mask)));
|
||||
tu_cs_emit_pkt7(cs, CP_SET_DRAW_STATE, 3 * (10 + util_bitcount(mask)));
|
||||
tu_cs_emit_draw_state(cs, TU_DRAW_STATE_PROGRAM_CONFIG, pipeline->program.config_state);
|
||||
tu_cs_emit_draw_state(cs, TU_DRAW_STATE_VS, pipeline->program.vs_state);
|
||||
tu_cs_emit_draw_state(cs, TU_DRAW_STATE_VS_BINNING, pipeline->program.vs_binning_state);
|
||||
|
|
@ -3457,7 +3468,6 @@ tu_CmdBindPipeline(VkCommandBuffer commandBuffer,
|
|||
tu_cs_emit_draw_state(cs, TU_DRAW_STATE_GS_BINNING, pipeline->program.gs_binning_state);
|
||||
tu_cs_emit_draw_state(cs, TU_DRAW_STATE_FS, pipeline->program.fs_state);
|
||||
tu_cs_emit_draw_state(cs, TU_DRAW_STATE_VPC, pipeline->program.vpc_state);
|
||||
tu_cs_emit_draw_state(cs, TU_DRAW_STATE_PRIM_MODE_SYSMEM, pipeline->prim_order.state_sysmem);
|
||||
tu_cs_emit_draw_state(cs, TU_DRAW_STATE_PRIM_MODE_GMEM, pipeline->prim_order.state_gmem);
|
||||
|
||||
u_foreach_bit(i, mask)
|
||||
|
|
@ -3475,7 +3485,19 @@ tu_CmdBindPipeline(VkCommandBuffer commandBuffer,
|
|||
|
||||
if (gfx_pipeline->feedback_loops != cmd->state.pipeline_feedback_loops) {
|
||||
cmd->state.pipeline_feedback_loops = gfx_pipeline->feedback_loops;
|
||||
cmd->state.dirty |= TU_CMD_DIRTY_LRZ;
|
||||
cmd->state.dirty |= TU_CMD_DIRTY_FEEDBACK_LOOPS | TU_CMD_DIRTY_LRZ;
|
||||
}
|
||||
|
||||
bool raster_order_attachment_access =
|
||||
pipeline->output.raster_order_attachment_access ||
|
||||
pipeline->ds.raster_order_attachment_access;
|
||||
if (!cmd->state.raster_order_attachment_access_valid ||
|
||||
raster_order_attachment_access !=
|
||||
cmd->state.raster_order_attachment_access) {
|
||||
cmd->state.raster_order_attachment_access =
|
||||
raster_order_attachment_access;
|
||||
cmd->state.dirty |= TU_CMD_DIRTY_RAST_ORDER;
|
||||
cmd->state.raster_order_attachment_access_valid = true;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -4974,7 +4996,9 @@ tu6_build_depth_plane_z_mode(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
|
|||
const struct tu_subpass *subpass = cmd->state.subpass;
|
||||
|
||||
if ((fs->variant->has_kill ||
|
||||
(cmd->state.pipeline_feedback_loops & VK_IMAGE_ASPECT_DEPTH_BIT)) &&
|
||||
(cmd->state.pipeline_feedback_loops & VK_IMAGE_ASPECT_DEPTH_BIT) ||
|
||||
(cmd->vk.dynamic_graphics_state.feedback_loops &
|
||||
VK_IMAGE_ASPECT_DEPTH_BIT)) &&
|
||||
(depth_write || stencil_write)) {
|
||||
zmode = (cmd->state.lrz.valid && cmd->state.lrz.enabled)
|
||||
? A6XX_EARLY_LRZ_LATE_Z
|
||||
|
|
@ -5230,7 +5254,9 @@ tu6_draw_common(struct tu_cmd_buffer *cmd,
|
|||
BITSET_TEST(cmd->vk.dynamic_graphics_state.dirty,
|
||||
MESA_VK_DYNAMIC_DS_STENCIL_WRITE_MASK) ||
|
||||
BITSET_TEST(cmd->vk.dynamic_graphics_state.dirty,
|
||||
MESA_VK_DYNAMIC_MS_ALPHA_TO_COVERAGE_ENABLE);
|
||||
MESA_VK_DYNAMIC_MS_ALPHA_TO_COVERAGE_ENABLE) ||
|
||||
BITSET_TEST(cmd->vk.dynamic_graphics_state.dirty,
|
||||
MESA_VK_DYNAMIC_ATTACHMENT_FEEDBACK_LOOP_ENABLE);
|
||||
|
||||
if (dirty_lrz) {
|
||||
struct tu_cs cs;
|
||||
|
|
@ -5245,6 +5271,17 @@ tu6_draw_common(struct tu_cmd_buffer *cmd,
|
|||
tu6_build_depth_plane_z_mode(cmd, &cs);
|
||||
}
|
||||
|
||||
if (BITSET_TEST(cmd->vk.dynamic_graphics_state.dirty,
|
||||
MESA_VK_DYNAMIC_ATTACHMENT_FEEDBACK_LOOP_ENABLE)) {
|
||||
if (cmd->vk.dynamic_graphics_state.feedback_loops &&
|
||||
!cmd->state.rp.disable_gmem) {
|
||||
perf_debug(
|
||||
cmd->device,
|
||||
"Disabling gmem due to VK_EXT_attachment_feedback_loop_layout");
|
||||
cmd->state.rp.disable_gmem = true;
|
||||
}
|
||||
}
|
||||
|
||||
if (BITSET_TEST(cmd->vk.dynamic_graphics_state.dirty,
|
||||
MESA_VK_DYNAMIC_VI_BINDINGS_VALID)) {
|
||||
cmd->state.vertex_buffers.size =
|
||||
|
|
@ -5307,7 +5344,6 @@ tu6_draw_common(struct tu_cmd_buffer *cmd,
|
|||
tu_cs_emit_draw_state(cs, TU_DRAW_STATE_GS_BINNING, program->gs_binning_state);
|
||||
tu_cs_emit_draw_state(cs, TU_DRAW_STATE_FS, program->fs_state);
|
||||
tu_cs_emit_draw_state(cs, TU_DRAW_STATE_VPC, program->vpc_state);
|
||||
tu_cs_emit_draw_state(cs, TU_DRAW_STATE_PRIM_MODE_SYSMEM, cmd->state.prim_order_sysmem);
|
||||
tu_cs_emit_draw_state(cs, TU_DRAW_STATE_PRIM_MODE_GMEM, cmd->state.prim_order_gmem);
|
||||
tu_cs_emit_draw_state(cs, TU_DRAW_STATE_CONST, cmd->state.shader_const);
|
||||
tu_cs_emit_draw_state(cs, TU_DRAW_STATE_DESC_SETS, cmd->state.desc_sets);
|
||||
|
|
|
|||
|
|
@ -40,7 +40,6 @@ enum tu_draw_state_group_id
|
|||
TU_DRAW_STATE_INPUT_ATTACHMENTS_SYSMEM,
|
||||
TU_DRAW_STATE_LRZ_AND_DEPTH_PLANE,
|
||||
TU_DRAW_STATE_PRIM_MODE_GMEM,
|
||||
TU_DRAW_STATE_PRIM_MODE_SYSMEM,
|
||||
|
||||
/* dynamic state related draw states */
|
||||
TU_DRAW_STATE_DYNAMIC,
|
||||
|
|
@ -71,8 +70,10 @@ enum tu_cmd_dirty_bits
|
|||
TU_CMD_DIRTY_PER_VIEW_VIEWPORT = BIT(9),
|
||||
TU_CMD_DIRTY_TES = BIT(10),
|
||||
TU_CMD_DIRTY_PROGRAM = BIT(11),
|
||||
TU_CMD_DIRTY_RAST_ORDER = BIT(12),
|
||||
TU_CMD_DIRTY_FEEDBACK_LOOPS = BIT(13),
|
||||
/* all draw states were disabled and need to be re-enabled: */
|
||||
TU_CMD_DIRTY_DRAW_STATE = BIT(12)
|
||||
TU_CMD_DIRTY_DRAW_STATE = BIT(14)
|
||||
};
|
||||
|
||||
/* There are only three cache domains we have to care about: the CCU, or
|
||||
|
|
@ -441,7 +442,7 @@ struct tu_cmd_state
|
|||
struct tu_draw_state desc_sets;
|
||||
struct tu_draw_state load_state;
|
||||
struct tu_draw_state compute_load_state;
|
||||
struct tu_draw_state prim_order_sysmem, prim_order_gmem;
|
||||
struct tu_draw_state prim_order_gmem;
|
||||
|
||||
struct tu_draw_state vs_params;
|
||||
struct tu_draw_state fs_params;
|
||||
|
|
@ -509,6 +510,8 @@ struct tu_cmd_state
|
|||
bool pipeline_has_tess;
|
||||
bool pipeline_has_gs;
|
||||
bool pipeline_disable_gmem;
|
||||
bool raster_order_attachment_access;
|
||||
bool raster_order_attachment_access_valid;
|
||||
VkImageAspectFlags pipeline_feedback_loops;
|
||||
|
||||
bool pipeline_blend_lrz, pipeline_bandwidth;
|
||||
|
|
|
|||
|
|
@ -221,6 +221,7 @@ get_device_extensions(const struct tu_physical_device *device,
|
|||
.KHR_zero_initialize_workgroup_memory = true,
|
||||
|
||||
.EXT_4444_formats = true,
|
||||
.EXT_attachment_feedback_loop_dynamic_state = true,
|
||||
.EXT_attachment_feedback_loop_layout = true,
|
||||
.EXT_border_color_swizzle = true,
|
||||
.EXT_color_write_enable = true,
|
||||
|
|
@ -484,6 +485,9 @@ tu_get_features(struct tu_physical_device *pdevice,
|
|||
features->formatA4R4G4B4 = true;
|
||||
features->formatA4B4G4R4 = true;
|
||||
|
||||
/* VK_EXT_attachment_feedback_loop_dynamic_state */
|
||||
features->attachmentFeedbackLoopDynamicState = true;
|
||||
|
||||
/* VK_EXT_attachment_feedback_loop_layout */
|
||||
features->attachmentFeedbackLoopLayout = true;
|
||||
|
||||
|
|
|
|||
|
|
@ -3218,6 +3218,54 @@ tu6_emit_rb_depth_cntl(struct tu_cs *cs,
|
|||
}
|
||||
}
|
||||
|
||||
static const enum mesa_vk_dynamic_graphics_state tu_prim_mode_sysmem_state[] = {
|
||||
MESA_VK_DYNAMIC_ATTACHMENT_FEEDBACK_LOOP_ENABLE,
|
||||
};
|
||||
|
||||
template <chip CHIP>
|
||||
static unsigned
|
||||
tu6_prim_mode_sysmem_size(struct tu_device *dev,
|
||||
bool raster_order_attachment_access,
|
||||
VkImageAspectFlags feedback_loops,
|
||||
bool *sysmem_single_prim_mode)
|
||||
{
|
||||
return 2;
|
||||
}
|
||||
|
||||
template <chip CHIP>
|
||||
static void
|
||||
tu6_emit_prim_mode_sysmem(struct tu_cs *cs,
|
||||
bool raster_order_attachment_access,
|
||||
VkImageAspectFlags feedback_loops,
|
||||
bool *sysmem_single_prim_mode)
|
||||
{
|
||||
/* VK_EXT_rasterization_order_attachment_access:
|
||||
*
|
||||
* This extension allow access to framebuffer attachments when used as both
|
||||
* input and color attachments from one fragment to the next, in
|
||||
* rasterization order, without explicit synchronization.
|
||||
*/
|
||||
raster_order_attachment_access |= TU_DEBUG(RAST_ORDER);
|
||||
|
||||
/* If there is a feedback loop, then the shader can read the previous value
|
||||
* of a pixel being written out. It can also write some components and then
|
||||
* read different components without a barrier in between. This is a
|
||||
* problem in sysmem mode with UBWC, because the main buffer and flags
|
||||
* buffer can get out-of-sync if only one is flushed. We fix this by
|
||||
* setting the SINGLE_PRIM_MODE field to the same value that the blob does
|
||||
* for advanced_blend in sysmem mode if a feedback loop is detected.
|
||||
*/
|
||||
enum a6xx_single_prim_mode sysmem_prim_mode =
|
||||
(raster_order_attachment_access || feedback_loops) ?
|
||||
FLUSH_PER_OVERLAP_AND_OVERWRITE : NO_FLUSH;
|
||||
|
||||
if (sysmem_prim_mode == FLUSH_PER_OVERLAP_AND_OVERWRITE)
|
||||
*sysmem_single_prim_mode = true;
|
||||
|
||||
tu_cs_emit_regs(cs, A6XX_GRAS_SC_CNTL(.ccusinglecachelinesize = 2,
|
||||
.single_prim_mode = sysmem_prim_mode));
|
||||
}
|
||||
|
||||
static inline bool
|
||||
emit_pipeline_state(BITSET_WORD *keep, BITSET_WORD *remove,
|
||||
BITSET_WORD *pipeline_set,
|
||||
|
|
@ -3380,6 +3428,26 @@ tu_pipeline_builder_emit_state(struct tu_pipeline_builder *builder,
|
|||
pipeline->shaders[MESA_SHADER_TESS_EVAL],
|
||||
&pipeline->program,
|
||||
builder->graphics_state.ts->patch_control_points);
|
||||
bool has_raster_order_state = false;
|
||||
if (pipeline->type == TU_PIPELINE_GRAPHICS) {
|
||||
has_raster_order_state = true;
|
||||
} else {
|
||||
struct tu_graphics_lib_pipeline *lib =
|
||||
tu_pipeline_to_graphics_lib(pipeline);
|
||||
has_raster_order_state =
|
||||
(lib->state & VK_GRAPHICS_PIPELINE_LIBRARY_FRAGMENT_SHADER_BIT_EXT) &&
|
||||
(lib->state &
|
||||
VK_GRAPHICS_PIPELINE_LIBRARY_FRAGMENT_OUTPUT_INTERFACE_BIT_EXT);
|
||||
}
|
||||
if (!builder->device->physical_device->info->a6xx.has_coherent_ubwc_flag_caches) {
|
||||
DRAW_STATE_COND(prim_mode_sysmem,
|
||||
TU_DYNAMIC_STATE_PRIM_MODE_SYSMEM,
|
||||
has_raster_order_state,
|
||||
pipeline->output.raster_order_attachment_access ||
|
||||
pipeline->ds.raster_order_attachment_access,
|
||||
vk_pipeline_flags_feedback_loops(builder->graphics_state.pipeline_flags),
|
||||
&pipeline->prim_order.sysmem_single_prim_mode);
|
||||
}
|
||||
#undef DRAW_STATE
|
||||
#undef DRAW_STATE_COND
|
||||
#undef EMIT_STATE
|
||||
|
|
@ -3452,7 +3520,7 @@ tu_emit_draw_state(struct tu_cmd_buffer *cmd)
|
|||
emit_draw_state(&cmd->vk.dynamic_graphics_state, tu_##name##_state, \
|
||||
ARRAY_SIZE(tu_##name##_state))
|
||||
#define DRAW_STATE_COND(name, id, extra_cond, ...) \
|
||||
if ((EMIT_STATE(name) || extra_cond) && \
|
||||
if ((EMIT_STATE(name) || (extra_cond)) && \
|
||||
!(cmd->state.pipeline_draw_states & (1u << id))) { \
|
||||
unsigned size = tu6_##name##_size<CHIP>(cmd->device, __VA_ARGS__); \
|
||||
if (size > 0) { \
|
||||
|
|
@ -3569,6 +3637,16 @@ tu_emit_draw_state(struct tu_cmd_buffer *cmd)
|
|||
cmd->state.shaders[MESA_SHADER_TESS_EVAL],
|
||||
&cmd->state.program,
|
||||
cmd->vk.dynamic_graphics_state.ts.patch_control_points);
|
||||
if (!cmd->device->physical_device->info->a6xx.has_coherent_ubwc_flag_caches) {
|
||||
DRAW_STATE_COND(prim_mode_sysmem,
|
||||
TU_DYNAMIC_STATE_PRIM_MODE_SYSMEM,
|
||||
cmd->state.dirty & (TU_CMD_DIRTY_RAST_ORDER |
|
||||
TU_CMD_DIRTY_FEEDBACK_LOOPS),
|
||||
cmd->state.raster_order_attachment_access,
|
||||
cmd->vk.dynamic_graphics_state.feedback_loops |
|
||||
cmd->state.pipeline_feedback_loops,
|
||||
&cmd->state.rp.sysmem_single_prim_mode);
|
||||
}
|
||||
#undef DRAW_STATE
|
||||
#undef DRAW_STATE_COND
|
||||
#undef EMIT_STATE
|
||||
|
|
@ -3651,7 +3729,6 @@ tu_pipeline_builder_parse_rasterization_order(
|
|||
* when implemented in the future.
|
||||
*/
|
||||
|
||||
enum a6xx_single_prim_mode sysmem_prim_mode = NO_FLUSH;
|
||||
enum a6xx_single_prim_mode gmem_prim_mode = NO_FLUSH;
|
||||
|
||||
if (raster_order_attachment_access) {
|
||||
|
|
@ -3661,27 +3738,7 @@ tu_pipeline_builder_parse_rasterization_order(
|
|||
* both input and color attachments from one fragment to the next,
|
||||
* in rasterization order, without explicit synchronization.
|
||||
*/
|
||||
if (builder->device->physical_device->info->a6xx.has_coherent_ubwc_flag_caches)
|
||||
sysmem_prim_mode = FLUSH_PER_OVERLAP;
|
||||
else
|
||||
sysmem_prim_mode = FLUSH_PER_OVERLAP_AND_OVERWRITE;
|
||||
gmem_prim_mode = FLUSH_PER_OVERLAP;
|
||||
pipeline->prim_order.sysmem_single_prim_mode = true;
|
||||
} else if (!builder->device->physical_device->info->a6xx.has_coherent_ubwc_flag_caches) {
|
||||
/* If there is a feedback loop, then the shader can read the previous value
|
||||
* of a pixel being written out. It can also write some components and then
|
||||
* read different components without a barrier in between. This is a
|
||||
* problem in sysmem mode with UBWC, because the main buffer and flags
|
||||
* buffer can get out-of-sync if only one is flushed. We fix this by
|
||||
* setting the SINGLE_PRIM_MODE field to the same value that the blob does
|
||||
* for advanced_blend in sysmem mode if a feedback loop is detected.
|
||||
*/
|
||||
if (builder->graphics_state.pipeline_flags &
|
||||
(VK_PIPELINE_CREATE_2_COLOR_ATTACHMENT_FEEDBACK_LOOP_BIT_EXT |
|
||||
VK_PIPELINE_CREATE_2_DEPTH_STENCIL_ATTACHMENT_FEEDBACK_LOOP_BIT_EXT)) {
|
||||
sysmem_prim_mode = FLUSH_PER_OVERLAP_AND_OVERWRITE;
|
||||
pipeline->prim_order.sysmem_single_prim_mode = true;
|
||||
}
|
||||
}
|
||||
|
||||
struct tu_cs cs;
|
||||
|
|
@ -3690,11 +3747,6 @@ tu_pipeline_builder_parse_rasterization_order(
|
|||
tu_cs_emit_write_reg(&cs, REG_A6XX_GRAS_SC_CNTL,
|
||||
A6XX_GRAS_SC_CNTL_CCUSINGLECACHELINESIZE(2) |
|
||||
A6XX_GRAS_SC_CNTL_SINGLE_PRIM_MODE(gmem_prim_mode));
|
||||
|
||||
pipeline->prim_order.state_sysmem = tu_cs_draw_state(&pipeline->cs, &cs, 2);
|
||||
tu_cs_emit_write_reg(&cs, REG_A6XX_GRAS_SC_CNTL,
|
||||
A6XX_GRAS_SC_CNTL_CCUSINGLECACHELINESIZE(2) |
|
||||
A6XX_GRAS_SC_CNTL_SINGLE_PRIM_MODE(sysmem_prim_mode));
|
||||
}
|
||||
|
||||
static void
|
||||
|
|
|
|||
|
|
@ -31,6 +31,7 @@ enum tu_dynamic_state
|
|||
TU_DYNAMIC_STATE_BLEND,
|
||||
TU_DYNAMIC_STATE_VERTEX_INPUT,
|
||||
TU_DYNAMIC_STATE_PATCH_CONTROL_POINTS,
|
||||
TU_DYNAMIC_STATE_PRIM_MODE_SYSMEM,
|
||||
TU_DYNAMIC_STATE_COUNT,
|
||||
};
|
||||
|
||||
|
|
@ -153,7 +154,7 @@ struct tu_pipeline
|
|||
struct {
|
||||
/* If the pipeline sets SINGLE_PRIM_MODE for sysmem. */
|
||||
bool sysmem_single_prim_mode;
|
||||
struct tu_draw_state state_sysmem, state_gmem;
|
||||
struct tu_draw_state state_gmem;
|
||||
} prim_order;
|
||||
|
||||
/* draw states for the pipeline */
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue