nvk: implement VK_EXT_conservative_rasterization

This change is built on top of work originally done by Benjamin Lee.

Implement conservative rasterization on GPUs that support it.  This is done
through a MME method on pre-Volta, and through SET_CONSERVATIVE_RASTER* (newly
published) on more recent GPUs.

primitiveUnderestimation and fullyCoveredFragmentShaderInputVariable will be
supported later as they require SPH and compiler work.

Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/9627
Signed-off-by: Arthur Huillet <ahuillet@nvidia.com>
Reviewed-by: Mary Guillemard <mary.guillemard@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/28937>
This commit is contained in:
Arthur Huillet 2024-04-26 15:29:39 +02:00 committed by Marge Bot
parent 4dd97b1d72
commit 715f2f1425
4 changed files with 95 additions and 23 deletions

View file

@ -39,19 +39,21 @@ nvk_cmd_buffer_3d_cls(struct nvk_cmd_buffer *cmd)
return pdev->info.cls_eng3d;
}
void
nvk_mme_set_priv_reg(struct mme_builder *b)
{
static void
mme_set_priv_reg(struct mme_builder *b,
struct mme_value value,
struct mme_value mask,
struct mme_value reg) {
mme_mthd(b, NV9097_WAIT_FOR_IDLE);
mme_emit(b, mme_zero());
mme_mthd(b, NV9097_SET_MME_SHADOW_SCRATCH(0));
mme_emit(b, mme_zero());
mme_emit(b, mme_load(b));
mme_emit(b, mme_load(b));
mme_emit(b, value);
mme_emit(b, mask);
mme_mthd(b, NV9097_SET_FALCON04);
mme_emit(b, mme_load(b));
mme_emit(b, reg);
struct mme_value loop_cond = mme_mov(b, mme_zero());
mme_while(b, ine, loop_cond, mme_imm(1)) {
@ -61,6 +63,27 @@ nvk_mme_set_priv_reg(struct mme_builder *b)
};
}
void
nvk_mme_set_priv_reg(struct mme_builder *b)
{
struct mme_value value = mme_load(b);
struct mme_value mask = mme_load(b);
struct mme_value reg = mme_load(b);
mme_set_priv_reg(b, value, mask, reg);
}
void
nvk_mme_set_conservative_raster_state(struct mme_builder *b) {
struct mme_value new_state = mme_load(b);
struct mme_value old_state = nvk_mme_load_scratch(b, CONSERVATIVE_RASTER_STATE);
mme_if(b, ine, new_state, old_state) {
nvk_mme_store_scratch(b, CONSERVATIVE_RASTER_STATE, new_state);
mme_set_priv_reg(b, new_state, mme_imm(BITFIELD_RANGE(23, 2)), mme_imm(0x418800));
}
}
VkResult
nvk_push_draw_state_init(struct nvk_device *dev, struct nv_push *p)
{
@ -171,6 +194,12 @@ nvk_push_draw_state_init(struct nvk_device *dev, struct nv_push *p)
P_INLINE_DATA(p, BITFIELD_BIT(14));
P_INLINE_DATA(p, reg);
}
/* Set CONSERVATIVE_RASTER_STATE to an invalid value, to ensure the
* hardware reg is always set the first time conservative rasterization
* is enabled */
P_IMMD(p, NV9097, SET_MME_SHADOW_SCRATCH(NVK_MME_SCRATCH_CONSERVATIVE_RASTER_STATE),
~0);
P_IMMD(p, NV9097, SET_RENDER_ENABLE_C, MODE_TRUE);
@ -1497,7 +1526,7 @@ vk_to_nv9097_provoking_vertex(VkProvokingVertexModeEXT vk_mode)
static void
nvk_flush_rs_state(struct nvk_cmd_buffer *cmd)
{
struct nv_push *p = nvk_cmd_buffer_push(cmd, 40);
struct nv_push *p = nvk_cmd_buffer_push(cmd, 44);
const struct vk_dynamic_graphics_state *dyn =
&cmd->vk.dynamic_graphics_state;
@ -1650,6 +1679,31 @@ nvk_flush_rs_state(struct nvk_cmd_buffer *cmd)
if (BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_RS_RASTERIZATION_STREAM))
P_IMMD(p, NV9097, SET_RASTER_INPUT, dyn->rs.rasterization_stream);
if (BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_RS_CONSERVATIVE_MODE) ||
BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_RS_EXTRA_PRIMITIVE_OVERESTIMATION_SIZE)) {
if (dyn->rs.conservative_mode == VK_CONSERVATIVE_RASTERIZATION_MODE_DISABLED_EXT) {
P_IMMD(p, NVB197, SET_CONSERVATIVE_RASTER, ENABLE_FALSE);
} else {
uint32_t extra_overestimate =
MIN2(3, dyn->rs.extra_primitive_overestimation_size * 4);
if (nvk_cmd_buffer_3d_cls(cmd) < VOLTA_A) {
P_1INC(p, NVB197, CALL_MME_MACRO(NVK_MME_SET_CONSERVATIVE_RASTER_STATE));
P_INLINE_DATA(p, extra_overestimate << 23);
} else {
P_IMMD(p, NVC397, SET_CONSERVATIVE_RASTER_CONTROL, {
.extra_prim_bloat = extra_overestimate,
.copy_inner_to_outer =
(dyn->rs.conservative_mode == VK_CONSERVATIVE_RASTERIZATION_MODE_UNDERESTIMATE_EXT),
.triangle_snap_mode = TRIANGLE_SNAP_MODE_MODE_PRE_SNAP,
.line_and_point_snap_mode = LINE_AND_POINT_SNAP_MODE_MODE_PRE_SNAP,
.uncertainty_region_size = UNCERTAINTY_REGION_SIZE_SIZE_512,
});
}
P_IMMD(p, NVB197, SET_CONSERVATIVE_RASTER, ENABLE_TRUE);
}
}
}
static VkSampleLocationEXT

View file

@ -7,22 +7,23 @@
#include "nvk_private.h"
static const nvk_mme_builder_func mme_builders[NVK_MME_COUNT] = {
[NVK_MME_BIND_CBUF_DESC] = nvk_mme_bind_cbuf_desc,
[NVK_MME_CLEAR] = nvk_mme_clear,
[NVK_MME_DRAW] = nvk_mme_draw,
[NVK_MME_DRAW_INDEXED] = nvk_mme_draw_indexed,
[NVK_MME_DRAW_INDIRECT] = nvk_mme_draw_indirect,
[NVK_MME_DRAW_INDEXED_INDIRECT] = nvk_mme_draw_indexed_indirect,
[NVK_MME_DRAW_INDIRECT_COUNT] = nvk_mme_draw_indirect_count,
[NVK_MME_DRAW_INDEXED_INDIRECT_COUNT] = nvk_mme_draw_indexed_indirect_count,
[NVK_MME_ADD_CS_INVOCATIONS] = nvk_mme_add_cs_invocations,
[NVK_MME_DISPATCH_INDIRECT] = nvk_mme_dispatch_indirect,
[NVK_MME_WRITE_CS_INVOCATIONS] = nvk_mme_write_cs_invocations,
[NVK_MME_COPY_QUERIES] = nvk_mme_copy_queries,
[NVK_MME_XFB_COUNTER_LOAD] = nvk_mme_xfb_counter_load,
[NVK_MME_XFB_DRAW_INDIRECT] = nvk_mme_xfb_draw_indirect,
[NVK_MME_SET_PRIV_REG] = nvk_mme_set_priv_reg,
[NVK_MME_SET_WRITE_MASK] = nvk_mme_set_write_mask,
[NVK_MME_BIND_CBUF_DESC] = nvk_mme_bind_cbuf_desc,
[NVK_MME_CLEAR] = nvk_mme_clear,
[NVK_MME_DRAW] = nvk_mme_draw,
[NVK_MME_DRAW_INDEXED] = nvk_mme_draw_indexed,
[NVK_MME_DRAW_INDIRECT] = nvk_mme_draw_indirect,
[NVK_MME_DRAW_INDEXED_INDIRECT] = nvk_mme_draw_indexed_indirect,
[NVK_MME_DRAW_INDIRECT_COUNT] = nvk_mme_draw_indirect_count,
[NVK_MME_DRAW_INDEXED_INDIRECT_COUNT] = nvk_mme_draw_indexed_indirect_count,
[NVK_MME_ADD_CS_INVOCATIONS] = nvk_mme_add_cs_invocations,
[NVK_MME_DISPATCH_INDIRECT] = nvk_mme_dispatch_indirect,
[NVK_MME_WRITE_CS_INVOCATIONS] = nvk_mme_write_cs_invocations,
[NVK_MME_COPY_QUERIES] = nvk_mme_copy_queries,
[NVK_MME_XFB_COUNTER_LOAD] = nvk_mme_xfb_counter_load,
[NVK_MME_XFB_DRAW_INDIRECT] = nvk_mme_xfb_draw_indirect,
[NVK_MME_SET_PRIV_REG] = nvk_mme_set_priv_reg,
[NVK_MME_SET_WRITE_MASK] = nvk_mme_set_write_mask,
[NVK_MME_SET_CONSERVATIVE_RASTER_STATE] = nvk_mme_set_conservative_raster_state,
};
uint32_t *

View file

@ -26,6 +26,7 @@ enum nvk_mme {
NVK_MME_XFB_DRAW_INDIRECT,
NVK_MME_SET_PRIV_REG,
NVK_MME_SET_WRITE_MASK,
NVK_MME_SET_CONSERVATIVE_RASTER_STATE,
NVK_MME_COUNT,
};
@ -37,6 +38,9 @@ enum nvk_mme_scratch {
NVK_MME_SCRATCH_DRAW_PAD_DW,
NVK_MME_SCRATCH_DRAW_IDX,
NVK_MME_SCRATCH_VIEW_MASK,
NVK_MME_SCRATCH_WRITE_MASK_DYN,
NVK_MME_SCRATCH_WRITE_MASK_PIPELINE,
NVK_MME_SCRATCH_CONSERVATIVE_RASTER_STATE,
/* Must be at the end */
NVK_MME_NUM_SCRATCH,
@ -126,5 +130,6 @@ void nvk_mme_xfb_counter_load(struct mme_builder *b);
void nvk_mme_xfb_draw_indirect(struct mme_builder *b);
void nvk_mme_set_priv_reg(struct mme_builder *b);
void nvk_mme_set_write_mask(struct mme_builder *b);
void nvk_mme_set_conservative_raster_state(struct mme_builder *b);
#endif /* NVK_MME_H */

View file

@ -174,6 +174,7 @@ nvk_get_device_extensions(const struct nvk_instance *instance,
.EXT_buffer_device_address = true,
.EXT_calibrated_timestamps = true,
.EXT_conditional_rendering = true,
.EXT_conservative_rasterization = info->cls_eng3d >= MAXWELL_B,
.EXT_color_write_enable = true,
.EXT_custom_border_color = true,
.EXT_depth_bias_control = true,
@ -868,6 +869,17 @@ nvk_get_device_properties(const struct nvk_instance *instance,
/* VK_KHR_push_descriptor */
.maxPushDescriptors = NVK_MAX_PUSH_DESCRIPTORS,
/* VK_EXT_conservative_rasterization */
.primitiveOverestimationSize = info->cls_eng3d >= VOLTA_A ? 1.0f / 512.0f : 0.0,
.maxExtraPrimitiveOverestimationSize = 0.75,
.extraPrimitiveOverestimationSizeGranularity = 0.25,
.primitiveUnderestimation = false,
.conservativePointAndLineRasterization = true,
.degenerateLinesRasterized = info->cls_eng3d >= VOLTA_A,
.degenerateTrianglesRasterized = info->cls_eng3d >= PASCAL_A,
.fullyCoveredFragmentShaderInputVariable = false,
.conservativeRasterizationPostDepthCoverage = true,
/* VK_EXT_custom_border_color */
.maxCustomBorderColorSamplers = 4000,