tu: Support VK_EXT_conservative_rasterization on a7xx

This supports everything the blob does.

The registers exist on later a6xx gens, but they would be way more
inconvenient to use since they're mixed up with binning/not-binning and
compression state, and I'm not sure if it works.

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/33152>
This commit is contained in:
Connor Abbott 2025-01-21 22:47:10 -05:00 committed by Marge Bot
parent 2798521bda
commit 6d406eeefa
11 changed files with 73 additions and 24 deletions

View file

@ -581,7 +581,7 @@ Khronos extensions that are not part of any Vulkan version:
VK_EXT_calibrated_timestamps DONE (anv, hasvk, nvk, lvp, radv, vn, tu/a750+)
VK_EXT_color_write_enable DONE (anv, hasvk, lvp, nvk, radv, tu, v3dv, vn)
VK_EXT_conditional_rendering DONE (anv, hasvk, lvp, nvk, radv, tu, vn)
VK_EXT_conservative_rasterization DONE (anv, nvk, radv, vn)
VK_EXT_conservative_rasterization DONE (anv, nvk, radv, vn, tu/a7xx+)
VK_EXT_custom_border_color DONE (anv, hasvk, lvp, nvk, panvk, radv, tu, v3dv, vn)
VK_EXT_debug_marker DONE (radv)
VK_EXT_debug_report DONE (anv, dzn, lvp, nvk, panvk, pvr, radv, tu, v3dv)

View file

@ -5361,7 +5361,7 @@ clusters:
00000000 GRAS_SU_POLY_OFFSET_OFFSET: 0.000000
00000000 GRAS_SU_POLY_OFFSET_OFFSET_CLAMP: 0.000000
00000000 GRAS_SU_DEPTH_BUFFER_INFO: { DEPTH_FORMAT = DEPTH6_NONE }
00000000 GRAS_SU_CONSERVATIVE_RAS_CNTL: { SHIFTAMOUNT = 0 }
00000000 GRAS_SU_CONSERVATIVE_RAS_CNTL: { SHIFTAMOUNT = NO_SHIFT }
00000000 GRAS_SU_PATH_RENDERING_CNTL: { 0 }
00000000 GRAS_VS_LAYER_CNTL: { 0 }
00000000 GRAS_GS_LAYER_CNTL: { 0 }
@ -5604,7 +5604,7 @@ clusters:
00000000 GRAS_SU_POLY_OFFSET_OFFSET: 0.000000
00000000 GRAS_SU_POLY_OFFSET_OFFSET_CLAMP: 0.000000
00000000 GRAS_SU_DEPTH_BUFFER_INFO: { DEPTH_FORMAT = DEPTH6_NONE }
00000000 GRAS_SU_CONSERVATIVE_RAS_CNTL: { SHIFTAMOUNT = 0 }
00000000 GRAS_SU_CONSERVATIVE_RAS_CNTL: { SHIFTAMOUNT = NO_SHIFT }
00000000 GRAS_SU_PATH_RENDERING_CNTL: { 0 }
00000000 GRAS_VS_LAYER_CNTL: { 0 }
00000000 GRAS_GS_LAYER_CNTL: { 0 }

View file

@ -1991,7 +1991,7 @@ got cmdszdw=83
!+ 100167800 VSC_DRAW_STRM_ADDRESS: 0x100167800
!+ 00000001 UCHE_UNKNOWN_0E12: 0x1
!+ 00000004 UCHE_CLIENT_PF: { PERFSEL = 0x4 }
+ 00000000 GRAS_SU_CONSERVATIVE_RAS_CNTL: { SHIFTAMOUNT = 0 }
+ 00000000 GRAS_SU_CONSERVATIVE_RAS_CNTL: { SHIFTAMOUNT = NO_SHIFT }
+ 00000000 GRAS_UNKNOWN_80AF: FALSE
+ 00000000 GRAS_UNKNOWN_8110: 0
!+ 04f06080 GRAS_2D_BLIT_CNTL: { ROTATE = ROTATE_0 | SOLID_COLOR | COLOR_FORMAT = FMT6_16_16_16_16_UNORM | MASK = 0xf | IFMT = R2D_FLOAT32 | RASTER_MODE = TYPE_TILED }
@ -2293,7 +2293,7 @@ got cmdszdw=83
+ 100167800 VSC_DRAW_STRM_ADDRESS: 0x100167800
+ 00000001 UCHE_UNKNOWN_0E12: 0x1
+ 00000004 UCHE_CLIENT_PF: { PERFSEL = 0x4 }
+ 00000000 GRAS_SU_CONSERVATIVE_RAS_CNTL: { SHIFTAMOUNT = 0 }
+ 00000000 GRAS_SU_CONSERVATIVE_RAS_CNTL: { SHIFTAMOUNT = NO_SHIFT }
!+ 00e00000 GRAS_BIN_CONTROL: { BINW = 0 | BINH = 0 | RENDER_MODE = RENDERING_PASS | FORCE_LRZ_WRITE_DIS | BUFFERS_LOCATION = BUFFERS_IN_SYSMEM | LRZ_FEEDBACK_ZMODE_MASK = LRZ_FEEDBACK_NONE }
+ 00000000 GRAS_UNKNOWN_80AF: FALSE
+ 00000000 GRAS_SC_WINDOW_SCISSOR_TL: { X = 0 | Y = 0 }
@ -5183,7 +5183,7 @@ ESTIMATED CRASH LOCATION!
+ 100167800 VSC_DRAW_STRM_ADDRESS: 0x100167800
+ 00000001 UCHE_UNKNOWN_0E12: 0x1
+ 00000004 UCHE_CLIENT_PF: { PERFSEL = 0x4 }
+ 00000000 GRAS_SU_CONSERVATIVE_RAS_CNTL: { SHIFTAMOUNT = 0 }
+ 00000000 GRAS_SU_CONSERVATIVE_RAS_CNTL: { SHIFTAMOUNT = NO_SHIFT }
+ 00000000 GRAS_UNKNOWN_80AF: FALSE
+ 00000000 GRAS_UNKNOWN_8110: 0
!+ 04f06000 GRAS_2D_BLIT_CNTL: { ROTATE = ROTATE_0 | COLOR_FORMAT = FMT6_16_16_16_16_UNORM | MASK = 0xf | IFMT = R2D_FLOAT32 | RASTER_MODE = TYPE_TILED }
@ -17189,7 +17189,7 @@ clusters:
00000000 GRAS_SU_POLY_OFFSET_OFFSET: 0.000000
00000000 GRAS_SU_POLY_OFFSET_OFFSET_CLAMP: 0.000000
00000000 GRAS_SU_DEPTH_BUFFER_INFO: { DEPTH_FORMAT = DEPTH6_NONE }
00000000 GRAS_SU_CONSERVATIVE_RAS_CNTL: { SHIFTAMOUNT = 0 }
00000000 GRAS_SU_CONSERVATIVE_RAS_CNTL: { SHIFTAMOUNT = NO_SHIFT }
00000000 GRAS_SU_PATH_RENDERING_CNTL: { 0 }
00000000 GRAS_VS_LAYER_CNTL: { 0 }
00000000 GRAS_GS_LAYER_CNTL: { 0 }
@ -17432,7 +17432,7 @@ clusters:
00000000 GRAS_SU_POLY_OFFSET_OFFSET: 0.000000
00000000 GRAS_SU_POLY_OFFSET_OFFSET_CLAMP: 0.000000
00000000 GRAS_SU_DEPTH_BUFFER_INFO: { DEPTH_FORMAT = DEPTH6_NONE }
00000000 GRAS_SU_CONSERVATIVE_RAS_CNTL: { SHIFTAMOUNT = 0 }
00000000 GRAS_SU_CONSERVATIVE_RAS_CNTL: { SHIFTAMOUNT = NO_SHIFT }
00000000 GRAS_SU_PATH_RENDERING_CNTL: { 0 }
00000000 GRAS_VS_LAYER_CNTL: { 0 }
00000000 GRAS_GS_LAYER_CNTL: { 0 }

View file

@ -153,7 +153,7 @@ cmdstream[0]: 265 dwords
SP_UNKNOWN_B183: 0
0000000001058184: 0000: 40b18301 00000000
write GRAS_SU_CONSERVATIVE_RAS_CNTL (8099)
GRAS_SU_CONSERVATIVE_RAS_CNTL: { SHIFTAMOUNT = 0 }
GRAS_SU_CONSERVATIVE_RAS_CNTL: { SHIFTAMOUNT = NO_SHIFT }
000000000105818c: 0000: 40809901 00000000
write GRAS_SC_CNTL (80a0)
GRAS_SC_CNTL: { CCUSINGLECACHELINESIZE = 0x2 | SINGLE_PRIM_MODE = NO_FLUSH | RASTER_MODE = TYPE_TILED | RASTER_DIRECTION = LR_TB | SEQUENCED_THREAD_DISTRIBUTION = DIST_SCREEN_COORD | ROTATION = 0 }
@ -274,7 +274,7 @@ cmdstream[0]: 265 dwords
!+ 010dc800 VSC_DRAW_STRM_ADDRESS: 0x10dc800
!+ 03200000 UCHE_UNKNOWN_0E12: 0x3200000
!+ 00000004 UCHE_CLIENT_PF: { PERFSEL = 0x4 }
+ 00000000 GRAS_SU_CONSERVATIVE_RAS_CNTL: { SHIFTAMOUNT = 0 }
+ 00000000 GRAS_SU_CONSERVATIVE_RAS_CNTL: { SHIFTAMOUNT = NO_SHIFT }
!+ 00000002 GRAS_SC_CNTL: { CCUSINGLECACHELINESIZE = 0x2 | SINGLE_PRIM_MODE = NO_FLUSH | RASTER_MODE = TYPE_TILED | RASTER_DIRECTION = LR_TB | SEQUENCED_THREAD_DISTRIBUTION = DIST_SCREEN_COORD | ROTATION = 0 }
+ 00000000 GRAS_UNKNOWN_80AF: FALSE
+ 00000000 GRAS_LRZ_CNTL: { DIR = 0 }

View file

@ -141,7 +141,7 @@ cmdstream[0]: 1023 dwords
SP_UNKNOWN_B183: 0
0000000001d91164: 0000: 40b18301 00000000
write GRAS_SU_CONSERVATIVE_RAS_CNTL (8099)
GRAS_SU_CONSERVATIVE_RAS_CNTL: { SHIFTAMOUNT = 0 }
GRAS_SU_CONSERVATIVE_RAS_CNTL: { SHIFTAMOUNT = NO_SHIFT }
0000000001d9116c: 0000: 40809901 00000000
write GRAS_VS_LAYER_CNTL (809b)
GRAS_VS_LAYER_CNTL: { 0 }
@ -927,7 +927,7 @@ cmdstream[0]: 1023 dwords
+ 00000000 GRAS_SU_POLY_OFFSET_OFFSET: 0.000000
+ 00000000 GRAS_SU_POLY_OFFSET_OFFSET_CLAMP: 0.000000
+ 00000000 GRAS_SU_DEPTH_BUFFER_INFO: { DEPTH_FORMAT = DEPTH6_NONE }
+ 00000000 GRAS_SU_CONSERVATIVE_RAS_CNTL: { SHIFTAMOUNT = 0 }
+ 00000000 GRAS_SU_CONSERVATIVE_RAS_CNTL: { SHIFTAMOUNT = NO_SHIFT }
+ 00000000 GRAS_VS_LAYER_CNTL: { 0 }
!+ 00000002 GRAS_SC_CNTL: { CCUSINGLECACHELINESIZE = 0x2 | SINGLE_PRIM_MODE = NO_FLUSH | RASTER_MODE = TYPE_TILED | RASTER_DIRECTION = LR_TB | SEQUENCED_THREAD_DISTRIBUTION = DIST_SCREEN_COORD | ROTATION = 0 }
!+ 06041e11 GRAS_BIN_CONTROL: { BINW = 544 | BINH = 480 | RENDER_MODE = BINNING_PASS | BUFFERS_LOCATION = BUFFERS_IN_GMEM | LRZ_FEEDBACK_ZMODE_MASK = 0x6 }

View file

@ -2535,7 +2535,7 @@ got cmdszdw=438
!+ 0000152b CP_SCRATCH[0x2].REG: 5419
!+ 03200000 UCHE_UNKNOWN_0E12: 0x3200000
!+ 00000004 UCHE_CLIENT_PF: { PERFSEL = 0x4 }
+ 00000000 GRAS_SU_CONSERVATIVE_RAS_CNTL: { SHIFTAMOUNT = 0 }
+ 00000000 GRAS_SU_CONSERVATIVE_RAS_CNTL: { SHIFTAMOUNT = NO_SHIFT }
+ 00000000 GRAS_VS_LAYER_CNTL: { 0 }
!+ 00000002 GRAS_SC_CNTL: { CCUSINGLECACHELINESIZE = 0x2 | SINGLE_PRIM_MODE = NO_FLUSH | RASTER_MODE = TYPE_TILED | RASTER_DIRECTION = LR_TB | SEQUENCED_THREAD_DISTRIBUTION = DIST_SCREEN_COORD | ROTATION = 0 }
!+ 00c00000 GRAS_BIN_CONTROL: { BINW = 0 | BINH = 0 | RENDER_MODE = RENDERING_PASS | BUFFERS_LOCATION = BUFFERS_IN_SYSMEM | LRZ_FEEDBACK_ZMODE_MASK = LRZ_FEEDBACK_NONE }
@ -151168,7 +151168,7 @@ clusters:
00000000 GRAS_SU_POLY_OFFSET_OFFSET: 0.000000
00000000 GRAS_SU_POLY_OFFSET_OFFSET_CLAMP: 0.000000
00000000 GRAS_SU_DEPTH_BUFFER_INFO: { DEPTH_FORMAT = DEPTH6_NONE }
00000000 GRAS_SU_CONSERVATIVE_RAS_CNTL: { SHIFTAMOUNT = 0 }
00000000 GRAS_SU_CONSERVATIVE_RAS_CNTL: { SHIFTAMOUNT = NO_SHIFT }
00000000 GRAS_SU_PATH_RENDERING_CNTL: { 0 }
00000000 GRAS_VS_LAYER_CNTL: { 0 }
00000000 GRAS_GS_LAYER_CNTL: { 0 }
@ -151411,7 +151411,7 @@ clusters:
00000000 GRAS_SU_POLY_OFFSET_OFFSET: 0.000000
00000000 GRAS_SU_POLY_OFFSET_OFFSET_CLAMP: 0.000000
00000000 GRAS_SU_DEPTH_BUFFER_INFO: { DEPTH_FORMAT = DEPTH6_NONE }
00000000 GRAS_SU_CONSERVATIVE_RAS_CNTL: { SHIFTAMOUNT = 0 }
00000000 GRAS_SU_CONSERVATIVE_RAS_CNTL: { SHIFTAMOUNT = NO_SHIFT }
00000000 GRAS_SU_PATH_RENDERING_CNTL: { 0 }
00000000 GRAS_VS_LAYER_CNTL: { 0 }
00000000 GRAS_GS_LAYER_CNTL: { 0 }

View file

@ -3138,7 +3138,12 @@ to upconvert to 32b float internally?
<reg32 offset="0x8099" name="GRAS_SU_CONSERVATIVE_RAS_CNTL" usage="cmd">
<bitfield name="CONSERVATIVERASEN" pos="0" type="boolean"/>
<bitfield name="SHIFTAMOUNT" low="1" high="2"/>
<enum name="a6xx_shift_amount">
<value value="0" name="NO_SHIFT"/>
<value value="1" name="HALF_PIXEL_SHIFT"/>
<value value="2" name="FULL_PIXEL_SHIFT"/>
</enum>
<bitfield name="SHIFTAMOUNT" low="1" high="2" type="a6xx_shift_amount"/>
<bitfield name="INNERCONSERVATIVERASEN" pos="3" type="boolean"/>
<bitfield name="UNK4" low="4" high="5"/>
</reg32>

View file

@ -960,6 +960,8 @@ r3d_common(struct tu_cmd_buffer *cmd, struct tu_cs *cs, enum r3d_type type,
.raster_mode = TYPE_TILED,
.raster_direction = LR_TB));
tu_cs_emit_regs(cs, A7XX_GRAS_SU_RENDER_CNTL());
tu_cs_emit_regs(cs, A6XX_PC_DGEN_SU_CONSERVATIVE_RAS_CNTL());
tu_cs_emit_regs(cs, A6XX_GRAS_SU_CONSERVATIVE_RAS_CNTL());
}
tu_cs_emit_regs(cs,

View file

@ -1435,9 +1435,11 @@ tu6_init_static_regs(struct tu_device *dev, struct tu_cs *cs)
tu_cs_emit_write_reg(cs, REG_A6XX_SP_UNKNOWN_B183, 0);
tu_cs_emit_write_reg(cs, REG_A6XX_GRAS_SU_CONSERVATIVE_RAS_CNTL, 0);
tu_cs_emit_write_reg(cs, REG_A6XX_GRAS_UNKNOWN_80AF, 0);
if (CHIP == A6XX) {
tu_cs_emit_write_reg(cs, REG_A6XX_GRAS_SU_CONSERVATIVE_RAS_CNTL, 0);
tu_cs_emit_regs(cs, A6XX_PC_DGEN_SU_CONSERVATIVE_RAS_CNTL());
tu_cs_emit_write_reg(cs, REG_A6XX_VPC_UNKNOWN_9210, 0);
tu_cs_emit_write_reg(cs, REG_A6XX_VPC_UNKNOWN_9211, 0);
}
@ -1479,8 +1481,6 @@ tu6_init_static_regs(struct tu_device *dev, struct tu_cs *cs)
tu_cs_emit_regs(cs, A7XX_PC_TESS_FACTOR_SIZE(TU_TESS_FACTOR_SIZE));
}
tu_cs_emit_regs(cs, A6XX_PC_DGEN_SU_CONSERVATIVE_RAS_CNTL());
/* There is an optimization to skip executing draw states for draws with no
* instances. Instead of simply skipping the draw, internally the firmware
* sets a bit in PC_DRAW_INITIATOR that seemingly skips the draw. However

View file

@ -254,6 +254,7 @@ get_device_extensions(const struct tu_physical_device *device,
.EXT_calibrated_timestamps = device->info->a7xx.has_persistent_counter,
.EXT_color_write_enable = true,
.EXT_conditional_rendering = true,
.EXT_conservative_rasterization = device->info->chip >= 7,
.EXT_custom_border_color = true,
.EXT_depth_clamp_zero_one = true,
.EXT_depth_clip_control = true,
@ -609,8 +610,10 @@ tu_get_features(struct tu_physical_device *pdevice,
features->extendedDynamicState3AlphaToOneEnable = true;
features->extendedDynamicState3DepthClipNegativeOneToOne = true;
features->extendedDynamicState3RasterizationStream = true;
features->extendedDynamicState3ConservativeRasterizationMode = false;
features->extendedDynamicState3ExtraPrimitiveOverestimationSize = false;
features->extendedDynamicState3ConservativeRasterizationMode =
pdevice->vk.supported_extensions.EXT_conservative_rasterization;
features->extendedDynamicState3ExtraPrimitiveOverestimationSize =
pdevice->vk.supported_extensions.EXT_conservative_rasterization;
features->extendedDynamicState3LineRasterizationMode = true;
features->extendedDynamicState3LineStippleEnable = false;
features->extendedDynamicState3ProvokingVertexMode = true;
@ -1122,7 +1125,7 @@ tu_get_properties(struct tu_physical_device *pdevice,
props->fragmentShadingRateWithSampleMask = true;
/* Has wrong gl_SampleMaskIn[0] values with VK_EXT_post_depth_coverage used. */
props->fragmentShadingRateWithShaderSampleMask = false;
props->fragmentShadingRateWithConservativeRasterization = false;
props->fragmentShadingRateWithConservativeRasterization = true;
props->fragmentShadingRateWithFragmentShaderInterlock = false;
props->fragmentShadingRateWithCustomSampleLocations = true;
props->fragmentShadingRateStrictMultiplyCombiner = true;
@ -1340,6 +1343,17 @@ tu_get_properties(struct tu_physical_device *pdevice,
props->maxDescriptorSetAccelerationStructures = max_descriptor_set_size;
props->maxDescriptorSetUpdateAfterBindAccelerationStructures = max_descriptor_set_size;
props->minAccelerationStructureScratchOffsetAlignment = 128;
/* VK_EXT_conservative_rasterization */
props->primitiveOverestimationSize = 0.5 + 1 / 256.;
props->maxExtraPrimitiveOverestimationSize = 0.5;
props->extraPrimitiveOverestimationSizeGranularity = 0.5;
props->primitiveUnderestimation = false;
props->conservativePointAndLineRasterization = false;
props->degenerateTrianglesRasterized = true;
props->degenerateLinesRasterized = false;
props->fullyCoveredFragmentShaderInputVariable = false;
props->conservativeRasterizationPostDepthCoverage = false;
}
static const struct vk_pipeline_cache_object_ops *const cache_import_ops[] = {

View file

@ -3093,6 +3093,8 @@ static const enum mesa_vk_dynamic_graphics_state tu_rast_state[] = {
MESA_VK_DYNAMIC_RS_RASTERIZATION_STREAM,
MESA_VK_DYNAMIC_VP_DEPTH_CLIP_NEGATIVE_ONE_TO_ONE,
MESA_VK_DYNAMIC_RS_LINE_WIDTH,
MESA_VK_DYNAMIC_RS_CONSERVATIVE_MODE,
MESA_VK_DYNAMIC_RS_EXTRA_PRIMITIVE_OVERESTIMATION_SIZE,
};
template <chip CHIP>
@ -3106,7 +3108,7 @@ tu6_rast_size(struct tu_device *dev,
if (CHIP == A6XX) {
return 15 + (dev->physical_device->info->a6xx.has_legacy_pipeline_shading_rate ? 8 : 0);
} else {
return 21;
return 25;
}
}
@ -3168,10 +3170,36 @@ tu6_emit_rast(struct tu_cs *cs,
.stream = rs->rasterization_stream,
.discard = rs->rasterizer_discard_enable));
bool conservative_ras_en =
rs->conservative_mode ==
VK_CONSERVATIVE_RASTERIZATION_MODE_OVERESTIMATE_EXT;
tu_cs_emit_regs(cs, RB_RENDER_CNTL(CHIP,
.raster_mode = TYPE_TILED,
.raster_direction = LR_TB));
.raster_direction = LR_TB,
.conservativerasen = conservative_ras_en));
tu_cs_emit_regs(cs, A7XX_GRAS_SU_RENDER_CNTL());
tu_cs_emit_regs(cs,
A6XX_PC_DGEN_SU_CONSERVATIVE_RAS_CNTL(conservative_ras_en));
/* There are only two conservative rasterization modes:
* - shift_amount = 0 (NO_SHIFT) - normal rasterization
* - shift_amount = 1 (HALF_PIXEL_SHIFT) - overestimate by half a pixel
* plus the rasterization grid size (1/256)
* - shift_amount = 2 (FULL_PIXEL_SHIFT) - overestimate by another half
* a pixel
*
* We expose a max of 0.5 and a granularity of 0.5, so the app should
* only give us 0 or 0.5 which correspond to HALF_PIXEL_SHIFT and
* FULL_PIXEL_SHIFT respectively. If they give us anything else just
* assume they meant 0.5 as the most conservative choice.
*/
enum a6xx_shift_amount shift_amount = conservative_ras_en ?
(rs->extra_primitive_overestimation_size != 0. ?
FULL_PIXEL_SHIFT : HALF_PIXEL_SHIFT) : NO_SHIFT;
tu_cs_emit_regs(cs, A6XX_GRAS_SU_CONSERVATIVE_RAS_CNTL(
.conservativerasen = conservative_ras_en,
.shiftamount = shift_amount));
}
/* move to hw ctx init? */