kk: Support VK_EXT_sample_locations

Sample locations are render pass state in Metal. In the best case
(same sample positions for all of subpass), we simply configure it
at the start and proceed as normal. For sub-optimal case (sample
positions change during subpass), we can support it by restarting
the Metal render pass with the new values.

This also interacts with the existing logic for centering sample
positions for bresenham lines. The user's custom sample positions
are prioritized, and centering applies in the default case. Some bug
fixes have also been made to prevent losing attachment contents from
render pass restarts and ensure the render pass restart happens before
other draw state is flushed.

Reviewed-by: Aitor Camacho <aitor@lunarg.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/42036>
This commit is contained in:
squidbus 2026-06-04 17:09:41 -07:00 committed by Marge Bot
parent 43b110dd62
commit d24599ee21
6 changed files with 147 additions and 61 deletions

View file

@ -683,7 +683,7 @@ Khronos extensions that are not part of any Vulkan version:
VK_EXT_rasterization_order_attachment_access DONE (lvp, tu, vn)
VK_EXT_rgba10x6_formats DONE (panvk/v11+)
VK_EXT_robustness2 DONE (anv, hasvk, hk, kk, lvp, nvk, panvk/v10+, pvr, radv, tu, v3dv, vn)
VK_EXT_sample_locations DONE (anv, hasvk, hk, lvp, nvk, radv, tu/a650+, vn)
VK_EXT_sample_locations DONE (anv, hasvk, hk, kk, lvp, nvk, radv, tu/a650+, vn)
VK_EXT_shader_atomic_float DONE (anv, hasvk, kk, lvp, nvk, panvk, radv, tu, vn)
VK_EXT_shader_atomic_float2 DONE (anv, lvp, radv, vn)
VK_EXT_shader_float8 DONE (radv/gfx12+, vn)

View file

@ -111,6 +111,11 @@ struct kk_rendering_state {
struct kk_attachment depth_att;
struct kk_attachment stencil_att;
struct kk_attachment fsr_att;
bool ms_bresenham_lines;
bool sample_locations_enable;
uint32_t sample_locations_count;
VkSampleLocationEXT sample_locations[KK_MAX_SAMPLES];
};
/* Dirty tracking bits for state not tracked by vk_dynamic_graphics_state or
@ -129,7 +134,6 @@ struct kk_graphics_state {
mtl_render_pass_descriptor *render_pass_descriptor;
bool is_depth_stencil_dynamic;
bool is_cull_front_and_back;
bool is_ms_bresenham_lines;
bool need_to_start_render_pass;
enum kk_dirty dirty;

View file

@ -759,6 +759,136 @@ set_empty_scissor(mtl_render_encoder *enc)
#define IS_SHADER_DIRTY(bit) \
(cmd->state.dirty_shaders & BITFIELD_BIT(MESA_SHADER_##bit))
static bool
kk_flush_sample_locations(struct kk_cmd_buffer *cmd)
{
struct kk_graphics_state *gfx = &cmd->state.gfx;
struct kk_rendering_state *render = &gfx->render;
struct vk_dynamic_graphics_state *dyn = &cmd->vk.dynamic_graphics_state;
bool needs_update = false;
/* Determine if the user-provided custom sample locations have changed */
if (IS_DIRTY(MS_SAMPLE_LOCATIONS_ENABLE) || IS_DIRTY(MS_SAMPLE_LOCATIONS)) {
needs_update |=
render->sample_locations_enable != dyn->ms.sample_locations_enable;
render->sample_locations_enable = dyn->ms.sample_locations_enable;
if (render->sample_locations_enable) {
struct vk_sample_locations_state *sample_locations =
dyn->ms.sample_locations;
uint32_t count = sample_locations->per_pixel *
sample_locations->grid_size.width *
sample_locations->grid_size.height;
needs_update |= render->sample_locations_count != count;
needs_update |= memcmp(render->sample_locations,
dyn->ms.sample_locations->locations,
count * sizeof(VkSampleLocationEXT)) != 0;
render->sample_locations_count = count;
typed_memcpy(render->sample_locations,
dyn->ms.sample_locations->locations, count);
}
}
/* Determine if we have switched to or from multisampled bresenham lines */
if (IS_DIRTY(IA_PRIMITIVE_TOPOLOGY) || IS_DIRTY(RS_LINE_MODE)) {
bool was_ms_bresenham_lines = render->ms_bresenham_lines;
render->ms_bresenham_lines =
u_reduced_prim(dyn->ia.primitive_topology) == MESA_PRIM_LINES &&
dyn->rs.line.mode == VK_LINE_RASTERIZATION_MODE_BRESENHAM &&
render->samples > 1;
needs_update |= was_ms_bresenham_lines != render->ms_bresenham_lines;
}
if (needs_update) {
if (render->sample_locations_enable) {
/* Prioritize user-provided sample locations */
struct mtl_sample_position sample_positions[KK_MAX_SAMPLES];
for (uint32_t i = 0; i < KK_MAX_SAMPLES; i++) {
VkSampleLocationEXT sl = render->sample_locations[i];
sample_positions[i] = (struct mtl_sample_position){
/* Metal asserts if values are out of range. Vulkan spec says
* values are clamped, and dynamic state CTS tests hit this */
.x = CLAMP(sl.x, KK_MIN_SAMPLE_LOCATION, KK_MAX_SAMPLE_LOCATION),
.y = CLAMP(sl.y, KK_MIN_SAMPLE_LOCATION, KK_MAX_SAMPLE_LOCATION),
};
}
mtl_render_pass_descriptor_set_sample_positions(
gfx->render_pass_descriptor, sample_positions,
render->sample_locations_count);
} else if (render->ms_bresenham_lines) {
/* For default sample locations with bresenham lines, set all to center
* to provide correct rasterization */
static const struct mtl_sample_position center = {0.5f, 0.5f};
static const struct mtl_sample_position center_all[KK_MAX_SAMPLES] = {
center, center, center, center, center, center, center, center};
mtl_render_pass_descriptor_set_sample_positions(
gfx->render_pass_descriptor, center_all, gfx->render.samples);
} else {
/* If custom sample locations are not needed, reset them */
mtl_render_pass_descriptor_set_sample_positions(
gfx->render_pass_descriptor, NULL, 0);
}
}
return needs_update;
}
static void
kk_force_attachment_load(struct kk_cmd_buffer *cmd)
{
struct kk_rendering_state *render = &cmd->state.gfx.render;
for (uint32_t i = 0; i < render->color_att_count; i++) {
if (render->color_att[i].iview) {
mtl_render_pass_attachment_descriptor *att =
mtl_render_pass_descriptor_get_color_attachment(
cmd->state.gfx.render_pass_descriptor, i);
mtl_render_pass_attachment_descriptor_set_load_action(
att, MTL_LOAD_ACTION_LOAD);
}
}
if (render->depth_att.iview) {
mtl_render_pass_attachment_descriptor *att =
mtl_render_pass_descriptor_get_depth_attachment(
cmd->state.gfx.render_pass_descriptor);
mtl_render_pass_attachment_descriptor_set_load_action(
att, MTL_LOAD_ACTION_LOAD);
}
if (render->stencil_att.iview) {
mtl_render_pass_attachment_descriptor *att =
mtl_render_pass_descriptor_get_stencil_attachment(
cmd->state.gfx.render_pass_descriptor);
mtl_render_pass_attachment_descriptor_set_load_action(
att, MTL_LOAD_ACTION_LOAD);
}
}
static void
kk_flush_render_pass(struct kk_cmd_buffer *cmd)
{
bool needs_restart = kk_flush_sample_locations(cmd);
/* If render pass state changes and the pass is currently active, end the
* current encoder and prepare to restart it */
bool active_render = cmd->encoder->main.last_used == KK_ENC_RENDER &&
cmd->encoder->main.encoder;
if (needs_restart && active_render) {
kk_encoder_signal_fence_and_end(cmd);
kk_cmd_buffer_dirty_all_gfx(cmd);
cmd->state.gfx.need_to_start_render_pass = true;
/* Override load action to prevent data loss between encoders.
* TODO_KOSMICKRISP: Handle store action if we stop always setting it to
* STORE. Metal allows it to be encoded later. */
kk_force_attachment_load(cmd);
}
}
static void
kk_flush_pipeline(struct kk_cmd_buffer *cmd)
{
@ -1177,6 +1307,7 @@ kk_flush_gfx_state(struct kk_cmd_buffer *cmd)
struct kk_graphics_state *gfx = &cmd->state.gfx;
struct kk_descriptor_state *desc = &gfx->descriptors;
kk_flush_render_pass(cmd);
kk_flush_pipeline(cmd);
if (desc->push_dirty)

View file

@ -272,53 +272,6 @@ kk_encoder_submit(struct kk_encoder *encoder)
mtl_command_buffer_commit(encoder->main.cmd_buffer);
}
static bool
kk_try_configure_line_ms(struct kk_cmd_buffer *cmd)
{
struct kk_graphics_state *gfx = &cmd->state.gfx;
struct vk_dynamic_graphics_state *dyn = &cmd->vk.dynamic_graphics_state;
bool was_ms_bresenham_lines = cmd->state.gfx.is_ms_bresenham_lines;
gfx->is_ms_bresenham_lines =
u_reduced_prim(dyn->ia.primitive_topology) == MESA_PRIM_LINES &&
dyn->rs.line.mode == VK_LINE_RASTERIZATION_MODE_BRESENHAM &&
gfx->render.samples > 1;
if (was_ms_bresenham_lines != gfx->is_ms_bresenham_lines) {
if (gfx->is_ms_bresenham_lines) {
/* Set all sample positions to center for bresenham lines */
static const struct mtl_sample_position center = {0.5f, 0.5f};
static const struct mtl_sample_position center_all[8] = {
center, center, center, center, center, center, center, center};
mtl_render_pass_descriptor_set_sample_positions(
gfx->render_pass_descriptor, center_all, gfx->render.samples);
} else {
/* Disable sample positions */
mtl_render_pass_descriptor_set_sample_positions(
gfx->render_pass_descriptor, NULL, 0);
}
return true;
}
return false;
}
static bool
kk_flush_render_pass(struct kk_cmd_buffer *cmd)
{
struct kk_encoder *encoder = cmd->encoder;
bool needs_restart = kk_try_configure_line_ms(cmd);
if (needs_restart && encoder->main.last_used == KK_ENC_RENDER &&
encoder->main.encoder) {
kk_encoder_signal_fence_and_end(cmd);
kk_cmd_buffer_dirty_all_gfx(cmd);
}
return needs_restart;
}
mtl_render_encoder *
kk_render_encoder(struct kk_cmd_buffer *cmd)
{
@ -326,18 +279,11 @@ kk_render_encoder(struct kk_cmd_buffer *cmd)
struct kk_graphics_state *gfx = &cmd->state.gfx;
bool start_render_pass = false;
if (gfx->need_to_start_render_pass) {
gfx->render.samples = gfx->pipeline_sample_count;
mtl_render_pass_descriptor_set_default_raster_sample_count(
cmd->state.gfx.render_pass_descriptor, gfx->render.samples);
gfx->need_to_start_render_pass = false;
start_render_pass = true;
}
start_render_pass |= kk_flush_render_pass(cmd);
if (start_render_pass) {
kk_encoder_start_render(cmd, gfx->render_pass_descriptor,
gfx->render.view_mask);
}

View file

@ -175,6 +175,7 @@ kk_get_device_extensions(const struct kk_instance *instance,
.EXT_primitive_restart_index = true,
.EXT_primitive_topology_list_restart = true,
.EXT_robustness2 = true,
.EXT_sample_locations = true,
.EXT_shader_atomic_float = true,
.EXT_shader_replicated_composites = true,
.EXT_shader_subgroup_ballot = true,
@ -403,6 +404,7 @@ kk_get_device_features(
.extendedDynamicState3DepthClampEnable = true,
.extendedDynamicState3DepthClipNegativeOneToOne = true,
.extendedDynamicState3LineRasterizationMode = true,
.extendedDynamicState3SampleLocationsEnable = true,
.extendedDynamicState3TessellationDomainOrigin = true,
/* EXT_image_2d_view_of_3d */
@ -696,7 +698,7 @@ kk_get_device_properties(const struct kk_physical_device *pdev,
/* VK_EXT_extended_dynamic_state3 */
.dynamicPrimitiveTopologyUnrestricted = false,
/* VK_EXT_external_memory_metal */
/* VK_EXT_external_memory_host */
.minImportedHostPointerAlignment = os_page_size,
/* VK_EXT_graphics_pipeline_library */
@ -775,12 +777,13 @@ kk_get_device_properties(const struct kk_physical_device *pdev,
.robustUniformBufferAccessSizeAlignment = KK_MIN_UBO_ALIGNMENT,
/* VK_EXT_sample_locations */
.sampleLocationSampleCounts = sample_counts,
/* Metal does not support sample positions for single sample */
.sampleLocationSampleCounts = sample_counts & ~VK_SAMPLE_COUNT_1_BIT,
.maxSampleLocationGridSize = (VkExtent2D){1, 1},
.sampleLocationCoordinateRange[0] = 0.0f,
.sampleLocationCoordinateRange[1] = 0.9375f,
.sampleLocationCoordinateRange[0] = KK_MIN_SAMPLE_LOCATION,
.sampleLocationCoordinateRange[1] = KK_MAX_SAMPLE_LOCATION,
.sampleLocationSubPixelBits = 4,
.variableSampleLocations = false,
.variableSampleLocations = true,
/* VK_EXT_shader_object */
.shaderBinaryVersion = 0,

View file

@ -35,6 +35,8 @@
#define KK_MAX_MULTIVIEW_VIEW_COUNT 32
#define KK_TEXTURE_BUFFER_WIDTH (1u << 14)
#define KK_MAX_OCCLUSION_QUERIES (32768)
#define KK_MIN_SAMPLE_LOCATION (0.0f)
#define KK_MAX_SAMPLE_LOCATION (0.9375f)
#define KK_SPARSE_ADDR_SPACE_SIZE (1ull << 39)