kk: Expose sampleRateShading

Reviewed-by: Arcady Goldmints-Orlov <arcady@lunarg.com>
Signed-off-by: Aitor Camacho <aitor@lunarg.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/39699>
This commit is contained in:
Aitor Camacho 2026-01-29 22:26:12 +09:00 committed by Marge Bot
parent f0b038cb7f
commit cc6c200719
9 changed files with 273 additions and 43 deletions

View file

@ -49,6 +49,36 @@ info on what was updated.
Workarounds
===========
KK_WORKAROUND_7
---------------
| macOS version: 26.0.1
| Metal ticket: Not reported
| Metal ticket status:
| CTS test failure: ``dEQP-VK.renderpasses.renderpass2.depth_stencil_resolve.image_2d_*testing_stencil_samplemask``
| Comments:
Metal seems to ignore sample_mask out for cases for the stencil attachment
where we have no color attachments, a multisample depth_stencil attachment
with at least 2 samples and a fragment shader that only writes the depth and
a static sample_mask out.
My conclusion is that they may try to prematurely optimize by doing early
fragment testing disregarding completely the sample_mask out and applying
the value to all stencil samples.
The failing tests do something along the lines of, start render pass with
depth_stencil cleared to 0.0f and 0 respectively, if depth test passes set
stencil to 1. Sample mask out is 1 (first sample). Draw framebuffer size square
with 0.5f depth. End render pass storing values. Start render pass loading the
previous output but if depth passes stencil will be set to 255. Sample mask out
is 2 (second sample). Draw framebuffer size square with 0.5f depth.
In a similar fashion to 2 workarounds below this one, we do a conditional
discard at the end discarding fragments not covered by the coverage_mask.
| Log:
| 2026-02-05: Workaround implemented
KK_WORKAROUND_6
---------------
| macOS version: 26.0.1

View file

@ -184,11 +184,78 @@ vs_output_block(nir_shader *shader, struct nir_to_msl_ctx *ctx)
if (shader->info.clip_distance_array_size)
P_IND(ctx, "float gl_ClipDistance [[clip_distance]] [%d];",
shader->info.clip_distance_array_size);
shader->info.clip_distance_array_size);
ctx->indentlevel--;
P(ctx, "};\n");
}
static void
fs_input_interpolant(struct nir_to_msl_ctx *ctx, uint64_t location)
{
struct io_slot_info info = ctx->inputs_info[location];
bool scalarized = VARYING_SLOT_INFO[location].scalarized;
const char *type = alu_type_to_string(info.type);
const char *vector_suffix =
scalarized ? "" : vector_suffixes[info.num_components];
unsigned components = scalarized ? info.num_components : 1;
const char *interp = "";
switch (info.interpolation) {
case INTERP_MODE_SMOOTH:
interp = "interpolation::perspective";
break;
case INTERP_MODE_NOPERSPECTIVE:
interp = "interpolation::no_perspective";
break;
default:
UNREACHABLE("Only perspective and no-perspective are allowed");
}
for (int c = 0; c < components; c++) {
P_IND(ctx, "interpolant<%s%s, %s> ", type, vector_suffix, interp);
varying_slot_name(ctx, location, c);
P(ctx, " ");
varying_slot_semantic(ctx, location, c);
P(ctx, ";\n");
}
}
static void
fs_input_no_interpolant(struct nir_to_msl_ctx *ctx, uint64_t location)
{
struct io_slot_info info = ctx->inputs_info[location];
bool scalarized = VARYING_SLOT_INFO[location].scalarized;
const char *type = alu_type_to_string(info.type);
const char *vector_suffix =
scalarized ? "" : vector_suffixes[info.num_components];
unsigned components = scalarized ? info.num_components : 1;
const char *interp = "";
switch (info.interpolation) {
case INTERP_MODE_NOPERSPECTIVE:
if (info.centroid)
interp = "[[centroid_no_perspective]]";
else if (info.sample)
interp = "[[sample_no_perspective]]";
else
interp = "[[center_no_perspective]]";
break;
case INTERP_MODE_FLAT:
interp = "[[flat]]";
break;
default:
if (info.centroid)
interp = "[[centroid_perspective]]";
else if (info.sample)
interp = "[[sample_perspective]]";
break;
}
for (int c = 0; c < components; c++) {
P_IND(ctx, "%s%s ", type, vector_suffix);
varying_slot_name(ctx, location, c);
P(ctx, " ");
varying_slot_semantic(ctx, location, c);
P(ctx, " %s;\n", interp);
}
}
/* Generate the struct definition for the fragment shader input argument */
static void
fs_input_block(nir_shader *shader, struct nir_to_msl_ctx *ctx)
@ -197,38 +264,10 @@ fs_input_block(nir_shader *shader, struct nir_to_msl_ctx *ctx)
ctx->indentlevel++;
u_foreach_bit64(location, shader->info.inputs_read) {
struct io_slot_info info = ctx->inputs_info[location];
bool scalarized = VARYING_SLOT_INFO[location].scalarized;
const char *type = alu_type_to_string(info.type);
const char *vector_suffix =
scalarized ? "" : vector_suffixes[info.num_components];
const char *interp = "";
switch (info.interpolation) {
case INTERP_MODE_NOPERSPECTIVE:
if (info.centroid)
interp = "[[centroid_no_perspective]]";
else if (info.sample)
interp = "[[sample_no_perspective]]";
else
interp = "[[center_no_perspective]]";
break;
case INTERP_MODE_FLAT:
interp = "[[flat]]";
break;
default:
if (info.centroid)
interp = "[[centroid_perspective]]";
else if (info.sample)
interp = "[[sample_perspective]]";
break;
}
unsigned components = scalarized ? info.num_components : 1;
for (int c = 0; c < components; c++) {
P_IND(ctx, "%s%s ", type, vector_suffix);
varying_slot_name(ctx, location, c);
P(ctx, " ");
varying_slot_semantic(ctx, location, c);
P(ctx, " %s;\n", interp);
}
if (info.uses_interpolant)
fs_input_interpolant(ctx, location);
else
fs_input_no_interpolant(ctx, location);
}
/* Enable reading from framebuffer */
@ -302,6 +341,9 @@ msl_nir_gather_io_info(nir_builder *b, nir_intrinsic_instr *intrin, void *data)
interp_intrin->intrinsic == nir_intrinsic_load_barycentric_centroid;
ctx->input[location].sample =
interp_intrin->intrinsic == nir_intrinsic_load_barycentric_sample;
ctx->input[location].uses_interpolant |=
interp_intrin->intrinsic == nir_intrinsic_load_barycentric_at_sample ||
interp_intrin->intrinsic == nir_intrinsic_load_barycentric_at_offset;
break;
}
case nir_intrinsic_load_input: {

View file

@ -131,8 +131,10 @@ msl_replace_load_sample_mask_in_for_static_sample_mask(
if (intr->intrinsic != nir_intrinsic_load_sample_mask_in)
return false;
nir_def *sample_mask = (nir_def *)data;
nir_def_rewrite_uses(&intr->def, sample_mask);
b->cursor = nir_after_instr(&intr->instr);
nir_def *static_sample_mask = (nir_def *)data;
nir_def *sample_mask = nir_iand(b, &intr->def, static_sample_mask);
nir_def_rewrite_uses_after(&intr->def, sample_mask);
return true;
}
@ -150,14 +152,15 @@ msl_lower_static_sample_mask(nir_shader *nir, uint32_t sample_mask)
.location = FRAG_RESULT_SAMPLE_MASK,
.num_slots = 1u,
};
nir_def *sample_mask_def = nir_imm_int(&b, sample_mask);
nir_store_output(&b, sample_mask_def, nir_imm_int(&b, 0u), .base = 0u,
.range = 1u, .write_mask = 0x1, .component = 0u,
nir_def *static_sample_mask = nir_imm_int(&b, sample_mask);
nir_store_output(&b, nir_load_sample_mask_in(&b), nir_imm_int(&b, 0u),
.base = 0u, .range = 1u, .write_mask = 0x1, .component = 0u,
.src_type = nir_type_uint32, .io_semantics = io_semantics);
BITSET_SET(nir->info.system_values_read, SYSTEM_VALUE_SAMPLE_MASK_IN);
return nir_shader_intrinsics_pass(
nir_shader_intrinsics_pass(
nir, msl_replace_load_sample_mask_in_for_static_sample_mask,
nir_metadata_control_flow, sample_mask_def);
nir_metadata_control_flow, static_sample_mask);
return true;
}
@ -309,6 +312,73 @@ msl_nir_fake_guard_for_discards(struct nir_shader *nir)
nir_metadata_control_flow, NULL);
}
/* Returns true if gl_SampleID is required. */
static bool
gather_fs_input_interpolant_usage(nir_builder *b, nir_intrinsic_instr *intr,
void *data)
{
if (intr->intrinsic != nir_intrinsic_load_interpolated_input)
return false;
bool *uses_interpolant = (bool *)data;
struct nir_io_semantics io = nir_intrinsic_io_semantics(intr);
nir_intrinsic_instr *interpolation = nir_src_as_intrinsic(intr->src[0]);
bool at_sample =
interpolation->intrinsic == nir_intrinsic_load_barycentric_at_sample;
uses_interpolant[io.location] |=
at_sample ||
interpolation->intrinsic == nir_intrinsic_load_barycentric_at_offset;
return at_sample;
}
static bool
lower_sample_shading(nir_builder *b, nir_intrinsic_instr *intr, void *data)
{
if (intr->intrinsic == nir_intrinsic_load_frag_coord) {
b->cursor = nir_after_instr(&intr->instr);
nir_def *offset =
nir_fadd(b, nir_load_sample_pos_from_id(b, 32u, nir_load_sample_id(b)),
nir_imm_vec2(b, -0.5f, -0.5f));
nir_def *sample_position =
nir_fadd(b, &intr->def, nir_pad_vector_imm_int(b, offset, 0u, 4u));
nir_def_rewrite_uses_after(&intr->def, sample_position);
return true;
}
if (intr->intrinsic != nir_intrinsic_load_interpolated_input)
return false;
bool *uses_interpolant = (bool *)data;
struct nir_io_semantics io = nir_intrinsic_io_semantics(intr);
nir_intrinsic_instr *interpolation = nir_src_as_intrinsic(intr->src[0]);
if (!uses_interpolant[io.location] ||
interpolation->intrinsic != nir_intrinsic_load_barycentric_sample)
return false;
b->cursor = nir_before_instr(&intr->instr);
nir_def *def = nir_load_barycentric_at_sample(
b, intr->def.bit_size, nir_load_sample_id(b),
.interp_mode = nir_intrinsic_interp_mode(interpolation));
nir_def_rewrite_uses_after(&interpolation->def, def);
nir_instr_remove(&interpolation->instr);
return false;
}
bool
msl_nir_lower_sample_shading(nir_shader *nir)
{
assert(nir->info.stage == MESA_SHADER_FRAGMENT);
bool uses_interpolant[NUM_TOTAL_VARYING_SLOTS] = {};
if (nir_shader_intrinsics_pass(nir, gather_fs_input_interpolant_usage,
nir_metadata_all, uses_interpolant))
BITSET_SET(nir->info.system_values_read, SYSTEM_VALUE_SAMPLE_ID);
return nir_shader_intrinsics_pass(
nir, lower_sample_shading, nir_metadata_control_flow, uses_interpolant);
}
static bool
lower_clip_distance(nir_builder *b, nir_intrinsic_instr *intr, void *data)
{

View file

@ -15,6 +15,7 @@ struct io_slot_info {
unsigned num_components;
bool centroid;
bool sample;
bool uses_interpolant;
};
struct nir_to_msl_ctx {

View file

@ -276,6 +276,10 @@ infer_types_from_intrinsic(struct hash_table *types, nir_intrinsic_instr *instr)
set_type(types, &instr->def, ty);
break;
}
case nir_intrinsic_load_sample_pos_from_id:
set_type(types, &instr->def, TYPE_FLOAT);
set_type(types, &instr->src[0], TYPE_UINT);
break;
case nir_intrinsic_load_global_constant:
set_type(types, &instr->def, TYPE_GENERIC_DATA);
set_type(types, &instr->src[0], TYPE_UINT);
@ -356,6 +360,7 @@ infer_types_from_intrinsic(struct hash_table *types, nir_intrinsic_instr *instr)
set_type(types, &instr->def, TYPE_UINT);
break;
case nir_intrinsic_load_vulkan_descriptor:
case nir_intrinsic_load_barycentric_at_sample:
set_type(types, &instr->src[0], TYPE_UINT);
set_type(types, &instr->def, TYPE_UINT);
break;
@ -380,10 +385,12 @@ infer_types_from_intrinsic(struct hash_table *types, nir_intrinsic_instr *instr)
case nir_intrinsic_ddy_coarse:
case nir_intrinsic_ddx_fine:
case nir_intrinsic_ddy_fine:
case nir_intrinsic_load_barycentric_at_offset:
set_type(types, &instr->src[0], TYPE_FLOAT);
set_type(types, &instr->def, TYPE_FLOAT);
break;
case nir_intrinsic_load_point_coord:
case nir_intrinsic_load_sample_pos:
set_type(types, &instr->def, TYPE_FLOAT);
break;
case nir_intrinsic_load_front_face:

View file

@ -852,6 +852,34 @@ memory_modes_to_msl(struct nir_to_msl_ctx *ctx, nir_variable_mode modes)
P(ctx, "mem_flags::mem_none");
}
static void
msl_interpolant_method(struct nir_to_msl_ctx *ctx, nir_src *src)
{
nir_intrinsic_instr *interpolation_intrin = nir_src_as_intrinsic(*src);
switch (interpolation_intrin->intrinsic) {
case nir_intrinsic_load_barycentric_at_offset:
P(ctx, ".interpolate_at_offset(")
src_to_msl(ctx, &interpolation_intrin->src[0u]);
/* Need to add 0.5f since GLSL.std.450 marks the pixel center as the
* coordinate while Metal is at top-left. */
P(ctx, " + 0.5f)");
break;
case nir_intrinsic_load_barycentric_centroid:
P(ctx, ".interpolate_at_centroid()");
break;
case nir_intrinsic_load_barycentric_at_sample:
P(ctx, ".interpolate_at_sample(");
src_to_msl(ctx, &interpolation_intrin->src[0u]);
P(ctx, ")");
break;
case nir_intrinsic_load_barycentric_pixel:
P(ctx, ".interpolate_at_center()");
break;
default:
break;
}
}
static void
intrinsic_to_msl(struct nir_to_msl_ctx *ctx, nir_intrinsic_instr *instr)
{
@ -859,7 +887,9 @@ intrinsic_to_msl(struct nir_to_msl_ctx *ctx, nir_intrinsic_instr *instr)
* don't generate any code. */
if (instr->intrinsic == nir_intrinsic_load_barycentric_pixel ||
instr->intrinsic == nir_intrinsic_load_barycentric_centroid ||
instr->intrinsic == nir_intrinsic_load_barycentric_sample)
instr->intrinsic == nir_intrinsic_load_barycentric_sample ||
instr->intrinsic == nir_intrinsic_load_barycentric_at_sample ||
instr->intrinsic == nir_intrinsic_load_barycentric_at_offset)
return;
const nir_intrinsic_info *info = &nir_intrinsic_infos[instr->intrinsic];
@ -978,7 +1008,10 @@ intrinsic_to_msl(struct nir_to_msl_ctx *ctx, nir_intrinsic_instr *instr)
P(ctx, "gl_SampleID;\n");
break;
case nir_intrinsic_load_sample_mask_in:
P(ctx, "gl_SampleMask;\n");
P(ctx, "gl_SampleMask & (1u << gl_SampleID);\n");
break;
case nir_intrinsic_load_sample_pos:
P(ctx, "get_sample_position(gl_SampleID);\n");
break;
case nir_intrinsic_load_amplification_id_kk:
P(ctx, "mtl_AmplificationID;\n");
@ -990,6 +1023,8 @@ intrinsic_to_msl(struct nir_to_msl_ctx *ctx, nir_intrinsic_instr *instr)
uint32_t location = io.location + idx;
msl_input_name(ctx, location, component);
if (ctx->inputs_info[location].uses_interpolant)
msl_interpolant_method(ctx, &instr->src[0u]);
if (instr->num_components < msl_input_num_components(ctx, location)) {
P(ctx, ".");
for (unsigned i = 0; i < instr->num_components; i++)
@ -1013,6 +1048,12 @@ intrinsic_to_msl(struct nir_to_msl_ctx *ctx, nir_intrinsic_instr *instr)
P(ctx, ";\n");
break;
}
case nir_intrinsic_load_sample_pos_from_id: {
P(ctx, "get_sample_position(");
src_to_msl(ctx, &instr->src[0]);
P(ctx, ");\n");
break;
}
case nir_intrinsic_load_output: {
unsigned idx = nir_src_as_uint(instr->src[0]);
nir_io_semantics io = nir_intrinsic_io_semantics(instr);
@ -2043,6 +2084,25 @@ msl_gather_info(struct nir_to_msl_ctx *ctx, struct nir_to_msl_options *options)
msl_gather_io_info(ctx, ctx->inputs_info, ctx->outputs_info);
if (ctx->shader->info.stage == MESA_SHADER_FRAGMENT) {
/* Metal does not have a system value for sample position but an
* entrypoint "get_sample_position" that take the sample index aka
* sample id. Ensure we have the required system values present. */
if (BITSET_TEST(ctx->shader->info.system_values_read,
SYSTEM_VALUE_SAMPLE_POS)) {
BITSET_CLEAR(ctx->shader->info.system_values_read,
SYSTEM_VALUE_SAMPLE_POS);
BITSET_SET(ctx->shader->info.system_values_read,
SYSTEM_VALUE_SAMPLE_ID);
}
/* Metal's sample mask has all the bits when we only care about the bit
* the fragment was generated for. This is why we need to compute it
* using sample id. */
if (BITSET_TEST(ctx->shader->info.system_values_read,
SYSTEM_VALUE_SAMPLE_MASK_IN))
BITSET_SET(ctx->shader->info.system_values_read,
SYSTEM_VALUE_SAMPLE_ID);
for (uint32_t i = 0u; i < MAX_DRAW_BUFFERS; ++i) {
uint32_t location = i + FRAG_RESULT_DATA0;
ctx->outputs_info[location].num_components =

View file

@ -66,4 +66,5 @@ bool msl_ensure_vertex_position_output(nir_shader *nir);
bool msl_nir_fs_io_types(nir_shader *nir);
bool msl_nir_vs_io_types(nir_shader *nir);
bool msl_nir_fake_guard_for_discards(struct nir_shader *nir);
bool msl_nir_lower_sample_shading(nir_shader *nir);
void msl_lower_nir_late(nir_shader *nir);

View file

@ -171,6 +171,7 @@ kk_get_device_features(
.occlusionQueryPrecise = true,
.robustBufferAccess = true,
.samplerAnisotropy = true,
.sampleRateShading = true,
.shaderClipDistance = true,
.shaderImageGatherExtended = true,
.shaderInt16 = true,

View file

@ -406,6 +406,13 @@ static void
kk_lower_fs(struct kk_device *dev, nir_shader *nir,
const struct vk_graphics_pipeline_state *state)
{
/* msl_nir_lower_sample_shading needs to go before blending since
* nir_lower_blend will always set uses_sample_shading to true if there's any
* output read. I believe we do not need to lower it always, that is why it
* goes here but need to double check. */
if (nir->info.fs.uses_sample_shading)
NIR_PASS(_, nir, msl_nir_lower_sample_shading);
if (state->cb)
kk_lower_fs_blend(nir, state);
@ -425,8 +432,19 @@ kk_lower_fs(struct kk_device *dev, nir_shader *nir,
NIR_PASS(_, nir, msl_nir_fs_io_types);
if (state->ms && state->ms->rasterization_samples &&
state->ms->sample_mask != UINT16_MAX)
state->ms->sample_mask != UINT16_MAX) {
/* KK_WORKAROUND_7 */
if (!(dev->disabled_workarounds & BITFIELD64_BIT(7))) {
if (!nir->info.fs.early_fragment_tests) {
nir_function_impl *entrypoint = nir_shader_get_entrypoint(nir);
nir_builder b = nir_builder_at(nir_after_impl(entrypoint));
nir_discard_if(&b,
nir_ieq_imm(&b, nir_load_sample_mask_in(&b), 0u));
}
}
NIR_PASS(_, nir, msl_lower_static_sample_mask, state->ms->sample_mask);
}
/* Check https://github.com/KhronosGroup/Vulkan-Portability/issues/54 for
* explanation on why we need this. */
else if (nir->info.fs.needs_full_quad_helper_invocations ||