mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-18 18:08:15 +02:00
The old calculation depended on the sample count, and gave subpar results for 8x MSAA with standard sample locations. The new calculation is based on the Intel pass, with some changing of the constants so that the sample count is always proportional to alpha for 2xMSAA and 4xMSAA and the addition of rotating the sample mask based on the pixel. Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/39335>
176 lines
6 KiB
C
176 lines
6 KiB
C
/*
|
|
* Copyright 2023 Alyssa Rosenzweig
|
|
* Copyright 2021 Intel Corporation
|
|
* SPDX-License-Identifier: MIT
|
|
*/
|
|
|
|
#include "nir.h"
|
|
#include "nir_builder.h"
|
|
|
|
static nir_def *
|
|
alpha_to_coverage(nir_builder *b, nir_def *alpha, bool has_intrinsic)
|
|
{
|
|
if (has_intrinsic)
|
|
return nir_alpha_to_coverage(b, alpha);
|
|
|
|
/* The following formula is used to compute final sample mask:
|
|
* m = int(round(16.0 * clamp(src0_alpha, 0.0, 1.0)))
|
|
* dither_mask = 0x1111 * ((0xf7540 >> (m & ~3)) & 0xf) |
|
|
* 0x0404 * (m & 2) | 0x0080 * (m & 1)
|
|
* sample_mask = sample_mask & dither_mask
|
|
*
|
|
* It gives a number of ones proportional to the alpha for 2, 4, 8 or 16
|
|
* least significant bits of the result:
|
|
*
|
|
* 0.0000 0000000000000000
|
|
* 0.0625 0000000010000000
|
|
* 0.1250 0000100000001000
|
|
* 0.1875 0000100010001000
|
|
* 0.2500 0100010001000100
|
|
* 0.3125 0100010011000100
|
|
* 0.3750 0100110001001100
|
|
* 0.4375 0100110011001100
|
|
* 0.5000 0101010101010101
|
|
* 0.5625 0101010111010101
|
|
* 0.6250 0101110101011101
|
|
* 0.6875 0101110111011101
|
|
* 0.7500 0111011101110111
|
|
* 0.8125 0111011111110111
|
|
* 0.8750 0111111101111111
|
|
* 0.9375 0111111111111111
|
|
* 1.0000 1111111111111111
|
|
*
|
|
* We use 16-bit math for the multiplies because the result always fits
|
|
* into 16 bits and that is typically way cheaper than full 32-bit
|
|
* multiplies.
|
|
*/
|
|
nir_def *m =
|
|
nir_f2i32(b, nir_fround_even(b, nir_fmul_imm(b, nir_fsat(b, alpha), 16.0)));
|
|
|
|
nir_def *part_a =
|
|
nir_u2u16(b, nir_iand_imm(b, nir_ushr(b, nir_imm_int(b, 0xf7540),
|
|
nir_iand_imm(b, m, ~3)),
|
|
0xf));
|
|
|
|
nir_def *part_b = nir_iand_imm(b, nir_u2u16(b, m), 2);
|
|
nir_def *part_c = nir_iand_imm(b, nir_u2u16(b, m), 1);
|
|
|
|
nir_def *mask = nir_ior(b, nir_imul_imm(b, part_a, 0x1111),
|
|
nir_ior(b, nir_imul_imm(b, part_b, 0x0404),
|
|
nir_imul_imm(b, part_c, 0x0080)));
|
|
|
|
/* Rotate the mask based on (pixel.x % 2) + (pixel.y % 2). This provides
|
|
* dithering and randomizes the sample locations.
|
|
*/
|
|
nir_def *pixel = nir_f2u32(b, nir_channels(b, nir_load_frag_coord(b), 0x3));
|
|
nir_def *rotate_amount =
|
|
nir_iadd(b, nir_iand_imm(b, nir_channel(b, pixel, 0), 0x1),
|
|
nir_iand_imm(b, nir_channel(b, pixel, 1), 0x1));
|
|
mask = nir_ior(b, nir_ushr(b, mask, rotate_amount),
|
|
nir_ishl(b, mask, nir_isub_imm(b, 16, rotate_amount)));
|
|
return nir_u2u32(b, mask);
|
|
}
|
|
|
|
/*
|
|
* Lower alpha-to-coverage to sample_mask and some math. May run on either a
|
|
* monolithic pixel shader or a fragment epilogue.
|
|
*/
|
|
bool
|
|
nir_lower_alpha_to_coverage(nir_shader *shader, bool has_intrinsic, nir_def *dyn_enable)
|
|
{
|
|
/* nir_lower_io_to_temporaries ensures that stores are in the last block */
|
|
nir_function_impl *impl = nir_shader_get_entrypoint(shader);
|
|
nir_block *block = nir_impl_last_block(impl);
|
|
|
|
/* The store is probably at the end of the block, so search in reverse. */
|
|
nir_intrinsic_instr *store = NULL;
|
|
nir_foreach_instr_reverse(instr, block) {
|
|
if (instr->type != nir_instr_type_intrinsic)
|
|
continue;
|
|
|
|
nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
|
|
if (intr->intrinsic != nir_intrinsic_store_output)
|
|
continue;
|
|
|
|
nir_io_semantics sem = nir_intrinsic_io_semantics(intr);
|
|
if (sem.location != FRAG_RESULT_DATA0)
|
|
continue;
|
|
if (sem.dual_source_blend_index != 0)
|
|
continue;
|
|
|
|
store = intr;
|
|
break;
|
|
}
|
|
|
|
/* If render target 0 isn't written, the alpha value input to
|
|
* alpha-to-coverage is undefined. We assume that the alpha would be 1.0,
|
|
* which would effectively disable alpha-to-coverage, skipping the lowering.
|
|
*
|
|
* Similarly, if there are less than 4 components, alpha is undefined.
|
|
*/
|
|
nir_def *rgba = store ? store->src[0].ssa : NULL;
|
|
if (!rgba || rgba->num_components < 4) {
|
|
return nir_no_progress(impl);
|
|
}
|
|
|
|
nir_builder _b = nir_builder_at(nir_before_instr(&store->instr));
|
|
nir_builder *b = &_b;
|
|
|
|
nir_def *alpha = nir_channel(b, rgba, 3);
|
|
if (dyn_enable)
|
|
alpha = nir_bcsel(b, dyn_enable, alpha, nir_imm_floatN_t(b, 1.0f, alpha->bit_size));
|
|
nir_def *mask = alpha_to_coverage(b, alpha, has_intrinsic);
|
|
|
|
/* Discard samples that aren't covered */
|
|
nir_demote_samples(b, nir_inot(b, mask));
|
|
shader->info.fs.uses_discard = true;
|
|
return nir_progress(true, impl, nir_metadata_control_flow);
|
|
}
|
|
|
|
/*
|
|
* Modify the inputs to store_output instructions in a pixel shader when
|
|
* alpha-to-one is used. May run on either a monolithic pixel shader or a
|
|
* fragment epilogue.
|
|
*/
|
|
bool
|
|
nir_lower_alpha_to_one(nir_shader *shader)
|
|
{
|
|
bool progress = false;
|
|
|
|
/* nir_lower_io_to_temporaries ensures that stores are in the last block */
|
|
nir_function_impl *impl = nir_shader_get_entrypoint(shader);
|
|
nir_block *block = nir_impl_last_block(impl);
|
|
|
|
nir_foreach_instr(instr, block) {
|
|
if (instr->type != nir_instr_type_intrinsic)
|
|
continue;
|
|
|
|
nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
|
|
if (intr->intrinsic != nir_intrinsic_store_output)
|
|
continue;
|
|
|
|
/* The OpenGL spec is a bit confusing here, but seemingly alpha-to-one
|
|
* applies to all render targets. Piglit
|
|
* ext_framebuffer_multisample-draw-buffers-alpha-to-one checks this.
|
|
*
|
|
* Even more confusingly, it seems to apply to dual-source blending too.
|
|
* ext_framebuffer_multisample-alpha-to-one-dual-src-blend checks this.
|
|
*/
|
|
nir_io_semantics sem = nir_intrinsic_io_semantics(intr);
|
|
if (sem.location < FRAG_RESULT_DATA0)
|
|
continue;
|
|
|
|
nir_def *rgba = intr->src[0].ssa;
|
|
if (rgba->num_components < 4)
|
|
continue;
|
|
|
|
nir_builder b = nir_builder_at(nir_before_instr(instr));
|
|
nir_def *rgb1 = nir_vector_insert_imm(
|
|
&b, rgba, nir_imm_floatN_t(&b, 1.0, rgba->bit_size), 3);
|
|
|
|
nir_src_rewrite(&intr->src[0], rgb1);
|
|
progress = true;
|
|
}
|
|
|
|
return nir_progress(progress, impl, nir_metadata_control_flow);
|
|
}
|