/* * Copyright 2023 Alyssa Rosenzweig * Copyright 2021 Intel Corporation * SPDX-License-Identifier: MIT */ #include "nir.h" #include "nir_builder.h" static nir_def * alpha_to_coverage(nir_builder *b, nir_def *alpha, bool has_intrinsic) { if (has_intrinsic) return nir_alpha_to_coverage(b, alpha); /* The following formula is used to compute final sample mask: * m = int(round(16.0 * clamp(src0_alpha, 0.0, 1.0))) * dither_mask = 0x1111 * ((0xf7540 >> (m & ~3)) & 0xf) | * 0x0404 * (m & 2) | 0x0080 * (m & 1) * sample_mask = sample_mask & dither_mask * * It gives a number of ones proportional to the alpha for 2, 4, 8 or 16 * least significant bits of the result: * * 0.0000 0000000000000000 * 0.0625 0000000010000000 * 0.1250 0000100000001000 * 0.1875 0000100010001000 * 0.2500 0100010001000100 * 0.3125 0100010011000100 * 0.3750 0100110001001100 * 0.4375 0100110011001100 * 0.5000 0101010101010101 * 0.5625 0101010111010101 * 0.6250 0101110101011101 * 0.6875 0101110111011101 * 0.7500 0111011101110111 * 0.8125 0111011111110111 * 0.8750 0111111101111111 * 0.9375 0111111111111111 * 1.0000 1111111111111111 * * We use 16-bit math for the multiplies because the result always fits * into 16 bits and that is typically way cheaper than full 32-bit * multiplies. */ nir_def *m = nir_f2i32(b, nir_fround_even(b, nir_fmul_imm(b, nir_fsat(b, alpha), 16.0))); nir_def *part_a = nir_u2u16(b, nir_iand_imm(b, nir_ushr(b, nir_imm_int(b, 0xf7540), nir_iand_imm(b, m, ~3)), 0xf)); nir_def *part_b = nir_iand_imm(b, nir_u2u16(b, m), 2); nir_def *part_c = nir_iand_imm(b, nir_u2u16(b, m), 1); nir_def *mask = nir_ior(b, nir_imul_imm(b, part_a, 0x1111), nir_ior(b, nir_imul_imm(b, part_b, 0x0404), nir_imul_imm(b, part_c, 0x0080))); /* Rotate the mask based on (pixel.x % 2) + (pixel.y % 2). This provides * dithering and randomizes the sample locations. */ nir_def *pixel = nir_f2u32(b, nir_channels(b, nir_load_frag_coord(b), 0x3)); nir_def *rotate_amount = nir_iadd(b, nir_iand_imm(b, nir_channel(b, pixel, 0), 0x1), nir_iand_imm(b, nir_channel(b, pixel, 1), 0x1)); mask = nir_ior(b, nir_ushr(b, mask, rotate_amount), nir_ishl(b, mask, nir_isub_imm(b, 16, rotate_amount))); return nir_u2u32(b, mask); } /* * Lower alpha-to-coverage to sample_mask and some math. May run on either a * monolithic pixel shader or a fragment epilogue. */ bool nir_lower_alpha_to_coverage(nir_shader *shader, bool has_intrinsic, nir_def *dyn_enable) { /* nir_lower_io_to_temporaries ensures that stores are in the last block */ nir_function_impl *impl = nir_shader_get_entrypoint(shader); nir_block *block = nir_impl_last_block(impl); /* The store is probably at the end of the block, so search in reverse. */ nir_intrinsic_instr *store = NULL; nir_foreach_instr_reverse(instr, block) { if (instr->type != nir_instr_type_intrinsic) continue; nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); if (intr->intrinsic != nir_intrinsic_store_output) continue; nir_io_semantics sem = nir_intrinsic_io_semantics(intr); if (sem.location != FRAG_RESULT_DATA0) continue; if (sem.dual_source_blend_index != 0) continue; store = intr; break; } /* If render target 0 isn't written, the alpha value input to * alpha-to-coverage is undefined. We assume that the alpha would be 1.0, * which would effectively disable alpha-to-coverage, skipping the lowering. * * Similarly, if there are less than 4 components, alpha is undefined. */ nir_def *rgba = store ? store->src[0].ssa : NULL; if (!rgba || rgba->num_components < 4) { return nir_no_progress(impl); } nir_builder _b = nir_builder_at(nir_before_instr(&store->instr)); nir_builder *b = &_b; nir_def *alpha = nir_channel(b, rgba, 3); if (dyn_enable) alpha = nir_bcsel(b, dyn_enable, alpha, nir_imm_floatN_t(b, 1.0f, alpha->bit_size)); nir_def *mask = alpha_to_coverage(b, alpha, has_intrinsic); /* Discard samples that aren't covered */ nir_demote_samples(b, nir_inot(b, mask)); shader->info.fs.uses_discard = true; return nir_progress(true, impl, nir_metadata_control_flow); } /* * Modify the inputs to store_output instructions in a pixel shader when * alpha-to-one is used. May run on either a monolithic pixel shader or a * fragment epilogue. */ bool nir_lower_alpha_to_one(nir_shader *shader) { bool progress = false; /* nir_lower_io_to_temporaries ensures that stores are in the last block */ nir_function_impl *impl = nir_shader_get_entrypoint(shader); nir_block *block = nir_impl_last_block(impl); nir_foreach_instr(instr, block) { if (instr->type != nir_instr_type_intrinsic) continue; nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); if (intr->intrinsic != nir_intrinsic_store_output) continue; /* The OpenGL spec is a bit confusing here, but seemingly alpha-to-one * applies to all render targets. Piglit * ext_framebuffer_multisample-draw-buffers-alpha-to-one checks this. * * Even more confusingly, it seems to apply to dual-source blending too. * ext_framebuffer_multisample-alpha-to-one-dual-src-blend checks this. */ nir_io_semantics sem = nir_intrinsic_io_semantics(intr); if (sem.location < FRAG_RESULT_DATA0) continue; nir_def *rgba = intr->src[0].ssa; if (rgba->num_components < 4) continue; nir_builder b = nir_builder_at(nir_before_instr(instr)); nir_def *rgb1 = nir_vector_insert_imm( &b, rgba, nir_imm_floatN_t(&b, 1.0, rgba->bit_size), 3); nir_src_rewrite(&intr->src[0], rgb1); progress = true; } return nir_progress(progress, impl, nir_metadata_control_flow); }