From 48898c47bf37ac026b9309f330cd210384e20dc4 Mon Sep 17 00:00:00 2001 From: Faith Ekstrand Date: Wed, 9 Oct 2024 15:11:57 -0500 Subject: [PATCH] nvk: Rework setup of sample masks Annoyingly, 2x2 with 2 passes uses the masks 0xa and 0x5, not 0x3 and 0xc as the current code assumes. When we enable 4x2_D3D, that one gets even more wonky. This reworks things so that we have two copies of root descriptor sample_masks array in scratch states, one for 2pass and one for 4pass, that get copied into the push constants as needed. This should handle the needs of both 2x2 and 4x2_D3D as well as 4x4 when the time comes. Fixes: 6a84d5439d27 ("nvk: Move the ANTI_ALIAS_CONTROL logic to the MME") Part-of: --- src/nouveau/vulkan/nvk_cmd_draw.c | 119 +++++++++++++++++++----------- src/nouveau/vulkan/nvk_mme.h | 11 +++ 2 files changed, 87 insertions(+), 43 deletions(-) diff --git a/src/nouveau/vulkan/nvk_cmd_draw.c b/src/nouveau/vulkan/nvk_cmd_draw.c index 4f0217be575..758c9ff4b4e 100644 --- a/src/nouveau/vulkan/nvk_cmd_draw.c +++ b/src/nouveau/vulkan/nvk_cmd_draw.c @@ -741,11 +741,57 @@ nvk_cmd_set_sample_layout(struct nvk_cmd_buffer *cmd, enum nil_sample_layout sample_layout) { const uint32_t samples = nil_sample_layout_samples(sample_layout); - struct nv_push *p = nvk_cmd_buffer_push(cmd, 4); + struct nv_push *p = nvk_cmd_buffer_push(cmd, 14); P_IMMD(p, NV9097, SET_ANTI_ALIAS, nil_to_nv9097_samples_mode(sample_layout)); + switch (sample_layout) { + case NIL_SAMPLE_LAYOUT_1X1: + case NIL_SAMPLE_LAYOUT_2X1: + /* These only have two modes: Single-pass or per-sample */ + P_MTHD(p, NV9097, SET_MME_SHADOW_SCRATCH(NVK_MME_SCRATCH_SAMPLE_MASKS_2PASS_0)); + P_INLINE_DATA(p, 0); + P_INLINE_DATA(p, 0); + P_INLINE_DATA(p, 0); + P_INLINE_DATA(p, 0); + P_MTHD(p, NV9097, SET_MME_SHADOW_SCRATCH(NVK_MME_SCRATCH_SAMPLE_MASKS_4PASS_0)); + P_INLINE_DATA(p, 0); + P_INLINE_DATA(p, 0); + P_INLINE_DATA(p, 0); + P_INLINE_DATA(p, 0); + break; + + case NIL_SAMPLE_LAYOUT_2X2: + P_MTHD(p, NV9097, SET_MME_SHADOW_SCRATCH(NVK_MME_SCRATCH_SAMPLE_MASKS_2PASS_0)); + P_INLINE_DATA(p, 0x000a0005); + P_INLINE_DATA(p, 0x000a0005); + P_INLINE_DATA(p, 0); + P_INLINE_DATA(p, 0); + P_MTHD(p, NV9097, SET_MME_SHADOW_SCRATCH(NVK_MME_SCRATCH_SAMPLE_MASKS_4PASS_0)); + P_INLINE_DATA(p, 0); + P_INLINE_DATA(p, 0); + P_INLINE_DATA(p, 0); + P_INLINE_DATA(p, 0); + break; + + case NIL_SAMPLE_LAYOUT_4X2: + P_MTHD(p, NV9097, SET_MME_SHADOW_SCRATCH(NVK_MME_SCRATCH_SAMPLE_MASKS_2PASS_0)); + P_INLINE_DATA(p, 0x000f000f); + P_INLINE_DATA(p, 0x000f000f); + P_INLINE_DATA(p, 0x00f000f0); + P_INLINE_DATA(p, 0x00f000f0); + P_MTHD(p, NV9097, SET_MME_SHADOW_SCRATCH(NVK_MME_SCRATCH_SAMPLE_MASKS_4PASS_0)); + P_INLINE_DATA(p, 0x00030003); + P_INLINE_DATA(p, 0x000c000c); + P_INLINE_DATA(p, 0x00300030); + P_INLINE_DATA(p, 0x00c000c0); + break; + + default: + unreachable("Unknown sample layout"); + } + P_1INC(p, NV9097, CALL_MME_MACRO(NVK_MME_SET_ANTI_ALIAS)); P_INLINE_DATA(p, nvk_mme_anti_alias_samples(samples)); } @@ -2156,23 +2202,8 @@ nvk_mme_set_anti_alias(struct mme_builder *b) mme_emit(b, aac); mme_free_reg(b, aac); - /* Now we need to emit sample masks per-sample: - * - * struct nak_sample_mask push_sm[NVK_MAX_SAMPLES]; - * uint32_t samples_per_pass = samples / passes; - * uint32_t sample_mask = BITFIELD_MASK(samples_per_pass); - * for (uint32_t s = 0; NVK_MAX_SAMPLES;) { - * push_sm[s] = (struct nak_sample_mask) { - * .sample_mask = sample_mask, - * }; - * - * s++; - * - * if (s & samples_per_pass) - * sample_mask <<= samples_per_pass; - * } - * - * Annoyingly, we have to pack these in pairs + /* Now we need to emit sample masks per-sample. Annoyingly, we have to + * pack these in pairs. */ STATIC_ASSERT(sizeof(struct nak_sample_mask) == 2); @@ -2185,7 +2216,6 @@ nvk_mme_set_anti_alias(struct mme_builder *b) struct mme_value samples_per_pass_log2 = mme_sub(b, samples_log2, passes_log2); mme_free_reg(b, samples_log2); - mme_free_reg(b, passes_log2); mme_if(b, ieq, samples_per_pass_log2, mme_zero()) { /* One sample per pass, we can just blast it out */ @@ -2197,32 +2227,27 @@ nvk_mme_set_anti_alias(struct mme_builder *b) } mme_if(b, ine, samples_per_pass_log2, mme_zero()) { - struct mme_value samples_per_pass = - mme_sll(b, mme_imm(1), samples_per_pass_log2); + mme_if(b, ieq, passes_log2, mme_zero()) { + /* It's a single pass so we can use 0xffff */ + for (uint32_t i = 0; i < NVK_MAX_SAMPLES / 2; i++) + mme_emit(b, mme_imm(~0)); + } - /* sample_mask = (1 << samples_per_pass) - 1 */ - struct mme_value sample_mask = - mme_sll(b, mme_imm(1), samples_per_pass); - mme_sub_to(b, sample_mask, sample_mask, mme_imm(1)); + mme_if(b, ieq, passes_log2, mme_imm(1)) { + for (uint32_t i = 0; i < NVK_MAX_SAMPLES / 2; i++) { + struct mme_value mask = + nvk_mme_load_scratch_arr(b, SAMPLE_MASKS_2PASS_0, i); + mme_emit(b, mask); + mme_free_reg(b, mask); + } + } - struct mme_value mod_mask = mme_sub(b, samples_per_pass, mme_imm(1)); - - struct mme_value s = mme_mov(b, mme_zero()); - mme_while(b, ine, s, mme_imm(NVK_MAX_SAMPLES)) { - /* Since samples_per_pass >= 2, we know that both masks in the pair - * will be the same. - */ - struct mme_value packed = - mme_merge(b, sample_mask, sample_mask, 16, 16, 0); - mme_emit(b, packed); - mme_free_reg(b, packed); - - mme_add_to(b, s, s, mme_imm(2)); - - /* if (s % samples_per_pass == 0) */ - struct mme_value mod = mme_and(b, s, mod_mask); - mme_if(b, ieq, mod, mme_zero()) { - mme_sll_to(b, sample_mask, sample_mask, samples_per_pass); + mme_if(b, ieq, passes_log2, mme_imm(2)) { + for (uint32_t i = 0; i < NVK_MAX_SAMPLES / 2; i++) { + struct mme_value mask = + nvk_mme_load_scratch_arr(b, SAMPLE_MASKS_4PASS_0, i); + mme_emit(b, mask); + mme_free_reg(b, mask); } } } @@ -2279,6 +2304,10 @@ const struct nvk_mme_test_case nvk_mme_set_anti_alias_tests[] = {{ /* 8 samples, minSampleShading = 0.5 */ .init = (struct nvk_mme_mthd_data[]) { { NVK_SET_MME_SCRATCH(ANTI_ALIAS), 0x1 }, + { NVK_SET_MME_SCRATCH(SAMPLE_MASKS_4PASS_0), 0x030003 }, + { NVK_SET_MME_SCRATCH(SAMPLE_MASKS_4PASS_1), 0x0c000c }, + { NVK_SET_MME_SCRATCH(SAMPLE_MASKS_4PASS_2), 0x300030 }, + { NVK_SET_MME_SCRATCH(SAMPLE_MASKS_4PASS_3), 0xc000c0 }, { } }, .params = (uint32_t[]) { 0x00f00030 }, @@ -2297,6 +2326,10 @@ const struct nvk_mme_test_case nvk_mme_set_anti_alias_tests[] = {{ /* 8 samples, minSampleShading = 0.25 */ .init = (struct nvk_mme_mthd_data[]) { { NVK_SET_MME_SCRATCH(ANTI_ALIAS), 0x30 }, + { NVK_SET_MME_SCRATCH(SAMPLE_MASKS_2PASS_0), 0x0f000f }, + { NVK_SET_MME_SCRATCH(SAMPLE_MASKS_2PASS_1), 0x0f000f }, + { NVK_SET_MME_SCRATCH(SAMPLE_MASKS_2PASS_2), 0xf000f0 }, + { NVK_SET_MME_SCRATCH(SAMPLE_MASKS_2PASS_3), 0xf000f0 }, { } }, .params = (uint32_t[]) { 0x000f0002 }, diff --git a/src/nouveau/vulkan/nvk_mme.h b/src/nouveau/vulkan/nvk_mme.h index 6bb71a92512..1fa66843423 100644 --- a/src/nouveau/vulkan/nvk_mme.h +++ b/src/nouveau/vulkan/nvk_mme.h @@ -66,6 +66,14 @@ enum nvk_mme_scratch { NVK_MME_SCRATCH_TESS_PARAMS, /* Anti-aliasing state */ + NVK_MME_SCRATCH_SAMPLE_MASKS_2PASS_0, + NVK_MME_SCRATCH_SAMPLE_MASKS_2PASS_1, + NVK_MME_SCRATCH_SAMPLE_MASKS_2PASS_2, + NVK_MME_SCRATCH_SAMPLE_MASKS_2PASS_3, + NVK_MME_SCRATCH_SAMPLE_MASKS_4PASS_0, + NVK_MME_SCRATCH_SAMPLE_MASKS_4PASS_1, + NVK_MME_SCRATCH_SAMPLE_MASKS_4PASS_2, + NVK_MME_SCRATCH_SAMPLE_MASKS_4PASS_3, NVK_MME_SCRATCH_ANTI_ALIAS, /* Addres of cb0 */ @@ -111,6 +119,9 @@ _nvk_mme_load_scratch(struct mme_builder *b, enum nvk_mme_scratch scratch) #define nvk_mme_load_scratch(b, S) \ _nvk_mme_load_scratch(b, NVK_MME_SCRATCH_##S) +#define nvk_mme_load_scratch_arr(b, S, i) \ + _nvk_mme_load_scratch(b, NVK_MME_SCRATCH_##S + i) + static inline void _nvk_mme_store_scratch(struct mme_builder *b, enum nvk_mme_scratch scratch, struct mme_value data)