diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h index 75ec58100d0..aba5f11a60b 100644 --- a/src/compiler/nir/nir.h +++ b/src/compiler/nir/nir.h @@ -5306,6 +5306,11 @@ typedef struct nir_lower_image_options { * If true, lower cube size operations. */ bool lower_cube_size; + + /** + * Lower multi sample image load and samples_identical to use fragment_mask_load. + */ + bool lower_to_fragment_mask_load_amd; } nir_lower_image_options; bool nir_lower_image(nir_shader *nir, diff --git a/src/compiler/nir/nir_lower_image.c b/src/compiler/nir/nir_lower_image.c index 5dab8f5be9b..e9dfd82d4dd 100644 --- a/src/compiler/nir/nir_lower_image.c +++ b/src/compiler/nir/nir_lower_image.c @@ -61,6 +61,85 @@ lower_cube_size(nir_builder *b, nir_intrinsic_instr *intrin) nir_instr_free(&intrin->instr); } +static void +lower_image_to_fragment_mask_load(nir_builder *b, nir_intrinsic_instr *intrin) +{ + b->cursor = nir_before_instr(&intrin->instr); + + nir_intrinsic_op fmask_op; + switch (intrin->intrinsic) { + case nir_intrinsic_image_load: + fmask_op = nir_intrinsic_image_fragment_mask_load_amd; + break; + case nir_intrinsic_image_deref_load: + fmask_op = nir_intrinsic_image_deref_fragment_mask_load_amd; + break; + case nir_intrinsic_bindless_image_load: + fmask_op = nir_intrinsic_bindless_image_fragment_mask_load_amd; + break; + default: + unreachable("bad intrinsic"); + break; + } + + nir_ssa_def *fmask = + nir_image_fragment_mask_load_amd(b, intrin->src[0].ssa, intrin->src[1].ssa, + .image_dim = nir_intrinsic_image_dim(intrin), + .image_array = nir_intrinsic_image_array(intrin), + .format = nir_intrinsic_format(intrin), + .access = nir_intrinsic_access(intrin)); + + /* fix intrinsic op */ + nir_intrinsic_instr *fmask_load = nir_instr_as_intrinsic(fmask->parent_instr); + fmask_load->intrinsic = fmask_op; + + /* extract real color buffer index from fmask buffer */ + nir_ssa_def *sample_index_old = intrin->src[2].ssa; + nir_ssa_def *fmask_offset = nir_ishl_imm(b, sample_index_old, 2); + nir_ssa_def *fmask_width = nir_imm_int(b, 4); + nir_ssa_def *sample_index_new = nir_ubfe(b, fmask, fmask_offset, fmask_width); + + /* fix color buffer load */ + nir_instr_rewrite_src_ssa(&intrin->instr, &intrin->src[2], sample_index_new); + + /* Mark uses fmask to prevent lower this intrinsic again. */ + enum gl_access_qualifier access = nir_intrinsic_access(intrin); + nir_intrinsic_set_access(intrin, access | ACCESS_FMASK_LOWERED_AMD); +} + +static void +lower_image_samples_identical_to_fragment_mask_load(nir_builder *b, nir_intrinsic_instr *intrin) +{ + b->cursor = nir_before_instr(&intrin->instr); + + nir_intrinsic_instr *fmask_load = + nir_instr_as_intrinsic(nir_instr_clone(b->shader, &intrin->instr)); + + switch (intrin->intrinsic) { + case nir_intrinsic_image_samples_identical: + fmask_load->intrinsic = nir_intrinsic_image_fragment_mask_load_amd; + break; + case nir_intrinsic_image_deref_samples_identical: + fmask_load->intrinsic = nir_intrinsic_image_deref_fragment_mask_load_amd; + break; + case nir_intrinsic_bindless_image_samples_identical: + fmask_load->intrinsic = nir_intrinsic_bindless_image_fragment_mask_load_amd; + break; + default: + unreachable("bad intrinsic"); + break; + } + + nir_ssa_dest_init(&fmask_load->instr, &fmask_load->dest, 1, 32, NULL); + nir_builder_instr_insert(b, &fmask_load->instr); + + nir_ssa_def *samples_identical = nir_ieq_imm(b, &fmask_load->dest.ssa, 0); + nir_ssa_def_rewrite_uses(&intrin->dest.ssa, samples_identical); + + nir_instr_remove(&intrin->instr); + nir_instr_free(&intrin->instr); +} + static bool lower_image_instr(nir_builder *b, nir_instr *instr, void *state) { @@ -81,6 +160,28 @@ lower_image_instr(nir_builder *b, nir_instr *instr, void *state) } return false; + case nir_intrinsic_image_load: + case nir_intrinsic_image_deref_load: + case nir_intrinsic_bindless_image_load: + if (options->lower_to_fragment_mask_load_amd && + nir_intrinsic_image_dim(intrin) == GLSL_SAMPLER_DIM_MS && + /* Don't lower again. */ + !(nir_intrinsic_access(intrin) & ACCESS_FMASK_LOWERED_AMD)) { + lower_image_to_fragment_mask_load(b, intrin); + return true; + } + return false; + + case nir_intrinsic_image_samples_identical: + case nir_intrinsic_image_deref_samples_identical: + case nir_intrinsic_bindless_image_samples_identical: + if (options->lower_to_fragment_mask_load_amd && + nir_intrinsic_image_dim(intrin) == GLSL_SAMPLER_DIM_MS) { + lower_image_samples_identical_to_fragment_mask_load(b, intrin); + return true; + } + return false; + default: return false; } diff --git a/src/compiler/shader_enums.h b/src/compiler/shader_enums.h index ca80ac72927..b14a2736104 100644 --- a/src/compiler/shader_enums.h +++ b/src/compiler/shader_enums.h @@ -1030,6 +1030,12 @@ enum gl_access_qualifier * buffers and sampler buffers). */ ACCESS_USES_FORMAT_AMD = (1 << 10), + + /** + * Whether a multi sample image load intrinsic uses sample index extracted + * from fragment mask buffer. + */ + ACCESS_FMASK_LOWERED_AMD = (1 << 11), }; /**