diff --git a/src/panfrost/lib/pan_fb.h b/src/panfrost/lib/pan_fb.h index f411c18e47e..95a5294f301 100644 --- a/src/panfrost/lib/pan_fb.h +++ b/src/panfrost/lib/pan_fb.h @@ -212,12 +212,25 @@ enum ENUM_PACKED pan_fb_msaa_copy_op { */ PAN_FB_MSAA_COPY_SINGLE, + /** Copies assuming all source samples are identical + * + * The actual copy can use any resolve mode because we know a priori that + * all samples are identical. All resolve modes yield the same result. + */ + PAN_FB_MSAA_COPY_IDENTICAL, + /** Copies sample 0 from the source to all samples in the destination */ PAN_FB_MSAA_COPY_SAMPLE_0, /** Copies the average of the samples in the source to the destination */ PAN_FB_MSAA_COPY_AVERAGE, + /** Copies the minimum of the samples in the source to the destination */ + PAN_FB_MSAA_COPY_MIN, + + /** Copies the maximum of the samples in the source to the destination */ + PAN_FB_MSAA_COPY_MAX, + PAN_FB_MSAA_COPY_OP_COUNT, }; @@ -440,8 +453,8 @@ enum ENUM_PACKED pan_fb_shader_data_type { static_assert(PAN_FB_SHADER_OP_COUNT <= (1 << 4), "pan_fb_shader_op fits in 4 bits"); -static_assert(PAN_FB_MSAA_COPY_OP_COUNT <= (1 << 2), - "pan_fb_msaa_copy_op fits in 2 bits"); +static_assert(PAN_FB_MSAA_COPY_OP_COUNT <= (1 << 3), + "pan_fb_msaa_copy_op fits in 3 bits"); static_assert(PAN_FB_SHADER_DATA_TYPE_COUNT <= (1 << 2), "pan_fb_shader_data_type fits in 2 bits"); @@ -452,16 +465,17 @@ static_assert(PAN_FB_SHADER_DATA_TYPE_COUNT <= (1 << 2), PRAGMA_DIAGNOSTIC_PUSH PRAGMA_DIAGNOSTIC_ERROR(-Wpadded) struct pan_fb_shader_key_target { - uint16_t in_bounds_op : 4; - uint16_t border_op : 4; - uint16_t image_msaa : 2; - uint16_t image_dim : 2; - uint16_t image_is_array : 1; - uint16_t data_type : 2; - uint16_t _pad : 1; + uint32_t in_bounds_op : 4; + uint32_t border_op : 4; + uint32_t image_msaa : 3; + uint32_t image_dim : 2; + uint32_t image_is_array : 1; + uint32_t image_samples_log2 : 3; + uint32_t data_type : 2; + uint32_t pad : 13; }; PRAGMA_DIAGNOSTIC_POP -static_assert(sizeof(struct pan_fb_shader_key_target) == 2, +static_assert(sizeof(struct pan_fb_shader_key_target) == 4, "This struct has no holes"); /** Whether or not the given target is written by the FB shader @@ -484,7 +498,7 @@ struct pan_fb_shader_key { struct pan_fb_shader_key_target z, s; }; PRAGMA_DIAGNOSTIC_POP -static_assert(sizeof(struct pan_fb_shader_key) == 2 * (PAN_MAX_RTS + 2), +static_assert(sizeof(struct pan_fb_shader_key) == 4 * (PAN_MAX_RTS + 2), "This struct has no holes"); #ifdef PAN_ARCH diff --git a/src/panfrost/lib/pan_fb_nir.c b/src/panfrost/lib/pan_fb_nir.c index 3f8525223f8..ccc8c4912b0 100644 --- a/src/panfrost/lib/pan_fb_nir.c +++ b/src/panfrost/lib/pan_fb_nir.c @@ -71,6 +71,14 @@ reduce_msaa_op(enum pan_fb_msaa_copy_op msaa, return msaa; } +static bool +msaa_op_needs_sample_count(enum pan_fb_msaa_copy_op msaa) +{ + return msaa == PAN_FB_MSAA_COPY_AVERAGE || + msaa == PAN_FB_MSAA_COPY_MIN || + msaa == PAN_FB_MSAA_COPY_MAX; +} + static const struct pan_fb_shader_key_target key_target_dont_care = { .in_bounds_op = PAN_FB_SHADER_DONT_CARE, .border_op = PAN_FB_SHADER_DONT_CARE, @@ -142,6 +150,8 @@ get_key_target(enum pipe_format format, .image_msaa = msaa, .image_dim = dim, .image_is_array = is_array, + .image_samples_log2 = msaa_op_needs_sample_count(msaa) + ? util_logbase2(image_sample_count) : 0, .data_type = data_type_for_format(format), }; } @@ -192,6 +202,75 @@ GENX(pan_fb_load_shader_key_fill)(struct pan_fb_shader_key *key, } } +static nir_def * +combine_samples_no_div(nir_builder *b, nir_def **samples, uint8_t sample_count, + const nir_alu_type nir_type, + enum pan_fb_msaa_copy_op msaa) +{ + assert(util_is_power_of_two_nonzero(sample_count)); + if (sample_count == 1) + return samples[0]; + + nir_def *lo = combine_samples_no_div(b, samples, + sample_count / 2, nir_type, msaa); + nir_def *hi = combine_samples_no_div(b, samples + (sample_count / 2), + sample_count / 2, nir_type, msaa); + + /* We assume that first half always comes before the second so setting the + * cursor after the second half combine instruction will give us the least + * common ancestor. + */ + b->cursor = nir_after_instr(nir_def_instr(hi)); + + switch (msaa) { + case PAN_FB_MSAA_COPY_AVERAGE: + assert(nir_alu_type_get_base_type(nir_type) == nir_type_float); + return nir_fadd(b, lo, hi); + + case PAN_FB_MSAA_COPY_MIN: + switch (nir_alu_type_get_base_type(nir_type)) { + case nir_type_float: + return nir_fmin(b, lo, hi); + case nir_type_uint: + return nir_umin(b, lo, hi); + case nir_type_int: + return nir_imin(b, lo, hi); + default: + UNREACHABLE("Unsupported NIR type"); + } + + case PAN_FB_MSAA_COPY_MAX: + switch (nir_alu_type_get_base_type(nir_type)) { + case nir_type_float: + return nir_fmax(b, lo, hi); + case nir_type_uint: + return nir_umax(b, lo, hi); + case nir_type_int: + return nir_imax(b, lo, hi); + default: + UNREACHABLE("Unsupported NIR type"); + } + + default: + UNREACHABLE("Invalid MSAA op"); + } +} + +static nir_def * +combine_samples(nir_builder *b, nir_def **samples, uint8_t sample_count, + const nir_alu_type nir_type, enum pan_fb_msaa_copy_op msaa) +{ + if (msaa == PAN_FB_MSAA_COPY_SAMPLE_0) + return samples[0]; + + nir_def *val = combine_samples_no_div(b, samples, sample_count, + nir_type, msaa); + if (msaa == PAN_FB_MSAA_COPY_AVERAGE) + val = nir_fdiv_imm(b, val, sample_count); + + return val; +} + static nir_def * build_sample_id(nir_builder *b) { @@ -199,6 +278,92 @@ build_sample_id(nir_builder *b) return nir_load_sample_id(b); } +static nir_def * +build_image_load(nir_builder *b, const nir_alu_type nir_type, + nir_def *pos, gl_frag_result location, + enum pan_fb_msaa_copy_op msaa, uint8_t sample_count, + enum mali_texture_dimension dim, bool is_array) +{ + assert(pos->num_components == 3); + switch (dim) { + case MALI_TEXTURE_DIMENSION_1D: + if (is_array) + pos = nir_channels(b, pos, 0b101); + else + pos = nir_channel(b, pos, 0); + break; + + case MALI_TEXTURE_DIMENSION_CUBE: + assert(is_array); + break; + + case MALI_TEXTURE_DIMENSION_2D: + if (!is_array) + pos = nir_channels(b, pos, 0b011); + break; + + case MALI_TEXTURE_DIMENSION_3D: + break; + + default: + UNREACHABLE("Unsupported dim"); + } + + nir_def *val; + switch (msaa) { + case PAN_FB_MSAA_COPY_SINGLE: + val = nir_txf(b, pos, + .texture_index = location, + .dim = mali_to_glsl_dim(dim), + .dest_type = nir_type, + .is_array = is_array); + break; + + case PAN_FB_MSAA_COPY_ALL: + case PAN_FB_MSAA_COPY_IDENTICAL: + case PAN_FB_MSAA_COPY_SAMPLE_0: { + assert(dim == MALI_TEXTURE_DIMENSION_2D); + + nir_def *sample_id = msaa == PAN_FB_MSAA_COPY_ALL + ? build_sample_id(b) : nir_imm_int(b, 0); + + val = nir_txf_ms(b, pos, sample_id, + .texture_index = location, + .dim = GLSL_SAMPLER_DIM_MS, + .dest_type = nir_type, + .is_array = is_array); + break; + } + + case PAN_FB_MSAA_COPY_AVERAGE: + case PAN_FB_MSAA_COPY_MIN: + case PAN_FB_MSAA_COPY_MAX: { + assert(dim == MALI_TEXTURE_DIMENSION_2D); + assert(sample_count > 0); + + nir_def *samples[16]; + assert(sample_count <= ARRAY_SIZE(samples)); + for (uint32_t s = 0; s < sample_count; s++) { + samples[s] = nir_txf_ms(b, pos, nir_imm_int(b, s), + .texture_index = location, + .dim = GLSL_SAMPLER_DIM_MS, + .dest_type = nir_type, + .is_array = is_array); + } + val = combine_samples(b, samples, sample_count, nir_type, msaa); + break; + } + + case PAN_FB_MSAA_COPY_OP_COUNT: + UNREACHABLE("Invalid copy op"); + } + + if (location == FRAG_RESULT_DEPTH || location == FRAG_RESULT_STENCIL) + val = nir_channel(b, val, 0); + + return val; +} + static nir_def * build_load(nir_builder *b, nir_def *pos, enum pan_fb_shader_op op, @@ -218,59 +383,11 @@ build_load(nir_builder *b, nir_def *pos, .io_semantics.location = location, .dest_type = nir_type); - case PAN_FB_SHADER_LOAD_IMAGE: { - assert(pos->num_components == 3); - switch (target->image_dim) { - case MALI_TEXTURE_DIMENSION_1D: - if (target->image_is_array) - pos = nir_channels(b, pos, 0b101); - else - pos = nir_channel(b, pos, 0); - break; - - case MALI_TEXTURE_DIMENSION_CUBE: - assert(target->image_is_array); - break; - - case MALI_TEXTURE_DIMENSION_2D: - if (!target->image_is_array) - pos = nir_channels(b, pos, 0b011); - break; - - case MALI_TEXTURE_DIMENSION_3D: - break; - - default: - UNREACHABLE("Unsupported dim"); - } - - nir_def *val; - if (target->image_msaa == PAN_FB_MSAA_COPY_SINGLE) { - val = nir_txf(b, pos, - .texture_index = location, - .dim = mali_to_glsl_dim(target->image_dim), - .dest_type = nir_type, - .is_array = target->image_is_array); - } else { - assert(target->image_dim == MALI_TEXTURE_DIMENSION_2D); - - assert(target->image_msaa == PAN_FB_MSAA_COPY_ALL || - target->image_msaa == PAN_FB_MSAA_COPY_SAMPLE_0); - nir_def *sample_id = target->image_msaa == PAN_FB_MSAA_COPY_ALL - ? build_sample_id(b) : nir_imm_int(b, 0); - - val = nir_txf_ms(b, pos, sample_id, - .texture_index = location, - .dim = GLSL_SAMPLER_DIM_MS, - .dest_type = nir_type, - .is_array = target->image_is_array); - } - - if (is_zs) - val = nir_channel(b, val, 0); - - return val; - } + case PAN_FB_SHADER_LOAD_IMAGE: + return build_image_load(b, nir_type, pos, location, + target->image_msaa, + 1 << target->image_samples_log2, + target->image_dim, target->image_is_array); default: UNREACHABLE("Unsupported load op");