From bddfbe7fb1e3d8fe947beefdf6d260d13ddefe2a Mon Sep 17 00:00:00 2001 From: Lionel Landwerlin Date: Tue, 2 Sep 2025 14:20:34 +0300 Subject: [PATCH] brw/blorp: lower MCS fetching in NIR One advantage here of moving a bunch of stuff to NIR is that we can now have consistent payload types straight from the NIR conversion to BRW. This massively simplifies the BRW lowering code and avoids type errors that are quite common to make in the backend. Signed-off-by: Lionel Landwerlin Reviewed-by: Kenneth Graunke Part-of: --- src/intel/blorp/blorp_blit.c | 92 ++++++------ src/intel/blorp/blorp_clear.c | 3 +- src/intel/blorp/blorp_nir_builder.h | 9 +- src/intel/compiler/brw_from_nir.cpp | 142 +++++++----------- .../compiler/brw_lower_logical_sends.cpp | 13 +- src/intel/compiler/brw_nir.c | 2 + src/intel/compiler/brw_nir.h | 3 + src/intel/compiler/brw_nir_lower_texture.c | 129 ++++++++++++++-- 8 files changed, 243 insertions(+), 150 deletions(-) diff --git a/src/intel/blorp/blorp_blit.c b/src/intel/blorp/blorp_blit.c index bb353ae6cc5..312e455764a 100644 --- a/src/intel/blorp/blorp_blit.c +++ b/src/intel/blorp/blorp_blit.c @@ -131,10 +131,19 @@ blorp_blit_apply_transform(nir_builder *b, nir_def *src_pos, return nir_fadd(b, nir_fmul(b, src_pos, mul), offset); } +static bool +tex_needs_16bits(nir_texop op, const struct intel_device_info *devinfo) +{ + return devinfo->verx10 >= 125 && + (op == nir_texop_txf_ms || + op == nir_texop_txf_ms_mcs_intel); +} + static nir_tex_instr * blorp_create_nir_tex_instr(nir_builder *b, struct blorp_blit_vars *v, nir_texop op, nir_def *pos, unsigned num_srcs, - nir_alu_type dst_type) + nir_alu_type dst_type, + const struct intel_device_info *devinfo) { nir_tex_instr *tex = nir_tex_instr_create(b->shader, num_srcs); @@ -161,7 +170,9 @@ blorp_create_nir_tex_instr(nir_builder *b, struct blorp_blit_vars *v, nir_load_var(b, v->v_src_z)); } - tex->src[0] = nir_tex_src_for_ssa(nir_tex_src_coord, pos); + tex->src[0] = nir_tex_src_for_ssa( + nir_tex_src_coord, + tex_needs_16bits(op, devinfo) ? nir_u2u16(b, pos) : pos); tex->coord_components = 3; nir_def_init(&tex->instr, &tex->def, 4, 32); @@ -171,7 +182,8 @@ blorp_create_nir_tex_instr(nir_builder *b, struct blorp_blit_vars *v, static nir_def * blorp_nir_tex(nir_builder *b, struct blorp_blit_vars *v, - const struct blorp_blit_prog_key *key, nir_def *pos) + const struct blorp_blit_prog_key *key, nir_def *pos, + const struct intel_device_info *devinfo) { if (key->need_src_offset) pos = nir_fadd(b, pos, nir_i2f32(b, nir_load_var(b, v->v_src_offset))); @@ -182,11 +194,13 @@ blorp_nir_tex(nir_builder *b, struct blorp_blit_vars *v, nir_tex_instr *tex = blorp_create_nir_tex_instr(b, v, nir_texop_txl, pos, 2, - key->texture_data_type); + key->texture_data_type, devinfo); assert(pos->num_components == 2); tex->sampler_dim = GLSL_SAMPLER_DIM_2D; - tex->src[1] = nir_tex_src_for_ssa(nir_tex_src_lod, nir_imm_int(b, 0)); + tex->src[1] = nir_tex_src_for_ssa( + nir_tex_src_lod, + nir_imm_intN_t(b, 0, tex_needs_16bits(nir_texop_txl, devinfo) ? 16 : 32)); nir_builder_instr_insert(b, &tex->instr); @@ -195,10 +209,11 @@ blorp_nir_tex(nir_builder *b, struct blorp_blit_vars *v, static nir_def * blorp_nir_txf(nir_builder *b, struct blorp_blit_vars *v, - nir_def *pos, nir_alu_type dst_type) + nir_def *pos, nir_alu_type dst_type, + const struct intel_device_info *devinfo) { nir_tex_instr *tex = - blorp_create_nir_tex_instr(b, v, nir_texop_txf, pos, 2, dst_type); + blorp_create_nir_tex_instr(b, v, nir_texop_txf, pos, 2, dst_type, devinfo); tex->sampler_dim = GLSL_SAMPLER_DIM_3D; tex->src[1] = nir_tex_src_for_ssa(nir_tex_src_lod, nir_imm_int(b, 0)); @@ -210,26 +225,26 @@ blorp_nir_txf(nir_builder *b, struct blorp_blit_vars *v, static nir_def * blorp_nir_txf_ms(nir_builder *b, struct blorp_blit_vars *v, - nir_def *pos, nir_def *mcs, nir_alu_type dst_type) + nir_def *pos, nir_alu_type dst_type, + const struct intel_device_info *devinfo) { - nir_tex_instr *tex = - blorp_create_nir_tex_instr(b, v, nir_texop_txf_ms, pos, 3, dst_type); + nir_tex_instr *tex = blorp_create_nir_tex_instr( + b, v, nir_texop_txf_ms, pos, 2, dst_type, devinfo); tex->sampler_dim = GLSL_SAMPLER_DIM_MS; tex->src[1].src_type = nir_tex_src_ms_index; if (pos->num_components == 2) { - tex->src[1].src = nir_src_for_ssa(nir_imm_int(b, 0)); + tex->src[1].src = nir_src_for_ssa( + nir_imm_intN_t(b, 0, tex_needs_16bits(nir_texop_txf_ms, devinfo) ? 16 : 32)); } else { assert(pos->num_components == 3); - tex->src[1].src = nir_src_for_ssa(nir_channel(b, pos, 2)); + tex->src[1].src = nir_src_for_ssa( + tex_needs_16bits(nir_texop_txf_ms, devinfo) ? + nir_u2u16(b, nir_channel(b, pos, 2)) : + nir_channel(b, pos, 2)); } - if (!mcs) - mcs = nir_imm_zero(b, 4, 32); - - tex->src[2] = nir_tex_src_for_ssa(nir_tex_src_ms_mcs_intel, mcs); - nir_builder_instr_insert(b, &tex->instr); return &tex->def; @@ -237,11 +252,12 @@ blorp_nir_txf_ms(nir_builder *b, struct blorp_blit_vars *v, static nir_def * blorp_blit_txf_ms_mcs(nir_builder *b, struct blorp_blit_vars *v, - nir_def *pos) + nir_def *pos, + const struct intel_device_info *devinfo) { nir_tex_instr *tex = blorp_create_nir_tex_instr(b, v, nir_texop_txf_ms_mcs_intel, - pos, 1, nir_type_int); + pos, 1, nir_type_int, devinfo); tex->sampler_dim = GLSL_SAMPLER_DIM_MS; @@ -558,14 +574,15 @@ blorp_nir_combine_samples(nir_builder *b, struct blorp_blit_vars *v, nir_def *pos, unsigned tex_samples, enum isl_aux_usage tex_aux_usage, nir_alu_type dst_type, - enum blorp_filter filter) + enum blorp_filter filter, + const struct intel_device_info *devinfo) { nir_variable *color = nir_local_variable_create(b->impl, glsl_vec4_type(), "color"); nir_def *mcs = NULL; if (isl_aux_usage_has_mcs(tex_aux_usage)) - mcs = blorp_blit_txf_ms_mcs(b, v, pos); + mcs = blorp_blit_txf_ms_mcs(b, v, pos, devinfo); nir_op combine_op; switch (filter) { @@ -641,7 +658,7 @@ blorp_nir_combine_samples(nir_builder *b, struct blorp_blit_vars *v, nir_def *ms_pos = nir_vec3(b, nir_channel(b, pos, 0), nir_channel(b, pos, 1), nir_imm_int(b, i)); - texture_data[stack_depth++] = blorp_nir_txf_ms(b, v, ms_pos, mcs, dst_type); + texture_data[stack_depth++] = blorp_nir_txf_ms(b, v, ms_pos, dst_type, devinfo); if (i == 0 && isl_aux_usage_has_mcs(tex_aux_usage)) { /* The Ivy Bridge PRM, Vol4 Part1 p27 (Multisample Control Surface) @@ -711,7 +728,8 @@ static nir_def * blorp_nir_manual_blend_bilinear(nir_builder *b, nir_def *pos, unsigned tex_samples, const struct blorp_blit_prog_key *key, - struct blorp_blit_vars *v) + struct blorp_blit_vars *v, + const struct intel_device_info *devinfo) { nir_def *pos_xy = nir_trim_vector(b, pos, 2); nir_def *rect_grid = nir_load_var(b, v->v_rect_grid); @@ -747,15 +765,6 @@ blorp_nir_manual_blend_bilinear(nir_builder *b, nir_def *pos, nir_def *sample_coords = nir_fadd(b, pos_xy, sample_off); nir_def *sample_coords_int = nir_f2i32(b, sample_coords); - /* The MCS value we fetch has to match up with the pixel that we're - * sampling from. Since we sample from different pixels in each - * iteration of this "for" loop, the call to mcs_fetch() should be - * here inside the loop after computing the pixel coordinates. - */ - nir_def *mcs = NULL; - if (isl_aux_usage_has_mcs(key->tex_aux_usage)) - mcs = blorp_blit_txf_ms_mcs(b, v, sample_coords_int); - /* Compute sample index and map the sample index to a sample number. * Sample index layout shows the numbering of slots in a rectangular * grid of samples with in a pixel. Sample number layout shows the @@ -835,7 +844,7 @@ blorp_nir_manual_blend_bilinear(nir_builder *b, nir_def *pos, nir_def *pos_ms = nir_vec3(b, nir_channel(b, sample_coords_int, 0), nir_channel(b, sample_coords_int, 1), sample); - tex_data[i] = blorp_nir_txf_ms(b, v, pos_ms, mcs, key->texture_data_type); + tex_data[i] = blorp_nir_txf_ms(b, v, pos_ms, key->texture_data_type, devinfo); } nir_def *frac_x = nir_channel(b, frac_xy, 0); @@ -1347,13 +1356,9 @@ blorp_build_nir_shader(struct blorp_context *blorp, * memory location. So we can fetch the texel now. */ if (key->src_samples == 1) { - color = blorp_nir_txf(&b, &v, src_pos, key->texture_data_type); + color = blorp_nir_txf(&b, &v, src_pos, key->texture_data_type, devinfo); } else { - nir_def *mcs = NULL; - if (isl_aux_usage_has_mcs(key->tex_aux_usage)) - mcs = blorp_blit_txf_ms_mcs(&b, &v, src_pos); - - color = blorp_nir_txf_ms(&b, &v, src_pos, mcs, key->texture_data_type); + color = blorp_nir_txf_ms(&b, &v, src_pos, key->texture_data_type, devinfo); } break; @@ -1363,11 +1368,11 @@ blorp_build_nir_shader(struct blorp_context *blorp, assert(key->tex_layout == key->src_layout); if (key->src_samples == 1) { - color = blorp_nir_tex(&b, &v, key, src_pos); + color = blorp_nir_tex(&b, &v, key, src_pos, devinfo); } else { assert(!key->use_kill); color = blorp_nir_manual_blend_bilinear(&b, src_pos, key->src_samples, - key, &v); + key, &v, devinfo); } break; @@ -1396,13 +1401,14 @@ blorp_build_nir_shader(struct blorp_context *blorp, src_pos = nir_fadd_imm(&b, nir_i2f32(&b, src_pos), 0.5f); - color = blorp_nir_tex(&b, &v, key, src_pos); + color = blorp_nir_tex(&b, &v, key, src_pos, devinfo); } else { /* Gfx7+ hardware doesn't automatically blend. */ color = blorp_nir_combine_samples(&b, &v, src_pos, key->src_samples, key->tex_aux_usage, key->texture_data_type, - key->filter); + key->filter, + devinfo); } break; diff --git a/src/intel/blorp/blorp_clear.c b/src/intel/blorp/blorp_clear.c index 853a14a2653..845ebfb42e1 100644 --- a/src/intel/blorp/blorp_clear.c +++ b/src/intel/blorp/blorp_clear.c @@ -1433,7 +1433,8 @@ blorp_params_get_mcs_partial_resolve_kernel(struct blorp_batch *batch, /* Do an MCS fetch and check if it is equal to the magic clear value */ nir_def *mcs = blorp_nir_txf_ms_mcs(&b, nir_f2i32(&b, nir_load_frag_coord(&b)), - nir_load_layer_id(&b)); + nir_load_layer_id(&b), + blorp->isl_dev->info); nir_def *is_clear = blorp_nir_mcs_is_clear_color(&b, mcs, blorp_key.num_samples); diff --git a/src/intel/blorp/blorp_nir_builder.h b/src/intel/blorp/blorp_nir_builder.h index 2b308b46797..1dbd29b73b8 100644 --- a/src/intel/blorp/blorp_nir_builder.h +++ b/src/intel/blorp/blorp_nir_builder.h @@ -21,6 +21,8 @@ * IN THE SOFTWARE. */ +#include "dev/intel_device_info.h" + #include "compiler/nir/nir_builder.h" #include "blorp_priv.h" @@ -42,7 +44,8 @@ blorp_nir_init_shader(nir_builder *b, } static inline nir_def * -blorp_nir_txf_ms_mcs(nir_builder *b, nir_def *xy_pos, nir_def *layer) +blorp_nir_txf_ms_mcs(nir_builder *b, nir_def *xy_pos, nir_def *layer, + const struct intel_device_info *devinfo) { nir_tex_instr *tex = nir_tex_instr_create(b->shader, 1); tex->op = nir_texop_txf_ms_mcs_intel; @@ -61,7 +64,9 @@ blorp_nir_txf_ms_mcs(nir_builder *b, nir_def *xy_pos, nir_def *layer) tex->coord_components = 2; coord = nir_trim_vector(b, xy_pos, 2); } - tex->src[0] = nir_tex_src_for_ssa(nir_tex_src_coord, coord); + tex->src[0] = nir_tex_src_for_ssa( + nir_tex_src_coord, + devinfo->verx10 >= 125 ? nir_u2u16(b, coord) : coord); /* Blorp only has one texture and it's bound at unit 0 */ tex->texture_index = 0; diff --git a/src/intel/compiler/brw_from_nir.cpp b/src/intel/compiler/brw_from_nir.cpp index 8a733e062e5..8fe989ae760 100644 --- a/src/intel/compiler/brw_from_nir.cpp +++ b/src/intel/compiler/brw_from_nir.cpp @@ -3619,34 +3619,6 @@ fetch_viewport_index(const brw_builder &bld) } } -/* Sample from the MCS surface attached to this multisample texture. */ -static brw_reg -emit_mcs_fetch(nir_to_brw_state &ntb, const brw_reg &coordinate, unsigned components, - const brw_reg &texture, - const brw_reg &texture_handle) -{ - const brw_builder &bld = ntb.bld; - - const brw_reg dest = bld.vgrf(BRW_TYPE_UD, 4); - - brw_reg srcs[TEX_LOGICAL_NUM_SRCS]; - srcs[TEX_LOGICAL_SRC_COORDINATE] = coordinate; - srcs[TEX_LOGICAL_SRC_SURFACE] = texture; - srcs[TEX_LOGICAL_SRC_SAMPLER] = brw_imm_ud(0); - srcs[TEX_LOGICAL_SRC_SURFACE_HANDLE] = texture_handle; - - brw_tex_inst *tex = bld.emit(SHADER_OPCODE_TXF_MCS_LOGICAL, dest, srcs, - ARRAY_SIZE(srcs))->as_tex(); - tex->coord_components = components; - - /* We only care about one or two regs of response, but the sampler always - * writes 4/8. - */ - tex->size_written = 4 * dest.component_size(tex->exec_size); - - return dest; -} - /** * Actual coherent framebuffer read implemented using the native render target * read message. Requires SKL+. @@ -7363,6 +7335,24 @@ brw_from_nir_emit_texture(nir_to_brw_state &ntb, ASSERTED bool got_bias = false; bool pack_lod_bias_and_offset = false; uint32_t header_bits = 0; + + brw_reg_type default_src_type; + switch (instr->op) { + case nir_texop_txf_ms: + case nir_texop_txf_ms_mcs_intel: + default_src_type = devinfo->verx10 >= 125 ? BRW_TYPE_W : BRW_TYPE_D; + break; + + case nir_texop_txf: + case nir_texop_txs: + default_src_type = BRW_TYPE_D; + break; + + default: + default_src_type = BRW_TYPE_F; + break; + } + for (unsigned i = 0; i < instr->num_srcs; i++) { nir_src nir_src = instr->src[i].src; brw_reg src = get_nir_src(ntb, nir_src, -1); @@ -7374,62 +7364,57 @@ brw_from_nir_emit_texture(nir_to_brw_state &ntb, if (nir_tex_instr_src_size(instr, i) == 1) src = offset(src, bld, 0); + brw_reg_type src_type = BRW_TYPE_F; + switch (instr->src[i].src_type) { + case nir_tex_src_sampler_offset: + case nir_tex_src_texture_offset: + case nir_tex_src_sampler_handle: + case nir_tex_src_texture_handle: + case nir_tex_src_offset: + src_type = BRW_TYPE_D; + break; + + case nir_tex_src_backend1: + case nir_tex_src_backend2: + src_type = BRW_TYPE_UD; + break; + + default: + src_type = default_src_type; + } + switch (instr->src[i].src_type) { case nir_tex_src_bias: assert(!got_lod); got_bias = true; - srcs[TEX_LOGICAL_SRC_LOD] = - retype(get_nir_src_imm(ntb, instr->src[i].src), BRW_TYPE_F); + retype(get_nir_src_imm(ntb, instr->src[i].src), src_type); break; case nir_tex_src_comparator: - srcs[TEX_LOGICAL_SRC_SHADOW_C] = retype(src, BRW_TYPE_F); + srcs[TEX_LOGICAL_SRC_SHADOW_C] = retype(src, src_type); break; case nir_tex_src_coord: - switch (instr->op) { - case nir_texop_txf: - case nir_texop_txf_ms: - case nir_texop_txf_ms_mcs_intel: - case nir_texop_samples_identical: - srcs[TEX_LOGICAL_SRC_COORDINATE] = retype(src, BRW_TYPE_D); - break; - default: - srcs[TEX_LOGICAL_SRC_COORDINATE] = retype(src, BRW_TYPE_F); - break; - } + srcs[TEX_LOGICAL_SRC_COORDINATE] = retype(src, src_type); break; case nir_tex_src_ddx: - srcs[TEX_LOGICAL_SRC_LOD] = retype(src, BRW_TYPE_F); + srcs[TEX_LOGICAL_SRC_LOD] = retype(src, src_type); lod_components = nir_tex_instr_src_size(instr, i); break; case nir_tex_src_ddy: - srcs[TEX_LOGICAL_SRC_LOD2] = retype(src, BRW_TYPE_F); + srcs[TEX_LOGICAL_SRC_LOD2] = retype(src, src_type); break; case nir_tex_src_lod: assert(!got_bias); got_lod = true; - - switch (instr->op) { - case nir_texop_txs: - srcs[TEX_LOGICAL_SRC_LOD] = - retype(get_nir_src_imm(ntb, instr->src[i].src), BRW_TYPE_UD); - break; - case nir_texop_txf: - srcs[TEX_LOGICAL_SRC_LOD] = - retype(get_nir_src_imm(ntb, instr->src[i].src), BRW_TYPE_D); - break; - default: - srcs[TEX_LOGICAL_SRC_LOD] = - retype(get_nir_src_imm(ntb, instr->src[i].src), BRW_TYPE_F); - break; - } + srcs[TEX_LOGICAL_SRC_LOD] = + retype(get_nir_src_imm(ntb, instr->src[i].src), src_type); break; case nir_tex_src_min_lod: srcs[TEX_LOGICAL_SRC_MIN_LOD] = - retype(get_nir_src_imm(ntb, instr->src[i].src), BRW_TYPE_F); + retype(get_nir_src_imm(ntb, instr->src[i].src), src_type); break; case nir_tex_src_ms_index: - srcs[TEX_LOGICAL_SRC_SAMPLE_INDEX] = retype(src, BRW_TYPE_UD); + srcs[TEX_LOGICAL_SRC_SAMPLE_INDEX] = retype(src, src_type); break; case nir_tex_src_offset: { @@ -7443,7 +7428,7 @@ brw_from_nir_emit_texture(nir_to_brw_state &ntb, */ assert(devinfo->verx10 < 125); srcs[TEX_LOGICAL_SRC_TG4_OFFSET] = - retype(src, BRW_TYPE_D); + retype(src, src_type); } break; } @@ -7483,7 +7468,7 @@ brw_from_nir_emit_texture(nir_to_brw_state &ntb, case nir_tex_src_ms_mcs_intel: assert(instr->op == nir_texop_txf_ms); - srcs[TEX_LOGICAL_SRC_MCS] = retype(src, BRW_TYPE_D); + srcs[TEX_LOGICAL_SRC_MCS] = retype(src, src_type); break; /* If this parameter is present, we are packing offset U, V and LOD/Bias @@ -7493,7 +7478,7 @@ brw_from_nir_emit_texture(nir_to_brw_state &ntb, assert(instr->op == nir_texop_tg4); pack_lod_bias_and_offset = true; srcs[TEX_LOGICAL_SRC_LOD] = - retype(get_nir_src_imm(ntb, instr->src[i].src), BRW_TYPE_F); + retype(get_nir_src_imm(ntb, instr->src[i].src), src_type); break; /* If this parameter is present, we are packing either the explicit LOD @@ -7505,7 +7490,7 @@ brw_from_nir_emit_texture(nir_to_brw_state &ntb, got_lod = true; assert(instr->op == nir_texop_txl || instr->op == nir_texop_txb); srcs[TEX_LOGICAL_SRC_LOD] = - retype(get_nir_src_imm(ntb, instr->src[i].src), BRW_TYPE_F); + retype(get_nir_src_imm(ntb, instr->src[i].src), src_type); break; default: @@ -7523,15 +7508,8 @@ brw_from_nir_emit_texture(nir_to_brw_state &ntb, srcs[TEX_LOGICAL_SRC_SAMPLER_HANDLE].file == BAD_FILE) srcs[TEX_LOGICAL_SRC_SAMPLER] = brw_imm_ud(instr->sampler_index); - if (srcs[TEX_LOGICAL_SRC_MCS].file == BAD_FILE && - (instr->op == nir_texop_txf_ms || - instr->op == nir_texop_samples_identical)) { - srcs[TEX_LOGICAL_SRC_MCS] = - emit_mcs_fetch(ntb, srcs[TEX_LOGICAL_SRC_COORDINATE], - instr->coord_components, - srcs[TEX_LOGICAL_SRC_SURFACE], - srcs[TEX_LOGICAL_SRC_SURFACE_HANDLE]); - } + assert(srcs[TEX_LOGICAL_SRC_MCS].file != BAD_FILE || + instr->op != nir_texop_txf_ms); enum opcode opcode; switch (instr->op) { @@ -7605,22 +7583,6 @@ brw_from_nir_emit_texture(nir_to_brw_state &ntb, case nir_texop_texture_samples: opcode = SHADER_OPCODE_SAMPLEINFO_LOGICAL; break; - case nir_texop_samples_identical: { - brw_reg dst = retype(get_nir_def(ntb, instr->def), BRW_TYPE_D); - - /* If mcs is an immediate value, it means there is no MCS. In that case - * just return false. - */ - if (srcs[TEX_LOGICAL_SRC_MCS].file == IMM) { - bld.MOV(dst, brw_imm_ud(0u)); - } else { - brw_reg tmp = - bld.OR(srcs[TEX_LOGICAL_SRC_MCS], - offset(srcs[TEX_LOGICAL_SRC_MCS], bld, 1)); - bld.CMP(dst, tmp, brw_imm_ud(0u), BRW_CONDITIONAL_EQ); - } - return; - } default: UNREACHABLE("unknown texture opcode"); } diff --git a/src/intel/compiler/brw_lower_logical_sends.cpp b/src/intel/compiler/brw_lower_logical_sends.cpp index 437a4345b0c..e66a102e1bd 100644 --- a/src/intel/compiler/brw_lower_logical_sends.cpp +++ b/src/intel/compiler/brw_lower_logical_sends.cpp @@ -799,6 +799,12 @@ get_sampler_msg_payload_type_bit_size(const intel_device_info *devinfo, */ if (inst->opcode != SHADER_OPCODE_TXF_CMS_W_GFX12_LOGICAL) { for (unsigned i = 0; i < TEX_LOGICAL_NUM_SRCS; i++) { + /* surface/sampler don't go in the payload */ + if (i == TEX_LOGICAL_SRC_SURFACE || + i == TEX_LOGICAL_SRC_SAMPLER || + i == TEX_LOGICAL_SRC_SURFACE_HANDLE || + i == TEX_LOGICAL_SRC_SAMPLER_HANDLE) + continue; assert(src[i].file == BAD_FILE || brw_type_size_bytes(src[i].type) == src_type_size); } @@ -1093,16 +1099,13 @@ lower_sampler_logical_send(const brw_builder &bld, brw_tex_inst *tex) * ld2dms_w si mcs0 mcs1 mcs2 mcs3 u v r */ if (op == SHADER_OPCODE_TXF_CMS_W_GFX12_LOGICAL) { - brw_reg tmp = offset(mcs, bld, i); sources[length] = retype(sources[length], payload_unsigned_type); bld.MOV(sources[length++], - mcs.file == IMM ? mcs : - brw_reg(subscript(tmp, payload_unsigned_type, 0))); + mcs.file == IMM ? mcs : offset(mcs, bld, 2 * i + 0)); sources[length] = retype(sources[length], payload_unsigned_type); bld.MOV(sources[length++], - mcs.file == IMM ? mcs : - brw_reg(subscript(tmp, payload_unsigned_type, 1))); + mcs.file == IMM ? mcs : offset(mcs, bld, 2 * i + 1)); } else { sources[length] = retype(sources[length], payload_unsigned_type); bld.MOV(sources[length++], diff --git a/src/intel/compiler/brw_nir.c b/src/intel/compiler/brw_nir.c index 962debf6989..3cac5eee9ea 100644 --- a/src/intel/compiler/brw_nir.c +++ b/src/intel/compiler/brw_nir.c @@ -2158,6 +2158,8 @@ brw_postprocess_nir(nir_shader *nir, const struct brw_compiler *compiler, if (OPT(nir_lower_tex, &tex_options)) OPT(nir_lower_tex, &tex_options); + OPT(brw_nir_lower_mcs_fetch, devinfo); + const struct brw_nir_lower_texture_opts brw_tex_options = { .combined_lod_and_array_index = compiler->devinfo->ver >= 20, .combined_lod_or_bias_and_offset = compiler->devinfo->ver >= 20, diff --git a/src/intel/compiler/brw_nir.h b/src/intel/compiler/brw_nir.h index fee89cca3fb..330d6c4a25c 100644 --- a/src/intel/compiler/brw_nir.h +++ b/src/intel/compiler/brw_nir.h @@ -212,6 +212,9 @@ bool brw_nir_lower_texel_address(nir_shader *shader, const struct intel_device_info *devinfo, enum isl_tiling tiling); +bool brw_nir_lower_mcs_fetch(nir_shader *shader, + const struct intel_device_info *devinfo); + struct brw_nir_lower_texture_opts { bool combined_lod_and_array_index; bool combined_lod_or_bias_and_offset; diff --git a/src/intel/compiler/brw_nir_lower_texture.c b/src/intel/compiler/brw_nir_lower_texture.c index aa478175bcc..772cde96c4d 100644 --- a/src/intel/compiler/brw_nir_lower_texture.c +++ b/src/intel/compiler/brw_nir_lower_texture.c @@ -163,13 +163,9 @@ pack_lod_or_bias_and_offset(nir_builder *b, nir_tex_instr *tex) } static bool -brw_nir_lower_texture_instr(nir_builder *b, nir_instr *instr, void *cb_data) +brw_nir_lower_texture_instr(nir_builder *b, nir_tex_instr *tex, void *cb_data) { - if (instr->type != nir_instr_type_tex) - return false; - const struct brw_nir_lower_texture_opts *opts = cb_data; - nir_tex_instr *tex = nir_instr_as_tex(instr); switch (tex->op) { case nir_texop_txl: @@ -186,6 +182,7 @@ brw_nir_lower_texture_instr(nir_builder *b, nir_instr *instr, void *cb_data) } return false; + default: /* Nothing to do */ return false; @@ -198,8 +195,122 @@ bool brw_nir_lower_texture(nir_shader *shader, const struct brw_nir_lower_texture_opts *opts) { - return nir_shader_instructions_pass(shader, - brw_nir_lower_texture_instr, - nir_metadata_none, - (void *)opts); + return nir_shader_tex_pass(shader, + brw_nir_lower_texture_instr, + nir_metadata_control_flow, + (void *)opts); +} + +static bool +brw_nir_lower_mcs_fetch_instr(nir_builder *b, nir_tex_instr *tex, void *cb_data) +{ + switch (tex->op) { + case nir_texop_txf_ms: + case nir_texop_samples_identical: + break; + + default: + /* Nothing to do */ + return false; + } + + /* Only happens with BLORP shaders */ + if (nir_tex_instr_src_index(tex, nir_tex_src_ms_mcs_intel) != -1) + return false; + + const struct intel_device_info *devinfo = cb_data; + const bool needs_16bit_txf_ms_payload = devinfo->verx10 >= 125; + + b->cursor = nir_before_instr(&tex->instr); + + /* Convert all sources to 16bit */ + unsigned n_mcs_sources = 0; + for (uint32_t i = 0; i < tex->num_srcs; i++) { + switch (tex->src[i].src_type) { + case nir_tex_src_texture_handle: + case nir_tex_src_texture_offset: + case nir_tex_src_texture_deref: + n_mcs_sources++; + break; + + case nir_tex_src_coord: + case nir_tex_src_lod: + n_mcs_sources++; + FALLTHROUGH; + default: + if (needs_16bit_txf_ms_payload) { + nir_src_rewrite(&tex->src[i].src, + nir_u2u16(b, tex->src[i].src.ssa)); + } + break; + } + } + + nir_tex_instr *mcs_tex = nir_tex_instr_create(b->shader, n_mcs_sources); + mcs_tex->op = nir_texop_txf_ms_mcs_intel; + mcs_tex->dest_type = nir_type_uint32; + mcs_tex->sampler_dim = tex->sampler_dim; + mcs_tex->coord_components = tex->coord_components; + mcs_tex->texture_index = tex->texture_index; + mcs_tex->sampler_index = tex->sampler_index; + mcs_tex->is_array = tex->is_array; + mcs_tex->can_speculate = tex->can_speculate; + + uint32_t mcs_src = 0; + for (uint32_t i = 0; i < tex->num_srcs; i++) { + switch (tex->src[i].src_type) { + case nir_tex_src_texture_handle: + case nir_tex_src_texture_offset: + case nir_tex_src_texture_deref: + case nir_tex_src_coord: + case nir_tex_src_lod: + assert(mcs_src < mcs_tex->num_srcs); + mcs_tex->src[mcs_src++] = + nir_tex_src_for_ssa(tex->src[i].src_type, + tex->src[i].src.ssa); + break; + + default: + continue; + } + } + + nir_def_init(&mcs_tex->instr, &mcs_tex->def, 4, 32); + nir_builder_instr_insert(b, &mcs_tex->instr); + + nir_def *mcs_data = &mcs_tex->def; + if (tex->op == nir_texop_txf_ms) { + if (needs_16bit_txf_ms_payload) { + mcs_data = + nir_vec4(b, + nir_unpack_32_2x16_split_x(b, nir_channel(b, mcs_data, 0)), + nir_unpack_32_2x16_split_y(b, nir_channel(b, mcs_data, 0)), + nir_unpack_32_2x16_split_x(b, nir_channel(b, mcs_data, 1)), + nir_unpack_32_2x16_split_y(b, nir_channel(b, mcs_data, 1))); + } + + nir_tex_instr_add_src(tex, nir_tex_src_ms_mcs_intel, mcs_data); + } else { + assert(tex->op == nir_texop_samples_identical); + + nir_def_replace(&tex->def, + nir_ieq_imm( + b, + nir_ior(b, + nir_channel(b, mcs_data, 0), + nir_channel(b, mcs_data, 1)), + 0)); + } + + return true; +} + +bool +brw_nir_lower_mcs_fetch(nir_shader *shader, + const struct intel_device_info *devinfo) +{ + return nir_shader_tex_pass(shader, + brw_nir_lower_mcs_fetch_instr, + nir_metadata_control_flow, + (void *)devinfo); }