From b34b2bdff3a89988bab3d2dc2ebb6a6d86a7592a Mon Sep 17 00:00:00 2001 From: Sagar Ghuge Date: Wed, 14 Feb 2024 11:12:12 -0800 Subject: [PATCH] intel/compiler: Adjust sample_b parameter according to new layout On Xe2+, we need to pack LOD with array index for cube array surfaces, with that mlod parameter gets adjusted to different indices based on the layout. So track if we are packing LOD with array index in fs_inst and propogate that to sampler lowering code to adjust param location. Reviewed-by: Ian Romanick Signed-off-by: Sagar Ghuge Part-of: --- src/intel/compiler/brw_fs_nir.cpp | 5 +++- src/intel/compiler/brw_ir_fs.h | 3 +++ .../compiler/brw_lower_logical_sends.cpp | 27 ++++++++++++++++--- 3 files changed, 30 insertions(+), 5 deletions(-) diff --git a/src/intel/compiler/brw_fs_nir.cpp b/src/intel/compiler/brw_fs_nir.cpp index d16ca1a5ae8..67c38603aa3 100644 --- a/src/intel/compiler/brw_fs_nir.cpp +++ b/src/intel/compiler/brw_fs_nir.cpp @@ -8087,6 +8087,7 @@ fs_nir_emit_texture(nir_to_brw_state &ntb, ASSERTED bool got_lod = false; ASSERTED bool got_bias = false; + bool pack_lod_and_array_index = false; uint32_t header_bits = 0; for (unsigned i = 0; i < instr->num_srcs; i++) { nir_src nir_src = instr->src[i].src; @@ -8224,7 +8225,7 @@ fs_nir_emit_texture(nir_to_brw_state &ntb, case nir_tex_src_backend1: assert(!got_lod && !got_bias); got_lod = true; - + pack_lod_and_array_index = true; assert(instr->op == nir_texop_txl || instr->op == nir_texop_txb); srcs[TEX_LOGICAL_SRC_LOD] = retype(get_nir_src_imm(ntb, instr->src[i].src), BRW_REGISTER_TYPE_F); @@ -8350,6 +8351,8 @@ fs_nir_emit_texture(nir_to_brw_state &ntb, fs_inst *inst = bld.emit(opcode, dst, srcs, ARRAY_SIZE(srcs)); inst->offset = header_bits; + inst->has_packed_lod_ai_src = pack_lod_and_array_index; + const unsigned dest_size = nir_tex_instr_dest_size(instr); if (devinfo->ver >= 9 && instr->op != nir_texop_tg4 && instr->op != nir_texop_query_levels) { diff --git a/src/intel/compiler/brw_ir_fs.h b/src/intel/compiler/brw_ir_fs.h index cc59b36c3e7..dc335d7e4e2 100644 --- a/src/intel/compiler/brw_ir_fs.h +++ b/src/intel/compiler/brw_ir_fs.h @@ -429,6 +429,9 @@ public: bool keep_payload_trailing_zeros; tgl_swsb sched; /**< Scheduling info. */ + + /* Hint that this instruction has combined LOD/LOD bias with array index */ + bool has_packed_lod_ai_src; }; /** diff --git a/src/intel/compiler/brw_lower_logical_sends.cpp b/src/intel/compiler/brw_lower_logical_sends.cpp index 943be05fd3f..a1b49a7bb48 100644 --- a/src/intel/compiler/brw_lower_logical_sends.cpp +++ b/src/intel/compiler/brw_lower_logical_sends.cpp @@ -1281,7 +1281,22 @@ lower_sampler_logical_send_gfx7(const fs_builder &bld, fs_inst *inst, opcode op, if (min_lod.file != BAD_FILE && !min_lod_is_first) { /* Account for all of the missing coordinate sources */ - if (op == SHADER_OPCODE_TXD && devinfo->verx10 >= 125) { + if (op == FS_OPCODE_TXB && devinfo->ver >= 20 && + inst->has_packed_lod_ai_src) { + /* Bspec 64985: + * + * For sample_b sampler message format: + * + * SIMD16H/SIMD32H + * Param Number 0 1 2 3 4 5 + * Param BIAS U V R Ai MLOD + * + * SIMD16/SIMD32 + * Param Number 0 1 2 3 4 + * Param BIAS_AI U V R MLOD + */ + length += 3 - coord_components; + } else if (op == SHADER_OPCODE_TXD && devinfo->verx10 >= 125) { /* On DG2 and newer platforms, sample_d can only be used with 1D and * 2D surfaces, so the maximum number of gradient components is 2. * In spite of this limitation, the Bspec lists a mysterious R @@ -1433,8 +1448,10 @@ lower_sampler_logical_send_gfx7(const fs_builder &bld, fs_inst *inst, opcode op, static unsigned get_sampler_msg_payload_type_bit_size(const intel_device_info *devinfo, - opcode op, const fs_reg *src) + opcode op, const fs_inst *inst) { + assert(inst); + const fs_reg *src = inst->src; unsigned src_type_size = 0; /* All sources need to have the same size, therefore seek the first valid @@ -1480,7 +1497,9 @@ get_sampler_msg_payload_type_bit_size(const intel_device_info *devinfo, if (op == SHADER_OPCODE_TXF_CMS_W || op == SHADER_OPCODE_TXF_CMS || op == SHADER_OPCODE_TXF_UMS || - op == SHADER_OPCODE_TXF_MCS) + op == SHADER_OPCODE_TXF_MCS || + (op == FS_OPCODE_TXB && !inst->has_packed_lod_ai_src && + devinfo->ver >= 20)) src_type_size = 2; return src_type_size * 8; @@ -1513,7 +1532,7 @@ lower_sampler_logical_send(const fs_builder &bld, fs_inst *inst, opcode op) if (devinfo->ver >= 7) { const unsigned msg_payload_type_bit_size = - get_sampler_msg_payload_type_bit_size(devinfo, op, inst->src); + get_sampler_msg_payload_type_bit_size(devinfo, op, inst); /* 16-bit payloads are available only on gfx11+ */ assert(msg_payload_type_bit_size != 16 || devinfo->ver >= 11);