diff --git a/src/intel/compiler/brw_fs_nir.cpp b/src/intel/compiler/brw_fs_nir.cpp index d16ca1a5ae8..67c38603aa3 100644 --- a/src/intel/compiler/brw_fs_nir.cpp +++ b/src/intel/compiler/brw_fs_nir.cpp @@ -8087,6 +8087,7 @@ fs_nir_emit_texture(nir_to_brw_state &ntb, ASSERTED bool got_lod = false; ASSERTED bool got_bias = false; + bool pack_lod_and_array_index = false; uint32_t header_bits = 0; for (unsigned i = 0; i < instr->num_srcs; i++) { nir_src nir_src = instr->src[i].src; @@ -8224,7 +8225,7 @@ fs_nir_emit_texture(nir_to_brw_state &ntb, case nir_tex_src_backend1: assert(!got_lod && !got_bias); got_lod = true; - + pack_lod_and_array_index = true; assert(instr->op == nir_texop_txl || instr->op == nir_texop_txb); srcs[TEX_LOGICAL_SRC_LOD] = retype(get_nir_src_imm(ntb, instr->src[i].src), BRW_REGISTER_TYPE_F); @@ -8350,6 +8351,8 @@ fs_nir_emit_texture(nir_to_brw_state &ntb, fs_inst *inst = bld.emit(opcode, dst, srcs, ARRAY_SIZE(srcs)); inst->offset = header_bits; + inst->has_packed_lod_ai_src = pack_lod_and_array_index; + const unsigned dest_size = nir_tex_instr_dest_size(instr); if (devinfo->ver >= 9 && instr->op != nir_texop_tg4 && instr->op != nir_texop_query_levels) { diff --git a/src/intel/compiler/brw_ir_fs.h b/src/intel/compiler/brw_ir_fs.h index cc59b36c3e7..dc335d7e4e2 100644 --- a/src/intel/compiler/brw_ir_fs.h +++ b/src/intel/compiler/brw_ir_fs.h @@ -429,6 +429,9 @@ public: bool keep_payload_trailing_zeros; tgl_swsb sched; /**< Scheduling info. */ + + /* Hint that this instruction has combined LOD/LOD bias with array index */ + bool has_packed_lod_ai_src; }; /** diff --git a/src/intel/compiler/brw_lower_logical_sends.cpp b/src/intel/compiler/brw_lower_logical_sends.cpp index 943be05fd3f..a1b49a7bb48 100644 --- a/src/intel/compiler/brw_lower_logical_sends.cpp +++ b/src/intel/compiler/brw_lower_logical_sends.cpp @@ -1281,7 +1281,22 @@ lower_sampler_logical_send_gfx7(const fs_builder &bld, fs_inst *inst, opcode op, if (min_lod.file != BAD_FILE && !min_lod_is_first) { /* Account for all of the missing coordinate sources */ - if (op == SHADER_OPCODE_TXD && devinfo->verx10 >= 125) { + if (op == FS_OPCODE_TXB && devinfo->ver >= 20 && + inst->has_packed_lod_ai_src) { + /* Bspec 64985: + * + * For sample_b sampler message format: + * + * SIMD16H/SIMD32H + * Param Number 0 1 2 3 4 5 + * Param BIAS U V R Ai MLOD + * + * SIMD16/SIMD32 + * Param Number 0 1 2 3 4 + * Param BIAS_AI U V R MLOD + */ + length += 3 - coord_components; + } else if (op == SHADER_OPCODE_TXD && devinfo->verx10 >= 125) { /* On DG2 and newer platforms, sample_d can only be used with 1D and * 2D surfaces, so the maximum number of gradient components is 2. * In spite of this limitation, the Bspec lists a mysterious R @@ -1433,8 +1448,10 @@ lower_sampler_logical_send_gfx7(const fs_builder &bld, fs_inst *inst, opcode op, static unsigned get_sampler_msg_payload_type_bit_size(const intel_device_info *devinfo, - opcode op, const fs_reg *src) + opcode op, const fs_inst *inst) { + assert(inst); + const fs_reg *src = inst->src; unsigned src_type_size = 0; /* All sources need to have the same size, therefore seek the first valid @@ -1480,7 +1497,9 @@ get_sampler_msg_payload_type_bit_size(const intel_device_info *devinfo, if (op == SHADER_OPCODE_TXF_CMS_W || op == SHADER_OPCODE_TXF_CMS || op == SHADER_OPCODE_TXF_UMS || - op == SHADER_OPCODE_TXF_MCS) + op == SHADER_OPCODE_TXF_MCS || + (op == FS_OPCODE_TXB && !inst->has_packed_lod_ai_src && + devinfo->ver >= 20)) src_type_size = 2; return src_type_size * 8; @@ -1513,7 +1532,7 @@ lower_sampler_logical_send(const fs_builder &bld, fs_inst *inst, opcode op) if (devinfo->ver >= 7) { const unsigned msg_payload_type_bit_size = - get_sampler_msg_payload_type_bit_size(devinfo, op, inst->src); + get_sampler_msg_payload_type_bit_size(devinfo, op, inst); /* 16-bit payloads are available only on gfx11+ */ assert(msg_payload_type_bit_size != 16 || devinfo->ver >= 11);