diff --git a/src/intel/compiler/brw_eu_emit.c b/src/intel/compiler/brw_eu_emit.c index b052d884ecb..9fae079e635 100644 --- a/src/intel/compiler/brw_eu_emit.c +++ b/src/intel/compiler/brw_eu_emit.c @@ -1530,7 +1530,8 @@ brw_send_indirect_split_message(struct brw_codegen *p, if (ex_desc_imm_inst) { /* Write the immediate extended descriptor immediate value, but only * the part used for encoding an offset. This matches to bits - * 12:15-19:31 as described in BSpec 70586. + * 12:15-19:31 as described in BSpec 70586 (extended descriptor + * format) & BSpec 56890 (SEND instruction format). */ assert(devinfo->ver >= 20); brw_eu_inst_set_bits(send, 127, 124, GET_BITS(ex_desc_imm_inst, 31, 28)); diff --git a/src/intel/compiler/brw_lower_logical_sends.cpp b/src/intel/compiler/brw_lower_logical_sends.cpp index 21fbbbb95c6..ab8e4aaa424 100644 --- a/src/intel/compiler/brw_lower_logical_sends.cpp +++ b/src/intel/compiler/brw_lower_logical_sends.cpp @@ -1455,10 +1455,9 @@ setup_lsc_surface_descriptors(const brw_builder &bld, brw_inst *inst, inst->src[1] = brw_imm_ud(lsc_bti_ex_desc(devinfo, surface.ud, base_offset_bits)); } else { + assert(base_offset == 0); const brw_builder ubld = bld.uniform(); - brw_reg tmp = ubld.OR( - ubld.SHL(surface, brw_imm_ud(24)), - brw_imm_ud(base_offset << 12)); + brw_reg tmp = ubld.SHL(surface, brw_imm_ud(24)); inst->src[1] = component(tmp, 0); } break; diff --git a/src/intel/compiler/brw_nir_lower_immediate_offsets.c b/src/intel/compiler/brw_nir_lower_immediate_offsets.c index 6b0ea8b9a4a..73ca91a2904 100644 --- a/src/intel/compiler/brw_nir_lower_immediate_offsets.c +++ b/src/intel/compiler/brw_nir_lower_immediate_offsets.c @@ -33,14 +33,38 @@ lower_immediate_offsets(nir_builder *b, nir_intrinsic_instr *intrin, void *data) nir_instr_as_intrinsic(binding->ssa->parent_instr)->intrinsic == nir_intrinsic_resource_intel; bool ss_binding = false; + bool bti_is_const; if (has_resource) { nir_intrinsic_instr *resource = nir_instr_as_intrinsic(binding->ssa->parent_instr); ss_binding = (nir_intrinsic_resource_access_intel(resource) & nir_resource_intel_bindless) != 0; + bti_is_const = nir_src_is_const(resource->src[1]); + } else { + bti_is_const = nir_src_is_const(*nir_get_io_index_src(intrin)); } - max_bits = ss_binding ? - LSC_ADDRESS_OFFSET_SS_BITS : LSC_ADDRESS_OFFSET_BTI_BITS; + /* The BTI index and the base offset got into the extended descriptor + * (see BSpec 63997 for the format). + * + * When the BTI index constant, the extended descriptor is encoded into + * the SEND instruction (no need to use the address register, see BSpec + * 56890). This is referred to as the extended descriptor immediate. + * + * When BTI is not a constant, the extended descriptor is put into the + * address register but only the BTI index part of it. The base offset + * needs to go in the SEND instruction (see programming note on BSpec + * 63997). + * + * When the extended descriptor is coming from the address register, + * some of the bits in the SEND instruction cannot be used for the + * immediate extended descriptor part and that includes bits you would + * want to use for the base offset... Slow clap to the HW design here. + * + * So put set max bits to 0 in that case and set the base offset to 0 + * since it's unusable. + */ + max_bits = ss_binding ? LSC_ADDRESS_OFFSET_SS_BITS : + bti_is_const ? LSC_ADDRESS_OFFSET_BTI_BITS : 0; break; } default: @@ -49,8 +73,22 @@ lower_immediate_offsets(nir_builder *b, nir_intrinsic_instr *intrin, void *data) assert(nir_intrinsic_has_base(intrin)); + if (nir_intrinsic_base(intrin) == 0) + return false; + b->cursor = nir_before_instr(&intrin->instr); + nir_src *offset_src = nir_get_io_offset_src(intrin); + + if (max_bits == 0) { + nir_src_rewrite( + offset_src, + nir_iadd_imm( + b, offset_src->ssa, nir_intrinsic_base(intrin))); + nir_intrinsic_set_base(intrin, 0); + return true; + } + const int32_t min = u_intN_min(max_bits); const int32_t max = u_intN_max(max_bits); @@ -63,8 +101,9 @@ lower_immediate_offsets(nir_builder *b, nir_intrinsic_instr *intrin, void *data) assert(new_base >= min && new_base <= max); - nir_src *offset_src = nir_get_io_offset_src(intrin); - nir_src_rewrite(offset_src, nir_iadd_imm(b, offset_src->ssa, base - new_base)); + nir_src_rewrite( + offset_src, nir_iadd_imm( + b, offset_src->ssa, base - new_base)); nir_intrinsic_set_base(intrin, new_base); return true;