From 80c89909f36045305ef151bd66fec7dd922df81e Mon Sep 17 00:00:00 2001 From: Lionel Landwerlin Date: Mon, 15 Sep 2025 11:59:52 +0300 Subject: [PATCH] brw: fixup immediate bindless surface handling This is unused at the moment but the backend incorrectly assumes immediate handles are for the binding table (therefore not bindless). Some new CTS tests are using an immediate bindless handle which is broken. Signed-off-by: Lionel Landwerlin Acked-by: Alyssa Rosenzweig Part-of: --- src/intel/compiler/brw/brw_generator.cpp | 13 ++++-- src/intel/compiler/brw/brw_generator.h | 3 +- src/intel/compiler/brw/brw_inst.h | 5 ++- .../compiler/brw/brw_lower_logical_sends.cpp | 40 +++++++++---------- 4 files changed, 33 insertions(+), 28 deletions(-) diff --git a/src/intel/compiler/brw/brw_generator.cpp b/src/intel/compiler/brw/brw_generator.cpp index a4b8d57d88f..04041a9518c 100644 --- a/src/intel/compiler/brw/brw_generator.cpp +++ b/src/intel/compiler/brw/brw_generator.cpp @@ -166,7 +166,8 @@ brw_generator::generate_send(brw_send_inst *inst, struct brw_reg desc, struct brw_reg ex_desc, struct brw_reg payload, - struct brw_reg payload2) + struct brw_reg payload2, + bool ex_bso) { const bool gather = inst->opcode == SHADER_OPCODE_SEND_GATHER; if (gather) { @@ -192,7 +193,7 @@ brw_generator::generate_send(brw_send_inst *inst, brw_send_indirect_split_message(p, inst->sfid, dst, payload, payload2, desc, ex_desc, inst->ex_desc_imm ? inst->offset : 0, - inst->ex_mlen, inst->ex_bso, + inst->ex_mlen, ex_bso, inst->eot, gather); if (inst->check_tdr) brw_eu_inst_set_opcode(p->isa, brw_last_inst, @@ -1166,14 +1167,18 @@ brw_generator::generate_code(const brw_shader &s, case SHADER_OPCODE_SEND: generate_send(inst->as_send(), dst, src[SEND_SRC_DESC], src[SEND_SRC_EX_DESC], - src[SEND_SRC_PAYLOAD1], src[SEND_SRC_PAYLOAD2]); + src[SEND_SRC_PAYLOAD1], src[SEND_SRC_PAYLOAD2], + inst->as_send()->bindless_surface && + compiler->extended_bindless_surface_offset); send_count++; break; case SHADER_OPCODE_SEND_GATHER: generate_send(inst->as_send(), dst, src[SEND_GATHER_SRC_DESC], src[SEND_GATHER_SRC_EX_DESC], - src[SEND_GATHER_SRC_SCALAR], brw_null_reg()); + src[SEND_GATHER_SRC_SCALAR], brw_null_reg(), + inst->as_send()->bindless_surface && + compiler->extended_bindless_surface_offset); send_count++; break; diff --git a/src/intel/compiler/brw/brw_generator.h b/src/intel/compiler/brw/brw_generator.h index b6fbcfbdcca..753f47d9d09 100644 --- a/src/intel/compiler/brw/brw_generator.h +++ b/src/intel/compiler/brw/brw_generator.h @@ -30,7 +30,8 @@ private: struct brw_reg desc, struct brw_reg ex_desc, struct brw_reg payload, - struct brw_reg payload2); + struct brw_reg payload2, + bool ex_bso); void generate_barrier(brw_inst *inst, struct brw_reg src); void generate_ddx(const brw_inst *inst, struct brw_reg dst, struct brw_reg src); diff --git a/src/intel/compiler/brw/brw_inst.h b/src/intel/compiler/brw/brw_inst.h index f867120073d..4e9d7a12cd8 100644 --- a/src/intel/compiler/brw/brw_inst.h +++ b/src/intel/compiler/brw/brw_inst.h @@ -257,9 +257,10 @@ struct brw_send_inst : brw_inst { bool is_volatile:1; /** - * Use extended bindless surface offset (26bits instead of 20bits) + * The surface used for this message is bindless and therefore should + * go into the address register. */ - bool ex_bso:1; + bool bindless_surface:1; /** * Serialize the message (Gfx12.x only) diff --git a/src/intel/compiler/brw/brw_lower_logical_sends.cpp b/src/intel/compiler/brw/brw_lower_logical_sends.cpp index 4ccf5990db5..b7dd3c07642 100644 --- a/src/intel/compiler/brw/brw_lower_logical_sends.cpp +++ b/src/intel/compiler/brw/brw_lower_logical_sends.cpp @@ -953,6 +953,7 @@ lower_sampler_logical_send(const brw_builder &bld, brw_tex_inst *tex) send->mlen = mlen; send->header_size = header_size; send->sfid = BRW_SFID_SAMPLER; + send->bindless_surface = surface_bindless; uint sampler_ret_type = brw_type_size_bits(send->dst.type) == 16 ? GFX8_SAMPLER_RETURN_FORMAT_16BITS : GFX8_SAMPLER_RETURN_FORMAT_32BITS; @@ -992,7 +993,6 @@ lower_sampler_logical_send(const brw_builder &bld, brw_tex_inst *tex) * we can use the surface handle directly as the extended descriptor. */ send->src[SEND_SRC_EX_DESC] = retype(surface, BRW_TYPE_UD); - send->ex_bso = compiler->extended_bindless_surface_offset; } else { /* Immediate portion of the descriptor */ send->desc = brw_sampler_desc(devinfo, @@ -1072,8 +1072,6 @@ static void setup_surface_descriptors(const brw_builder &bld, brw_send_inst *send, uint32_t desc, const brw_reg &surface, const brw_reg &surface_handle) { - const brw_compiler *compiler = bld.shader->compiler; - /* We must have exactly one of surface and surface_handle */ assert((surface.file == BAD_FILE) != (surface_handle.file == BAD_FILE)); @@ -1090,7 +1088,6 @@ setup_surface_descriptors(const brw_builder &bld, brw_send_inst *send, uint32_t * we can use the surface handle directly as the extended descriptor. */ send->src[SEND_SRC_EX_DESC] = retype(surface_handle, BRW_TYPE_UD); - send->ex_bso = compiler->extended_bindless_surface_offset; } else { send->desc = desc; const brw_builder ubld = bld.uniform(); @@ -1099,6 +1096,8 @@ setup_surface_descriptors(const brw_builder &bld, brw_send_inst *send, uint32_t send->src[SEND_SRC_DESC] = component(tmp, 0); send->src[SEND_SRC_EX_DESC] = brw_imm_ud(0); } + + send->bindless_surface = surface_handle.file != BAD_FILE; } static void @@ -1107,7 +1106,6 @@ setup_lsc_surface_descriptors(const brw_builder &bld, brw_send_inst *send, int32_t base_offset) { const ASSERTED intel_device_info *devinfo = bld.shader->devinfo; - const brw_compiler *compiler = bld.shader->compiler; assert(base_offset == 0 || devinfo->ver >= 20); @@ -1123,19 +1121,18 @@ setup_lsc_surface_descriptors(const brw_builder &bld, brw_send_inst *send, const unsigned base_offset_bits = util_bitpack_sint(base_offset, 0, max_imm_bits - 1); + send->bindless_surface = + surf_type == LSC_ADDR_SURFTYPE_BSS || + surf_type == LSC_ADDR_SURFTYPE_SS; + switch (surf_type) { case LSC_ADDR_SURFTYPE_BSS: - send->ex_bso = compiler->extended_bindless_surface_offset; - FALLTHROUGH; case LSC_ADDR_SURFTYPE_SS: assert(surface.file != BAD_FILE); /* We assume that the driver provided the handle in the top 20 bits so * we can use the surface handle directly as the extended descriptor. */ send->src[SEND_SRC_EX_DESC] = retype(surface, BRW_TYPE_UD); - /* Gfx20+ assumes ExBSO with UGM */ - if (devinfo->ver >= 20 && send->sfid == BRW_SFID_UGM) - send->ex_bso = true; /* We're already using the extended descriptor to hold the surface * handle. But now the immediate extended descriptor bits in the @@ -1407,7 +1404,6 @@ static void lower_hdc_memory_logical_send(const brw_builder &bld, brw_mem_inst *mem) { const intel_device_info *devinfo = bld.shader->devinfo; - const brw_compiler *compiler = bld.shader->compiler; /* Get the logical send arguments. */ brw_reg binding = mem->src[MEMORY_LOGICAL_BINDING]; @@ -1643,6 +1639,10 @@ lower_hdc_memory_logical_send(const brw_builder &bld, brw_mem_inst *mem) send->exec_size = components > 8 ? 16 : 8; } + send->bindless_surface = + binding_type == LSC_ADDR_SURFTYPE_BSS || + binding_type == LSC_ADDR_SURFTYPE_SS; + /* Set up descriptors */ switch (binding_type) { case LSC_ADDR_SURFTYPE_FLAT: @@ -1650,8 +1650,6 @@ lower_hdc_memory_logical_send(const brw_builder &bld, brw_mem_inst *mem) send->src[SEND_SRC_EX_DESC] = brw_imm_ud(0); break; case LSC_ADDR_SURFTYPE_BSS: - send->ex_bso = compiler->extended_bindless_surface_offset; - FALLTHROUGH; case LSC_ADDR_SURFTYPE_SS: desc |= GFX9_BTI_BINDLESS; @@ -1713,8 +1711,8 @@ lower_lsc_varying_pull_constant_logical_send(const brw_builder &bld, inst = NULL; send->sfid = BRW_SFID_UGM; - send->ex_bso = surf_type == LSC_ADDR_SURFTYPE_BSS && - compiler->extended_bindless_surface_offset; + send->bindless_surface = (surf_type == LSC_ADDR_SURFTYPE_BSS || + surf_type == LSC_ADDR_SURFTYPE_SS); assert(!compiler->indirect_ubos_use_sampler); @@ -2424,8 +2422,7 @@ brw_lower_uniform_pull_constant_loads(brw_shader &s) LSC_CACHE(devinfo, LOAD, L1STATE_L3MOCS)); send->mlen = lsc_msg_addr_len(devinfo, LSC_ADDR_SIZE_A32, 1); - send->ex_bso = surface_handle.file != BAD_FILE && - s.compiler->extended_bindless_surface_offset; + send->bindless_surface = surface_handle.file != BAD_FILE; send->ex_mlen = 0; send->header_size = 0; send->has_side_effects = false; @@ -2520,17 +2517,18 @@ brw_lower_send_descriptors(brw_shader &s) uint32_t ex_desc_imm = send->ex_desc | brw_message_ex_desc(devinfo, send->ex_mlen); - if (ex_desc.file == IMM) + if (ex_desc.file == IMM && !send->bindless_surface) ex_desc_imm |= ex_desc.ud; bool needs_addr_reg = false; - if (ex_desc.file != IMM) + if (ex_desc.file != IMM || send->bindless_surface) needs_addr_reg = true; if (devinfo->ver < 12 && ex_desc.file == IMM && (ex_desc_imm & INTEL_MASK(15, 12)) != 0) needs_addr_reg = true; - if (send->ex_bso) { + if (send->bindless_surface && + s.compiler->extended_bindless_surface_offset) { needs_addr_reg = true; /* When using the extended bindless offset, the whole extended * descriptor is the surface handle. @@ -2544,7 +2542,7 @@ brw_lower_send_descriptors(brw_shader &s) if (needs_addr_reg) { brw_reg addr_reg = ubld.vaddr(BRW_TYPE_UD, BRW_ADDRESS_SUBREG_INDIRECT_EX_DESC); - if (ex_desc.file == IMM) + if (ex_desc.file == IMM && !send->bindless_surface) ubld.MOV(addr_reg, brw_imm_ud(ex_desc_imm)); else if (ex_desc_imm == 0) ubld.MOV(addr_reg, ex_desc);