pan/bi,nir: Divide memory_access from segments

Valhall removed Bifrost's memory segments and added in its place memory
access.  Those were bolted on reserved bits as "pseudo-segments" and the
emitter would catch these and emit the right memory access.  This commit
cleans it up a bit by making memory_access available directly and
exposing it to NIR (this will be useful later).

Signed-off-by: Lorenzo Rossi <lorenzo.rossi@collabora.com>
Reviewed-by: Faith Ekstrand <faith.ekstrand@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/40391>
This commit is contained in:
Lorenzo Rossi 2026-03-11 17:45:33 +01:00 committed by Marge Bot
parent c730e41ed5
commit 43ffcf06f4
6 changed files with 80 additions and 45 deletions

View file

@ -900,6 +900,8 @@ print_access(enum gl_access_qualifier access, print_state *state, const char *se
{ ACCESS_ATOMIC, "atomic" },
{ ACCESS_FUSED_EU_DISABLE_INTEL, "fused-eu-disable-intel" },
{ ACCESS_SPARSE, "sparse" },
{ ACCESS_ISTREAM_PAN, "istream-pan" },
{ ACCESS_ESTREAM_PAN, "estream-pan" },
};
bool first = true;

View file

@ -1266,6 +1266,24 @@ enum gl_access_qualifier
* intrinsics for sparse.
*/
ACCESS_SPARSE = (1 << 20),
/**
* Internal streaming access (v9+)
*
* Whether the memory is accessed in a streaming fashion inside of the GPU.
* Since the data is likely to be read inside of the GPU, the hardware will
* try to store it in level 2 cache.
*/
ACCESS_ISTREAM_PAN = (1 << 21),
/**
* External streaming access (v9+)
*
* Whether the memory is accessed in a streaming fashion outside of the GPU.
* This hints the hardware to not cache the data, it could be useful for
* one-time accesses or if the data is larger than what the memory can store.
*/
ACCESS_ESTREAM_PAN = (1 << 22),
};
enum gl_tess_spacing

View file

@ -1146,9 +1146,9 @@ bi_emit_store_vary(bi_builder *b, nir_intrinsic_instr *instr)
if (index_offset != 0)
index = bi_iadd_imm_i32(b, index, index_offset);
const enum bi_seg seg =
slot->section == PAN_VARYING_SECTION_GENERIC ? BI_SEG_VARY
: BI_SEG_POS;
const enum va_memory_access mem_access =
slot->section == PAN_VARYING_SECTION_GENERIC ? VA_MEMORY_ACCESS_ESTREAM
: VA_MEMORY_ACCESS_ISTREAM;
nir_src *offset_src = nir_get_io_offset_src(instr);
assert(nir_src_is_const(*offset_src) && "assumes immediate offset");
@ -1169,7 +1169,9 @@ bi_emit_store_vary(bi_builder *b, nir_intrinsic_instr *instr)
bi_emit_split_i32(b, a, address, 2);
bi_instr *S = bi_store(b, nr * src_bit_sz, data, a[0], a[1], seg, offset);
bi_instr *S = bi_store(b, nr * src_bit_sz, data, a[0], a[1], BI_SEG_NONE,
offset);
S->mem_access = mem_access;
S->is_psiz_write = slot->location == VARYING_SLOT_PSIZ;
} else {
assert(T_size == 32 || T_size == 16);
@ -1310,7 +1312,8 @@ bi_handle_segment(bi_builder *b, bi_index *addr_lo, bi_index *addr_hi,
}
static void
bi_emit_load(bi_builder *b, nir_intrinsic_instr *instr, enum bi_seg seg)
bi_emit_load(bi_builder *b, nir_intrinsic_instr *instr, enum bi_seg seg,
enum va_memory_access mem_access)
{
int16_t offset = 0;
unsigned bits = instr->num_components * instr->def.bit_size;
@ -1320,12 +1323,14 @@ bi_emit_load(bi_builder *b, nir_intrinsic_instr *instr, enum bi_seg seg)
bi_handle_segment(b, &addr_lo, &addr_hi, seg, &offset);
bi_load_to(b, bits, dest, addr_lo, addr_hi, seg, offset);
bi_instr *I = bi_load_to(b, bits, dest, addr_lo, addr_hi, seg, offset);
I->mem_access = mem_access;
bi_emit_cached_split(b, dest, bits);
}
static bi_instr *
bi_emit_store(bi_builder *b, nir_intrinsic_instr *instr, enum bi_seg seg)
bi_emit_store(bi_builder *b, nir_intrinsic_instr *instr, enum bi_seg seg,
enum va_memory_access mem_access)
{
/* Require contiguous masks, gauranteed by nir_lower_wrmasks */
assert(nir_intrinsic_write_mask(instr) ==
@ -1340,6 +1345,7 @@ bi_emit_store(bi_builder *b, nir_intrinsic_instr *instr, enum bi_seg seg)
bi_instr *I =
bi_store(b, instr->num_components * nir_src_bit_size(instr->src[0]),
bi_src_index(&instr->src[0]), addr_lo, addr_hi, seg, offset);
I->mem_access = mem_access;
return I;
}
@ -1744,19 +1750,23 @@ va_emit_load_texel_buf_index_address(bi_builder *b, bi_index dst,
}
static void
bi_emit_load_cvt(bi_builder *b, bi_index dst, nir_intrinsic_instr *instr)
bi_emit_load_cvt(bi_builder *b, bi_index dst, nir_intrinsic_instr *instr,
enum va_memory_access mem_access)
{
bi_index addr = bi_src_index(&instr->src[0]);
bi_index icd = bi_src_index(&instr->src[1]);
bi_ld_cvt_to(b, dst, bi_extract(b, addr, 0), bi_extract(b, addr, 1), icd,
bi_reg_fmt_for_nir(nir_intrinsic_dest_type(instr)),
instr->def.num_components - 1);
bi_instr *I =
bi_ld_cvt_to(b, dst, bi_extract(b, addr, 0), bi_extract(b, addr, 1), icd,
bi_reg_fmt_for_nir(nir_intrinsic_dest_type(instr)),
instr->def.num_components - 1);
I->mem_access = mem_access;
bi_emit_cached_split_i32(b, dst, instr->def.num_components);
}
static void
bi_emit_store_cvt(bi_builder *b, nir_intrinsic_instr *instr)
bi_emit_store_cvt(bi_builder *b, nir_intrinsic_instr *instr,
enum va_memory_access mem_access)
{
bi_index value = bi_src_index(&instr->src[0]);
bi_index addr = bi_src_index(&instr->src[1]);
@ -1773,8 +1783,10 @@ bi_emit_store_cvt(bi_builder *b, nir_intrinsic_instr *instr)
regfmt = bi_reg_fmt_for_nir(src_type);
}
bi_st_cvt(b, value, bi_extract(b, addr, 0), bi_extract(b, addr, 1), icd,
regfmt, instr->num_components - 1);
bi_instr *I =
bi_st_cvt(b, value, bi_extract(b, addr, 0), bi_extract(b, addr, 1), icd,
regfmt, instr->num_components - 1);
I->mem_access = mem_access;
}
static void
@ -1980,6 +1992,17 @@ bi_subgroup_from_cluster_size(unsigned cluster_size)
}
}
static enum va_memory_access
va_memory_access_from_nir(const nir_intrinsic_instr *intr)
{
const enum gl_access_qualifier access = nir_intrinsic_access(intr);
if (access & ACCESS_ISTREAM_PAN)
return VA_MEMORY_ACCESS_ISTREAM;
if (access & ACCESS_ESTREAM_PAN)
return VA_MEMORY_ACCESS_ESTREAM;
return VA_MEMORY_ACCESS_NONE;
}
static void
bi_emit_intrinsic(bi_builder *b, nir_intrinsic_instr *instr)
{
@ -2139,30 +2162,31 @@ bi_emit_intrinsic(bi_builder *b, nir_intrinsic_instr *instr)
case nir_intrinsic_load_global:
case nir_intrinsic_load_global_constant:
bi_emit_load(b, instr, BI_SEG_NONE);
bi_emit_load(b, instr, BI_SEG_NONE, va_memory_access_from_nir(instr));
break;
case nir_intrinsic_store_global:
case nir_intrinsic_store_global_psiz_pan: {
bi_instr *I = bi_emit_store(b, instr, BI_SEG_NONE);
bi_instr *I = bi_emit_store(b, instr, BI_SEG_NONE,
va_memory_access_from_nir(instr));
I->is_psiz_write = instr->intrinsic == nir_intrinsic_store_global_psiz_pan;
break;
}
case nir_intrinsic_load_scratch:
bi_emit_load(b, instr, BI_SEG_TL);
bi_emit_load(b, instr, BI_SEG_TL, VA_MEMORY_ACCESS_FORCE);
break;
case nir_intrinsic_store_scratch:
bi_emit_store(b, instr, BI_SEG_TL);
bi_emit_store(b, instr, BI_SEG_TL, VA_MEMORY_ACCESS_FORCE);
break;
case nir_intrinsic_load_shared:
bi_emit_load(b, instr, BI_SEG_WLS);
bi_emit_load(b, instr, BI_SEG_WLS, VA_MEMORY_ACCESS_NONE);
break;
case nir_intrinsic_store_shared:
bi_emit_store(b, instr, BI_SEG_WLS);
bi_emit_store(b, instr, BI_SEG_WLS, VA_MEMORY_ACCESS_NONE);
break;
case nir_intrinsic_barrier:
@ -2304,11 +2328,11 @@ bi_emit_intrinsic(bi_builder *b, nir_intrinsic_instr *instr)
break;
case nir_intrinsic_load_global_cvt_pan:
bi_emit_load_cvt(b, dst, instr);
bi_emit_load_cvt(b, dst, instr, va_memory_access_from_nir(instr));
break;
case nir_intrinsic_store_global_cvt_pan:
bi_emit_store_cvt(b, instr);
bi_emit_store_cvt(b, instr, va_memory_access_from_nir(instr));
break;
case nir_intrinsic_load_tile_pan:

View file

@ -637,6 +637,7 @@ typedef struct {
struct {
enum bi_seg seg; /* LOAD, STORE, SEG_ADD, SEG_SUB */
enum va_memory_access mem_access; /* LOAD, STORE, LD_CVT, ST_CVT */
bool preserve_null; /* SEG_ADD, SEG_SUB */
enum bi_extend extend; /* LOAD, IMUL */
};

View file

@ -291,11 +291,12 @@ TEST_F(ValhallPacking, LeaBufImm)
0x005e84040000007b);
}
TEST_F(ValhallPacking, StoreSegment)
TEST_F(ValhallPacking, StoreMemoryAccess)
{
CASE(bi_store_i96(b, bi_register(0), bi_discard(bi_register(4)),
bi_discard(bi_register(5)), BI_SEG_VARY, 0),
0x0061400632000044);
bi_instr *I = bi_store_i96(b, bi_register(0), bi_discard(bi_register(4)),
bi_discard(bi_register(5)), BI_SEG_NONE, 0);
I->mem_access = VA_MEMORY_ACCESS_ESTREAM;
CASE(I, 0x0061400632000044);
}
TEST_F(ValhallPacking, Convert16To32)

View file

@ -737,35 +737,21 @@ va_pack_load(const bi_instr *I, bool buffer_descriptor)
hex |= va_pack_byte_offset(I);
hex |= (uint64_t)va_pack_src(I, 0) << 0;
hex |= (uint64_t)I->mem_access << 24;
if (buffer_descriptor)
hex |= (uint64_t)va_pack_src(I, 1) << 8;
return hex;
}
static uint64_t
va_pack_memory_access(const bi_instr *I)
{
switch (I->seg) {
case BI_SEG_TL:
return VA_MEMORY_ACCESS_FORCE;
case BI_SEG_POS:
return VA_MEMORY_ACCESS_ISTREAM;
case BI_SEG_VARY:
return VA_MEMORY_ACCESS_ESTREAM;
default:
return VA_MEMORY_ACCESS_NONE;
}
}
static uint64_t
va_pack_store(const bi_instr *I)
{
uint64_t hex = va_pack_memory_access(I) << 24;
uint64_t hex = 0;
va_validate_register_pair(I, 1);
hex |= (uint64_t)va_pack_src(I, 1) << 0;
hex |= I->mem_access << 24;
hex |= va_pack_byte_offset(I);
@ -971,15 +957,18 @@ va_pack_instr(const bi_instr *I, unsigned arch)
/* Conversion descriptor */
hex |= (uint64_t)va_pack_src(I, 2) << 16;
hex |= va_pack_memory_access(I) << 37;
hex |= (uint64_t)I->mem_access << 37;
break;
case BI_OPCODE_ST_CVT:
/* Staging read */
hex |= va_pack_store(I);
va_validate_register_pair(I, 1);
hex |= (uint64_t)va_pack_src(I, 1) << 0;
hex |= va_pack_byte_offset(I);
/* Conversion descriptor */
hex |= (uint64_t)va_pack_src(I, 3) << 16;
hex |= (uint64_t)I->mem_access << 37;
break;
case BI_OPCODE_BLEND: {