mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-03-23 20:10:36 +01:00
pan/bi,nir: Divide memory_access from segments
Valhall removed Bifrost's memory segments and added in its place memory access. Those were bolted on reserved bits as "pseudo-segments" and the emitter would catch these and emit the right memory access. This commit cleans it up a bit by making memory_access available directly and exposing it to NIR (this will be useful later). Signed-off-by: Lorenzo Rossi <lorenzo.rossi@collabora.com> Reviewed-by: Faith Ekstrand <faith.ekstrand@collabora.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/40391>
This commit is contained in:
parent
c730e41ed5
commit
43ffcf06f4
6 changed files with 80 additions and 45 deletions
|
|
@ -900,6 +900,8 @@ print_access(enum gl_access_qualifier access, print_state *state, const char *se
|
|||
{ ACCESS_ATOMIC, "atomic" },
|
||||
{ ACCESS_FUSED_EU_DISABLE_INTEL, "fused-eu-disable-intel" },
|
||||
{ ACCESS_SPARSE, "sparse" },
|
||||
{ ACCESS_ISTREAM_PAN, "istream-pan" },
|
||||
{ ACCESS_ESTREAM_PAN, "estream-pan" },
|
||||
};
|
||||
|
||||
bool first = true;
|
||||
|
|
|
|||
|
|
@ -1266,6 +1266,24 @@ enum gl_access_qualifier
|
|||
* intrinsics for sparse.
|
||||
*/
|
||||
ACCESS_SPARSE = (1 << 20),
|
||||
|
||||
/**
|
||||
* Internal streaming access (v9+)
|
||||
*
|
||||
* Whether the memory is accessed in a streaming fashion inside of the GPU.
|
||||
* Since the data is likely to be read inside of the GPU, the hardware will
|
||||
* try to store it in level 2 cache.
|
||||
*/
|
||||
ACCESS_ISTREAM_PAN = (1 << 21),
|
||||
|
||||
/**
|
||||
* External streaming access (v9+)
|
||||
*
|
||||
* Whether the memory is accessed in a streaming fashion outside of the GPU.
|
||||
* This hints the hardware to not cache the data, it could be useful for
|
||||
* one-time accesses or if the data is larger than what the memory can store.
|
||||
*/
|
||||
ACCESS_ESTREAM_PAN = (1 << 22),
|
||||
};
|
||||
|
||||
enum gl_tess_spacing
|
||||
|
|
|
|||
|
|
@ -1146,9 +1146,9 @@ bi_emit_store_vary(bi_builder *b, nir_intrinsic_instr *instr)
|
|||
if (index_offset != 0)
|
||||
index = bi_iadd_imm_i32(b, index, index_offset);
|
||||
|
||||
const enum bi_seg seg =
|
||||
slot->section == PAN_VARYING_SECTION_GENERIC ? BI_SEG_VARY
|
||||
: BI_SEG_POS;
|
||||
const enum va_memory_access mem_access =
|
||||
slot->section == PAN_VARYING_SECTION_GENERIC ? VA_MEMORY_ACCESS_ESTREAM
|
||||
: VA_MEMORY_ACCESS_ISTREAM;
|
||||
|
||||
nir_src *offset_src = nir_get_io_offset_src(instr);
|
||||
assert(nir_src_is_const(*offset_src) && "assumes immediate offset");
|
||||
|
|
@ -1169,7 +1169,9 @@ bi_emit_store_vary(bi_builder *b, nir_intrinsic_instr *instr)
|
|||
|
||||
bi_emit_split_i32(b, a, address, 2);
|
||||
|
||||
bi_instr *S = bi_store(b, nr * src_bit_sz, data, a[0], a[1], seg, offset);
|
||||
bi_instr *S = bi_store(b, nr * src_bit_sz, data, a[0], a[1], BI_SEG_NONE,
|
||||
offset);
|
||||
S->mem_access = mem_access;
|
||||
S->is_psiz_write = slot->location == VARYING_SLOT_PSIZ;
|
||||
} else {
|
||||
assert(T_size == 32 || T_size == 16);
|
||||
|
|
@ -1310,7 +1312,8 @@ bi_handle_segment(bi_builder *b, bi_index *addr_lo, bi_index *addr_hi,
|
|||
}
|
||||
|
||||
static void
|
||||
bi_emit_load(bi_builder *b, nir_intrinsic_instr *instr, enum bi_seg seg)
|
||||
bi_emit_load(bi_builder *b, nir_intrinsic_instr *instr, enum bi_seg seg,
|
||||
enum va_memory_access mem_access)
|
||||
{
|
||||
int16_t offset = 0;
|
||||
unsigned bits = instr->num_components * instr->def.bit_size;
|
||||
|
|
@ -1320,12 +1323,14 @@ bi_emit_load(bi_builder *b, nir_intrinsic_instr *instr, enum bi_seg seg)
|
|||
|
||||
bi_handle_segment(b, &addr_lo, &addr_hi, seg, &offset);
|
||||
|
||||
bi_load_to(b, bits, dest, addr_lo, addr_hi, seg, offset);
|
||||
bi_instr *I = bi_load_to(b, bits, dest, addr_lo, addr_hi, seg, offset);
|
||||
I->mem_access = mem_access;
|
||||
bi_emit_cached_split(b, dest, bits);
|
||||
}
|
||||
|
||||
static bi_instr *
|
||||
bi_emit_store(bi_builder *b, nir_intrinsic_instr *instr, enum bi_seg seg)
|
||||
bi_emit_store(bi_builder *b, nir_intrinsic_instr *instr, enum bi_seg seg,
|
||||
enum va_memory_access mem_access)
|
||||
{
|
||||
/* Require contiguous masks, gauranteed by nir_lower_wrmasks */
|
||||
assert(nir_intrinsic_write_mask(instr) ==
|
||||
|
|
@ -1340,6 +1345,7 @@ bi_emit_store(bi_builder *b, nir_intrinsic_instr *instr, enum bi_seg seg)
|
|||
bi_instr *I =
|
||||
bi_store(b, instr->num_components * nir_src_bit_size(instr->src[0]),
|
||||
bi_src_index(&instr->src[0]), addr_lo, addr_hi, seg, offset);
|
||||
I->mem_access = mem_access;
|
||||
return I;
|
||||
}
|
||||
|
||||
|
|
@ -1744,19 +1750,23 @@ va_emit_load_texel_buf_index_address(bi_builder *b, bi_index dst,
|
|||
}
|
||||
|
||||
static void
|
||||
bi_emit_load_cvt(bi_builder *b, bi_index dst, nir_intrinsic_instr *instr)
|
||||
bi_emit_load_cvt(bi_builder *b, bi_index dst, nir_intrinsic_instr *instr,
|
||||
enum va_memory_access mem_access)
|
||||
{
|
||||
bi_index addr = bi_src_index(&instr->src[0]);
|
||||
bi_index icd = bi_src_index(&instr->src[1]);
|
||||
|
||||
bi_ld_cvt_to(b, dst, bi_extract(b, addr, 0), bi_extract(b, addr, 1), icd,
|
||||
bi_reg_fmt_for_nir(nir_intrinsic_dest_type(instr)),
|
||||
instr->def.num_components - 1);
|
||||
bi_instr *I =
|
||||
bi_ld_cvt_to(b, dst, bi_extract(b, addr, 0), bi_extract(b, addr, 1), icd,
|
||||
bi_reg_fmt_for_nir(nir_intrinsic_dest_type(instr)),
|
||||
instr->def.num_components - 1);
|
||||
I->mem_access = mem_access;
|
||||
bi_emit_cached_split_i32(b, dst, instr->def.num_components);
|
||||
}
|
||||
|
||||
static void
|
||||
bi_emit_store_cvt(bi_builder *b, nir_intrinsic_instr *instr)
|
||||
bi_emit_store_cvt(bi_builder *b, nir_intrinsic_instr *instr,
|
||||
enum va_memory_access mem_access)
|
||||
{
|
||||
bi_index value = bi_src_index(&instr->src[0]);
|
||||
bi_index addr = bi_src_index(&instr->src[1]);
|
||||
|
|
@ -1773,8 +1783,10 @@ bi_emit_store_cvt(bi_builder *b, nir_intrinsic_instr *instr)
|
|||
regfmt = bi_reg_fmt_for_nir(src_type);
|
||||
}
|
||||
|
||||
bi_st_cvt(b, value, bi_extract(b, addr, 0), bi_extract(b, addr, 1), icd,
|
||||
regfmt, instr->num_components - 1);
|
||||
bi_instr *I =
|
||||
bi_st_cvt(b, value, bi_extract(b, addr, 0), bi_extract(b, addr, 1), icd,
|
||||
regfmt, instr->num_components - 1);
|
||||
I->mem_access = mem_access;
|
||||
}
|
||||
|
||||
static void
|
||||
|
|
@ -1980,6 +1992,17 @@ bi_subgroup_from_cluster_size(unsigned cluster_size)
|
|||
}
|
||||
}
|
||||
|
||||
static enum va_memory_access
|
||||
va_memory_access_from_nir(const nir_intrinsic_instr *intr)
|
||||
{
|
||||
const enum gl_access_qualifier access = nir_intrinsic_access(intr);
|
||||
if (access & ACCESS_ISTREAM_PAN)
|
||||
return VA_MEMORY_ACCESS_ISTREAM;
|
||||
if (access & ACCESS_ESTREAM_PAN)
|
||||
return VA_MEMORY_ACCESS_ESTREAM;
|
||||
return VA_MEMORY_ACCESS_NONE;
|
||||
}
|
||||
|
||||
static void
|
||||
bi_emit_intrinsic(bi_builder *b, nir_intrinsic_instr *instr)
|
||||
{
|
||||
|
|
@ -2139,30 +2162,31 @@ bi_emit_intrinsic(bi_builder *b, nir_intrinsic_instr *instr)
|
|||
|
||||
case nir_intrinsic_load_global:
|
||||
case nir_intrinsic_load_global_constant:
|
||||
bi_emit_load(b, instr, BI_SEG_NONE);
|
||||
bi_emit_load(b, instr, BI_SEG_NONE, va_memory_access_from_nir(instr));
|
||||
break;
|
||||
|
||||
case nir_intrinsic_store_global:
|
||||
case nir_intrinsic_store_global_psiz_pan: {
|
||||
bi_instr *I = bi_emit_store(b, instr, BI_SEG_NONE);
|
||||
bi_instr *I = bi_emit_store(b, instr, BI_SEG_NONE,
|
||||
va_memory_access_from_nir(instr));
|
||||
I->is_psiz_write = instr->intrinsic == nir_intrinsic_store_global_psiz_pan;
|
||||
break;
|
||||
}
|
||||
|
||||
case nir_intrinsic_load_scratch:
|
||||
bi_emit_load(b, instr, BI_SEG_TL);
|
||||
bi_emit_load(b, instr, BI_SEG_TL, VA_MEMORY_ACCESS_FORCE);
|
||||
break;
|
||||
|
||||
case nir_intrinsic_store_scratch:
|
||||
bi_emit_store(b, instr, BI_SEG_TL);
|
||||
bi_emit_store(b, instr, BI_SEG_TL, VA_MEMORY_ACCESS_FORCE);
|
||||
break;
|
||||
|
||||
case nir_intrinsic_load_shared:
|
||||
bi_emit_load(b, instr, BI_SEG_WLS);
|
||||
bi_emit_load(b, instr, BI_SEG_WLS, VA_MEMORY_ACCESS_NONE);
|
||||
break;
|
||||
|
||||
case nir_intrinsic_store_shared:
|
||||
bi_emit_store(b, instr, BI_SEG_WLS);
|
||||
bi_emit_store(b, instr, BI_SEG_WLS, VA_MEMORY_ACCESS_NONE);
|
||||
break;
|
||||
|
||||
case nir_intrinsic_barrier:
|
||||
|
|
@ -2304,11 +2328,11 @@ bi_emit_intrinsic(bi_builder *b, nir_intrinsic_instr *instr)
|
|||
break;
|
||||
|
||||
case nir_intrinsic_load_global_cvt_pan:
|
||||
bi_emit_load_cvt(b, dst, instr);
|
||||
bi_emit_load_cvt(b, dst, instr, va_memory_access_from_nir(instr));
|
||||
break;
|
||||
|
||||
case nir_intrinsic_store_global_cvt_pan:
|
||||
bi_emit_store_cvt(b, instr);
|
||||
bi_emit_store_cvt(b, instr, va_memory_access_from_nir(instr));
|
||||
break;
|
||||
|
||||
case nir_intrinsic_load_tile_pan:
|
||||
|
|
|
|||
|
|
@ -637,6 +637,7 @@ typedef struct {
|
|||
|
||||
struct {
|
||||
enum bi_seg seg; /* LOAD, STORE, SEG_ADD, SEG_SUB */
|
||||
enum va_memory_access mem_access; /* LOAD, STORE, LD_CVT, ST_CVT */
|
||||
bool preserve_null; /* SEG_ADD, SEG_SUB */
|
||||
enum bi_extend extend; /* LOAD, IMUL */
|
||||
};
|
||||
|
|
|
|||
|
|
@ -291,11 +291,12 @@ TEST_F(ValhallPacking, LeaBufImm)
|
|||
0x005e84040000007b);
|
||||
}
|
||||
|
||||
TEST_F(ValhallPacking, StoreSegment)
|
||||
TEST_F(ValhallPacking, StoreMemoryAccess)
|
||||
{
|
||||
CASE(bi_store_i96(b, bi_register(0), bi_discard(bi_register(4)),
|
||||
bi_discard(bi_register(5)), BI_SEG_VARY, 0),
|
||||
0x0061400632000044);
|
||||
bi_instr *I = bi_store_i96(b, bi_register(0), bi_discard(bi_register(4)),
|
||||
bi_discard(bi_register(5)), BI_SEG_NONE, 0);
|
||||
I->mem_access = VA_MEMORY_ACCESS_ESTREAM;
|
||||
CASE(I, 0x0061400632000044);
|
||||
}
|
||||
|
||||
TEST_F(ValhallPacking, Convert16To32)
|
||||
|
|
|
|||
|
|
@ -737,35 +737,21 @@ va_pack_load(const bi_instr *I, bool buffer_descriptor)
|
|||
hex |= va_pack_byte_offset(I);
|
||||
|
||||
hex |= (uint64_t)va_pack_src(I, 0) << 0;
|
||||
hex |= (uint64_t)I->mem_access << 24;
|
||||
|
||||
if (buffer_descriptor)
|
||||
hex |= (uint64_t)va_pack_src(I, 1) << 8;
|
||||
|
||||
return hex;
|
||||
}
|
||||
|
||||
static uint64_t
|
||||
va_pack_memory_access(const bi_instr *I)
|
||||
{
|
||||
switch (I->seg) {
|
||||
case BI_SEG_TL:
|
||||
return VA_MEMORY_ACCESS_FORCE;
|
||||
case BI_SEG_POS:
|
||||
return VA_MEMORY_ACCESS_ISTREAM;
|
||||
case BI_SEG_VARY:
|
||||
return VA_MEMORY_ACCESS_ESTREAM;
|
||||
default:
|
||||
return VA_MEMORY_ACCESS_NONE;
|
||||
}
|
||||
}
|
||||
|
||||
static uint64_t
|
||||
va_pack_store(const bi_instr *I)
|
||||
{
|
||||
uint64_t hex = va_pack_memory_access(I) << 24;
|
||||
uint64_t hex = 0;
|
||||
|
||||
va_validate_register_pair(I, 1);
|
||||
hex |= (uint64_t)va_pack_src(I, 1) << 0;
|
||||
hex |= I->mem_access << 24;
|
||||
|
||||
hex |= va_pack_byte_offset(I);
|
||||
|
||||
|
|
@ -971,15 +957,18 @@ va_pack_instr(const bi_instr *I, unsigned arch)
|
|||
|
||||
/* Conversion descriptor */
|
||||
hex |= (uint64_t)va_pack_src(I, 2) << 16;
|
||||
hex |= va_pack_memory_access(I) << 37;
|
||||
hex |= (uint64_t)I->mem_access << 37;
|
||||
break;
|
||||
|
||||
case BI_OPCODE_ST_CVT:
|
||||
/* Staging read */
|
||||
hex |= va_pack_store(I);
|
||||
va_validate_register_pair(I, 1);
|
||||
hex |= (uint64_t)va_pack_src(I, 1) << 0;
|
||||
hex |= va_pack_byte_offset(I);
|
||||
|
||||
/* Conversion descriptor */
|
||||
hex |= (uint64_t)va_pack_src(I, 3) << 16;
|
||||
hex |= (uint64_t)I->mem_access << 37;
|
||||
break;
|
||||
|
||||
case BI_OPCODE_BLEND: {
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue