2024-04-19 11:29:22 -07:00
|
|
|
/*
|
|
|
|
|
* Copyright 2024 Intel Corporation
|
|
|
|
|
* SPDX-License-Identifier: MIT
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
#include "intel_compute_slm.h"
|
|
|
|
|
|
|
|
|
|
#include <assert.h>
|
|
|
|
|
|
|
|
|
|
#include "util/macros.h"
|
|
|
|
|
#include "util/u_math.h"
|
|
|
|
|
|
2024-04-08 10:22:05 -07:00
|
|
|
struct slm_encode {
|
|
|
|
|
uint32_t encode;
|
|
|
|
|
uint32_t size_in_kb;
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
static inline struct slm_encode *
|
|
|
|
|
slm_encode_lookup(struct slm_encode *table, unsigned int table_len, uint32_t bytes)
|
|
|
|
|
{
|
|
|
|
|
const uint32_t kbytes = DIV_ROUND_UP(bytes, 1024);
|
|
|
|
|
unsigned int i;
|
|
|
|
|
|
|
|
|
|
assert(kbytes <= table[table_len - 1].size_in_kb);
|
|
|
|
|
for (i = 0; i < table_len; i++) {
|
|
|
|
|
if (table[i].size_in_kb >= kbytes)
|
|
|
|
|
return &table[i];
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return &table[table_len - 1];
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static struct slm_encode xe2_slm_allocation_size_table[] = {
|
|
|
|
|
{ .encode = 0x0, .size_in_kb = 0, },
|
|
|
|
|
{ .encode = 0x1, .size_in_kb = 1, },
|
|
|
|
|
{ .encode = 0x2, .size_in_kb = 2, },
|
|
|
|
|
{ .encode = 0x3, .size_in_kb = 4, },
|
|
|
|
|
{ .encode = 0x4, .size_in_kb = 8, },
|
|
|
|
|
{ .encode = 0x5, .size_in_kb = 16, },
|
|
|
|
|
{ .encode = 0x8, .size_in_kb = 24, },
|
|
|
|
|
{ .encode = 0x6, .size_in_kb = 32, },
|
|
|
|
|
{ .encode = 0x9, .size_in_kb = 48, },
|
|
|
|
|
{ .encode = 0x7, .size_in_kb = 64, },
|
|
|
|
|
{ .encode = 0xA, .size_in_kb = 96, },
|
|
|
|
|
{ .encode = 0xB, .size_in_kb = 128, },
|
|
|
|
|
{ .encode = 0xC, .size_in_kb = 192, },
|
|
|
|
|
{ .encode = 0xD, .size_in_kb = 256, },
|
|
|
|
|
{ .encode = 0xE, .size_in_kb = 384, },
|
|
|
|
|
};
|
|
|
|
|
|
2024-04-19 11:29:22 -07:00
|
|
|
/* Shared Local Memory Size is specified as powers of two,
|
|
|
|
|
* and also have a Gen-dependent minimum value if not zero.
|
|
|
|
|
*/
|
|
|
|
|
uint32_t
|
|
|
|
|
intel_compute_slm_calculate_size(unsigned gen, uint32_t bytes)
|
|
|
|
|
{
|
2024-04-08 10:22:05 -07:00
|
|
|
if (gen >= 20) {
|
|
|
|
|
struct slm_encode *slm_encode;
|
|
|
|
|
|
|
|
|
|
slm_encode = slm_encode_lookup(xe2_slm_allocation_size_table,
|
|
|
|
|
ARRAY_SIZE(xe2_slm_allocation_size_table),
|
|
|
|
|
bytes);
|
|
|
|
|
return slm_encode->size_in_kb * 1024;
|
|
|
|
|
}
|
|
|
|
|
|
2024-04-19 11:29:22 -07:00
|
|
|
assert(bytes <= 64 * 1024);
|
|
|
|
|
if (bytes > 0)
|
|
|
|
|
return MAX2(util_next_power_of_two(bytes), gen >= 9 ? 1024 : 4096);
|
|
|
|
|
else
|
|
|
|
|
return 0;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
uint32_t
|
|
|
|
|
intel_compute_slm_encode_size(unsigned gen, uint32_t bytes)
|
|
|
|
|
{
|
2024-04-08 10:22:05 -07:00
|
|
|
uint32_t slm_size;
|
|
|
|
|
|
|
|
|
|
if (bytes == 0)
|
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
|
|
if (gen >= 20) {
|
|
|
|
|
struct slm_encode *slm_encode;
|
|
|
|
|
|
|
|
|
|
slm_encode = slm_encode_lookup(xe2_slm_allocation_size_table,
|
|
|
|
|
ARRAY_SIZE(xe2_slm_allocation_size_table),
|
|
|
|
|
bytes);
|
|
|
|
|
return slm_encode->encode;
|
|
|
|
|
}
|
2024-04-19 11:29:22 -07:00
|
|
|
|
|
|
|
|
/* Shared Local Memory is specified as powers of two, and encoded in
|
|
|
|
|
* INTERFACE_DESCRIPTOR_DATA with the following representations:
|
|
|
|
|
*
|
|
|
|
|
* Size | 0 kB | 1 kB | 2 kB | 4 kB | 8 kB | 16 kB | 32 kB | 64 kB |
|
|
|
|
|
* -------------------------------------------------------------------
|
|
|
|
|
* Gfx7-8 | 0 | none | none | 1 | 2 | 4 | 8 | 16 |
|
|
|
|
|
* -------------------------------------------------------------------
|
|
|
|
|
* Gfx9+ | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 |
|
|
|
|
|
*/
|
|
|
|
|
|
2024-04-08 10:22:05 -07:00
|
|
|
slm_size = intel_compute_slm_calculate_size(gen, bytes);
|
|
|
|
|
assert(util_is_power_of_two_nonzero(slm_size));
|
|
|
|
|
|
|
|
|
|
if (gen >= 9) {
|
|
|
|
|
/* Turn an exponent of 10 (1024 kB) into 1. */
|
|
|
|
|
assert(slm_size >= 1024);
|
|
|
|
|
slm_size = ffs(slm_size) - 10;
|
|
|
|
|
} else {
|
|
|
|
|
assert(slm_size >= 4096);
|
|
|
|
|
/* Convert to the pre-Gfx9 representation. */
|
|
|
|
|
slm_size = slm_size / 4096;
|
2024-04-19 11:29:22 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return slm_size;
|
|
|
|
|
}
|
2024-04-08 10:39:25 -07:00
|
|
|
|
|
|
|
|
/* encode = 0 sets to largest SLM size supported in subslice */
|
|
|
|
|
static struct slm_encode preferred_slm_allocation_size_table[] = {
|
|
|
|
|
{ .encode = 0x8, .size_in_kb = 0, },
|
|
|
|
|
{ .encode = 0x9, .size_in_kb = 16, },
|
|
|
|
|
{ .encode = 0xa, .size_in_kb = 32, },
|
|
|
|
|
{ .encode = 0xb, .size_in_kb = 64, },
|
|
|
|
|
{ .encode = 0xc, .size_in_kb = 96, },
|
|
|
|
|
{ .encode = 0xd, .size_in_kb = 128, },
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
static struct slm_encode xe2_preferred_slm_allocation_size_table[] = {
|
|
|
|
|
{ .encode = 0x0, .size_in_kb = 0, },
|
|
|
|
|
{ .encode = 0x1, .size_in_kb = 16, },
|
|
|
|
|
{ .encode = 0x2, .size_in_kb = 32, },
|
|
|
|
|
{ .encode = 0x3, .size_in_kb = 64, },
|
|
|
|
|
{ .encode = 0x4, .size_in_kb = 96, },
|
|
|
|
|
{ .encode = 0x5, .size_in_kb = 128, },
|
|
|
|
|
{ .encode = 0x6, .size_in_kb = 160, },
|
|
|
|
|
{ .encode = 0x7, .size_in_kb = 192, },
|
|
|
|
|
{ .encode = 0x8, .size_in_kb = 224, },
|
|
|
|
|
{ .encode = 0x9, .size_in_kb = 256, },
|
|
|
|
|
{ .encode = 0xA, .size_in_kb = 384, },
|
|
|
|
|
};
|
|
|
|
|
|
2024-04-05 13:12:32 -07:00
|
|
|
static uint32_t
|
2024-04-08 10:39:25 -07:00
|
|
|
intel_compute_preferred_slm_encode_size(unsigned gen, uint32_t bytes)
|
|
|
|
|
{
|
|
|
|
|
struct slm_encode *table;
|
|
|
|
|
unsigned int table_len;
|
|
|
|
|
|
|
|
|
|
if (gen >= 20) {
|
|
|
|
|
table = xe2_preferred_slm_allocation_size_table;
|
|
|
|
|
table_len = ARRAY_SIZE(xe2_preferred_slm_allocation_size_table);
|
|
|
|
|
} else {
|
|
|
|
|
table = preferred_slm_allocation_size_table;
|
|
|
|
|
table_len = ARRAY_SIZE(preferred_slm_allocation_size_table);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return slm_encode_lookup(table, table_len, bytes)->encode;
|
|
|
|
|
}
|
2024-04-05 13:12:32 -07:00
|
|
|
|
2024-04-19 14:11:03 -07:00
|
|
|
/**
|
|
|
|
|
* Compute a shared local memory size to be allocated for each sub-slice.
|
|
|
|
|
* It estimate how many workgroups will run concurrently per sub-slice and
|
|
|
|
|
* multiply that per each workgroup SLM size.
|
|
|
|
|
*/
|
2024-04-05 13:12:32 -07:00
|
|
|
uint32_t
|
2024-04-19 14:11:03 -07:00
|
|
|
intel_compute_preferred_slm_calc_encode_size(const struct intel_device_info *devinfo,
|
|
|
|
|
const uint32_t slm_size_per_workgroup,
|
|
|
|
|
const uint32_t invocations_per_workgroup,
|
|
|
|
|
const uint8_t cs_simd)
|
2024-04-05 13:12:32 -07:00
|
|
|
{
|
2024-04-19 14:11:03 -07:00
|
|
|
const uint32_t max_preferred_slm_size = intel_device_info_get_max_preferred_slm_size(devinfo);
|
|
|
|
|
const uint32_t invocations_per_ss = intel_device_info_get_eu_count_first_subslice(devinfo) *
|
|
|
|
|
devinfo->num_thread_per_eu * cs_simd;
|
|
|
|
|
uint32_t preferred_slm_size;
|
2024-04-05 13:12:32 -07:00
|
|
|
|
2024-04-19 14:11:03 -07:00
|
|
|
if (slm_size_per_workgroup) {
|
|
|
|
|
uint32_t workgroups_per_ss = invocations_per_ss / invocations_per_workgroup;
|
|
|
|
|
|
|
|
|
|
preferred_slm_size = workgroups_per_ss * slm_size_per_workgroup;
|
|
|
|
|
preferred_slm_size = MIN2(preferred_slm_size, max_preferred_slm_size);
|
|
|
|
|
} else {
|
|
|
|
|
preferred_slm_size = 0;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
assert(preferred_slm_size >= slm_size_per_workgroup);
|
|
|
|
|
return intel_compute_preferred_slm_encode_size(devinfo->ver, preferred_slm_size);
|
2024-04-05 13:12:32 -07:00
|
|
|
}
|