intel/dev: Adjust prefetch_size values for MTL engines

MTL has different CS prefetch sizes for each CS type.
So here replacing the cs_prefetch_size in intel_device_info struct
by a function that takes as argument the i915 engine class.

Fixes:
- func.cmd-buffer.small-secondaries.q0
- dEQP-VK.multiview.secondary_cmd_buffer.*
- Several other VK CTS tests that uses secondary_cmd_buffer

v2:
- renamed to intel_device_info_get_engine_prefetch() (Jordan)

v3:
- renamed to intel_device_info_calc_engine_prefetch()
- store each engine class prefetch in intel_device_info

BSpec: 45718
Signed-off-by: José Roberto de Souza <jose.souza@intel.com>
Reviewed-by: Jordan Justen <jordan.l.justen@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/18597>
This commit is contained in:
José Roberto de Souza 2022-09-12 10:10:07 -07:00 committed by Marge Bot
parent 029522f67d
commit 89d2cdad37
9 changed files with 49 additions and 29 deletions

View file

@ -1164,7 +1164,7 @@ mi_store_address(struct mi_builder *b, struct mi_value addr_reg)
}
static inline void
mi_self_mod_barrier(struct mi_builder *b)
mi_self_mod_barrier(struct mi_builder *b, unsigned cs_prefetch_size)
{
/* First make sure all the memory writes from previous modifying commands
* have landed. We want to do this before going through the CS cache,
@ -1177,7 +1177,7 @@ mi_self_mod_barrier(struct mi_builder *b)
* but experiment show it doesn't work properly, so for now just get over
* the CS prefetch.
*/
for (uint32_t i = 0; i < (b->devinfo->cs_prefetch_size / 4); i++)
for (uint32_t i = 0; i < (cs_prefetch_size / 4); i++)
mi_builder_emit(b, GENX(MI_NOOP), noop);
}

View file

@ -100,7 +100,6 @@ static const struct intel_device_info intel_device_info_gfx3 = {
.max_eus_per_subslice = 8,
.num_thread_per_eu = 4,
.timestamp_frequency = 12500000,
.cs_prefetch_size = 512,
};
static const struct intel_device_info intel_device_info_i965 = {
@ -119,7 +118,6 @@ static const struct intel_device_info intel_device_info_i965 = {
},
.timestamp_frequency = 12500000,
.simulator_id = -1,
.cs_prefetch_size = 512,
};
static const struct intel_device_info intel_device_info_g4x = {
@ -141,7 +139,6 @@ static const struct intel_device_info intel_device_info_g4x = {
},
.timestamp_frequency = 12500000,
.simulator_id = -1,
.cs_prefetch_size = 512,
};
static const struct intel_device_info intel_device_info_ilk = {
@ -162,7 +159,6 @@ static const struct intel_device_info intel_device_info_ilk = {
},
.timestamp_frequency = 12500000,
.simulator_id = -1,
.cs_prefetch_size = 512,
};
static const struct intel_device_info intel_device_info_snb_gt1 = {
@ -193,7 +189,6 @@ static const struct intel_device_info intel_device_info_snb_gt1 = {
},
.timestamp_frequency = 12500000,
.simulator_id = -1,
.cs_prefetch_size = 512,
};
static const struct intel_device_info intel_device_info_snb_gt2 = {
@ -224,7 +219,6 @@ static const struct intel_device_info intel_device_info_snb_gt2 = {
},
.timestamp_frequency = 12500000,
.simulator_id = -1,
.cs_prefetch_size = 512,
};
#define GFX7_FEATURES \
@ -236,8 +230,7 @@ static const struct intel_device_info intel_device_info_snb_gt2 = {
.has_64bit_float = true, \
.has_surface_tile_offset = true, \
.timestamp_frequency = 12500000, \
.max_constant_urb_size_kb = 16, \
.cs_prefetch_size = 512
.max_constant_urb_size_kb = 16
static const struct intel_device_info intel_device_info_ivb_gt1 = {
GFX7_FEATURES, .platform = INTEL_PLATFORM_IVB, .gt = 1,
@ -439,8 +432,7 @@ static const struct intel_device_info intel_device_info_hsw_gt3 = {
.max_wm_threads = 384, \
.max_threads_per_psd = 64, \
.timestamp_frequency = 12500000, \
.max_constant_urb_size_kb = 32, \
.cs_prefetch_size = 512
.max_constant_urb_size_kb = 32
static const struct intel_device_info intel_device_info_bdw_gt1 = {
GFX8_FEATURES, .gt = 1,
@ -550,7 +542,6 @@ static const struct intel_device_info intel_device_info_chv = {
.max_threads_per_psd = 64, \
.max_cs_threads = 56, \
.timestamp_frequency = 12000000, \
.cs_prefetch_size = 512, \
.urb = { \
.min_entries = { \
[MESA_SHADER_VERTEX] = 64, \
@ -835,8 +826,7 @@ static const struct intel_device_info intel_device_info_cfl_gt3 = {
.max_tcs_threads = 224, \
.max_tes_threads = 364, \
.max_threads_per_psd = 64, \
.max_cs_threads = 56, \
.cs_prefetch_size = 512
.max_cs_threads = 56
#define GFX11_FEATURES(_gt, _slices, _subslices, _l3, _platform) \
GFX8_FEATURES, \
@ -971,8 +961,7 @@ static const struct intel_device_info intel_device_info_ehl_2x4 = {
.has_integer_dword_mul = false, \
.gt = _gt, .num_slices = _slices, .l3_banks = _l3, \
.simulator_id = 22, \
.max_eus_per_subslice = 16, \
.cs_prefetch_size = 512
.max_eus_per_subslice = 16
#define dual_subslices(args...) { args, }
@ -1061,8 +1050,7 @@ static const struct intel_device_info intel_device_info_sg1 = {
.has_llc = false, \
.has_local_mem = true, \
.has_aux_map = false, \
.simulator_id = 29, \
.cs_prefetch_size = 1024
.simulator_id = 29
#define DG2_FEATURES \
/* (Sub)slice info comes from the kernel topology info */ \
@ -1930,6 +1918,27 @@ init_max_scratch_ids(struct intel_device_info *devinfo)
}
}
static unsigned
intel_device_info_calc_engine_prefetch(const struct intel_device_info *devinfo,
enum drm_i915_gem_engine_class engine_class)
{
if (devinfo->verx10 < 125)
return 512;
if (intel_device_info_is_mtl(devinfo)) {
switch (engine_class) {
case I915_ENGINE_CLASS_RENDER:
return 2048;
case I915_ENGINE_CLASS_COMPUTE:
return 1024;
default:
return 512;
}
}
return 1024;
}
bool
intel_get_device_info_from_fd(int fd, struct intel_device_info *devinfo)
{
@ -2045,6 +2054,11 @@ intel_get_device_info_from_fd(int fd, struct intel_device_info *devinfo)
init_max_scratch_ids(devinfo);
for (enum drm_i915_gem_engine_class engine = I915_ENGINE_CLASS_RENDER;
engine < ARRAY_SIZE(devinfo->engine_class_prefetch); engine++)
devinfo->engine_class_prefetch[engine] =
intel_device_info_calc_engine_prefetch(devinfo, engine);
return true;
}

View file

@ -28,6 +28,8 @@
#include <stdbool.h>
#include <stdint.h>
#include "drm-uapi/i915_drm.h"
#include "util/macros.h"
#include "compiler/shader_enums.h"
@ -349,7 +351,7 @@ struct intel_device_info
* Size of the command streamer prefetch. This is important to know for
* self modifying batches.
*/
unsigned cs_prefetch_size;
unsigned engine_class_prefetch[I915_ENGINE_CLASS_COMPUTE + 1];
/**
* For the longest time the timestamp frequency for Gen's timestamp counter

View file

@ -71,6 +71,7 @@ main(int argc, char *argv[])
fprintf(stderr, "%u\n", devinfo.verx10);
assert(devinfo.verx10 == verx10);
verify_device_info(&devinfo);
assert(devinfo.engine_class_prefetch[I915_ENGINE_CLASS_RENDER] > 0);
}
return 0;

View file

@ -12,7 +12,6 @@ verify_device_info(const struct intel_device_info *devinfo)
assert(devinfo->max_eus_per_subslice != 0);
assert(devinfo->num_thread_per_eu != 0);
assert(devinfo->timestamp_frequency != 0);
assert(devinfo->cs_prefetch_size > 0);
assert(devinfo->ver < 7 || devinfo->max_constant_urb_size_kb > 0);
assert(devinfo->ver < 8 || devinfo->max_threads_per_psd > 0);

View file

@ -879,10 +879,10 @@ anv_cmd_buffer_end_batch_buffer(struct anv_cmd_buffer *cmd_buffer)
*/
if (cmd_buffer->batch_bos.next == cmd_buffer->batch_bos.prev) {
const struct intel_device_info *devinfo = cmd_buffer->device->info;
const enum drm_i915_gem_engine_class engine_class = cmd_buffer->queue_family->engine_class;
/* Careful to have everything in signed integer. */
int32_t prefetch_len = devinfo->cs_prefetch_size;
int32_t batch_len =
cmd_buffer->batch.next - cmd_buffer->batch.start;
int32_t prefetch_len = devinfo->engine_class_prefetch[engine_class];
int32_t batch_len = cmd_buffer->batch.next - cmd_buffer->batch.start;
for (int32_t i = 0; i < (prefetch_len - batch_len); i += 4)
anv_batch_emit(&cmd_buffer->batch, GFX8_MI_NOOP, noop);

View file

@ -994,7 +994,9 @@ void genX(CmdBeginQueryIndexedEXT)(
assert(reloc_idx == pdevice->n_perf_query_commands);
mi_self_mod_barrier(&b);
const struct intel_device_info *devinfo = cmd_buffer->device->info;
const enum drm_i915_gem_engine_class engine_class = cmd_buffer->queue_family->engine_class;
mi_self_mod_barrier(&b, devinfo->engine_class_prefetch[engine_class]);
anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), pc) {
pc.CommandStreamerStallEnable = true;

View file

@ -1019,10 +1019,10 @@ anv_cmd_buffer_end_batch_buffer(struct anv_cmd_buffer *cmd_buffer)
*/
if (cmd_buffer->batch_bos.next == cmd_buffer->batch_bos.prev) {
const struct intel_device_info *devinfo = cmd_buffer->device->info;
const enum drm_i915_gem_engine_class engine_class = cmd_buffer->queue_family->engine_class;
/* Careful to have everything in signed integer. */
int32_t prefetch_len = devinfo->cs_prefetch_size;
int32_t batch_len =
cmd_buffer->batch.next - cmd_buffer->batch.start;
int32_t prefetch_len = devinfo->engine_class_prefetch[engine_class];
int batch_len = cmd_buffer->batch.next - cmd_buffer->batch.start;
for (int32_t i = 0; i < (prefetch_len - batch_len); i += 4)
anv_batch_emit(&cmd_buffer->batch, GFX8_MI_NOOP, noop);

View file

@ -1015,7 +1015,9 @@ void genX(CmdBeginQueryIndexedEXT)(
assert(reloc_idx == pdevice->n_perf_query_commands);
mi_self_mod_barrier(&b);
const struct intel_device_info *devinfo = cmd_buffer->device->info;
const enum drm_i915_gem_engine_class engine_class = cmd_buffer->queue_family->engine_class;
mi_self_mod_barrier(&b, devinfo->engine_class_prefetch[engine_class]);
anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), pc) {
pc.CommandStreamerStallEnable = true;