mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-26 06:20:09 +01:00
intel/dev: Adjust prefetch_size values for MTL engines
MTL has different CS prefetch sizes for each CS type. So here replacing the cs_prefetch_size in intel_device_info struct by a function that takes as argument the i915 engine class. Fixes: - func.cmd-buffer.small-secondaries.q0 - dEQP-VK.multiview.secondary_cmd_buffer.* - Several other VK CTS tests that uses secondary_cmd_buffer v2: - renamed to intel_device_info_get_engine_prefetch() (Jordan) v3: - renamed to intel_device_info_calc_engine_prefetch() - store each engine class prefetch in intel_device_info BSpec: 45718 Signed-off-by: José Roberto de Souza <jose.souza@intel.com> Reviewed-by: Jordan Justen <jordan.l.justen@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/18597>
This commit is contained in:
parent
029522f67d
commit
89d2cdad37
9 changed files with 49 additions and 29 deletions
|
|
@ -1164,7 +1164,7 @@ mi_store_address(struct mi_builder *b, struct mi_value addr_reg)
|
|||
}
|
||||
|
||||
static inline void
|
||||
mi_self_mod_barrier(struct mi_builder *b)
|
||||
mi_self_mod_barrier(struct mi_builder *b, unsigned cs_prefetch_size)
|
||||
{
|
||||
/* First make sure all the memory writes from previous modifying commands
|
||||
* have landed. We want to do this before going through the CS cache,
|
||||
|
|
@ -1177,7 +1177,7 @@ mi_self_mod_barrier(struct mi_builder *b)
|
|||
* but experiment show it doesn't work properly, so for now just get over
|
||||
* the CS prefetch.
|
||||
*/
|
||||
for (uint32_t i = 0; i < (b->devinfo->cs_prefetch_size / 4); i++)
|
||||
for (uint32_t i = 0; i < (cs_prefetch_size / 4); i++)
|
||||
mi_builder_emit(b, GENX(MI_NOOP), noop);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -100,7 +100,6 @@ static const struct intel_device_info intel_device_info_gfx3 = {
|
|||
.max_eus_per_subslice = 8,
|
||||
.num_thread_per_eu = 4,
|
||||
.timestamp_frequency = 12500000,
|
||||
.cs_prefetch_size = 512,
|
||||
};
|
||||
|
||||
static const struct intel_device_info intel_device_info_i965 = {
|
||||
|
|
@ -119,7 +118,6 @@ static const struct intel_device_info intel_device_info_i965 = {
|
|||
},
|
||||
.timestamp_frequency = 12500000,
|
||||
.simulator_id = -1,
|
||||
.cs_prefetch_size = 512,
|
||||
};
|
||||
|
||||
static const struct intel_device_info intel_device_info_g4x = {
|
||||
|
|
@ -141,7 +139,6 @@ static const struct intel_device_info intel_device_info_g4x = {
|
|||
},
|
||||
.timestamp_frequency = 12500000,
|
||||
.simulator_id = -1,
|
||||
.cs_prefetch_size = 512,
|
||||
};
|
||||
|
||||
static const struct intel_device_info intel_device_info_ilk = {
|
||||
|
|
@ -162,7 +159,6 @@ static const struct intel_device_info intel_device_info_ilk = {
|
|||
},
|
||||
.timestamp_frequency = 12500000,
|
||||
.simulator_id = -1,
|
||||
.cs_prefetch_size = 512,
|
||||
};
|
||||
|
||||
static const struct intel_device_info intel_device_info_snb_gt1 = {
|
||||
|
|
@ -193,7 +189,6 @@ static const struct intel_device_info intel_device_info_snb_gt1 = {
|
|||
},
|
||||
.timestamp_frequency = 12500000,
|
||||
.simulator_id = -1,
|
||||
.cs_prefetch_size = 512,
|
||||
};
|
||||
|
||||
static const struct intel_device_info intel_device_info_snb_gt2 = {
|
||||
|
|
@ -224,7 +219,6 @@ static const struct intel_device_info intel_device_info_snb_gt2 = {
|
|||
},
|
||||
.timestamp_frequency = 12500000,
|
||||
.simulator_id = -1,
|
||||
.cs_prefetch_size = 512,
|
||||
};
|
||||
|
||||
#define GFX7_FEATURES \
|
||||
|
|
@ -236,8 +230,7 @@ static const struct intel_device_info intel_device_info_snb_gt2 = {
|
|||
.has_64bit_float = true, \
|
||||
.has_surface_tile_offset = true, \
|
||||
.timestamp_frequency = 12500000, \
|
||||
.max_constant_urb_size_kb = 16, \
|
||||
.cs_prefetch_size = 512
|
||||
.max_constant_urb_size_kb = 16
|
||||
|
||||
static const struct intel_device_info intel_device_info_ivb_gt1 = {
|
||||
GFX7_FEATURES, .platform = INTEL_PLATFORM_IVB, .gt = 1,
|
||||
|
|
@ -439,8 +432,7 @@ static const struct intel_device_info intel_device_info_hsw_gt3 = {
|
|||
.max_wm_threads = 384, \
|
||||
.max_threads_per_psd = 64, \
|
||||
.timestamp_frequency = 12500000, \
|
||||
.max_constant_urb_size_kb = 32, \
|
||||
.cs_prefetch_size = 512
|
||||
.max_constant_urb_size_kb = 32
|
||||
|
||||
static const struct intel_device_info intel_device_info_bdw_gt1 = {
|
||||
GFX8_FEATURES, .gt = 1,
|
||||
|
|
@ -550,7 +542,6 @@ static const struct intel_device_info intel_device_info_chv = {
|
|||
.max_threads_per_psd = 64, \
|
||||
.max_cs_threads = 56, \
|
||||
.timestamp_frequency = 12000000, \
|
||||
.cs_prefetch_size = 512, \
|
||||
.urb = { \
|
||||
.min_entries = { \
|
||||
[MESA_SHADER_VERTEX] = 64, \
|
||||
|
|
@ -835,8 +826,7 @@ static const struct intel_device_info intel_device_info_cfl_gt3 = {
|
|||
.max_tcs_threads = 224, \
|
||||
.max_tes_threads = 364, \
|
||||
.max_threads_per_psd = 64, \
|
||||
.max_cs_threads = 56, \
|
||||
.cs_prefetch_size = 512
|
||||
.max_cs_threads = 56
|
||||
|
||||
#define GFX11_FEATURES(_gt, _slices, _subslices, _l3, _platform) \
|
||||
GFX8_FEATURES, \
|
||||
|
|
@ -971,8 +961,7 @@ static const struct intel_device_info intel_device_info_ehl_2x4 = {
|
|||
.has_integer_dword_mul = false, \
|
||||
.gt = _gt, .num_slices = _slices, .l3_banks = _l3, \
|
||||
.simulator_id = 22, \
|
||||
.max_eus_per_subslice = 16, \
|
||||
.cs_prefetch_size = 512
|
||||
.max_eus_per_subslice = 16
|
||||
|
||||
#define dual_subslices(args...) { args, }
|
||||
|
||||
|
|
@ -1061,8 +1050,7 @@ static const struct intel_device_info intel_device_info_sg1 = {
|
|||
.has_llc = false, \
|
||||
.has_local_mem = true, \
|
||||
.has_aux_map = false, \
|
||||
.simulator_id = 29, \
|
||||
.cs_prefetch_size = 1024
|
||||
.simulator_id = 29
|
||||
|
||||
#define DG2_FEATURES \
|
||||
/* (Sub)slice info comes from the kernel topology info */ \
|
||||
|
|
@ -1930,6 +1918,27 @@ init_max_scratch_ids(struct intel_device_info *devinfo)
|
|||
}
|
||||
}
|
||||
|
||||
static unsigned
|
||||
intel_device_info_calc_engine_prefetch(const struct intel_device_info *devinfo,
|
||||
enum drm_i915_gem_engine_class engine_class)
|
||||
{
|
||||
if (devinfo->verx10 < 125)
|
||||
return 512;
|
||||
|
||||
if (intel_device_info_is_mtl(devinfo)) {
|
||||
switch (engine_class) {
|
||||
case I915_ENGINE_CLASS_RENDER:
|
||||
return 2048;
|
||||
case I915_ENGINE_CLASS_COMPUTE:
|
||||
return 1024;
|
||||
default:
|
||||
return 512;
|
||||
}
|
||||
}
|
||||
|
||||
return 1024;
|
||||
}
|
||||
|
||||
bool
|
||||
intel_get_device_info_from_fd(int fd, struct intel_device_info *devinfo)
|
||||
{
|
||||
|
|
@ -2045,6 +2054,11 @@ intel_get_device_info_from_fd(int fd, struct intel_device_info *devinfo)
|
|||
|
||||
init_max_scratch_ids(devinfo);
|
||||
|
||||
for (enum drm_i915_gem_engine_class engine = I915_ENGINE_CLASS_RENDER;
|
||||
engine < ARRAY_SIZE(devinfo->engine_class_prefetch); engine++)
|
||||
devinfo->engine_class_prefetch[engine] =
|
||||
intel_device_info_calc_engine_prefetch(devinfo, engine);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -28,6 +28,8 @@
|
|||
#include <stdbool.h>
|
||||
#include <stdint.h>
|
||||
|
||||
#include "drm-uapi/i915_drm.h"
|
||||
|
||||
#include "util/macros.h"
|
||||
#include "compiler/shader_enums.h"
|
||||
|
||||
|
|
@ -349,7 +351,7 @@ struct intel_device_info
|
|||
* Size of the command streamer prefetch. This is important to know for
|
||||
* self modifying batches.
|
||||
*/
|
||||
unsigned cs_prefetch_size;
|
||||
unsigned engine_class_prefetch[I915_ENGINE_CLASS_COMPUTE + 1];
|
||||
|
||||
/**
|
||||
* For the longest time the timestamp frequency for Gen's timestamp counter
|
||||
|
|
|
|||
|
|
@ -71,6 +71,7 @@ main(int argc, char *argv[])
|
|||
fprintf(stderr, "%u\n", devinfo.verx10);
|
||||
assert(devinfo.verx10 == verx10);
|
||||
verify_device_info(&devinfo);
|
||||
assert(devinfo.engine_class_prefetch[I915_ENGINE_CLASS_RENDER] > 0);
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
|
|
|||
|
|
@ -12,7 +12,6 @@ verify_device_info(const struct intel_device_info *devinfo)
|
|||
assert(devinfo->max_eus_per_subslice != 0);
|
||||
assert(devinfo->num_thread_per_eu != 0);
|
||||
assert(devinfo->timestamp_frequency != 0);
|
||||
assert(devinfo->cs_prefetch_size > 0);
|
||||
|
||||
assert(devinfo->ver < 7 || devinfo->max_constant_urb_size_kb > 0);
|
||||
assert(devinfo->ver < 8 || devinfo->max_threads_per_psd > 0);
|
||||
|
|
|
|||
|
|
@ -879,10 +879,10 @@ anv_cmd_buffer_end_batch_buffer(struct anv_cmd_buffer *cmd_buffer)
|
|||
*/
|
||||
if (cmd_buffer->batch_bos.next == cmd_buffer->batch_bos.prev) {
|
||||
const struct intel_device_info *devinfo = cmd_buffer->device->info;
|
||||
const enum drm_i915_gem_engine_class engine_class = cmd_buffer->queue_family->engine_class;
|
||||
/* Careful to have everything in signed integer. */
|
||||
int32_t prefetch_len = devinfo->cs_prefetch_size;
|
||||
int32_t batch_len =
|
||||
cmd_buffer->batch.next - cmd_buffer->batch.start;
|
||||
int32_t prefetch_len = devinfo->engine_class_prefetch[engine_class];
|
||||
int32_t batch_len = cmd_buffer->batch.next - cmd_buffer->batch.start;
|
||||
|
||||
for (int32_t i = 0; i < (prefetch_len - batch_len); i += 4)
|
||||
anv_batch_emit(&cmd_buffer->batch, GFX8_MI_NOOP, noop);
|
||||
|
|
|
|||
|
|
@ -994,7 +994,9 @@ void genX(CmdBeginQueryIndexedEXT)(
|
|||
|
||||
assert(reloc_idx == pdevice->n_perf_query_commands);
|
||||
|
||||
mi_self_mod_barrier(&b);
|
||||
const struct intel_device_info *devinfo = cmd_buffer->device->info;
|
||||
const enum drm_i915_gem_engine_class engine_class = cmd_buffer->queue_family->engine_class;
|
||||
mi_self_mod_barrier(&b, devinfo->engine_class_prefetch[engine_class]);
|
||||
|
||||
anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), pc) {
|
||||
pc.CommandStreamerStallEnable = true;
|
||||
|
|
|
|||
|
|
@ -1019,10 +1019,10 @@ anv_cmd_buffer_end_batch_buffer(struct anv_cmd_buffer *cmd_buffer)
|
|||
*/
|
||||
if (cmd_buffer->batch_bos.next == cmd_buffer->batch_bos.prev) {
|
||||
const struct intel_device_info *devinfo = cmd_buffer->device->info;
|
||||
const enum drm_i915_gem_engine_class engine_class = cmd_buffer->queue_family->engine_class;
|
||||
/* Careful to have everything in signed integer. */
|
||||
int32_t prefetch_len = devinfo->cs_prefetch_size;
|
||||
int32_t batch_len =
|
||||
cmd_buffer->batch.next - cmd_buffer->batch.start;
|
||||
int32_t prefetch_len = devinfo->engine_class_prefetch[engine_class];
|
||||
int batch_len = cmd_buffer->batch.next - cmd_buffer->batch.start;
|
||||
|
||||
for (int32_t i = 0; i < (prefetch_len - batch_len); i += 4)
|
||||
anv_batch_emit(&cmd_buffer->batch, GFX8_MI_NOOP, noop);
|
||||
|
|
|
|||
|
|
@ -1015,7 +1015,9 @@ void genX(CmdBeginQueryIndexedEXT)(
|
|||
|
||||
assert(reloc_idx == pdevice->n_perf_query_commands);
|
||||
|
||||
mi_self_mod_barrier(&b);
|
||||
const struct intel_device_info *devinfo = cmd_buffer->device->info;
|
||||
const enum drm_i915_gem_engine_class engine_class = cmd_buffer->queue_family->engine_class;
|
||||
mi_self_mod_barrier(&b, devinfo->engine_class_prefetch[engine_class]);
|
||||
|
||||
anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), pc) {
|
||||
pc.CommandStreamerStallEnable = true;
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue