intel: Move subslice_total into devinfo

Reworks:
 * Move asserts for subslice_total into intel_device_info.c (s-b Ken)
 * Drop now unused intel_device_info_subslice_total (s-b Ken)
 * Add comment for subslice_total (Ken)

Suggested-by: Kenneth Graunke <kenneth@whitecape.org>
Signed-off-by: Jordan Justen <jordan.l.justen@intel.com>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/12799>
This commit is contained in:
Jordan Justen 2021-09-08 16:20:24 -07:00
parent 3c18e69078
commit 32e848aeaa
20 changed files with 30 additions and 65 deletions

View file

@ -2645,9 +2645,8 @@ crocus_get_scratch_space(struct crocus_context *ice,
struct crocus_bo **bop = &ice->shaders.scratch_bos[encoded_size][stage]; struct crocus_bo **bop = &ice->shaders.scratch_bos[encoded_size][stage];
unsigned subslice_total = screen->subslice_total; /* TODO: This doesn't seem to match brw_alloc_stage_scratch */
subslice_total = 4 * devinfo->num_slices; unsigned cs_subslices = 4 * devinfo->num_slices;
// assert(subslice_total >= screen->subslice_total);
if (!*bop) { if (!*bop) {
unsigned scratch_ids_per_subslice = devinfo->max_cs_threads; unsigned scratch_ids_per_subslice = devinfo->max_cs_threads;
@ -2658,7 +2657,7 @@ crocus_get_scratch_space(struct crocus_context *ice,
[MESA_SHADER_TESS_EVAL] = devinfo->max_tes_threads, [MESA_SHADER_TESS_EVAL] = devinfo->max_tes_threads,
[MESA_SHADER_GEOMETRY] = devinfo->max_gs_threads, [MESA_SHADER_GEOMETRY] = devinfo->max_gs_threads,
[MESA_SHADER_FRAGMENT] = devinfo->max_wm_threads, [MESA_SHADER_FRAGMENT] = devinfo->max_wm_threads,
[MESA_SHADER_COMPUTE] = scratch_ids_per_subslice * subslice_total, [MESA_SHADER_COMPUTE] = scratch_ids_per_subslice * cs_subslices,
}; };
uint32_t size = per_thread_scratch * max_threads[stage]; uint32_t size = per_thread_scratch * max_threads[stage];

View file

@ -813,9 +813,6 @@ crocus_screen_create(int fd, const struct pipe_screen_config *config)
slab_create_parent(&screen->transfer_pool, slab_create_parent(&screen->transfer_pool,
sizeof(struct crocus_transfer), 64); sizeof(struct crocus_transfer), 64);
screen->subslice_total = intel_device_info_subslice_total(&screen->devinfo);
assert(screen->subslice_total >= 1);
struct pipe_screen *pscreen = &screen->base; struct pipe_screen *pscreen = &screen->base;
crocus_init_screen_fence_functions(pscreen); crocus_init_screen_fence_functions(pscreen);

View file

@ -201,8 +201,6 @@ struct crocus_screen {
bool always_flush_cache; bool always_flush_cache;
} driconf; } driconf;
unsigned subslice_total;
uint64_t aperture_bytes; uint64_t aperture_bytes;
struct intel_device_info devinfo; struct intel_device_info devinfo;

View file

@ -8075,7 +8075,7 @@ crocus_upload_compute_state(struct crocus_context *ice,
} }
vfe.MaximumNumberofThreads = vfe.MaximumNumberofThreads =
devinfo->max_cs_threads * screen->subslice_total - 1; devinfo->max_cs_threads * devinfo->subslice_total - 1;
vfe.ResetGatewayTimer = vfe.ResetGatewayTimer =
Resettingrelativetimerandlatchingtheglobaltimestamp; Resettingrelativetimerandlatchingtheglobaltimestamp;
vfe.BypassGatewayControl = true; vfe.BypassGatewayControl = true;

View file

@ -2334,7 +2334,7 @@ iris_get_scratch_space(struct iris_context *ice,
* For, Gfx11+, scratch space allocation is based on the number of threads * For, Gfx11+, scratch space allocation is based on the number of threads
* in the base configuration. * in the base configuration.
*/ */
unsigned subslice_total = screen->subslice_total; unsigned subslice_total = devinfo->subslice_total;
if (devinfo->verx10 == 125) if (devinfo->verx10 == 125)
subslice_total = 32; subslice_total = 32;
else if (devinfo->ver == 12) else if (devinfo->ver == 12)
@ -2343,7 +2343,7 @@ iris_get_scratch_space(struct iris_context *ice,
subslice_total = 8; subslice_total = 8;
else if (devinfo->ver < 11) else if (devinfo->ver < 11)
subslice_total = 4 * devinfo->num_slices; subslice_total = 4 * devinfo->num_slices;
assert(subslice_total >= screen->subslice_total); assert(subslice_total >= devinfo->subslice_total);
if (!*bop) { if (!*bop) {
unsigned scratch_ids_per_subslice = devinfo->max_cs_threads; unsigned scratch_ids_per_subslice = devinfo->max_cs_threads;

View file

@ -872,9 +872,6 @@ iris_screen_create(int fd, const struct pipe_screen_config *config)
slab_create_parent(&screen->transfer_pool, slab_create_parent(&screen->transfer_pool,
sizeof(struct iris_transfer), 64); sizeof(struct iris_transfer), 64);
screen->subslice_total = intel_device_info_subslice_total(&screen->devinfo);
assert(screen->subslice_total >= 1);
iris_detect_kernel_features(screen); iris_detect_kernel_features(screen);
struct pipe_screen *pscreen = &screen->base; struct pipe_screen *pscreen = &screen->base;

View file

@ -185,8 +185,6 @@ struct iris_screen {
unsigned kernel_features; unsigned kernel_features;
#define KERNEL_HAS_WAIT_FOR_SUBMIT (1<<0) #define KERNEL_HAS_WAIT_FOR_SUBMIT (1<<0)
unsigned subslice_total;
uint64_t aperture_bytes; uint64_t aperture_bytes;
/** /**

View file

@ -6920,7 +6920,7 @@ iris_upload_compute_walker(struct iris_context *ice,
if (stage_dirty & IRIS_STAGE_DIRTY_CS) { if (stage_dirty & IRIS_STAGE_DIRTY_CS) {
iris_emit_cmd(batch, GENX(CFE_STATE), cfe) { iris_emit_cmd(batch, GENX(CFE_STATE), cfe) {
cfe.MaximumNumberofThreads = cfe.MaximumNumberofThreads =
devinfo->max_cs_threads * screen->subslice_total - 1; devinfo->max_cs_threads * devinfo->subslice_total - 1;
if (prog_data->total_scratch > 0) { if (prog_data->total_scratch > 0) {
cfe.ScratchSpaceBuffer = cfe.ScratchSpaceBuffer =
iris_get_scratch_surf(ice, prog_data->total_scratch)->offset >> 4; iris_get_scratch_surf(ice, prog_data->total_scratch)->offset >> 4;
@ -7003,7 +7003,7 @@ iris_upload_gpgpu_walker(struct iris_context *ice,
} }
vfe.MaximumNumberofThreads = vfe.MaximumNumberofThreads =
devinfo->max_cs_threads * screen->subslice_total - 1; devinfo->max_cs_threads * devinfo->subslice_total - 1;
#if GFX_VER < 11 #if GFX_VER < 11
vfe.ResetGatewayTimer = vfe.ResetGatewayTimer =
Resettingrelativetimerandlatchingtheglobaltimestamp; Resettingrelativetimerandlatchingtheglobaltimestamp;

View file

@ -1538,5 +1538,13 @@ intel_get_device_info_from_fd(int fd, struct intel_device_info *devinfo)
intel_get_aperture_size(fd, &devinfo->aperture_bytes); intel_get_aperture_size(fd, &devinfo->aperture_bytes);
devinfo->has_tiling_uapi = has_get_tiling(fd); devinfo->has_tiling_uapi = has_get_tiling(fd);
devinfo->subslice_total = 0;
for (uint32_t i = 0; i < devinfo->num_slices; i++)
devinfo->subslice_total += __builtin_popcount(devinfo->subslice_masks[i]);
/* Gfx7 and older do not support EU/Subslice info */
assert(devinfo->subslice_total >= 1 || devinfo->ver <= 7);
devinfo->subslice_total = MAX2(devinfo->subslice_total, 1);
return true; return true;
} }

View file

@ -166,6 +166,12 @@ struct intel_device_info
uint8_t subslice_masks[INTEL_DEVICE_MAX_SLICES * uint8_t subslice_masks[INTEL_DEVICE_MAX_SLICES *
DIV_ROUND_UP(INTEL_DEVICE_MAX_SUBSLICES, 8)]; DIV_ROUND_UP(INTEL_DEVICE_MAX_SUBSLICES, 8)];
/**
* The number of enabled subslices (considering fusing). For exactly which
* subslices are enabled, see subslice_masks[].
*/
unsigned subslice_total;
/** /**
* An array of bit mask of EUs available, use eu_slice_stride & * An array of bit mask of EUs available, use eu_slice_stride &
* eu_subslice_stride to access this array. * eu_subslice_stride to access this array.
@ -332,17 +338,6 @@ intel_device_info_eu_available(const struct intel_device_info *devinfo,
return (devinfo->eu_masks[subslice_offset + eu / 8] & (1U << eu % 8)) != 0; return (devinfo->eu_masks[subslice_offset + eu / 8] & (1U << eu % 8)) != 0;
} }
static inline uint32_t
intel_device_info_subslice_total(const struct intel_device_info *devinfo)
{
uint32_t total = 0;
for (uint32_t i = 0; i < devinfo->num_slices; i++)
total += __builtin_popcount(devinfo->subslice_masks[i]);
return total;
}
static inline uint32_t static inline uint32_t
intel_device_info_eu_total(const struct intel_device_info *devinfo) intel_device_info_eu_total(const struct intel_device_info *devinfo)
{ {

View file

@ -1464,7 +1464,7 @@ anv_scratch_pool_alloc(struct anv_device *device, struct anv_scratch_pool *pool,
if (bo != NULL) if (bo != NULL)
return bo; return bo;
unsigned subslices = MAX2(device->physical->subslice_total, 1); unsigned subslices = devinfo->subslice_total;
/* The documentation for 3DSTATE_PS "Scratch Space Base Pointer" says: /* The documentation for 3DSTATE_PS "Scratch Space Base Pointer" says:
* *

View file

@ -940,9 +940,6 @@ anv_physical_device_try_create(struct anv_instance *instance,
device->has_userptr_probe = device->has_userptr_probe =
anv_gem_get_param(fd, I915_PARAM_HAS_USERPTR_PROBE); anv_gem_get_param(fd, I915_PARAM_HAS_USERPTR_PROBE);
/* GENs prior to 8 do not support EU/Subslice info */
device->subslice_total = intel_device_info_subslice_total(&device->info);
device->compiler = brw_compiler_create(NULL, &device->info); device->compiler = brw_compiler_create(NULL, &device->info);
if (device->compiler == NULL) { if (device->compiler == NULL) {
result = vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); result = vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);

View file

@ -960,8 +960,6 @@ struct anv_physical_device {
bool always_flush_cache; bool always_flush_cache;
uint32_t subslice_total;
struct { struct {
uint32_t family_count; uint32_t family_count;
struct anv_queue_family families[ANV_MAX_QUEUE_FAMILIES]; struct anv_queue_family families[ANV_MAX_QUEUE_FAMILIES];

View file

@ -5401,11 +5401,9 @@ genX(flush_pipeline_select)(struct anv_cmd_buffer *cmd_buffer,
* GPGPU and 3D are back-to-back and this seems to fix it. We don't * GPGPU and 3D are back-to-back and this seems to fix it. We don't
* really know why. * really know why.
*/ */
const uint32_t subslices =
MAX2(cmd_buffer->device->physical->subslice_total, 1);
anv_batch_emit(&cmd_buffer->batch, GENX(MEDIA_VFE_STATE), vfe) { anv_batch_emit(&cmd_buffer->batch, GENX(MEDIA_VFE_STATE), vfe) {
vfe.MaximumNumberofThreads = vfe.MaximumNumberofThreads =
devinfo->max_cs_threads * subslices - 1; devinfo->max_cs_threads * devinfo->subslice_total - 1;
vfe.NumberofURBEntries = 2; vfe.NumberofURBEntries = 2;
vfe.URBEntryAllocationSize = 2; vfe.URBEntryAllocationSize = 2;
} }

View file

@ -2589,14 +2589,12 @@ emit_compute_state(struct anv_compute_pipeline *pipeline,
const struct brw_cs_prog_data *cs_prog_data = get_cs_prog_data(pipeline); const struct brw_cs_prog_data *cs_prog_data = get_cs_prog_data(pipeline);
anv_pipeline_setup_l3_config(&pipeline->base, cs_prog_data->base.total_shared > 0); anv_pipeline_setup_l3_config(&pipeline->base, cs_prog_data->base.total_shared > 0);
const uint32_t subslices = MAX2(device->physical->subslice_total, 1);
const UNUSED struct anv_shader_bin *cs_bin = pipeline->cs; const UNUSED struct anv_shader_bin *cs_bin = pipeline->cs;
const struct intel_device_info *devinfo = &device->info; const struct intel_device_info *devinfo = &device->info;
anv_batch_emit(&pipeline->base.batch, GENX(CFE_STATE), cfe) { anv_batch_emit(&pipeline->base.batch, GENX(CFE_STATE), cfe) {
cfe.MaximumNumberofThreads = cfe.MaximumNumberofThreads =
devinfo->max_cs_threads * subslices - 1; devinfo->max_cs_threads * devinfo->subslice_total - 1;
cfe.ScratchSpaceBuffer = get_scratch_surf(&pipeline->base, cs_bin); cfe.ScratchSpaceBuffer = get_scratch_surf(&pipeline->base, cs_bin);
} }
} }
@ -2618,8 +2616,6 @@ emit_compute_state(struct anv_compute_pipeline *pipeline,
ALIGN(cs_prog_data->push.per_thread.regs * dispatch.threads + ALIGN(cs_prog_data->push.per_thread.regs * dispatch.threads +
cs_prog_data->push.cross_thread.regs, 2); cs_prog_data->push.cross_thread.regs, 2);
const uint32_t subslices = MAX2(device->physical->subslice_total, 1);
const struct anv_shader_bin *cs_bin = pipeline->cs; const struct anv_shader_bin *cs_bin = pipeline->cs;
anv_batch_emit(&pipeline->base.batch, GENX(MEDIA_VFE_STATE), vfe) { anv_batch_emit(&pipeline->base.batch, GENX(MEDIA_VFE_STATE), vfe) {
@ -2629,7 +2625,7 @@ emit_compute_state(struct anv_compute_pipeline *pipeline,
vfe.GPGPUMode = true; vfe.GPGPUMode = true;
#endif #endif
vfe.MaximumNumberofThreads = vfe.MaximumNumberofThreads =
devinfo->max_cs_threads * subslices - 1; devinfo->max_cs_threads * devinfo->subslice_total - 1;
vfe.NumberofURBEntries = GFX_VER <= 7 ? 0 : 2; vfe.NumberofURBEntries = GFX_VER <= 7 ? 0 : 2;
#if GFX_VER < 11 #if GFX_VER < 11
vfe.ResetGatewayTimer = true; vfe.ResetGatewayTimer = true;

View file

@ -497,9 +497,8 @@ brw_emit_select_pipeline(struct brw_context *brw, enum brw_pipeline pipeline)
/* We seem to have issues with geometry flickering when 3D and compute /* We seem to have issues with geometry flickering when 3D and compute
* are combined in the same batch and this appears to fix it. * are combined in the same batch and this appears to fix it.
*/ */
const uint32_t subslices = MAX2(brw->screen->subslice_total, 1);
const uint32_t maxNumberofThreads = const uint32_t maxNumberofThreads =
devinfo->max_cs_threads * subslices - 1; devinfo->max_cs_threads * devinfo->subslice_total - 1;
BEGIN_BATCH(9); BEGIN_BATCH(9);
OUT_BATCH(MEDIA_VFE_STATE << 16 | (9 - 2)); OUT_BATCH(MEDIA_VFE_STATE << 16 | (9 - 2));

View file

@ -455,7 +455,7 @@ brw_alloc_stage_scratch(struct brw_context *brw,
thread_count = devinfo->max_wm_threads; thread_count = devinfo->max_wm_threads;
break; break;
case MESA_SHADER_COMPUTE: { case MESA_SHADER_COMPUTE: {
unsigned subslices = MAX2(brw->screen->subslice_total, 1); unsigned subslices = devinfo->subslice_total;
/* The documentation for 3DSTATE_PS "Scratch Space Base Pointer" says: /* The documentation for 3DSTATE_PS "Scratch Space Base Pointer" says:
* *

View file

@ -2611,9 +2611,6 @@ __DRIconfig **brw_init_screen(__DRIscreen *dri_screen)
isl_device_init(&screen->isl_dev, &screen->devinfo, isl_device_init(&screen->isl_dev, &screen->devinfo,
screen->hw_has_swizzling); screen->hw_has_swizzling);
/* GENs prior to 8 do not support EU/Subslice info */
screen->subslice_total = intel_device_info_subslice_total(devinfo);
/* Gfx7-7.5 kernel requirements / command parser saga: /* Gfx7-7.5 kernel requirements / command parser saga:
* *
* - pre-v3.16: * - pre-v3.16:

View file

@ -107,11 +107,6 @@ struct brw_screen
*/ */
int cmd_parser_version; int cmd_parser_version;
/**
* Number of subslices reported by the I915_PARAM_SUBSLICE_TOTAL parameter
*/
int subslice_total;
bool mesa_format_supports_texture[MESA_FORMAT_COUNT]; bool mesa_format_supports_texture[MESA_FORMAT_COUNT];
bool mesa_format_supports_render[MESA_FORMAT_COUNT]; bool mesa_format_supports_render[MESA_FORMAT_COUNT];
enum isl_format mesa_to_isl_render_format[MESA_FORMAT_COUNT]; enum isl_format mesa_to_isl_render_format[MESA_FORMAT_COUNT];

View file

@ -4321,15 +4321,8 @@ genX(upload_cs_state)(struct brw_context *brw)
vfe.PerThreadScratchSpace = per_thread_scratch_value; vfe.PerThreadScratchSpace = per_thread_scratch_value;
} }
/* If brw->screen->subslice_total is greater than one, then vfe.MaximumNumberofThreads =
* devinfo->max_cs_threads stores number of threads per sub-slice; devinfo->max_cs_threads * devinfo->subslice_total - 1;
* thus we need to multiply by that number by subslices to get
* the actual maximum number of threads; the -1 is because the HW
* has a bias of 1 (would not make sense to say the maximum number
* of threads is 0).
*/
const uint32_t subslices = MAX2(brw->screen->subslice_total, 1);
vfe.MaximumNumberofThreads = devinfo->max_cs_threads * subslices - 1;
vfe.NumberofURBEntries = GFX_VER >= 8 ? 2 : 0; vfe.NumberofURBEntries = GFX_VER >= 8 ? 2 : 0;
#if GFX_VER < 11 #if GFX_VER < 11
vfe.ResetGatewayTimer = vfe.ResetGatewayTimer =