mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-06 11:38:05 +02:00
i965: Hard code CS scratch_ids_per_subslice for Cherryview
Ken suggested that we might be underallocating scratch space on HD
400. Allocating scratch space as though there was actually 8 EUs
seems to help with a GPU hang seen on synmark CSDof.
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=104636
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=105290
Cc: <mesa-stable@lists.freedesktop.org>
Signed-off-by: Jordan Justen <jordan.l.justen@intel.com>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Tested-by: Eero Tamminen <eero.t.tamminen@intel.com>
(cherry picked from commit 06e3bd02c0)
This commit is contained in:
parent
aaefff1c6f
commit
5baea7bb20
1 changed files with 27 additions and 17 deletions
|
|
@ -360,23 +360,33 @@ brw_alloc_stage_scratch(struct brw_context *brw,
|
|||
case MESA_SHADER_COMPUTE: {
|
||||
const unsigned subslices = MAX2(brw->screen->subslice_total, 1);
|
||||
|
||||
/* WaCSScratchSize:hsw
|
||||
*
|
||||
* Haswell's scratch space address calculation appears to be sparse
|
||||
* rather than tightly packed. The Thread ID has bits indicating
|
||||
* which subslice, EU within a subslice, and thread within an EU
|
||||
* it is. There's a maximum of two slices and two subslices, so these
|
||||
* can be stored with a single bit. Even though there are only 10 EUs
|
||||
* per subslice, this is stored in 4 bits, so there's an effective
|
||||
* maximum value of 16 EUs. Similarly, although there are only 7
|
||||
* threads per EU, this is stored in a 3 bit number, giving an effective
|
||||
* maximum value of 8 threads per EU.
|
||||
*
|
||||
* This means that we need to use 16 * 8 instead of 10 * 7 for the
|
||||
* number of threads per subslice.
|
||||
*/
|
||||
const unsigned scratch_ids_per_subslice =
|
||||
devinfo->is_haswell ? 16 * 8 : devinfo->max_cs_threads;
|
||||
unsigned scratch_ids_per_subslice;
|
||||
if (devinfo->is_haswell) {
|
||||
/* WaCSScratchSize:hsw
|
||||
*
|
||||
* Haswell's scratch space address calculation appears to be sparse
|
||||
* rather than tightly packed. The Thread ID has bits indicating
|
||||
* which subslice, EU within a subslice, and thread within an EU it
|
||||
* is. There's a maximum of two slices and two subslices, so these
|
||||
* can be stored with a single bit. Even though there are only 10 EUs
|
||||
* per subslice, this is stored in 4 bits, so there's an effective
|
||||
* maximum value of 16 EUs. Similarly, although there are only 7
|
||||
* threads per EU, this is stored in a 3 bit number, giving an
|
||||
* effective maximum value of 8 threads per EU.
|
||||
*
|
||||
* This means that we need to use 16 * 8 instead of 10 * 7 for the
|
||||
* number of threads per subslice.
|
||||
*/
|
||||
scratch_ids_per_subslice = 16 * 8;
|
||||
} else if (devinfo->is_cherryview) {
|
||||
/* Cherryview devices have either 6 or 8 EUs per subslice, and each
|
||||
* EU has 7 threads. The 6 EU devices appear to calculate thread IDs
|
||||
* as if it had 8 EUs.
|
||||
*/
|
||||
scratch_ids_per_subslice = 8 * 7;
|
||||
} else {
|
||||
scratch_ids_per_subslice = devinfo->max_cs_threads;
|
||||
}
|
||||
|
||||
thread_count = scratch_ids_per_subslice * subslices;
|
||||
break;
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue