intel: Fix pixel shader scratch space allocation on Gen9+ platforms.

We had missed a bit of errata - PS scratch needs to be computed as if
there were 4 subslices per slice, rather than 3.

This is a conservative backport of commit aaee3daa90.
It only increases the scratch amount, unlike the original commit which
decreases it on Skylake GT1-3 to avoid overallocating.

Cc: "12.0 11.2" <mesa-stable@lists.freedesktop.org>
Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
This commit is contained in:
Kenneth Graunke 2016-11-08 10:29:08 -08:00
parent 352902218e
commit e6bc5248aa

View file

@ -336,7 +336,7 @@ static const struct brw_device_info brw_device_info_chv = {
.max_gs_threads = 336, \ .max_gs_threads = 336, \
.max_hs_threads = 336, \ .max_hs_threads = 336, \
.max_ds_threads = 336, \ .max_ds_threads = 336, \
.max_wm_threads = 64 * 9, \ .max_wm_threads = 64 * 12, \
.max_cs_threads = 56, \ .max_cs_threads = 56, \
.urb = { \ .urb = { \
.size = 384, \ .size = 384, \
@ -389,7 +389,7 @@ static const struct brw_device_info brw_device_info_bxt = {
.max_hs_threads = 112, .max_hs_threads = 112,
.max_ds_threads = 112, .max_ds_threads = 112,
.max_gs_threads = 112, .max_gs_threads = 112,
.max_wm_threads = 64 * 3, .max_wm_threads = 64 * 4,
.max_cs_threads = 6 * 6, .max_cs_threads = 6 * 6,
.urb = { .urb = {
.size = 192, .size = 192,
@ -412,7 +412,7 @@ static const struct brw_device_info brw_device_info_bxt_2x6 = {
.max_hs_threads = 56, /* XXX: guess */ .max_hs_threads = 56, /* XXX: guess */
.max_ds_threads = 56, .max_ds_threads = 56,
.max_gs_threads = 56, .max_gs_threads = 56,
.max_wm_threads = 64 * 2, .max_wm_threads = 64 * 4,
.max_cs_threads = 6 * 6, .max_cs_threads = 6 * 6,
.urb = { .urb = {
.size = 128, .size = 128,
@ -439,7 +439,7 @@ static const struct brw_device_info brw_device_info_kbl_gt1 = {
.gt = 1, .gt = 1,
.max_cs_threads = 7 * 6, .max_cs_threads = 7 * 6,
.max_wm_threads = KBL_MAX_THREADS_PER_PSD * 2, .max_wm_threads = KBL_MAX_THREADS_PER_PSD * 4,
.urb.size = 192, .urb.size = 192,
.num_slices = 1, .num_slices = 1,
}; };
@ -449,7 +449,7 @@ static const struct brw_device_info brw_device_info_kbl_gt1_5 = {
.gt = 1, .gt = 1,
.max_cs_threads = 7 * 6, .max_cs_threads = 7 * 6,
.max_wm_threads = KBL_MAX_THREADS_PER_PSD * 3, .max_wm_threads = KBL_MAX_THREADS_PER_PSD * 4,
.num_slices = 1, .num_slices = 1,
}; };
@ -457,7 +457,7 @@ static const struct brw_device_info brw_device_info_kbl_gt2 = {
GEN9_FEATURES, GEN9_FEATURES,
.gt = 2, .gt = 2,
.max_wm_threads = KBL_MAX_THREADS_PER_PSD * 3, .max_wm_threads = KBL_MAX_THREADS_PER_PSD * 4,
.num_slices = 1, .num_slices = 1,
}; };
@ -465,7 +465,7 @@ static const struct brw_device_info brw_device_info_kbl_gt3 = {
GEN9_FEATURES, GEN9_FEATURES,
.gt = 3, .gt = 3,
.max_wm_threads = KBL_MAX_THREADS_PER_PSD * 6, .max_wm_threads = KBL_MAX_THREADS_PER_PSD * 8,
.num_slices = 2, .num_slices = 2,
}; };
@ -473,7 +473,7 @@ static const struct brw_device_info brw_device_info_kbl_gt4 = {
GEN9_FEATURES, GEN9_FEATURES,
.gt = 4, .gt = 4,
.max_wm_threads = KBL_MAX_THREADS_PER_PSD * 9, .max_wm_threads = KBL_MAX_THREADS_PER_PSD * 12,
/* /*
* From the "L3 Allocation and Programming" documentation: * From the "L3 Allocation and Programming" documentation:
* *