mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-25 15:10:10 +01:00
intel/xehp: Switch to coarser cross-slice pixel hashing with table permutation.
The coarser 32x32 cross-slice hashing mode seems to lead to better L1 and L2 utilization due to the improved execution locality, however it can also lead to a bottleneck in a single slice, especially in workloads that concentrate heavy rendering in small areas of the screen (e.g. SynMark2 OglGeomPoint, OglTerrain*) -- This effect is mitigated here by performing a permutation of the pixel pipe hashing tables that ensures that adjacent rows map to pixel pipes as far away as possible in the caching hierarchy. Reviewed-by: Caio Oliveira <caio.oliveira@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/13569>
This commit is contained in:
parent
ef675e6857
commit
074bde9989
4 changed files with 51 additions and 3 deletions
|
|
@ -939,6 +939,9 @@ upload_pixel_hashing_tables(struct iris_batch *batch)
|
|||
iris_emit_cmd(batch, GENX(3DSTATE_3D_MODE), mode) {
|
||||
mode.SliceHashingTableEnable = true;
|
||||
mode.SliceHashingTableEnableMask = true;
|
||||
mode.CrossSliceHashingMode = (util_bitcount(ppipe_mask) > 1 ?
|
||||
hashing32x32 : NormalMode);
|
||||
mode.CrossSliceHashingModeMask = -1;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
|
|
|||
|
|
@ -85,12 +85,54 @@ intel_compute_pixel_hash_table_nway(unsigned n, unsigned m, uint32_t mask,
|
|||
|
||||
assert(num_ids > 0);
|
||||
|
||||
/* Compute a permutation of the above indices that assigns indices
|
||||
* as far as possible to adjacent entries. This permutation is
|
||||
* designed to be equivalent to the bit reversal of each index in
|
||||
* cases where num_ids is a power of two, but doesn't actually
|
||||
* require it to be a power of two in order to satisfy the required
|
||||
* properties (which is necessary to handle configurations with
|
||||
* arbitrary non-power of two fusing). By construction, flipping
|
||||
* bit l of its input will lead to a change in its result of the
|
||||
* order of num_ids/2^(l+1) (see variable t below). The
|
||||
* bijectivity of this permutation can be verified easily by
|
||||
* induction.
|
||||
*/
|
||||
const unsigned bits = util_logbase2_ceil(num_ids);
|
||||
unsigned swz[ARRAY_SIZE(phys_ids)];
|
||||
|
||||
for (unsigned k = 0; k < num_ids; k++) {
|
||||
unsigned t = num_ids;
|
||||
unsigned s = 0;
|
||||
|
||||
for (unsigned l = 0; l < bits; l++) {
|
||||
if (k & (1u << l)) {
|
||||
s += (t + 1) >> 1;
|
||||
t >>= 1;
|
||||
} else {
|
||||
t = (t + 1) >> 1;
|
||||
}
|
||||
}
|
||||
|
||||
swz[k] = s;
|
||||
}
|
||||
|
||||
/* Initialize the table with the cyclic repetition of a
|
||||
* num_ids-periodic pattern.
|
||||
*
|
||||
* Note that the swz permutation only affects the ordering of rows.
|
||||
* This is intentional in order to minimize the size of the
|
||||
* contiguous area that needs to be rendered in parallel in order
|
||||
* to utilize the whole GPU: A rendering rectangle of width W will
|
||||
* need to be at least H blocks high, where H is bounded by
|
||||
* 2^ceil(log2(num_ids/W)) thanks to the above definition of the swz
|
||||
* permutation.
|
||||
*/
|
||||
for (unsigned i = 0; i < n; i++) {
|
||||
for (unsigned j = 0; j < m; j++)
|
||||
p[j + m * i] = phys_ids[(j + i) % num_ids];
|
||||
const unsigned k = i % num_ids;
|
||||
assert(swz[k] < num_ids);
|
||||
for (unsigned j = 0; j < m; j++) {
|
||||
p[j + m * i] = phys_ids[(j + swz[k]) % num_ids];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -1372,7 +1372,7 @@
|
|||
<field name="3D Scoreboard Hashing Mode" start="36" end="36" type="bool"/>
|
||||
<field name="Subslice Hashing Table Enable" start="37" end="37" type="bool"/>
|
||||
<field name="Slice Hashing Table Enable" start="38" end="38" type="bool"/>
|
||||
<field name="Cross Slice Hashing Mode Mask" start="48" end="49" type="uint"/>
|
||||
<field name="Cross Slice Hashing Mode Mask" start="48" end="49" type="int"/>
|
||||
<field name="3D Scoreboard Hashing Mode Mask" start="52" end="52" type="bool"/>
|
||||
<field name="Subslice Hashing Table Enable Mask" start="53" end="53" type="bool"/>
|
||||
<field name="Slice Hashing Table Enable Mask" start="54" end="54" type="bool"/>
|
||||
|
|
|
|||
|
|
@ -151,6 +151,9 @@ genX(emit_slice_hashing_state)(struct anv_device *device,
|
|||
anv_batch_emit(batch, GENX(3DSTATE_3D_MODE), mode) {
|
||||
mode.SliceHashingTableEnable = true;
|
||||
mode.SliceHashingTableEnableMask = true;
|
||||
mode.CrossSliceHashingMode = (util_bitcount(ppipe_mask) > 1 ?
|
||||
hashing32x32 : NormalMode);
|
||||
mode.CrossSliceHashingModeMask = -1;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue