mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-01-04 02:40:11 +01:00
i965/gen9+: Switch thread scratch space to non-coherent stateless access.
The thread scratch space is thread-local so using the full IA-coherent stateless surface index (255 since Gen8) is unnecessary and potentially expensive. On Gen8 and early steppings of Gen9 this is not a functional change because the kernel already sets bit 4 of HDC_CHICKEN0 which overrides all HDC memory access to be non-coherent in order to workaround a hardware bug. This happens to fix a full system hang when running any spilling code on a pre-production SKL GT4e machine I have on my desk (forcing all HDC access to non-coherent from the kernel up to stepping F0 might be a good idea though regardless of this patch), and improves performance of the OglPSBump2 SynMark benchmark run with INTEL_DEBUG=spill_fs by 33% (11 runs, 5% significance) on a production SKL GT2 (on which HDC IA-coherency is apparently functional so it wouldn't make sense to disable globally). Reviewed-by: Kristian Høgsberg <krh@bitplanet.net>
This commit is contained in:
parent
bc8182808a
commit
55ffa64daf
3 changed files with 19 additions and 4 deletions
|
|
@ -315,6 +315,8 @@ void brw_oword_block_read(struct brw_codegen *p,
|
|||
uint32_t offset,
|
||||
uint32_t bind_table_index);
|
||||
|
||||
unsigned brw_scratch_surface_idx(const struct brw_codegen *p);
|
||||
|
||||
void brw_oword_block_read_scratch(struct brw_codegen *p,
|
||||
struct brw_reg dest,
|
||||
struct brw_reg mrf,
|
||||
|
|
|
|||
|
|
@ -1997,6 +1997,19 @@ void gen6_math(struct brw_codegen *p,
|
|||
brw_set_src1(p, insn, src1);
|
||||
}
|
||||
|
||||
/**
|
||||
* Return the right surface index to access the thread scratch space using
|
||||
* stateless dataport messages.
|
||||
*/
|
||||
unsigned
|
||||
brw_scratch_surface_idx(const struct brw_codegen *p)
|
||||
{
|
||||
/* The scratch space is thread-local so IA coherency is unnecessary. */
|
||||
if (p->devinfo->gen >= 8)
|
||||
return GEN8_BTI_STATELESS_NON_COHERENT;
|
||||
else
|
||||
return BRW_BTI_STATELESS;
|
||||
}
|
||||
|
||||
/**
|
||||
* Write a block of OWORDs (half a GRF each) from the scratch buffer,
|
||||
|
|
@ -2097,7 +2110,7 @@ void brw_oword_block_write_scratch(struct brw_codegen *p,
|
|||
|
||||
brw_set_dp_write_message(p,
|
||||
insn,
|
||||
255, /* binding table index (255=stateless) */
|
||||
brw_scratch_surface_idx(p),
|
||||
msg_control,
|
||||
msg_type,
|
||||
mlen,
|
||||
|
|
@ -2183,7 +2196,7 @@ brw_oword_block_read_scratch(struct brw_codegen *p,
|
|||
|
||||
brw_set_dp_read_message(p,
|
||||
insn,
|
||||
255, /* binding table index (255=stateless) */
|
||||
brw_scratch_surface_idx(p),
|
||||
msg_control,
|
||||
BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */
|
||||
BRW_DATAPORT_READ_TARGET_RENDER_CACHE,
|
||||
|
|
|
|||
|
|
@ -802,7 +802,7 @@ generate_scratch_read(struct brw_codegen *p,
|
|||
if (devinfo->gen < 6)
|
||||
brw_inst_set_cond_modifier(devinfo, send, inst->base_mrf);
|
||||
brw_set_dp_read_message(p, send,
|
||||
255, /* binding table index: stateless access */
|
||||
brw_scratch_surface_idx(p),
|
||||
BRW_DATAPORT_OWORD_DUAL_BLOCK_1OWORD,
|
||||
msg_type,
|
||||
BRW_DATAPORT_READ_TARGET_RENDER_CACHE,
|
||||
|
|
@ -875,7 +875,7 @@ generate_scratch_write(struct brw_codegen *p,
|
|||
if (devinfo->gen < 6)
|
||||
brw_inst_set_cond_modifier(p->devinfo, send, inst->base_mrf);
|
||||
brw_set_dp_write_message(p, send,
|
||||
255, /* binding table index: stateless access */
|
||||
brw_scratch_surface_idx(p),
|
||||
BRW_DATAPORT_OWORD_DUAL_BLOCK_1OWORD,
|
||||
msg_type,
|
||||
3, /* mlen */
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue