mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-01-08 17:10:11 +01:00
i965: Keep track of the per-thread scratch allocation in brw_stage_state.
This will be used to find out what per-thread slot size a previously
allocated scratch BO was used with in order to fix a hardware race
condition without introducing additional stalls or memory allocations.
Instead of calling brw_get_scratch_bo() manually from the various
codegen functions, call a new helper function that keeps track of the
per-thread scratch size and conditionally allocates a larger scratch
BO.
v2: Handle BO allocation manually instead of relying on
brw_get_scratch_bo (Ken).
Cc: <mesa-stable@lists.freedesktop.org>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
This commit is contained in:
parent
013ae4a70a
commit
d960284e44
8 changed files with 68 additions and 47 deletions
|
|
@ -677,6 +677,12 @@ struct brw_stage_state
|
|||
*/
|
||||
drm_intel_bo *scratch_bo;
|
||||
|
||||
/**
|
||||
* Scratch slot size allocated for each thread in the buffer object given
|
||||
* by \c scratch_bo.
|
||||
*/
|
||||
uint32_t per_thread_scratch;
|
||||
|
||||
/** Offset in the program cache to the program */
|
||||
uint32_t prog_offset;
|
||||
|
||||
|
|
@ -1481,6 +1487,10 @@ brw_get_scratch_size(int size)
|
|||
}
|
||||
void brw_get_scratch_bo(struct brw_context *brw,
|
||||
drm_intel_bo **scratch_bo, int size);
|
||||
void brw_alloc_stage_scratch(struct brw_context *brw,
|
||||
struct brw_stage_state *stage_state,
|
||||
unsigned per_thread_size,
|
||||
unsigned thread_count);
|
||||
void brw_init_shader_time(struct brw_context *brw);
|
||||
int brw_get_shader_time_index(struct brw_context *brw,
|
||||
struct gl_shader_program *shader_prog,
|
||||
|
|
|
|||
|
|
@ -148,31 +148,29 @@ brw_codegen_cs_prog(struct brw_context *brw,
|
|||
}
|
||||
}
|
||||
|
||||
if (prog_data.base.total_scratch) {
|
||||
const unsigned subslices = MAX2(brw->intelScreen->subslice_total, 1);
|
||||
const unsigned subslices = MAX2(brw->intelScreen->subslice_total, 1);
|
||||
|
||||
/* WaCSScratchSize:hsw
|
||||
*
|
||||
* Haswell's scratch space address calculation appears to be sparse
|
||||
* rather than tightly packed. The Thread ID has bits indicating
|
||||
* which subslice, EU within a subslice, and thread within an EU
|
||||
* it is. There's a maximum of two slices and two subslices, so these
|
||||
* can be stored with a single bit. Even though there are only 10 EUs
|
||||
* per subslice, this is stored in 4 bits, so there's an effective
|
||||
* maximum value of 16 EUs. Similarly, although there are only 7
|
||||
* threads per EU, this is stored in a 3 bit number, giving an effective
|
||||
* maximum value of 8 threads per EU.
|
||||
*
|
||||
* This means that we need to use 16 * 8 instead of 10 * 7 for the
|
||||
* number of threads per subslice.
|
||||
*/
|
||||
const unsigned scratch_ids_per_subslice =
|
||||
brw->is_haswell ? 16 * 8 : brw->max_cs_threads;
|
||||
/* WaCSScratchSize:hsw
|
||||
*
|
||||
* Haswell's scratch space address calculation appears to be sparse
|
||||
* rather than tightly packed. The Thread ID has bits indicating
|
||||
* which subslice, EU within a subslice, and thread within an EU
|
||||
* it is. There's a maximum of two slices and two subslices, so these
|
||||
* can be stored with a single bit. Even though there are only 10 EUs
|
||||
* per subslice, this is stored in 4 bits, so there's an effective
|
||||
* maximum value of 16 EUs. Similarly, although there are only 7
|
||||
* threads per EU, this is stored in a 3 bit number, giving an effective
|
||||
* maximum value of 8 threads per EU.
|
||||
*
|
||||
* This means that we need to use 16 * 8 instead of 10 * 7 for the
|
||||
* number of threads per subslice.
|
||||
*/
|
||||
const unsigned scratch_ids_per_subslice =
|
||||
brw->is_haswell ? 16 * 8 : brw->max_cs_threads;
|
||||
|
||||
brw_get_scratch_bo(brw, &brw->cs.base.scratch_bo,
|
||||
prog_data.base.total_scratch *
|
||||
scratch_ids_per_subslice * subslices);
|
||||
}
|
||||
brw_alloc_stage_scratch(brw, &brw->cs.base,
|
||||
prog_data.base.total_scratch,
|
||||
scratch_ids_per_subslice * subslices);
|
||||
|
||||
if (unlikely(INTEL_DEBUG & DEBUG_CS))
|
||||
fprintf(stderr, "\n");
|
||||
|
|
|
|||
|
|
@ -180,11 +180,9 @@ brw_codegen_gs_prog(struct brw_context *brw,
|
|||
}
|
||||
|
||||
/* Scratch space is used for register spilling */
|
||||
if (prog_data.base.base.total_scratch) {
|
||||
brw_get_scratch_bo(brw, &stage_state->scratch_bo,
|
||||
prog_data.base.base.total_scratch *
|
||||
brw->max_gs_threads);
|
||||
}
|
||||
brw_alloc_stage_scratch(brw, stage_state,
|
||||
prog_data.base.base.total_scratch,
|
||||
brw->max_gs_threads);
|
||||
|
||||
brw_upload_cache(&brw->cache, BRW_CACHE_GS_PROG,
|
||||
key, sizeof(*key),
|
||||
|
|
|
|||
|
|
@ -345,6 +345,28 @@ brw_get_scratch_bo(struct brw_context *brw,
|
|||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Reserve enough scratch space for the given stage to hold \p per_thread_size
|
||||
* bytes times the given \p thread_count.
|
||||
*/
|
||||
void
|
||||
brw_alloc_stage_scratch(struct brw_context *brw,
|
||||
struct brw_stage_state *stage_state,
|
||||
unsigned per_thread_size,
|
||||
unsigned thread_count)
|
||||
{
|
||||
if (stage_state->per_thread_scratch < per_thread_size) {
|
||||
stage_state->per_thread_scratch = per_thread_size;
|
||||
|
||||
if (stage_state->scratch_bo)
|
||||
drm_intel_bo_unreference(stage_state->scratch_bo);
|
||||
|
||||
stage_state->scratch_bo =
|
||||
drm_intel_bo_alloc(brw->bufmgr, "shader scratch space",
|
||||
per_thread_size * thread_count, 4096);
|
||||
}
|
||||
}
|
||||
|
||||
void brwInitFragProgFuncs( struct dd_function_table *functions )
|
||||
{
|
||||
assert(functions->ProgramStringNotify == _tnl_program_string);
|
||||
|
|
|
|||
|
|
@ -294,11 +294,9 @@ brw_codegen_tcs_prog(struct brw_context *brw,
|
|||
}
|
||||
|
||||
/* Scratch space is used for register spilling */
|
||||
if (prog_data.base.base.total_scratch) {
|
||||
brw_get_scratch_bo(brw, &stage_state->scratch_bo,
|
||||
prog_data.base.base.total_scratch *
|
||||
brw->max_hs_threads);
|
||||
}
|
||||
brw_alloc_stage_scratch(brw, stage_state,
|
||||
prog_data.base.base.total_scratch,
|
||||
brw->max_hs_threads);
|
||||
|
||||
brw_upload_cache(&brw->cache, BRW_CACHE_TCS_PROG,
|
||||
key, sizeof(*key),
|
||||
|
|
|
|||
|
|
@ -214,11 +214,9 @@ brw_codegen_tes_prog(struct brw_context *brw,
|
|||
}
|
||||
|
||||
/* Scratch space is used for register spilling */
|
||||
if (prog_data.base.base.total_scratch) {
|
||||
brw_get_scratch_bo(brw, &stage_state->scratch_bo,
|
||||
prog_data.base.base.total_scratch *
|
||||
brw->max_ds_threads);
|
||||
}
|
||||
brw_alloc_stage_scratch(brw, stage_state,
|
||||
prog_data.base.base.total_scratch,
|
||||
brw->max_ds_threads);
|
||||
|
||||
brw_upload_cache(&brw->cache, BRW_CACHE_TES_PROG,
|
||||
key, sizeof(*key),
|
||||
|
|
|
|||
|
|
@ -208,11 +208,9 @@ brw_codegen_vs_prog(struct brw_context *brw,
|
|||
}
|
||||
|
||||
/* Scratch space is used for register spilling */
|
||||
if (prog_data.base.base.total_scratch) {
|
||||
brw_get_scratch_bo(brw, &brw->vs.base.scratch_bo,
|
||||
prog_data.base.base.total_scratch *
|
||||
brw->max_vs_threads);
|
||||
}
|
||||
brw_alloc_stage_scratch(brw, &brw->vs.base,
|
||||
prog_data.base.base.total_scratch,
|
||||
brw->max_vs_threads);
|
||||
|
||||
brw_upload_cache(&brw->cache, BRW_CACHE_VS_PROG,
|
||||
key, sizeof(struct brw_vs_prog_key),
|
||||
|
|
|
|||
|
|
@ -163,10 +163,9 @@ brw_codegen_wm_prog(struct brw_context *brw,
|
|||
}
|
||||
}
|
||||
|
||||
if (prog_data.base.total_scratch) {
|
||||
brw_get_scratch_bo(brw, &brw->wm.base.scratch_bo,
|
||||
prog_data.base.total_scratch * brw->max_wm_threads);
|
||||
}
|
||||
brw_alloc_stage_scratch(brw, &brw->wm.base,
|
||||
prog_data.base.total_scratch,
|
||||
brw->max_wm_threads);
|
||||
|
||||
if (unlikely(INTEL_DEBUG & DEBUG_WM))
|
||||
fprintf(stderr, "\n");
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue