mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-08 15:38:09 +02:00
i965: Calculate thread_count in brw_alloc_stage_scratch
Previously, thread_count was sent in from the stage after some stage specific calculations. Those stage specific calculations were moved into brw_alloc_stage_scratch, which will allow the shader cache to also use the same calculations. Signed-off-by: Jordan Justen <jordan.l.justen@intel.com> Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
This commit is contained in:
parent
f082d7f64f
commit
f9d5a7add4
8 changed files with 62 additions and 45 deletions
|
|
@ -1345,8 +1345,7 @@ void brw_get_scratch_bo(struct brw_context *brw,
|
|||
struct brw_bo **scratch_bo, int size);
|
||||
void brw_alloc_stage_scratch(struct brw_context *brw,
|
||||
struct brw_stage_state *stage_state,
|
||||
unsigned per_thread_size,
|
||||
unsigned thread_count);
|
||||
unsigned per_thread_size);
|
||||
void brw_init_shader_time(struct brw_context *brw);
|
||||
int brw_get_shader_time_index(struct brw_context *brw,
|
||||
struct gl_program *prog,
|
||||
|
|
|
|||
|
|
@ -114,29 +114,7 @@ brw_codegen_cs_prog(struct brw_context *brw,
|
|||
}
|
||||
}
|
||||
|
||||
const unsigned subslices = MAX2(brw->screen->subslice_total, 1);
|
||||
|
||||
/* WaCSScratchSize:hsw
|
||||
*
|
||||
* Haswell's scratch space address calculation appears to be sparse
|
||||
* rather than tightly packed. The Thread ID has bits indicating
|
||||
* which subslice, EU within a subslice, and thread within an EU
|
||||
* it is. There's a maximum of two slices and two subslices, so these
|
||||
* can be stored with a single bit. Even though there are only 10 EUs
|
||||
* per subslice, this is stored in 4 bits, so there's an effective
|
||||
* maximum value of 16 EUs. Similarly, although there are only 7
|
||||
* threads per EU, this is stored in a 3 bit number, giving an effective
|
||||
* maximum value of 8 threads per EU.
|
||||
*
|
||||
* This means that we need to use 16 * 8 instead of 10 * 7 for the
|
||||
* number of threads per subslice.
|
||||
*/
|
||||
const unsigned scratch_ids_per_subslice =
|
||||
devinfo->is_haswell ? 16 * 8 : devinfo->max_cs_threads;
|
||||
|
||||
brw_alloc_stage_scratch(brw, &brw->cs.base,
|
||||
prog_data.base.total_scratch,
|
||||
scratch_ids_per_subslice * subslices);
|
||||
brw_alloc_stage_scratch(brw, &brw->cs.base, prog_data.base.total_scratch);
|
||||
|
||||
/* The param and pull_param arrays will be freed by the shader cache. */
|
||||
ralloc_steal(NULL, prog_data.base.param);
|
||||
|
|
|
|||
|
|
@ -138,8 +138,7 @@ brw_codegen_gs_prog(struct brw_context *brw,
|
|||
|
||||
/* Scratch space is used for register spilling */
|
||||
brw_alloc_stage_scratch(brw, stage_state,
|
||||
prog_data.base.base.total_scratch,
|
||||
devinfo->max_gs_threads);
|
||||
prog_data.base.base.total_scratch);
|
||||
|
||||
/* The param and pull_param arrays will be freed by the shader cache. */
|
||||
ralloc_steal(NULL, prog_data.base.base.param);
|
||||
|
|
|
|||
|
|
@ -328,19 +328,65 @@ brw_get_scratch_bo(struct brw_context *brw,
|
|||
void
|
||||
brw_alloc_stage_scratch(struct brw_context *brw,
|
||||
struct brw_stage_state *stage_state,
|
||||
unsigned per_thread_size,
|
||||
unsigned thread_count)
|
||||
unsigned per_thread_size)
|
||||
{
|
||||
if (stage_state->per_thread_scratch < per_thread_size) {
|
||||
stage_state->per_thread_scratch = per_thread_size;
|
||||
if (stage_state->per_thread_scratch >= per_thread_size)
|
||||
return;
|
||||
|
||||
if (stage_state->scratch_bo)
|
||||
brw_bo_unreference(stage_state->scratch_bo);
|
||||
stage_state->per_thread_scratch = per_thread_size;
|
||||
|
||||
stage_state->scratch_bo =
|
||||
brw_bo_alloc(brw->bufmgr, "shader scratch space",
|
||||
per_thread_size * thread_count, 4096);
|
||||
if (stage_state->scratch_bo)
|
||||
brw_bo_unreference(stage_state->scratch_bo);
|
||||
|
||||
const struct gen_device_info *devinfo = &brw->screen->devinfo;
|
||||
unsigned thread_count;
|
||||
switch(stage_state->stage) {
|
||||
case MESA_SHADER_VERTEX:
|
||||
thread_count = devinfo->max_vs_threads;
|
||||
break;
|
||||
case MESA_SHADER_TESS_CTRL:
|
||||
thread_count = devinfo->max_tcs_threads;
|
||||
break;
|
||||
case MESA_SHADER_TESS_EVAL:
|
||||
thread_count = devinfo->max_tes_threads;
|
||||
break;
|
||||
case MESA_SHADER_GEOMETRY:
|
||||
thread_count = devinfo->max_gs_threads;
|
||||
break;
|
||||
case MESA_SHADER_FRAGMENT:
|
||||
thread_count = devinfo->max_wm_threads;
|
||||
break;
|
||||
case MESA_SHADER_COMPUTE: {
|
||||
const unsigned subslices = MAX2(brw->screen->subslice_total, 1);
|
||||
|
||||
/* WaCSScratchSize:hsw
|
||||
*
|
||||
* Haswell's scratch space address calculation appears to be sparse
|
||||
* rather than tightly packed. The Thread ID has bits indicating
|
||||
* which subslice, EU within a subslice, and thread within an EU
|
||||
* it is. There's a maximum of two slices and two subslices, so these
|
||||
* can be stored with a single bit. Even though there are only 10 EUs
|
||||
* per subslice, this is stored in 4 bits, so there's an effective
|
||||
* maximum value of 16 EUs. Similarly, although there are only 7
|
||||
* threads per EU, this is stored in a 3 bit number, giving an effective
|
||||
* maximum value of 8 threads per EU.
|
||||
*
|
||||
* This means that we need to use 16 * 8 instead of 10 * 7 for the
|
||||
* number of threads per subslice.
|
||||
*/
|
||||
const unsigned scratch_ids_per_subslice =
|
||||
devinfo->is_haswell ? 16 * 8 : devinfo->max_cs_threads;
|
||||
|
||||
thread_count = scratch_ids_per_subslice * subslices;
|
||||
break;
|
||||
}
|
||||
default:
|
||||
unreachable("Unsupported stage!");
|
||||
}
|
||||
|
||||
stage_state->scratch_bo =
|
||||
brw_bo_alloc(brw->bufmgr, "shader scratch space",
|
||||
per_thread_size * thread_count, 4096);
|
||||
}
|
||||
|
||||
void brwInitFragProgFuncs( struct dd_function_table *functions )
|
||||
|
|
|
|||
|
|
@ -259,8 +259,7 @@ brw_codegen_tcs_prog(struct brw_context *brw, struct brw_program *tcp,
|
|||
|
||||
/* Scratch space is used for register spilling */
|
||||
brw_alloc_stage_scratch(brw, stage_state,
|
||||
prog_data.base.base.total_scratch,
|
||||
devinfo->max_tcs_threads);
|
||||
prog_data.base.base.total_scratch);
|
||||
|
||||
/* The param and pull_param arrays will be freed by the shader cache. */
|
||||
ralloc_steal(NULL, prog_data.base.base.param);
|
||||
|
|
|
|||
|
|
@ -129,8 +129,7 @@ brw_codegen_tes_prog(struct brw_context *brw,
|
|||
|
||||
/* Scratch space is used for register spilling */
|
||||
brw_alloc_stage_scratch(brw, stage_state,
|
||||
prog_data.base.base.total_scratch,
|
||||
devinfo->max_tes_threads);
|
||||
prog_data.base.base.total_scratch);
|
||||
|
||||
/* The param and pull_param arrays will be freed by the shader cache. */
|
||||
ralloc_steal(NULL, prog_data.base.base.param);
|
||||
|
|
|
|||
|
|
@ -248,8 +248,7 @@ brw_codegen_vs_prog(struct brw_context *brw,
|
|||
|
||||
/* Scratch space is used for register spilling */
|
||||
brw_alloc_stage_scratch(brw, &brw->vs.base,
|
||||
prog_data.base.base.total_scratch,
|
||||
devinfo->max_vs_threads);
|
||||
prog_data.base.base.total_scratch);
|
||||
|
||||
/* The param and pull_param arrays will be freed by the shader cache. */
|
||||
ralloc_steal(NULL, prog_data.base.base.param);
|
||||
|
|
|
|||
|
|
@ -209,9 +209,7 @@ brw_codegen_wm_prog(struct brw_context *brw,
|
|||
}
|
||||
}
|
||||
|
||||
brw_alloc_stage_scratch(brw, &brw->wm.base,
|
||||
prog_data.base.total_scratch,
|
||||
devinfo->max_wm_threads);
|
||||
brw_alloc_stage_scratch(brw, &brw->wm.base, prog_data.base.total_scratch);
|
||||
|
||||
if (unlikely((INTEL_DEBUG & DEBUG_WM) && fp->program.is_arb_asm))
|
||||
fprintf(stderr, "\n");
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue