mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-24 19:40:10 +01:00
intel/compiler: Add and use helpers for working with KSP indices
The pixel shader dispatch table is kind-of a confusing mess. This adds some helpers for dealing with it and for easily extracting the correct data from wm_prog_data. Reviewed-by: Matt Turner <mattst88@gmail.com>
This commit is contained in:
parent
85750348bc
commit
9d78abbef8
5 changed files with 186 additions and 58 deletions
|
|
@ -763,17 +763,22 @@ blorp_emit_ps_config(struct blorp_batch *batch,
|
|||
}
|
||||
|
||||
if (prog_data) {
|
||||
ps.DispatchGRFStartRegisterForConstantSetupData0 =
|
||||
prog_data->base.dispatch_grf_start_reg;
|
||||
ps.DispatchGRFStartRegisterForConstantSetupData2 =
|
||||
prog_data->dispatch_grf_start_reg_2;
|
||||
|
||||
ps._8PixelDispatchEnable = prog_data->dispatch_8;
|
||||
ps._16PixelDispatchEnable = prog_data->dispatch_16;
|
||||
|
||||
ps.KernelStartPointer0 = params->wm_prog_kernel;
|
||||
ps.KernelStartPointer2 =
|
||||
params->wm_prog_kernel + prog_data->prog_offset_2;
|
||||
ps.DispatchGRFStartRegisterForConstantSetupData0 =
|
||||
brw_wm_prog_data_dispatch_grf_start_reg(prog_data, ps, 0);
|
||||
ps.DispatchGRFStartRegisterForConstantSetupData1 =
|
||||
brw_wm_prog_data_dispatch_grf_start_reg(prog_data, ps, 1);
|
||||
ps.DispatchGRFStartRegisterForConstantSetupData2 =
|
||||
brw_wm_prog_data_dispatch_grf_start_reg(prog_data, ps, 2);
|
||||
|
||||
ps.KernelStartPointer0 = params->wm_prog_kernel +
|
||||
brw_wm_prog_data_prog_offset(prog_data, ps, 0);
|
||||
ps.KernelStartPointer1 = params->wm_prog_kernel +
|
||||
brw_wm_prog_data_prog_offset(prog_data, ps, 1);
|
||||
ps.KernelStartPointer2 = params->wm_prog_kernel +
|
||||
brw_wm_prog_data_prog_offset(prog_data, ps, 2);
|
||||
}
|
||||
|
||||
/* 3DSTATE_PS expects the number of threads per PSD, which is always 64
|
||||
|
|
@ -867,18 +872,23 @@ blorp_emit_ps_config(struct blorp_batch *batch,
|
|||
#endif
|
||||
|
||||
if (prog_data) {
|
||||
ps.DispatchGRFStartRegisterForConstantSetupData0 =
|
||||
prog_data->base.dispatch_grf_start_reg;
|
||||
ps.DispatchGRFStartRegisterForConstantSetupData2 =
|
||||
prog_data->dispatch_grf_start_reg_2;
|
||||
|
||||
ps.KernelStartPointer0 = params->wm_prog_kernel;
|
||||
ps.KernelStartPointer2 =
|
||||
params->wm_prog_kernel + prog_data->prog_offset_2;
|
||||
|
||||
ps._8PixelDispatchEnable = prog_data->dispatch_8;
|
||||
ps._16PixelDispatchEnable = prog_data->dispatch_16;
|
||||
|
||||
ps.DispatchGRFStartRegisterForConstantSetupData0 =
|
||||
brw_wm_prog_data_dispatch_grf_start_reg(prog_data, ps, 0);
|
||||
ps.DispatchGRFStartRegisterForConstantSetupData1 =
|
||||
brw_wm_prog_data_dispatch_grf_start_reg(prog_data, ps, 1);
|
||||
ps.DispatchGRFStartRegisterForConstantSetupData2 =
|
||||
brw_wm_prog_data_dispatch_grf_start_reg(prog_data, ps, 2);
|
||||
|
||||
ps.KernelStartPointer0 = params->wm_prog_kernel +
|
||||
brw_wm_prog_data_prog_offset(prog_data, ps, 0);
|
||||
ps.KernelStartPointer1 = params->wm_prog_kernel +
|
||||
brw_wm_prog_data_prog_offset(prog_data, ps, 1);
|
||||
ps.KernelStartPointer2 = params->wm_prog_kernel +
|
||||
brw_wm_prog_data_prog_offset(prog_data, ps, 2);
|
||||
|
||||
ps.AttributeEnable = prog_data->num_varying_inputs > 0;
|
||||
} else {
|
||||
/* Gen7 hardware gets angry if we don't enable at least one dispatch
|
||||
|
|
@ -929,18 +939,23 @@ blorp_emit_ps_config(struct blorp_batch *batch,
|
|||
if (prog_data) {
|
||||
wm.ThreadDispatchEnable = true;
|
||||
|
||||
wm.DispatchGRFStartRegisterForConstantSetupData0 =
|
||||
prog_data->base.dispatch_grf_start_reg;
|
||||
wm.DispatchGRFStartRegisterForConstantSetupData2 =
|
||||
prog_data->dispatch_grf_start_reg_2;
|
||||
|
||||
wm.KernelStartPointer0 = params->wm_prog_kernel;
|
||||
wm.KernelStartPointer2 =
|
||||
params->wm_prog_kernel + prog_data->prog_offset_2;
|
||||
|
||||
wm._8PixelDispatchEnable = prog_data->dispatch_8;
|
||||
wm._16PixelDispatchEnable = prog_data->dispatch_16;
|
||||
|
||||
wm.DispatchGRFStartRegisterForConstantSetupData0 =
|
||||
brw_wm_prog_data_dispatch_grf_start_reg(prog_data, wm, 0);
|
||||
wm.DispatchGRFStartRegisterForConstantSetupData1 =
|
||||
brw_wm_prog_data_dispatch_grf_start_reg(prog_data, wm, 1);
|
||||
wm.DispatchGRFStartRegisterForConstantSetupData2 =
|
||||
brw_wm_prog_data_dispatch_grf_start_reg(prog_data, wm, 2);
|
||||
|
||||
wm.KernelStartPointer0 = params->wm_prog_kernel +
|
||||
brw_wm_prog_data_prog_offset(prog_data, wm, 0);
|
||||
wm.KernelStartPointer1 = params->wm_prog_kernel +
|
||||
brw_wm_prog_data_prog_offset(prog_data, wm, 1);
|
||||
wm.KernelStartPointer2 = params->wm_prog_kernel +
|
||||
brw_wm_prog_data_prog_offset(prog_data, wm, 2);
|
||||
|
||||
wm.NumberofSFOutputAttributes = prog_data->num_varying_inputs;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -745,6 +745,91 @@ struct brw_wm_prog_data {
|
|||
int urb_setup[VARYING_SLOT_MAX];
|
||||
};
|
||||
|
||||
/** Returns the SIMD width corresponding to a given KSP index
|
||||
*
|
||||
* The "Variable Pixel Dispatch" table in the PRM (which can be found, for
|
||||
* example in Vol. 7 of the SKL PRM) has a mapping from dispatch widths to
|
||||
* kernel start pointer (KSP) indices that is based on what dispatch widths
|
||||
* are enabled. This function provides, effectively, the reverse mapping.
|
||||
*
|
||||
* If the given KSP is valid with respect to the SIMD8/16/32 enables, a SIMD
|
||||
* width of 8, 16, or 32 is returned. If the KSP is invalid, 0 is returned.
|
||||
*/
|
||||
static inline unsigned
|
||||
brw_fs_simd_width_for_ksp(unsigned ksp_idx, bool simd8_enabled,
|
||||
bool simd16_enabled, bool simd32_enabled)
|
||||
{
|
||||
/* This function strictly ignores contiguous dispatch */
|
||||
switch (ksp_idx) {
|
||||
case 0:
|
||||
return simd8_enabled ? 8 :
|
||||
(simd16_enabled && !simd32_enabled) ? 16 :
|
||||
(simd32_enabled && !simd16_enabled) ? 32 : 0;
|
||||
case 1:
|
||||
return (simd32_enabled && (simd16_enabled || simd8_enabled)) ? 32 : 0;
|
||||
case 2:
|
||||
return (simd16_enabled && (simd32_enabled || simd8_enabled)) ? 16 : 0;
|
||||
default:
|
||||
unreachable("Invalid KSP index");
|
||||
}
|
||||
}
|
||||
|
||||
#define brw_wm_state_simd_width_for_ksp(wm_state, ksp_idx) \
|
||||
brw_fs_simd_width_for_ksp((ksp_idx), (wm_state)._8PixelDispatchEnable, \
|
||||
(wm_state)._16PixelDispatchEnable, \
|
||||
(wm_state)._32PixelDispatchEnable)
|
||||
|
||||
#define brw_wm_state_has_ksp(wm_state, ksp_idx) \
|
||||
(brw_wm_state_simd_width_for_ksp((wm_state), (ksp_idx)) != 0)
|
||||
|
||||
static inline uint32_t
|
||||
_brw_wm_prog_data_prog_offset(const struct brw_wm_prog_data *prog_data,
|
||||
unsigned ksp_idx)
|
||||
{
|
||||
switch (ksp_idx) {
|
||||
case 0: return 0;
|
||||
case 1: return 0;
|
||||
case 2: return prog_data->prog_offset_2;
|
||||
default:
|
||||
unreachable("Invalid KSP index");
|
||||
}
|
||||
}
|
||||
|
||||
#define brw_wm_prog_data_prog_offset(prog_data, wm_state, ksp_idx) \
|
||||
_brw_wm_prog_data_prog_offset(prog_data, ksp_idx)
|
||||
|
||||
static inline uint8_t
|
||||
_brw_wm_prog_data_dispatch_grf_start_reg(const struct brw_wm_prog_data *prog_data,
|
||||
unsigned ksp_idx)
|
||||
{
|
||||
switch (ksp_idx) {
|
||||
case 0: return prog_data->base.dispatch_grf_start_reg;
|
||||
case 1: return 0;
|
||||
case 2: return prog_data->dispatch_grf_start_reg_2;
|
||||
default:
|
||||
unreachable("Invalid KSP index");
|
||||
}
|
||||
}
|
||||
|
||||
#define brw_wm_prog_data_dispatch_grf_start_reg(prog_data, wm_state, ksp_idx) \
|
||||
_brw_wm_prog_data_dispatch_grf_start_reg(prog_data, ksp_idx)
|
||||
|
||||
static inline uint8_t
|
||||
_brw_wm_prog_data_reg_blocks(const struct brw_wm_prog_data *prog_data,
|
||||
unsigned ksp_idx)
|
||||
{
|
||||
switch (ksp_idx) {
|
||||
case 0: return prog_data->reg_blocks_0;
|
||||
case 1: return 0;
|
||||
case 2: return prog_data->reg_blocks_2;
|
||||
default:
|
||||
unreachable("Invalid KSP index");
|
||||
}
|
||||
}
|
||||
|
||||
#define brw_wm_prog_data_reg_blocks(prog_data, wm_state, ksp_idx) \
|
||||
_brw_wm_prog_data_reg_blocks(prog_data, ksp_idx)
|
||||
|
||||
struct brw_push_const_block {
|
||||
unsigned dwords; /* Dword count, not reg aligned */
|
||||
unsigned regs;
|
||||
|
|
|
|||
|
|
@ -1488,14 +1488,17 @@ emit_3dstate_ps(struct anv_pipeline *pipeline,
|
|||
#endif
|
||||
|
||||
anv_batch_emit(&pipeline->batch, GENX(3DSTATE_PS), ps) {
|
||||
ps.KernelStartPointer0 = fs_bin->kernel.offset;
|
||||
ps.KernelStartPointer1 = 0;
|
||||
ps.KernelStartPointer2 = fs_bin->kernel.offset +
|
||||
wm_prog_data->prog_offset_2;
|
||||
ps._8PixelDispatchEnable = wm_prog_data->dispatch_8;
|
||||
ps._16PixelDispatchEnable = wm_prog_data->dispatch_16;
|
||||
ps._32PixelDispatchEnable = false;
|
||||
|
||||
ps.KernelStartPointer0 = fs_bin->kernel.offset +
|
||||
brw_wm_prog_data_prog_offset(wm_prog_data, ps, 0);
|
||||
ps.KernelStartPointer1 = fs_bin->kernel.offset +
|
||||
brw_wm_prog_data_prog_offset(wm_prog_data, ps, 1);
|
||||
ps.KernelStartPointer2 = fs_bin->kernel.offset +
|
||||
brw_wm_prog_data_prog_offset(wm_prog_data, ps, 2);
|
||||
|
||||
ps.SingleProgramFlow = false;
|
||||
ps.VectorMaskEnable = true;
|
||||
ps.SamplerCount = get_sampler_count(fs_bin);
|
||||
|
|
@ -1526,10 +1529,11 @@ emit_3dstate_ps(struct anv_pipeline *pipeline,
|
|||
#endif
|
||||
|
||||
ps.DispatchGRFStartRegisterForConstantSetupData0 =
|
||||
wm_prog_data->base.dispatch_grf_start_reg;
|
||||
ps.DispatchGRFStartRegisterForConstantSetupData1 = 0;
|
||||
brw_wm_prog_data_dispatch_grf_start_reg(wm_prog_data, ps, 0);
|
||||
ps.DispatchGRFStartRegisterForConstantSetupData1 =
|
||||
brw_wm_prog_data_dispatch_grf_start_reg(wm_prog_data, ps, 1);
|
||||
ps.DispatchGRFStartRegisterForConstantSetupData2 =
|
||||
wm_prog_data->dispatch_grf_start_reg_2;
|
||||
brw_wm_prog_data_dispatch_grf_start_reg(wm_prog_data, ps, 2);
|
||||
|
||||
ps.PerThreadScratchSpace = get_scratch_space(fs_bin);
|
||||
ps.ScratchSpaceBasePointer =
|
||||
|
|
|
|||
|
|
@ -136,13 +136,17 @@ blorp_emit_wm_state(struct blorp_batch *batch,
|
|||
#if GEN_GEN == 4
|
||||
wm.KernelStartPointer0 =
|
||||
instruction_state_address(batch, params->wm_prog_kernel);
|
||||
wm.GRFRegisterCount0 = prog_data->reg_blocks_0;
|
||||
wm.GRFRegisterCount0 = brw_wm_prog_data_reg_blocks(prog_data, wm, 0);
|
||||
#else
|
||||
wm.KernelStartPointer0 = params->wm_prog_kernel;
|
||||
wm.GRFRegisterCount0 = prog_data->reg_blocks_0;
|
||||
wm.KernelStartPointer2 =
|
||||
params->wm_prog_kernel + prog_data->prog_offset_2;
|
||||
wm.GRFRegisterCount2 = prog_data->reg_blocks_2;
|
||||
wm.KernelStartPointer0 = params->wm_prog_kernel +
|
||||
brw_wm_prog_data_prog_offset(prog_data, wm, 0);
|
||||
wm.KernelStartPointer1 = params->wm_prog_kernel +
|
||||
brw_wm_prog_data_prog_offset(prog_data, wm, 1);
|
||||
wm.KernelStartPointer2 = params->wm_prog_kernel +
|
||||
brw_wm_prog_data_prog_offset(prog_data, wm, 2);
|
||||
wm.GRFRegisterCount0 = brw_wm_prog_data_reg_blocks(prog_data, wm, 0);
|
||||
wm.GRFRegisterCount1 = brw_wm_prog_data_reg_blocks(prog_data, wm, 1);
|
||||
wm.GRFRegisterCount2 = brw_wm_prog_data_reg_blocks(prog_data, wm, 2);
|
||||
#endif
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -1899,43 +1899,57 @@ genX(upload_wm)(struct brw_context *brw)
|
|||
|
||||
#if GEN_GEN == 4
|
||||
/* On gen4, we only have one shader kernel */
|
||||
if (wm_prog_data->dispatch_8 || wm_prog_data->dispatch_16) {
|
||||
if (brw_wm_state_has_ksp(wm, 0)) {
|
||||
assert(brw_wm_prog_data_prog_offset(wm_prog_data, wm, 0) == 0);
|
||||
wm.KernelStartPointer0 = KSP(brw, stage_state->prog_offset);
|
||||
wm.GRFRegisterCount0 = wm_prog_data->reg_blocks_0;
|
||||
wm.GRFRegisterCount0 = brw_wm_prog_data_reg_blocks(wm_prog_data, wm, 0);
|
||||
wm.DispatchGRFStartRegisterForConstantSetupData0 =
|
||||
wm_prog_data->base.dispatch_grf_start_reg;
|
||||
brw_wm_prog_data_dispatch_grf_start_reg(wm_prog_data, wm, 0);
|
||||
}
|
||||
#elif GEN_GEN == 5
|
||||
/* On gen5, we have multiple shader kernels but only one GRF start
|
||||
* register for all kernels
|
||||
*/
|
||||
wm.KernelStartPointer0 = stage_state->prog_offset;
|
||||
wm.KernelStartPointer0 = stage_state->prog_offset +
|
||||
brw_wm_prog_data_prog_offset(wm_prog_data, wm, 0);
|
||||
wm.KernelStartPointer1 = stage_state->prog_offset +
|
||||
brw_wm_prog_data_prog_offset(wm_prog_data, wm, 1);
|
||||
wm.KernelStartPointer2 = stage_state->prog_offset +
|
||||
wm_prog_data->prog_offset_2;
|
||||
brw_wm_prog_data_prog_offset(wm_prog_data, wm, 2);
|
||||
|
||||
wm.GRFRegisterCount0 = wm_prog_data->reg_blocks_0;
|
||||
wm.GRFRegisterCount2 = wm_prog_data->reg_blocks_2;
|
||||
wm.GRFRegisterCount0 = brw_wm_prog_data_reg_blocks(wm_prog_data, wm, 0);
|
||||
wm.GRFRegisterCount1 = brw_wm_prog_data_reg_blocks(wm_prog_data, wm, 1);
|
||||
wm.GRFRegisterCount2 = brw_wm_prog_data_reg_blocks(wm_prog_data, wm, 2);
|
||||
|
||||
wm.DispatchGRFStartRegisterForConstantSetupData0 =
|
||||
wm_prog_data->base.dispatch_grf_start_reg;
|
||||
|
||||
/* Dispatch GRF Start should be the same for all shaders on gen5 */
|
||||
if (wm_prog_data->dispatch_8 && wm_prog_data->dispatch_16) {
|
||||
if (brw_wm_state_has_ksp(wm, 1)) {
|
||||
assert(wm_prog_data->base.dispatch_grf_start_reg ==
|
||||
wm_prog_data->dispatch_grf_start_reg_2);
|
||||
brw_wm_prog_data_dispatch_grf_start_reg(wm_prog_data, wm, 1));
|
||||
}
|
||||
if (brw_wm_state_has_ksp(wm, 2)) {
|
||||
assert(wm_prog_data->base.dispatch_grf_start_reg ==
|
||||
brw_wm_prog_data_dispatch_grf_start_reg(wm_prog_data, wm, 2));
|
||||
}
|
||||
#elif GEN_GEN == 6
|
||||
/* On gen5, we have multiple shader kernels and we no longer specify a
|
||||
* register count for each one.
|
||||
*/
|
||||
wm.KernelStartPointer0 = stage_state->prog_offset;
|
||||
wm.KernelStartPointer0 = stage_state->prog_offset +
|
||||
brw_wm_prog_data_prog_offset(wm_prog_data, wm, 0);
|
||||
wm.KernelStartPointer1 = stage_state->prog_offset +
|
||||
brw_wm_prog_data_prog_offset(wm_prog_data, wm, 1);
|
||||
wm.KernelStartPointer2 = stage_state->prog_offset +
|
||||
wm_prog_data->prog_offset_2;
|
||||
brw_wm_prog_data_prog_offset(wm_prog_data, wm, 2);
|
||||
|
||||
wm.DispatchGRFStartRegisterForConstantSetupData0 =
|
||||
wm_prog_data->base.dispatch_grf_start_reg;
|
||||
brw_wm_prog_data_dispatch_grf_start_reg(wm_prog_data, wm, 0);
|
||||
wm.DispatchGRFStartRegisterForConstantSetupData1 =
|
||||
brw_wm_prog_data_dispatch_grf_start_reg(wm_prog_data, wm, 1);
|
||||
wm.DispatchGRFStartRegisterForConstantSetupData2 =
|
||||
wm_prog_data->dispatch_grf_start_reg_2;
|
||||
brw_wm_prog_data_dispatch_grf_start_reg(wm_prog_data, wm, 2);
|
||||
#endif
|
||||
|
||||
#if GEN_GEN <= 5
|
||||
|
|
@ -4015,14 +4029,20 @@ genX(upload_ps)(struct brw_context *brw)
|
|||
|
||||
ps._8PixelDispatchEnable = prog_data->dispatch_8;
|
||||
ps._16PixelDispatchEnable = prog_data->dispatch_16;
|
||||
ps.DispatchGRFStartRegisterForConstantSetupData0 =
|
||||
prog_data->base.dispatch_grf_start_reg;
|
||||
ps.DispatchGRFStartRegisterForConstantSetupData2 =
|
||||
prog_data->dispatch_grf_start_reg_2;
|
||||
|
||||
ps.KernelStartPointer0 = stage_state->prog_offset;
|
||||
ps.DispatchGRFStartRegisterForConstantSetupData0 =
|
||||
brw_wm_prog_data_dispatch_grf_start_reg(prog_data, ps, 0);
|
||||
ps.DispatchGRFStartRegisterForConstantSetupData1 =
|
||||
brw_wm_prog_data_dispatch_grf_start_reg(prog_data, ps, 1);
|
||||
ps.DispatchGRFStartRegisterForConstantSetupData2 =
|
||||
brw_wm_prog_data_dispatch_grf_start_reg(prog_data, ps, 2);
|
||||
|
||||
ps.KernelStartPointer0 = stage_state->prog_offset +
|
||||
brw_wm_prog_data_prog_offset(prog_data, ps, 0);
|
||||
ps.KernelStartPointer1 = stage_state->prog_offset +
|
||||
brw_wm_prog_data_prog_offset(prog_data, ps, 1);
|
||||
ps.KernelStartPointer2 = stage_state->prog_offset +
|
||||
prog_data->prog_offset_2;
|
||||
brw_wm_prog_data_prog_offset(prog_data, ps, 2);
|
||||
|
||||
if (prog_data->base.total_scratch) {
|
||||
ps.ScratchSpaceBasePointer =
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue