mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-28 23:20:08 +01:00
blorp: Add initial state setup support for SIMD8 dispatch
Reviewed-by: Topi Pohjolainen <topi.pohjolainen@intel.com>
This commit is contained in:
parent
cd5a2905cf
commit
353eadb170
7 changed files with 67 additions and 34 deletions
|
|
@ -137,7 +137,11 @@ brw_blorp_compute_tile_offsets(const struct brw_blorp_surface_info *info,
|
|||
void
|
||||
brw_blorp_prog_data_init(struct brw_blorp_prog_data *prog_data)
|
||||
{
|
||||
prog_data->first_curbe_grf = 0;
|
||||
prog_data->dispatch_8 = false;
|
||||
prog_data->dispatch_16 = true;
|
||||
prog_data->first_curbe_grf_0 = 0;
|
||||
prog_data->first_curbe_grf_2 = 0;
|
||||
prog_data->ksp_offset_2 = 0;
|
||||
prog_data->persample_msaa_dispatch = false;
|
||||
|
||||
prog_data->nr_params = BRW_BLORP_NUM_PUSH_CONSTANT_DWORDS;
|
||||
|
|
|
|||
|
|
@ -208,7 +208,13 @@ static const unsigned int BRW_BLORP_NUM_PUSH_CONST_REGS =
|
|||
|
||||
struct brw_blorp_prog_data
|
||||
{
|
||||
unsigned int first_curbe_grf;
|
||||
bool dispatch_8;
|
||||
bool dispatch_16;
|
||||
|
||||
uint8_t first_curbe_grf_0;
|
||||
uint8_t first_curbe_grf_2;
|
||||
|
||||
uint32_t ksp_offset_2;
|
||||
|
||||
/**
|
||||
* True if the WM program should be run in MSDISPMODE_PERSAMPLE with more
|
||||
|
|
|
|||
|
|
@ -778,7 +778,7 @@ brw_blorp_blit_program::alloc_regs()
|
|||
int reg = 0;
|
||||
this->R0 = retype(brw_vec8_grf(reg++, 0), BRW_REGISTER_TYPE_UW);
|
||||
this->R1 = retype(brw_vec8_grf(reg++, 0), BRW_REGISTER_TYPE_UW);
|
||||
prog_data.first_curbe_grf = reg;
|
||||
prog_data.first_curbe_grf_0 = reg;
|
||||
alloc_push_const_regs(reg);
|
||||
reg += BRW_BLORP_NUM_PUSH_CONST_REGS;
|
||||
for (unsigned i = 0; i < ARRAY_SIZE(texture_data); ++i) {
|
||||
|
|
|
|||
|
|
@ -86,7 +86,7 @@ brw_blorp_const_color_program::brw_blorp_const_color_program(
|
|||
clear_rgba(),
|
||||
base_mrf(0)
|
||||
{
|
||||
prog_data.first_curbe_grf = 0;
|
||||
prog_data.first_curbe_grf_0 = 0;
|
||||
prog_data.persample_msaa_dispatch = false;
|
||||
brw_init_codegen(brw->intelScreen->devinfo, &func, mem_ctx);
|
||||
}
|
||||
|
|
@ -145,7 +145,7 @@ brw_blorp_const_color_program::alloc_regs()
|
|||
this->R0 = retype(brw_vec8_grf(reg++, 0), BRW_REGISTER_TYPE_UW);
|
||||
this->R1 = retype(brw_vec8_grf(reg++, 0), BRW_REGISTER_TYPE_UW);
|
||||
|
||||
prog_data.first_curbe_grf = reg;
|
||||
prog_data.first_curbe_grf_0 = reg;
|
||||
clear_rgba = retype(brw_vec4_grf(reg++, 0), BRW_REGISTER_TYPE_F);
|
||||
reg += BRW_BLORP_NUM_PUSH_CONST_REGS;
|
||||
|
||||
|
|
|
|||
|
|
@ -619,7 +619,7 @@ gen6_blorp_emit_wm_config(struct brw_context *brw,
|
|||
const struct brw_blorp_params *params)
|
||||
{
|
||||
const struct brw_blorp_prog_data *prog_data = params->wm_prog_data;
|
||||
uint32_t dw2, dw4, dw5, dw6;
|
||||
uint32_t dw2, dw4, dw5, dw6, ksp0, ksp2;
|
||||
|
||||
/* Even when thread dispatch is disabled, max threads (dw5.25:31) must be
|
||||
* nonzero to prevent the GPU from hanging. While the documentation doesn't
|
||||
|
|
@ -630,7 +630,7 @@ gen6_blorp_emit_wm_config(struct brw_context *brw,
|
|||
* configure the WM state whether or not there is a WM program.
|
||||
*/
|
||||
|
||||
dw2 = dw4 = dw5 = dw6 = 0;
|
||||
dw2 = dw4 = dw5 = dw6 = ksp0 = ksp2 = 0;
|
||||
switch (params->hiz_op) {
|
||||
case GEN6_HIZ_OP_DEPTH_CLEAR:
|
||||
dw4 |= GEN6_WM_DEPTH_CLEAR;
|
||||
|
|
@ -652,9 +652,18 @@ gen6_blorp_emit_wm_config(struct brw_context *brw,
|
|||
dw6 |= 0 << GEN6_WM_BARYCENTRIC_INTERPOLATION_MODE_SHIFT; /* No interp */
|
||||
dw6 |= 0 << GEN6_WM_NUM_SF_OUTPUTS_SHIFT; /* No inputs from SF */
|
||||
if (params->wm_prog_data) {
|
||||
dw4 |= prog_data->first_curbe_grf << GEN6_WM_DISPATCH_START_GRF_SHIFT_0;
|
||||
dw5 |= GEN6_WM_16_DISPATCH_ENABLE;
|
||||
dw5 |= GEN6_WM_DISPATCH_ENABLE; /* We are rendering */
|
||||
|
||||
dw4 |= prog_data->first_curbe_grf_0 << GEN6_WM_DISPATCH_START_GRF_SHIFT_0;
|
||||
dw4 |= prog_data->first_curbe_grf_2 << GEN6_WM_DISPATCH_START_GRF_SHIFT_2;
|
||||
|
||||
ksp0 = params->wm_prog_kernel;
|
||||
ksp2 = params->wm_prog_kernel + params->wm_prog_data->ksp_offset_2;
|
||||
|
||||
if (params->wm_prog_data->dispatch_8)
|
||||
dw5 |= GEN6_WM_8_DISPATCH_ENABLE;
|
||||
if (params->wm_prog_data->dispatch_16)
|
||||
dw5 |= GEN6_WM_16_DISPATCH_ENABLE;
|
||||
}
|
||||
|
||||
if (params->src.mt) {
|
||||
|
|
@ -675,14 +684,14 @@ gen6_blorp_emit_wm_config(struct brw_context *brw,
|
|||
|
||||
BEGIN_BATCH(9);
|
||||
OUT_BATCH(_3DSTATE_WM << 16 | (9 - 2));
|
||||
OUT_BATCH(params->wm_prog_kernel);
|
||||
OUT_BATCH(ksp0);
|
||||
OUT_BATCH(dw2);
|
||||
OUT_BATCH(0); /* No scratch needed */
|
||||
OUT_BATCH(dw4);
|
||||
OUT_BATCH(dw5);
|
||||
OUT_BATCH(dw6);
|
||||
OUT_BATCH(0); /* No other programs */
|
||||
OUT_BATCH(0); /* No other programs */
|
||||
OUT_BATCH(0); /* kernel 1 pointer */
|
||||
OUT_BATCH(ksp2);
|
||||
ADVANCE_BATCH();
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -526,26 +526,33 @@ gen7_blorp_emit_ps_config(struct brw_context *brw,
|
|||
const struct brw_blorp_params *params)
|
||||
{
|
||||
const struct brw_blorp_prog_data *prog_data = params->wm_prog_data;
|
||||
uint32_t dw2, dw4, dw5;
|
||||
uint32_t dw2, dw4, dw5, ksp0, ksp2;
|
||||
const int max_threads_shift = brw->is_haswell ?
|
||||
HSW_PS_MAX_THREADS_SHIFT : IVB_PS_MAX_THREADS_SHIFT;
|
||||
|
||||
dw2 = dw4 = dw5 = 0;
|
||||
dw2 = dw4 = dw5 = ksp0 = ksp2 = 0;
|
||||
dw4 |= (brw->max_wm_threads - 1) << max_threads_shift;
|
||||
|
||||
/* If there's a WM program, we need to do 16-pixel dispatch since that's
|
||||
* what the program is compiled for. If there isn't, then it shouldn't
|
||||
* matter because no program is actually being run. However, the hardware
|
||||
* gets angry if we don't enable at least one dispatch mode, so just enable
|
||||
* 16-pixel dispatch unconditionally.
|
||||
*/
|
||||
dw4 |= GEN7_PS_16_DISPATCH_ENABLE;
|
||||
|
||||
if (brw->is_haswell)
|
||||
dw4 |= SET_FIELD(1, HSW_PS_SAMPLE_MASK); /* 1 sample for now */
|
||||
if (params->wm_prog_data) {
|
||||
dw4 |= GEN7_PS_PUSH_CONSTANT_ENABLE;
|
||||
dw5 |= prog_data->first_curbe_grf << GEN7_PS_DISPATCH_START_GRF_SHIFT_0;
|
||||
|
||||
dw5 |= prog_data->first_curbe_grf_0 << GEN7_PS_DISPATCH_START_GRF_SHIFT_0;
|
||||
dw5 |= prog_data->first_curbe_grf_2 << GEN7_PS_DISPATCH_START_GRF_SHIFT_2;
|
||||
|
||||
ksp0 = params->wm_prog_kernel;
|
||||
ksp2 = params->wm_prog_kernel + params->wm_prog_data->ksp_offset_2;
|
||||
|
||||
if (params->wm_prog_data->dispatch_8)
|
||||
dw4 |= GEN7_PS_8_DISPATCH_ENABLE;
|
||||
if (params->wm_prog_data->dispatch_16)
|
||||
dw4 |= GEN7_PS_16_DISPATCH_ENABLE;
|
||||
} else {
|
||||
/* The hardware gets angry if we don't enable at least one dispatch
|
||||
* mode, so just enable 16-pixel dispatch if we don't have a program.
|
||||
*/
|
||||
dw4 |= GEN7_PS_16_DISPATCH_ENABLE;
|
||||
}
|
||||
|
||||
if (params->src.mt)
|
||||
|
|
@ -555,13 +562,13 @@ gen7_blorp_emit_ps_config(struct brw_context *brw,
|
|||
|
||||
BEGIN_BATCH(8);
|
||||
OUT_BATCH(_3DSTATE_PS << 16 | (8 - 2));
|
||||
OUT_BATCH(params->wm_prog_kernel);
|
||||
OUT_BATCH(ksp0);
|
||||
OUT_BATCH(dw2);
|
||||
OUT_BATCH(0);
|
||||
OUT_BATCH(dw4);
|
||||
OUT_BATCH(dw5);
|
||||
OUT_BATCH(0);
|
||||
OUT_BATCH(0);
|
||||
OUT_BATCH(0); /* kernel 1 pointer */
|
||||
OUT_BATCH(ksp2);
|
||||
ADVANCE_BATCH();
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -372,13 +372,11 @@ gen8_blorp_emit_ps_config(struct brw_context *brw,
|
|||
const struct brw_blorp_params *params)
|
||||
{
|
||||
const struct brw_blorp_prog_data *prog_data = params->wm_prog_data;
|
||||
uint32_t dw3, dw5, dw6, dw7;
|
||||
uint32_t dw3, dw5, dw6, dw7, ksp0, ksp2;
|
||||
|
||||
dw3 = dw5 = dw6 = dw7 = 0;
|
||||
dw3 = dw5 = dw6 = dw7 = ksp0 = ksp2 = 0;
|
||||
dw3 |= GEN7_PS_VECTOR_MASK_ENABLE;
|
||||
|
||||
dw6 |= GEN7_PS_16_DISPATCH_ENABLE;
|
||||
|
||||
if (params->src.mt) {
|
||||
dw3 |= 1 << GEN7_PS_SAMPLER_COUNT_SHIFT; /* Up to 4 samplers */
|
||||
dw3 |= 2 << GEN7_PS_BINDING_TABLE_ENTRY_COUNT_SHIFT; /* Two surfaces */
|
||||
|
|
@ -387,7 +385,16 @@ gen8_blorp_emit_ps_config(struct brw_context *brw,
|
|||
}
|
||||
|
||||
dw6 |= GEN7_PS_PUSH_CONSTANT_ENABLE;
|
||||
dw7 |= prog_data->first_curbe_grf << GEN7_PS_DISPATCH_START_GRF_SHIFT_0;
|
||||
dw7 |= prog_data->first_curbe_grf_0 << GEN7_PS_DISPATCH_START_GRF_SHIFT_0;
|
||||
dw7 |= prog_data->first_curbe_grf_2 << GEN7_PS_DISPATCH_START_GRF_SHIFT_2;
|
||||
|
||||
if (params->wm_prog_data->dispatch_8)
|
||||
dw6 |= GEN7_PS_8_DISPATCH_ENABLE;
|
||||
if (params->wm_prog_data->dispatch_16)
|
||||
dw6 |= GEN7_PS_16_DISPATCH_ENABLE;
|
||||
|
||||
ksp0 = params->wm_prog_kernel;
|
||||
ksp2 = params->wm_prog_kernel + params->wm_prog_data->ksp_offset_2;
|
||||
|
||||
/* 3DSTATE_PS expects the number of threads per PSD, which is always 64;
|
||||
* it implicitly scales for different GT levels (which have some # of PSDs).
|
||||
|
|
@ -404,16 +411,16 @@ gen8_blorp_emit_ps_config(struct brw_context *brw,
|
|||
|
||||
BEGIN_BATCH(12);
|
||||
OUT_BATCH(_3DSTATE_PS << 16 | (12 - 2));
|
||||
OUT_BATCH(params->wm_prog_kernel);
|
||||
OUT_BATCH(ksp0);
|
||||
OUT_BATCH(0);
|
||||
OUT_BATCH(dw3);
|
||||
OUT_BATCH(0);
|
||||
OUT_BATCH(0);
|
||||
OUT_BATCH(dw6);
|
||||
OUT_BATCH(dw7);
|
||||
OUT_BATCH(0); /* kernel 1 pointer */
|
||||
OUT_BATCH(0);
|
||||
OUT_BATCH(0);
|
||||
OUT_BATCH(0);
|
||||
OUT_BATCH(ksp2);
|
||||
OUT_BATCH(0);
|
||||
ADVANCE_BATCH();
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue