ilo: add ilo_state_ps

We want to make ilo_shader_cso a union of ilo_state_{vs,hs,ds,gs,ps}.
This commit is contained in:
Chia-I Wu 2015-06-12 14:56:56 +08:00
parent df9f846ac6
commit 6dad848d1a
3 changed files with 848 additions and 0 deletions

View file

@ -32,6 +32,7 @@ C_SOURCES := \
core/ilo_state_sbe.c \
core/ilo_state_sbe.h \
core/ilo_state_shader.c \
core/ilo_state_shader_ps.c \
core/ilo_state_shader.h \
core/ilo_state_sol.c \
core/ilo_state_sol.h \

View file

@ -123,6 +123,54 @@ struct ilo_state_gs_info {
bool stats_enable;
};
struct ilo_state_ps_io_info {
/* inputs */
enum gen_position_offset posoffset;
uint8_t attr_count;
bool use_z;
bool use_w;
bool use_coverage_mask;
/* outputs */
enum gen_pscdepth_mode pscdepth;
bool has_rt_write;
bool write_pixel_mask;
bool write_omask;
};
struct ilo_state_ps_params_info {
/* compatibility with raster states */
uint32_t sample_mask;
bool earlyz_control_psexec;
/* compatibility with cc states */
bool alpha_may_kill;
bool dual_source_blending;
bool has_writeable_rt;
};
struct ilo_state_ps_info {
struct ilo_state_shader_kernel_info kernel_8;
struct ilo_state_shader_kernel_info kernel_16;
struct ilo_state_shader_kernel_info kernel_32;
struct ilo_state_shader_resource_info resource;
struct ilo_state_ps_io_info io;
struct ilo_state_ps_params_info params;
/* bitmask of GEN6_PS_DISPATCH_x */
uint8_t valid_kernels;
bool per_sample_dispatch;
bool sample_count_one;
bool cv_per_sample_interp;
bool cv_has_earlyz_op;
bool rt_clear_enable;
bool rt_resolve_enable;
bool cv_has_depth_buffer;
};
struct ilo_state_vs {
uint32_t vs[5];
};
@ -140,6 +188,20 @@ struct ilo_state_gs {
uint32_t gs[5];
};
struct ilo_state_ps {
uint32_t ps[8];
struct ilo_state_ps_dispatch_conds {
bool ps_valid;
bool has_rt_write;
bool write_odepth;
bool write_ostencil;
bool has_uav_write;
bool ps_may_kill;
} conds;
};
bool
ilo_state_vs_init(struct ilo_state_vs *vs,
const struct ilo_dev *dev,
@ -177,4 +239,18 @@ bool
ilo_state_gs_init_disabled(struct ilo_state_gs *gs,
const struct ilo_dev *dev);
bool
ilo_state_ps_init(struct ilo_state_ps *ps,
const struct ilo_dev *dev,
const struct ilo_state_ps_info *info);
bool
ilo_state_ps_init_disabled(struct ilo_state_ps *ps,
const struct ilo_dev *dev);
bool
ilo_state_ps_set_params(struct ilo_state_ps *ps,
const struct ilo_dev *dev,
const struct ilo_state_ps_params_info *params);
#endif /* ILO_STATE_SHADER_H */

View file

@ -0,0 +1,771 @@
/*
* Mesa 3-D graphics library
*
* Copyright (C) 2012-2015 LunarG, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*
* Authors:
* Chia-I Wu <olv@lunarg.com>
*/
#include "ilo_debug.h"
#include "ilo_state_shader.h"
struct pixel_ff {
uint8_t dispatch_modes;
uint32_t kernel_offsets[3];
uint8_t grf_starts[3];
bool pcb_enable;
uint8_t scratch_space;
uint8_t sampler_count;
uint8_t surface_count;
bool has_uav;
uint16_t thread_count;
struct ilo_state_ps_dispatch_conds conds;
bool kill_pixel;
bool dispatch_enable;
bool dual_source_blending;
uint32_t sample_mask;
};
static bool
ps_kernel_validate_gen6(const struct ilo_dev *dev,
const struct ilo_state_shader_kernel_info *kernel)
{
/* "Dispatch GRF Start Register for Constant/Setup Data" is U7 */
const uint8_t max_grf_start = 128;
/*
* From the Sandy Bridge PRM, volume 2 part 1, page 271:
*
* "(Per-Thread Scratch Space)
* Range [0,11] indicating [1k bytes, 2M bytes] in powers of two"
*/
const uint32_t max_scratch_size = 2 * 1024 * 1024;
ILO_DEV_ASSERT(dev, 6, 8);
/* "Kernel Start Pointer" is 64-byte aligned */
assert(kernel->offset % 64 == 0);
assert(kernel->grf_start < max_grf_start);
assert(kernel->scratch_size <= max_scratch_size);
return true;
}
static bool
ps_validate_gen6(const struct ilo_dev *dev,
const struct ilo_state_ps_info *info)
{
const struct ilo_state_shader_kernel_info *kernel_8 = &info->kernel_8;
const struct ilo_state_shader_kernel_info *kernel_16 = &info->kernel_16;
const struct ilo_state_shader_kernel_info *kernel_32 = &info->kernel_32;
const struct ilo_state_ps_io_info *io = &info->io;
ILO_DEV_ASSERT(dev, 6, 8);
if (!ps_kernel_validate_gen6(dev, kernel_8) ||
!ps_kernel_validate_gen6(dev, kernel_16) ||
!ps_kernel_validate_gen6(dev, kernel_32))
return false;
/* unsupported on Gen6 */
if (ilo_dev_gen(dev) == ILO_GEN(6))
assert(!io->use_coverage_mask);
/*
* From the Sandy Bridge PRM, volume 2 part 1, page 275:
*
* "If a NULL Depth Buffer is selected, the Pixel Shader Computed Depth
* field must be set to disabled."
*/
if (ilo_dev_gen(dev) == ILO_GEN(6) && io->pscdepth != GEN7_PSCDEPTH_OFF)
assert(info->cv_has_depth_buffer);
if (!info->per_sample_dispatch) {
/*
* From the Sandy Bridge PRM, volume 2 part 1, page 281:
*
* "MSDISPMODE_PERSAMPLE is required in order to select
* POSOFFSET_SAMPLE."
*/
assert(io->posoffset != GEN6_POSOFFSET_SAMPLE);
/*
* From the Sandy Bridge PRM, volume 2 part 1, page 282:
*
* "MSDISPMODE_PERSAMPLE is required in order to select
* INTERP_SAMPLE."
*
* From the Sandy Bridge PRM, volume 2 part 1, page 283:
*
* "MSDISPMODE_PERSAMPLE is required in order to select Perspective
* Sample or Non-perspective Sample barycentric coordinates."
*/
assert(!info->cv_per_sample_interp);
}
/*
*
* From the Sandy Bridge PRM, volume 2 part 1, page 314:
*
* "Pixel Shader Dispatch, Alpha... must all be disabled."
*
* Simply disallow any valid kernel when there is early-z op. Also, when
* there is no valid kernel, io should be zeroed.
*/
if (info->valid_kernels)
assert(!info->cv_has_earlyz_op);
else
assert(ilo_is_zeroed(io, sizeof(*io)));
return true;
}
static uint8_t
ps_get_gen6_dispatch_modes(const struct ilo_dev *dev,
const struct ilo_state_ps_info *info)
{
const struct ilo_state_ps_io_info *io = &info->io;
uint8_t dispatch_modes = info->valid_kernels;
ILO_DEV_ASSERT(dev, 6, 8);
if (!dispatch_modes)
return 0;
/*
* From the Sandy Bridge PRM, volume 2 part 1, page 334:
*
* "Not valid on [DevSNB] if 4x PERPIXEL mode with pixel shader
* computed depth."
*
* "Valid on all products, except when in non-1x PERSAMPLE mode
* (applies to [DevSNB+] only)"
*
* From the Sandy Bridge PRM, volume 4 part 1, page 239:
*
* "[DevSNB]: When Pixel Shader outputs oDepth and PS invocation mode
* is PERPIXEL, Message Type for Render Target Write must be SIMD8.
*
* Errata: [DevSNB+]: When Pixel Shader outputs oMask, this message
* type is not supported: SIMD8 (including SIMD8_DUALSRC_xx)."
*
* It is really hard to follow what combinations are valid on what
* platforms. Judging from the restrictions on RT write messages on Gen6,
* oDepth and oMask related issues should be Gen6-specific. PERSAMPLE
* issue should be universal, and disallows multiple dispatch modes.
*/
if (ilo_dev_gen(dev) == ILO_GEN(6)) {
if (io->pscdepth != GEN7_PSCDEPTH_OFF && !info->per_sample_dispatch)
dispatch_modes &= GEN6_PS_DISPATCH_8;
if (io->write_omask)
dispatch_modes &= ~GEN6_PS_DISPATCH_8;
}
if (info->per_sample_dispatch && !info->sample_count_one) {
/* prefer 32 over 16 over 8 */
if (dispatch_modes & GEN6_PS_DISPATCH_32)
dispatch_modes &= GEN6_PS_DISPATCH_32;
else if (dispatch_modes & GEN6_PS_DISPATCH_16)
dispatch_modes &= GEN6_PS_DISPATCH_16;
else
dispatch_modes &= GEN6_PS_DISPATCH_8;
}
/*
* From the Broadwell PRM, volume 2b, page 149:
*
* "When Render Target Fast Clear Enable is ENABLED or Render Target
* Resolve Type = RESOLVE_PARTIAL or RESOLVE_FULL, this bit (8 Pixel
* Dispatch or Dual-8 Pixel Dispatch Enable) must be DISABLED."
*/
if (info->rt_clear_enable || info->rt_resolve_enable)
dispatch_modes &= ~GEN6_PS_DISPATCH_8;
assert(dispatch_modes);
return dispatch_modes;
}
static uint16_t
ps_get_gen6_thread_count(const struct ilo_dev *dev,
const struct ilo_state_ps_info *info)
{
uint16_t thread_count;
ILO_DEV_ASSERT(dev, 6, 8);
/* Maximum Number of Threads of 3DSTATE_PS */
switch (ilo_dev_gen(dev)) {
case ILO_GEN(8):
/* scaled automatically */
thread_count = 64 - 1;
break;
case ILO_GEN(7.5):
thread_count = (dev->gt == 3) ? 408 :
(dev->gt == 2) ? 204 : 102;
break;
case ILO_GEN(7):
thread_count = (dev->gt == 2) ? 172 : 48;
break;
case ILO_GEN(6):
default:
/* from the classic driver instead of the PRM */
thread_count = (dev->gt == 2) ? 80 : 40;
break;
}
return thread_count - 1;
}
static bool
ps_params_get_gen6_kill_pixel(const struct ilo_dev *dev,
const struct ilo_state_ps_params_info *params,
const struct ilo_state_ps_dispatch_conds *conds)
{
ILO_DEV_ASSERT(dev, 6, 8);
/*
* From the Sandy Bridge PRM, volume 2 part 1, page 275:
*
* "This bit (Pixel Shader Kill Pixel), if ENABLED, indicates that the
* PS kernel or color calculator has the ability to kill (discard)
* pixels or samples, other than due to depth or stencil testing.
* This bit is required to be ENABLED in the following situations:
*
* The API pixel shader program contains "killpix" or "discard"
* instructions, or other code in the pixel shader kernel that can
* cause the final pixel mask to differ from the pixel mask received
* on dispatch.
*
* A sampler with chroma key enabled with kill pixel mode is used by
* the pixel shader.
*
* Any render target has Alpha Test Enable or AlphaToCoverage Enable
* enabled.
*
* The pixel shader kernel generates and outputs oMask.
*
* Note: As ClipDistance clipping is fully supported in hardware and
* therefore not via PS instructions, there should be no need to
* ENABLE this bit due to ClipDistance clipping."
*/
return (conds->ps_may_kill || params->alpha_may_kill);
}
static bool
ps_params_get_gen6_dispatch_enable(const struct ilo_dev *dev,
const struct ilo_state_ps_params_info *params,
const struct ilo_state_ps_dispatch_conds *conds)
{
/*
* We want to skip dispatching when EarlyZ suffices. The conditions that
* require dispatching are
*
* - PS writes RTs and RTs are writeable
* - PS changes depth value and depth test/write is enabled
* - PS changes stencil value and stencil test is enabled
* - PS writes UAVs
* - PS or CC kills pixels
* - EDSC is PSEXEC, and depth test/write or stencil test is enabled
*/
bool dispatch_required =
((conds->has_rt_write && params->has_writeable_rt) ||
conds->write_odepth ||
conds->write_ostencil ||
conds->has_uav_write ||
ps_params_get_gen6_kill_pixel(dev, params, conds) ||
params->earlyz_control_psexec);
ILO_DEV_ASSERT(dev, 6, 8);
/*
* From the Ivy Bridge PRM, volume 2 part 1, page 280:
*
* "If EDSC_PSEXEC mode is selected, Thread Dispatch Enable must be
* set."
*/
if (ilo_dev_gen(dev) < ILO_GEN(8) && params->earlyz_control_psexec)
dispatch_required = true;
/* assert it is valid to dispatch */
if (dispatch_required)
assert(conds->ps_valid);
return dispatch_required;
}
static bool
ps_get_gen6_ff_kernels(const struct ilo_dev *dev,
const struct ilo_state_ps_info *info,
struct pixel_ff *ff)
{
const struct ilo_state_shader_kernel_info *kernel_8 = &info->kernel_8;
const struct ilo_state_shader_kernel_info *kernel_16 = &info->kernel_16;
const struct ilo_state_shader_kernel_info *kernel_32 = &info->kernel_32;
uint32_t scratch_size;
ILO_DEV_ASSERT(dev, 6, 8);
ff->dispatch_modes = ps_get_gen6_dispatch_modes(dev, info);
/* initialize kernel offsets and GRF starts */
if (util_is_power_of_two(ff->dispatch_modes)) {
if (ff->dispatch_modes & GEN6_PS_DISPATCH_8) {
ff->kernel_offsets[0] = kernel_8->offset;
ff->grf_starts[0] = kernel_8->grf_start;
} else if (ff->dispatch_modes & GEN6_PS_DISPATCH_16) {
ff->kernel_offsets[0] = kernel_16->offset;
ff->grf_starts[0] = kernel_16->grf_start;
} else if (ff->dispatch_modes & GEN6_PS_DISPATCH_32) {
ff->kernel_offsets[0] = kernel_32->offset;
ff->grf_starts[0] = kernel_32->grf_start;
}
} else {
ff->kernel_offsets[0] = kernel_8->offset;
ff->kernel_offsets[1] = kernel_32->offset;
ff->kernel_offsets[2] = kernel_16->offset;
ff->grf_starts[0] = kernel_8->grf_start;
ff->grf_starts[1] = kernel_32->grf_start;
ff->grf_starts[2] = kernel_16->grf_start;
}
/* we do not want to save it */
assert(ff->kernel_offsets[0] == 0);
ff->pcb_enable = (((ff->dispatch_modes & GEN6_PS_DISPATCH_8) &&
kernel_8->pcb_attr_count) ||
((ff->dispatch_modes & GEN6_PS_DISPATCH_16) &&
kernel_16->pcb_attr_count) ||
((ff->dispatch_modes & GEN6_PS_DISPATCH_32) &&
kernel_32->pcb_attr_count));
scratch_size = 0;
if ((ff->dispatch_modes & GEN6_PS_DISPATCH_8) &&
scratch_size < kernel_8->scratch_size)
scratch_size = kernel_8->scratch_size;
if ((ff->dispatch_modes & GEN6_PS_DISPATCH_16) &&
scratch_size < kernel_16->scratch_size)
scratch_size = kernel_16->scratch_size;
if ((ff->dispatch_modes & GEN6_PS_DISPATCH_32) &&
scratch_size < kernel_32->scratch_size)
scratch_size = kernel_32->scratch_size;
/* next power of two, starting from 1KB */
ff->scratch_space = (scratch_size > 1024) ?
(util_last_bit(scratch_size - 1) - 10): 0;
/* GPU hangs on Haswell if none of the dispatch mode bits is set */
if (ilo_dev_gen(dev) == ILO_GEN(7.5) && !ff->dispatch_modes)
ff->dispatch_modes |= GEN6_PS_DISPATCH_8;
return true;
}
static bool
ps_get_gen6_ff(const struct ilo_dev *dev,
const struct ilo_state_ps_info *info,
struct pixel_ff *ff)
{
const struct ilo_state_shader_resource_info *resource = &info->resource;
const struct ilo_state_ps_io_info *io = &info->io;
const struct ilo_state_ps_params_info *params = &info->params;
ILO_DEV_ASSERT(dev, 6, 8);
memset(ff, 0, sizeof(*ff));
if (!ps_validate_gen6(dev, info) || !ps_get_gen6_ff_kernels(dev, info, ff))
return false;
ff->sampler_count = (resource->sampler_count <= 12) ?
(resource->sampler_count + 3) / 4 : 4;
ff->surface_count = resource->surface_count;
ff->has_uav = resource->has_uav;
ff->thread_count = ps_get_gen6_thread_count(dev, info);
ff->conds.ps_valid = (info->valid_kernels != 0x0);
ff->conds.has_rt_write = io->has_rt_write;
ff->conds.write_odepth = (io->pscdepth != GEN7_PSCDEPTH_OFF);
ff->conds.write_ostencil = false;
ff->conds.has_uav_write = resource->has_uav;
ff->conds.ps_may_kill = (io->write_pixel_mask || io->write_omask);
ff->kill_pixel = ps_params_get_gen6_kill_pixel(dev, params, &ff->conds);
ff->dispatch_enable =
ps_params_get_gen6_dispatch_enable(dev, params, &ff->conds);
ff->dual_source_blending = params->dual_source_blending;
ff->sample_mask = params->sample_mask;
return true;
}
static bool
ps_set_gen6_3dstate_wm(struct ilo_state_ps *ps,
const struct ilo_dev *dev,
const struct ilo_state_ps_info *info,
const struct pixel_ff *ff)
{
const struct ilo_state_ps_io_info *io = &info->io;
uint32_t dw2, dw3, dw4, dw5, dw6;
ILO_DEV_ASSERT(dev, 6, 6);
dw2 = ff->sampler_count << GEN6_THREADDISP_SAMPLER_COUNT__SHIFT |
ff->surface_count << GEN6_THREADDISP_BINDING_TABLE_SIZE__SHIFT;
if (false)
dw2 |= GEN6_THREADDISP_FP_MODE_ALT;
dw3 = ff->scratch_space << GEN6_THREADSCRATCH_SPACE_PER_THREAD__SHIFT;
dw4 = ff->grf_starts[0] << GEN6_WM_DW4_URB_GRF_START0__SHIFT |
ff->grf_starts[1] << GEN6_WM_DW4_URB_GRF_START1__SHIFT |
ff->grf_starts[2] << GEN6_WM_DW4_URB_GRF_START2__SHIFT;
dw5 = ff->thread_count << GEN6_WM_DW5_MAX_THREADS__SHIFT |
ff->dispatch_modes << GEN6_WM_DW5_PS_DISPATCH_MODE__SHIFT;
if (ff->kill_pixel)
dw5 |= GEN6_WM_DW5_PS_KILL_PIXEL;
if (io->pscdepth != GEN7_PSCDEPTH_OFF)
dw5 |= GEN6_WM_DW5_PS_COMPUTE_DEPTH;
if (io->use_z)
dw5 |= GEN6_WM_DW5_PS_USE_DEPTH;
if (ff->dispatch_enable)
dw5 |= GEN6_WM_DW5_PS_DISPATCH_ENABLE;
if (io->write_omask)
dw5 |= GEN6_WM_DW5_PS_COMPUTE_OMASK;
if (io->use_w)
dw5 |= GEN6_WM_DW5_PS_USE_W;
if (ff->dual_source_blending)
dw5 |= GEN6_WM_DW5_PS_DUAL_SOURCE_BLEND;
dw6 = io->attr_count << GEN6_WM_DW6_SF_ATTR_COUNT__SHIFT |
io->posoffset << GEN6_WM_DW6_PS_POSOFFSET__SHIFT;
dw6 |= (info->per_sample_dispatch) ?
GEN6_WM_DW6_MSDISPMODE_PERSAMPLE : GEN6_WM_DW6_MSDISPMODE_PERPIXEL;
STATIC_ASSERT(ARRAY_SIZE(ps->ps) >= 7);
ps->ps[0] = dw2;
ps->ps[1] = dw3;
ps->ps[2] = dw4;
ps->ps[3] = dw5;
ps->ps[4] = dw6;
ps->ps[5] = ff->kernel_offsets[1];
ps->ps[6] = ff->kernel_offsets[2];
return true;
}
static bool
ps_set_gen7_3dstate_wm(struct ilo_state_ps *ps,
const struct ilo_dev *dev,
const struct ilo_state_ps_info *info,
const struct pixel_ff *ff)
{
const struct ilo_state_ps_io_info *io = &info->io;
uint32_t dw1, dw2;
ILO_DEV_ASSERT(dev, 7, 7.5);
dw1 = io->pscdepth << GEN7_WM_DW1_PSCDEPTH__SHIFT;
if (ff->dispatch_enable)
dw1 |= GEN7_WM_DW1_PS_DISPATCH_ENABLE;
if (ff->kill_pixel)
dw1 |= GEN7_WM_DW1_PS_KILL_PIXEL;
if (io->use_z)
dw1 |= GEN7_WM_DW1_PS_USE_DEPTH;
if (io->use_w)
dw1 |= GEN7_WM_DW1_PS_USE_W;
if (io->use_coverage_mask)
dw1 |= GEN7_WM_DW1_PS_USE_COVERAGE_MASK;
dw2 = (info->per_sample_dispatch) ?
GEN7_WM_DW2_MSDISPMODE_PERSAMPLE : GEN7_WM_DW2_MSDISPMODE_PERPIXEL;
STATIC_ASSERT(ARRAY_SIZE(ps->ps) >= 2);
ps->ps[0] = dw1;
ps->ps[1] = dw2;
return true;
}
static bool
ps_set_gen7_3DSTATE_PS(struct ilo_state_ps *ps,
const struct ilo_dev *dev,
const struct ilo_state_ps_info *info,
const struct pixel_ff *ff)
{
const struct ilo_state_ps_io_info *io = &info->io;
uint32_t dw2, dw3, dw4, dw5;
ILO_DEV_ASSERT(dev, 7, 7.5);
dw2 = ff->sampler_count << GEN6_THREADDISP_SAMPLER_COUNT__SHIFT |
ff->surface_count << GEN6_THREADDISP_BINDING_TABLE_SIZE__SHIFT;
if (false)
dw2 |= GEN6_THREADDISP_FP_MODE_ALT;
dw3 = ff->scratch_space << GEN6_THREADSCRATCH_SPACE_PER_THREAD__SHIFT;
dw4 = io->posoffset << GEN7_PS_DW4_POSOFFSET__SHIFT |
ff->dispatch_modes << GEN7_PS_DW4_DISPATCH_MODE__SHIFT;
if (ilo_dev_gen(dev) == ILO_GEN(7.5)) {
dw4 |= ff->thread_count << GEN75_PS_DW4_MAX_THREADS__SHIFT |
(ff->sample_mask & 0xff) << GEN75_PS_DW4_SAMPLE_MASK__SHIFT;
} else {
dw4 |= ff->thread_count << GEN7_PS_DW4_MAX_THREADS__SHIFT;
}
if (ff->pcb_enable)
dw4 |= GEN7_PS_DW4_PUSH_CONSTANT_ENABLE;
if (io->attr_count)
dw4 |= GEN7_PS_DW4_ATTR_ENABLE;
if (io->write_omask)
dw4 |= GEN7_PS_DW4_COMPUTE_OMASK;
if (info->rt_clear_enable)
dw4 |= GEN7_PS_DW4_RT_FAST_CLEAR;
if (ff->dual_source_blending)
dw4 |= GEN7_PS_DW4_DUAL_SOURCE_BLEND;
if (info->rt_resolve_enable)
dw4 |= GEN7_PS_DW4_RT_RESOLVE;
if (ilo_dev_gen(dev) >= ILO_GEN(7.5) && ff->has_uav)
dw4 |= GEN75_PS_DW4_ACCESS_UAV;
dw5 = ff->grf_starts[0] << GEN7_PS_DW5_URB_GRF_START0__SHIFT |
ff->grf_starts[1] << GEN7_PS_DW5_URB_GRF_START1__SHIFT |
ff->grf_starts[2] << GEN7_PS_DW5_URB_GRF_START2__SHIFT;
STATIC_ASSERT(ARRAY_SIZE(ps->ps) >= 8);
ps->ps[2] = dw2;
ps->ps[3] = dw3;
ps->ps[4] = dw4;
ps->ps[5] = dw5;
ps->ps[6] = ff->kernel_offsets[1];
ps->ps[7] = ff->kernel_offsets[2];
return true;
}
static bool
ps_set_gen8_3DSTATE_PS(struct ilo_state_ps *ps,
const struct ilo_dev *dev,
const struct ilo_state_ps_info *info,
const struct pixel_ff *ff)
{
const struct ilo_state_ps_io_info *io = &info->io;
uint32_t dw3, dw4, dw6, dw7;
ILO_DEV_ASSERT(dev, 8, 8);
dw3 = ff->sampler_count << GEN6_THREADDISP_SAMPLER_COUNT__SHIFT |
ff->surface_count << GEN6_THREADDISP_BINDING_TABLE_SIZE__SHIFT;
if (false)
dw3 |= GEN6_THREADDISP_FP_MODE_ALT;
dw4 = ff->scratch_space << GEN6_THREADSCRATCH_SPACE_PER_THREAD__SHIFT;
dw6 = ff->thread_count << GEN8_PS_DW6_MAX_THREADS__SHIFT |
io->posoffset << GEN8_PS_DW6_POSOFFSET__SHIFT |
ff->dispatch_modes << GEN8_PS_DW6_DISPATCH_MODE__SHIFT;
if (ff->pcb_enable)
dw6 |= GEN8_PS_DW6_PUSH_CONSTANT_ENABLE;
if (info->rt_clear_enable)
dw6 |= GEN8_PS_DW6_RT_FAST_CLEAR;
if (info->rt_resolve_enable)
dw6 |= GEN8_PS_DW6_RT_RESOLVE;
dw7 = ff->grf_starts[0] << GEN8_PS_DW7_URB_GRF_START0__SHIFT |
ff->grf_starts[1] << GEN8_PS_DW7_URB_GRF_START1__SHIFT |
ff->grf_starts[2] << GEN8_PS_DW7_URB_GRF_START2__SHIFT;
STATIC_ASSERT(ARRAY_SIZE(ps->ps) >= 6);
ps->ps[0] = dw3;
ps->ps[1] = dw4;
ps->ps[2] = dw6;
ps->ps[3] = dw7;
ps->ps[4] = ff->kernel_offsets[1];
ps->ps[5] = ff->kernel_offsets[2];
return true;
}
static bool
ps_set_gen8_3DSTATE_PS_EXTRA(struct ilo_state_ps *ps,
const struct ilo_dev *dev,
const struct ilo_state_ps_info *info,
const struct pixel_ff *ff)
{
const struct ilo_state_ps_io_info *io = &info->io;
uint32_t dw1;
ILO_DEV_ASSERT(dev, 8, 8);
dw1 = io->pscdepth << GEN8_PSX_DW1_PSCDEPTH__SHIFT;
if (info->valid_kernels)
dw1 |= GEN8_PSX_DW1_VALID;
if (!io->has_rt_write)
dw1 |= GEN8_PSX_DW1_UAV_ONLY;
if (io->write_omask)
dw1 |= GEN8_PSX_DW1_COMPUTE_OMASK;
if (io->write_pixel_mask)
dw1 |= GEN8_PSX_DW1_KILL_PIXEL;
if (io->use_z)
dw1 |= GEN8_PSX_DW1_USE_DEPTH;
if (io->use_w)
dw1 |= GEN8_PSX_DW1_USE_W;
if (io->attr_count)
dw1 |= GEN8_PSX_DW1_ATTR_ENABLE;
if (info->per_sample_dispatch)
dw1 |= GEN8_PSX_DW1_PER_SAMPLE;
if (ff->has_uav)
dw1 |= GEN8_PSX_DW1_ACCESS_UAV;
if (io->use_coverage_mask)
dw1 |= GEN8_PSX_DW1_USE_COVERAGE_MASK;
/*
* From the Broadwell PRM, volume 2b, page 151:
*
* "When this bit (Pixel Shader Valid) clear the rest of this command
* should also be clear.
*/
if (!info->valid_kernels)
dw1 = 0;
STATIC_ASSERT(ARRAY_SIZE(ps->ps) >= 5);
ps->ps[4] = dw1;
return true;
}
bool
ilo_state_ps_init(struct ilo_state_ps *ps,
const struct ilo_dev *dev,
const struct ilo_state_ps_info *info)
{
struct pixel_ff ff;
bool ret = true;
assert(ilo_is_zeroed(ps, sizeof(*ps)));
ret &= ps_get_gen6_ff(dev, info, &ff);
if (ilo_dev_gen(dev) >= ILO_GEN(8)) {
ret &= ps_set_gen8_3DSTATE_PS(ps, dev, info, &ff);
ret &= ps_set_gen8_3DSTATE_PS_EXTRA(ps, dev, info, &ff);
} else if (ilo_dev_gen(dev) >= ILO_GEN(7)) {
ret &= ps_set_gen7_3dstate_wm(ps, dev, info, &ff);
ret &= ps_set_gen7_3DSTATE_PS(ps, dev, info, &ff);
} else {
ret &= ps_set_gen6_3dstate_wm(ps, dev, info, &ff);
}
/* save conditions */
ps->conds = ff.conds;
assert(ret);
return ret;
}
bool
ilo_state_ps_init_disabled(struct ilo_state_ps *ps,
const struct ilo_dev *dev)
{
struct ilo_state_ps_info info;
memset(&info, 0, sizeof(info));
return ilo_state_ps_init(ps, dev, &info);
}
bool
ilo_state_ps_set_params(struct ilo_state_ps *ps,
const struct ilo_dev *dev,
const struct ilo_state_ps_params_info *params)
{
ILO_DEV_ASSERT(dev, 6, 8);
/* modify sample mask */
if (ilo_dev_gen(dev) == ILO_GEN(7.5)) {
ps->ps[4] = (ps->ps[4] & ~GEN75_PS_DW4_SAMPLE_MASK__MASK) |
(params->sample_mask & 0xff) << GEN75_PS_DW4_SAMPLE_MASK__SHIFT;
}
/* modify dispatch enable, pixel kill, and dual source blending */
if (ilo_dev_gen(dev) < ILO_GEN(8)) {
if (ilo_dev_gen(dev) >= ILO_GEN(7)) {
if (ps_params_get_gen6_dispatch_enable(dev, params, &ps->conds))
ps->ps[0] |= GEN7_WM_DW1_PS_DISPATCH_ENABLE;
else
ps->ps[0] &= ~GEN7_WM_DW1_PS_DISPATCH_ENABLE;
if (ps_params_get_gen6_kill_pixel(dev, params, &ps->conds))
ps->ps[0] |= GEN7_WM_DW1_PS_KILL_PIXEL;
else
ps->ps[0] &= ~GEN7_WM_DW1_PS_KILL_PIXEL;
if (params->dual_source_blending)
ps->ps[4] |= GEN7_PS_DW4_DUAL_SOURCE_BLEND;
else
ps->ps[4] &= ~GEN7_PS_DW4_DUAL_SOURCE_BLEND;
} else {
if (ps_params_get_gen6_dispatch_enable(dev, params, &ps->conds))
ps->ps[3] |= GEN6_WM_DW5_PS_DISPATCH_ENABLE;
else
ps->ps[3] &= ~GEN6_WM_DW5_PS_DISPATCH_ENABLE;
if (ps_params_get_gen6_kill_pixel(dev, params, &ps->conds))
ps->ps[3] |= GEN6_WM_DW5_PS_KILL_PIXEL;
else
ps->ps[3] &= ~GEN6_WM_DW5_PS_KILL_PIXEL;
if (params->dual_source_blending)
ps->ps[3] |= GEN6_WM_DW5_PS_DUAL_SOURCE_BLEND;
else
ps->ps[3] &= ~GEN6_WM_DW5_PS_DUAL_SOURCE_BLEND;
}
}
return true;
}