panvk: Emit the fragment shader RSD dynamically

This is the final step of the dynamic graphics state transition,
making the panvk_pipeline logic a dumb layer on top of panvk_shader
whose sole responsibility is to compile+link shaders, and call some
vk_graphics_pipeline helpers to store the static state in a
vk_dynamic_graphics_state object that can be copied to the command
buffer dynamic graphics state at pipeline bind time.

Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com>
Reviewed-by: Mary Guillemard <mary.guillemard@collabora.com>
Acked-by: Erik Faye-Lund <erik.faye-lund@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/28927>
This commit is contained in:
Boris Brezillon 2024-04-26 15:24:52 +02:00 committed by Marge Bot
parent 4335560bb7
commit 4b6f7613c0
6 changed files with 288 additions and 557 deletions

View file

@ -133,6 +133,11 @@ struct panvk_cmd_graphics_state {
} ib;
struct {
enum vk_rp_attachment_flags bound_attachments;
struct {
VkFormat fmts[MAX_RTS];
uint8_t samples[MAX_RTS];
} color_attachments;
struct pan_fb_info info;
bool crc_valid[MAX_RTS];
uint32_t bo_count;

View file

@ -63,23 +63,6 @@ struct panvk_graphics_pipeline {
struct panvk_pipeline_shader fs;
struct {
struct {
bool required;
bool dynamic_rsd;
uint8_t rt_mask;
struct mali_renderer_state_packed rsd_template;
} fs;
struct {
struct pan_blend_state pstate;
struct {
uint8_t index;
uint16_t bifrost_factor;
} constant[8];
struct mali_blend_packed bd_template[8];
bool reads_dest;
} blend;
struct vk_dynamic_graphics_state dynamic;
struct vk_vertex_input_state vi;
struct vk_sample_locations_state sl;

View file

@ -76,7 +76,6 @@ bool panvk_per_arch(blend_needs_lowering)(const struct panvk_device *dev,
struct panvk_shader *panvk_per_arch(shader_create)(
struct panvk_device *dev, const VkPipelineShaderStageCreateInfo *stage_info,
const struct panvk_pipeline_layout *layout,
struct pan_blend_state *blend_state, bool static_blend_constants,
const VkAllocationCallbacks *alloc);
void panvk_per_arch(shader_destroy)(struct panvk_device *dev,

View file

@ -44,9 +44,11 @@
#include "pan_blitter.h"
#include "pan_desc.h"
#include "pan_earlyzs.h"
#include "pan_encoder.h"
#include "pan_props.h"
#include "pan_samples.h"
#include "pan_shader.h"
#include "util/rounding.h"
#include "util/u_pack_color.h"
@ -533,80 +535,287 @@ panvk_cmd_prepare_samplers(struct panvk_cmd_buffer *cmdbuf,
desc_state->samplers = samplers.gpu;
}
static bool
has_depth_att(struct panvk_cmd_buffer *cmdbuf)
{
return (cmdbuf->state.gfx.fb.bound_attachments &
MESA_VK_RP_ATTACHMENT_DEPTH_BIT) != 0;
}
static bool
has_stencil_att(struct panvk_cmd_buffer *cmdbuf)
{
return (cmdbuf->state.gfx.fb.bound_attachments &
MESA_VK_RP_ATTACHMENT_STENCIL_BIT) != 0;
}
static bool
writes_depth(struct panvk_cmd_buffer *cmdbuf)
{
const struct vk_depth_stencil_state *ds =
&cmdbuf->vk.dynamic_graphics_state.ds;
return has_depth_att(cmdbuf) && ds->depth.test_enable &&
ds->depth.write_enable && ds->depth.compare_op != VK_COMPARE_OP_NEVER;
}
static bool
writes_stencil(struct panvk_cmd_buffer *cmdbuf)
{
const struct vk_depth_stencil_state *ds =
&cmdbuf->vk.dynamic_graphics_state.ds;
return has_stencil_att(cmdbuf) && ds->stencil.test_enable &&
((ds->stencil.front.write_mask &&
(ds->stencil.front.op.fail != VK_STENCIL_OP_KEEP ||
ds->stencil.front.op.pass != VK_STENCIL_OP_KEEP ||
ds->stencil.front.op.depth_fail != VK_STENCIL_OP_KEEP)) ||
(ds->stencil.back.write_mask &&
(ds->stencil.back.op.fail != VK_STENCIL_OP_KEEP ||
ds->stencil.back.op.pass != VK_STENCIL_OP_KEEP ||
ds->stencil.back.op.depth_fail != VK_STENCIL_OP_KEEP)));
}
static bool
ds_test_always_passes(struct panvk_cmd_buffer *cmdbuf)
{
const struct vk_depth_stencil_state *ds =
&cmdbuf->vk.dynamic_graphics_state.ds;
if (!has_depth_att(cmdbuf))
return true;
if (ds->depth.test_enable && ds->depth.compare_op != VK_COMPARE_OP_ALWAYS)
return false;
if (ds->stencil.test_enable &&
(ds->stencil.front.op.compare != VK_COMPARE_OP_ALWAYS ||
ds->stencil.back.op.compare != VK_COMPARE_OP_ALWAYS))
return false;
return true;
}
static inline enum mali_func
translate_compare_func(VkCompareOp comp)
{
STATIC_ASSERT(VK_COMPARE_OP_NEVER == (VkCompareOp)MALI_FUNC_NEVER);
STATIC_ASSERT(VK_COMPARE_OP_LESS == (VkCompareOp)MALI_FUNC_LESS);
STATIC_ASSERT(VK_COMPARE_OP_EQUAL == (VkCompareOp)MALI_FUNC_EQUAL);
STATIC_ASSERT(VK_COMPARE_OP_LESS_OR_EQUAL == (VkCompareOp)MALI_FUNC_LEQUAL);
STATIC_ASSERT(VK_COMPARE_OP_GREATER == (VkCompareOp)MALI_FUNC_GREATER);
STATIC_ASSERT(VK_COMPARE_OP_NOT_EQUAL == (VkCompareOp)MALI_FUNC_NOT_EQUAL);
STATIC_ASSERT(VK_COMPARE_OP_GREATER_OR_EQUAL ==
(VkCompareOp)MALI_FUNC_GEQUAL);
STATIC_ASSERT(VK_COMPARE_OP_ALWAYS == (VkCompareOp)MALI_FUNC_ALWAYS);
return (enum mali_func)comp;
}
static enum mali_stencil_op
translate_stencil_op(VkStencilOp in)
{
switch (in) {
case VK_STENCIL_OP_KEEP:
return MALI_STENCIL_OP_KEEP;
case VK_STENCIL_OP_ZERO:
return MALI_STENCIL_OP_ZERO;
case VK_STENCIL_OP_REPLACE:
return MALI_STENCIL_OP_REPLACE;
case VK_STENCIL_OP_INCREMENT_AND_CLAMP:
return MALI_STENCIL_OP_INCR_SAT;
case VK_STENCIL_OP_DECREMENT_AND_CLAMP:
return MALI_STENCIL_OP_DECR_SAT;
case VK_STENCIL_OP_INCREMENT_AND_WRAP:
return MALI_STENCIL_OP_INCR_WRAP;
case VK_STENCIL_OP_DECREMENT_AND_WRAP:
return MALI_STENCIL_OP_DECR_WRAP;
case VK_STENCIL_OP_INVERT:
return MALI_STENCIL_OP_INVERT;
default:
unreachable("Invalid stencil op");
}
}
static bool
fs_required(const struct vk_color_blend_state *cb,
const struct pan_shader_info *fs_info)
{
/* If we generally have side effects */
if (fs_info->fs.sidefx)
return true;
/* If colour is written we need to execute */
for (unsigned i = 0; i < cb->attachment_count; ++i) {
if ((cb->color_write_enables & BITFIELD_BIT(i)) &&
cb->attachments[i].write_mask)
return true;
}
/* If depth is written and not implied we need to execute.
* TODO: Predicate on Z/S writes being enabled */
return (fs_info->fs.writes_depth || fs_info->fs.writes_stencil);
}
static void
panvk_draw_prepare_fs_rsd(struct panvk_cmd_buffer *cmdbuf,
struct panvk_draw_info *draw)
{
const struct panvk_graphics_pipeline *pipeline = cmdbuf->state.gfx.pipeline;
if (!pipeline->state.fs.dynamic_rsd) {
draw->fs.rsd = pipeline->fs.rsd;
bool dirty =
is_dirty(cmdbuf, RS_RASTERIZER_DISCARD_ENABLE) ||
is_dirty(cmdbuf, RS_DEPTH_CLAMP_ENABLE) ||
is_dirty(cmdbuf, RS_DEPTH_BIAS_ENABLE) ||
is_dirty(cmdbuf, RS_DEPTH_BIAS_FACTORS) ||
is_dirty(cmdbuf, CB_LOGIC_OP_ENABLE) || is_dirty(cmdbuf, CB_LOGIC_OP) ||
is_dirty(cmdbuf, CB_ATTACHMENT_COUNT) ||
is_dirty(cmdbuf, CB_COLOR_WRITE_ENABLES) ||
is_dirty(cmdbuf, CB_BLEND_ENABLES) ||
is_dirty(cmdbuf, CB_BLEND_EQUATIONS) ||
is_dirty(cmdbuf, CB_WRITE_MASKS) ||
is_dirty(cmdbuf, CB_BLEND_CONSTANTS) ||
is_dirty(cmdbuf, DS_DEPTH_TEST_ENABLE) ||
is_dirty(cmdbuf, DS_DEPTH_WRITE_ENABLE) ||
is_dirty(cmdbuf, DS_DEPTH_COMPARE_OP) ||
is_dirty(cmdbuf, DS_DEPTH_COMPARE_OP) ||
is_dirty(cmdbuf, DS_STENCIL_TEST_ENABLE) ||
is_dirty(cmdbuf, DS_STENCIL_OP) ||
is_dirty(cmdbuf, DS_STENCIL_COMPARE_MASK) ||
is_dirty(cmdbuf, DS_STENCIL_WRITE_MASK) ||
is_dirty(cmdbuf, DS_STENCIL_REFERENCE) ||
is_dirty(cmdbuf, MS_RASTERIZATION_SAMPLES) ||
is_dirty(cmdbuf, MS_SAMPLE_MASK) ||
is_dirty(cmdbuf, MS_ALPHA_TO_COVERAGE_ENABLE) ||
is_dirty(cmdbuf, MS_ALPHA_TO_ONE_ENABLE) || !cmdbuf->state.gfx.fs.rsd;
if (!dirty) {
draw->fs.rsd = cmdbuf->state.gfx.fs.rsd;
return;
}
bool dirty = is_dirty(cmdbuf, RS_DEPTH_BIAS_FACTORS) ||
is_dirty(cmdbuf, CB_BLEND_CONSTANTS) ||
is_dirty(cmdbuf, DS_STENCIL_COMPARE_MASK) ||
is_dirty(cmdbuf, DS_STENCIL_WRITE_MASK) ||
is_dirty(cmdbuf, DS_STENCIL_REFERENCE) ||
!cmdbuf->state.gfx.fs.rsd;
struct panvk_device *dev = to_panvk_device(cmdbuf->vk.base.device);
const struct vk_dynamic_graphics_state *dyns =
&cmdbuf->vk.dynamic_graphics_state;
const struct vk_rasterization_state *rs = &dyns->rs;
const struct vk_color_blend_state *cb = &dyns->cb;
const struct vk_depth_stencil_state *ds = &dyns->ds;
const struct pan_shader_info *fs_info = &pipeline->fs.info;
unsigned bd_count = MAX2(cb->attachment_count, 1);
bool test_s = has_stencil_att(cmdbuf) && ds->stencil.test_enable;
bool test_z = has_depth_att(cmdbuf) && ds->depth.test_enable;
bool writes_z = writes_depth(cmdbuf);
bool writes_s = writes_stencil(cmdbuf);
bool needs_fs = fs_required(cb, fs_info);
bool blend_shader_loads_blend_const = false;
bool blend_reads_dest = false;
if (dirty) {
const struct vk_rasterization_state *rs =
&cmdbuf->vk.dynamic_graphics_state.rs;
const struct vk_color_blend_state *cb =
&cmdbuf->vk.dynamic_graphics_state.cb;
const struct vk_depth_stencil_state *ds =
&cmdbuf->vk.dynamic_graphics_state.ds;
struct panfrost_ptr rsd = pan_pool_alloc_desc_aggregate(
&cmdbuf->desc_pool.base, PAN_DESC(RENDERER_STATE),
PAN_DESC_ARRAY(pipeline->state.blend.pstate.rt_count, BLEND));
struct panfrost_ptr ptr = pan_pool_alloc_desc_aggregate(
&cmdbuf->desc_pool.base, PAN_DESC(RENDERER_STATE),
PAN_DESC_ARRAY(bd_count, BLEND));
struct mali_renderer_state_packed *rsd = ptr.cpu;
struct mali_blend_packed *bds = ptr.cpu + pan_size(RENDERER_STATE);
struct mali_renderer_state_packed rsd_dyn;
const struct mali_renderer_state_packed *rsd_templ =
&pipeline->state.fs.rsd_template;
panvk_per_arch(blend_emit_descs)(
dev, cb, cmdbuf->state.gfx.fb.color_attachments.fmts,
cmdbuf->state.gfx.fb.color_attachments.samples, fs_info,
pipeline->fs.code, bds, &blend_reads_dest,
&blend_shader_loads_blend_const);
pan_pack(&rsd_dyn, RENDERER_STATE, cfg) {
cfg.depth_units = rs->depth_bias.constant * 2.0f;
cfg.depth_factor = rs->depth_bias.slope;
cfg.depth_bias_clamp = rs->depth_bias.clamp;
pan_pack(rsd, RENDERER_STATE, cfg) {
bool alpha_to_coverage = dyns->ms.alpha_to_coverage_enable;
cfg.stencil_front.mask = ds->stencil.front.compare_mask;
cfg.stencil_back.mask = ds->stencil.back.compare_mask;
cfg.stencil_mask_misc.stencil_mask_front =
ds->stencil.front.write_mask;
cfg.stencil_mask_misc.stencil_mask_back = ds->stencil.back.write_mask;
cfg.stencil_front.reference_value = ds->stencil.front.reference;
cfg.stencil_back.reference_value = ds->stencil.back.reference;
}
if (needs_fs) {
pan_shader_prepare_rsd(fs_info, pipeline->fs.code, &cfg);
pan_merge(rsd_dyn, (*rsd_templ), RENDERER_STATE);
memcpy(rsd.cpu, &rsd_dyn, sizeof(rsd_dyn));
void *bd = rsd.cpu + pan_size(RENDERER_STATE);
for (unsigned i = 0; i < pipeline->state.blend.pstate.rt_count; i++) {
if (pipeline->state.blend.constant[i].index != (uint8_t)~0) {
struct mali_blend_packed bd_dyn;
const struct mali_blend_packed *bd_templ =
&pipeline->state.blend.bd_template[i];
unsigned constant_idx = pipeline->state.blend.constant[i].index;
float constant = cb->blend_constants[constant_idx] *
pipeline->state.blend.constant[i].bifrost_factor;
pan_pack(&bd_dyn, BLEND, cfg) {
cfg.enable = false;
cfg.constant = constant;
}
pan_merge(bd_dyn, (*bd_templ), BLEND);
memcpy(bd, &bd_dyn, sizeof(bd_dyn));
if (blend_shader_loads_blend_const) {
/* Preload the blend constant if the blend shader depends on it. */
cfg.preload.uniform_count = MAX2(
cfg.preload.uniform_count,
DIV_ROUND_UP(256 + sizeof(struct panvk_graphics_sysvals), 8));
}
bd += pan_size(BLEND);
uint8_t rt_written = fs_info->outputs_written >> FRAG_RESULT_DATA0;
uint8_t rt_mask = cmdbuf->state.gfx.fb.bound_attachments &
MESA_VK_RP_ATTACHMENT_ANY_COLOR_BITS;
cfg.properties.allow_forward_pixel_to_kill =
fs_info->fs.can_fpk && !(rt_mask & ~rt_written) &&
!alpha_to_coverage && !blend_reads_dest;
bool writes_zs = writes_z || writes_s;
bool zs_always_passes = ds_test_always_passes(cmdbuf);
bool oq = false; /* TODO: Occlusion queries */
struct pan_earlyzs_state earlyzs =
pan_earlyzs_get(pan_earlyzs_analyze(fs_info), writes_zs || oq,
alpha_to_coverage, zs_always_passes);
cfg.properties.pixel_kill_operation = earlyzs.kill;
cfg.properties.zs_update_operation = earlyzs.update;
} else {
cfg.properties.depth_source = MALI_DEPTH_SOURCE_FIXED_FUNCTION;
cfg.properties.allow_forward_pixel_to_kill = true;
cfg.properties.allow_forward_pixel_to_be_killed = true;
cfg.properties.zs_update_operation = MALI_PIXEL_KILL_STRONG_EARLY;
}
cmdbuf->state.gfx.fs.rsd = rsd.gpu;
bool msaa = dyns->ms.rasterization_samples > 1;
cfg.multisample_misc.multisample_enable = msaa;
cfg.multisample_misc.sample_mask =
msaa ? dyns->ms.sample_mask : UINT16_MAX;
cfg.multisample_misc.depth_function =
test_z ? translate_compare_func(ds->depth.compare_op)
: MALI_FUNC_ALWAYS;
cfg.multisample_misc.depth_write_mask = writes_z;
cfg.multisample_misc.fixed_function_near_discard =
!rs->depth_clamp_enable;
cfg.multisample_misc.fixed_function_far_discard = !rs->depth_clamp_enable;
cfg.multisample_misc.shader_depth_range_fixed = true;
cfg.stencil_mask_misc.stencil_enable = test_s;
cfg.stencil_mask_misc.alpha_to_coverage = alpha_to_coverage;
cfg.stencil_mask_misc.alpha_test_compare_function = MALI_FUNC_ALWAYS;
cfg.stencil_mask_misc.front_facing_depth_bias = rs->depth_bias.enable;
cfg.stencil_mask_misc.back_facing_depth_bias = rs->depth_bias.enable;
cfg.stencil_mask_misc.single_sampled_lines =
dyns->ms.rasterization_samples <= 1;
cfg.depth_units = rs->depth_bias.constant * 2.0f;
cfg.depth_factor = rs->depth_bias.slope;
cfg.depth_bias_clamp = rs->depth_bias.clamp;
cfg.stencil_front.mask = ds->stencil.front.compare_mask;
cfg.stencil_back.mask = ds->stencil.back.compare_mask;
cfg.stencil_mask_misc.stencil_mask_front = ds->stencil.front.write_mask;
cfg.stencil_mask_misc.stencil_mask_back = ds->stencil.back.write_mask;
cfg.stencil_front.reference_value = ds->stencil.front.reference;
cfg.stencil_back.reference_value = ds->stencil.back.reference;
if (test_s) {
cfg.stencil_front.compare_function =
translate_compare_func(ds->stencil.front.op.compare);
cfg.stencil_front.stencil_fail =
translate_stencil_op(ds->stencil.front.op.fail);
cfg.stencil_front.depth_fail =
translate_stencil_op(ds->stencil.front.op.depth_fail);
cfg.stencil_front.depth_pass =
translate_stencil_op(ds->stencil.front.op.pass);
cfg.stencil_back.compare_function =
translate_compare_func(ds->stencil.back.op.compare);
cfg.stencil_back.stencil_fail =
translate_stencil_op(ds->stencil.back.op.fail);
cfg.stencil_back.depth_fail =
translate_stencil_op(ds->stencil.back.op.depth_fail);
cfg.stencil_back.depth_pass =
translate_stencil_op(ds->stencil.back.op.pass);
}
}
cmdbuf->state.gfx.fs.rsd = ptr.gpu;
draw->fs.rsd = cmdbuf->state.gfx.fs.rsd;
}
@ -1826,12 +2035,14 @@ panvk_cmd_begin_rendering_init_fbinfo(struct panvk_cmd_buffer *cmdbuf,
to_panvk_physical_device(dev->vk.physical);
struct pan_fb_info *fbinfo = &cmdbuf->state.gfx.fb.info;
uint32_t att_width = 0, att_height = 0;
bool has_attachments = false;
cmdbuf->state.gfx.fb.bo_count = 0;
memset(cmdbuf->state.gfx.fb.bos, 0, sizeof(cmdbuf->state.gfx.fb.bos));
memset(cmdbuf->state.gfx.fb.crc_valid, 0,
sizeof(cmdbuf->state.gfx.fb.crc_valid));
memset(&cmdbuf->state.gfx.fb.color_attachments, 0,
sizeof(cmdbuf->state.gfx.fb.color_attachments));
cmdbuf->state.gfx.fb.bound_attachments = 0;
*fbinfo = (struct pan_fb_info){
.tile_buf_budget = panfrost_query_optimal_tib_size(phys_dev->model),
@ -1854,7 +2065,10 @@ panvk_cmd_begin_rendering_init_fbinfo(struct panvk_cmd_buffer *cmdbuf,
const VkExtent3D iview_size =
vk_image_mip_level_extent(&img->vk, iview->vk.base_mip_level);
has_attachments = true;
cmdbuf->state.gfx.fb.bound_attachments |=
MESA_VK_RP_ATTACHMENT_COLOR_BIT(i);
cmdbuf->state.gfx.fb.color_attachments.fmts[i] = iview->vk.format;
cmdbuf->state.gfx.fb.color_attachments.samples[i] = img->vk.samples;
att_width = MAX2(iview_size.width, att_width);
att_height = MAX2(iview_size.height, att_height);
@ -1888,7 +2102,7 @@ panvk_cmd_begin_rendering_init_fbinfo(struct panvk_cmd_buffer *cmdbuf,
const VkExtent3D iview_size =
vk_image_mip_level_extent(&img->vk, iview->vk.base_mip_level);
has_attachments = true;
cmdbuf->state.gfx.fb.bound_attachments |= MESA_VK_RP_ATTACHMENT_DEPTH_BIT;
att_width = MAX2(iview_size.width, att_width);
att_height = MAX2(iview_size.height, att_height);
@ -1914,7 +2128,8 @@ panvk_cmd_begin_rendering_init_fbinfo(struct panvk_cmd_buffer *cmdbuf,
const VkExtent3D iview_size =
vk_image_mip_level_extent(&img->vk, iview->vk.base_mip_level);
has_attachments = true;
cmdbuf->state.gfx.fb.bound_attachments |=
MESA_VK_RP_ATTACHMENT_STENCIL_BIT;
att_width = MAX2(iview_size.width, att_width);
att_height = MAX2(iview_size.height, att_height);
@ -1937,7 +2152,7 @@ panvk_cmd_begin_rendering_init_fbinfo(struct panvk_cmd_buffer *cmdbuf,
fbinfo->height = pRenderingInfo->renderArea.offset.y +
pRenderingInfo->renderArea.extent.height;
if (has_attachments) {
if (cmdbuf->state.gfx.fb.bound_attachments) {
/* We need the rendering area to be aligned on a 32x32 section for tile
* buffer preloading to work correctly.
*/
@ -1949,6 +2164,9 @@ panvk_cmd_begin_rendering_init_fbinfo(struct panvk_cmd_buffer *cmdbuf,
fbinfo->extent.maxx = fbinfo->width - 1;
fbinfo->extent.maxy = fbinfo->height - 1;
/* We need to re-emit the FS RSD when the color attachments change. */
cmdbuf->state.gfx.fs.rsd = 0;
}
VKAPI_ATTR void VKAPI_CALL

View file

@ -49,434 +49,8 @@
#include "panfrost/util/pan_lower_framebuffer.h"
#include "pan_earlyzs.h"
#include "pan_shader.h"
static bool
dyn_state_is_set(const struct panvk_graphics_pipeline *pipeline, uint32_t id)
{
if (!pipeline)
return false;
return BITSET_TEST(pipeline->state.dynamic.set, id);
}
static bool
writes_depth(const struct vk_depth_stencil_state *ds)
{
return ds && ds->depth.test_enable && ds->depth.write_enable &&
ds->depth.compare_op != VK_COMPARE_OP_NEVER;
}
static bool
writes_stencil(const struct vk_depth_stencil_state *ds)
{
return ds && ds->stencil.test_enable &&
((ds->stencil.front.write_mask &&
(ds->stencil.front.op.fail != VK_STENCIL_OP_KEEP ||
ds->stencil.front.op.pass != VK_STENCIL_OP_KEEP ||
ds->stencil.front.op.depth_fail != VK_STENCIL_OP_KEEP)) ||
(ds->stencil.back.write_mask &&
(ds->stencil.back.op.fail != VK_STENCIL_OP_KEEP ||
ds->stencil.back.op.pass != VK_STENCIL_OP_KEEP ||
ds->stencil.back.op.depth_fail != VK_STENCIL_OP_KEEP)));
}
static bool
ds_test_always_passes(const struct vk_depth_stencil_state *ds)
{
if (!ds)
return true;
if (ds->depth.test_enable && ds->depth.compare_op != VK_COMPARE_OP_ALWAYS)
return false;
if (ds->stencil.test_enable &&
(ds->stencil.front.op.compare != VK_COMPARE_OP_ALWAYS ||
ds->stencil.back.op.compare != VK_COMPARE_OP_ALWAYS))
return false;
return true;
}
static inline enum mali_func
translate_compare_func(VkCompareOp comp)
{
STATIC_ASSERT(VK_COMPARE_OP_NEVER == (VkCompareOp)MALI_FUNC_NEVER);
STATIC_ASSERT(VK_COMPARE_OP_LESS == (VkCompareOp)MALI_FUNC_LESS);
STATIC_ASSERT(VK_COMPARE_OP_EQUAL == (VkCompareOp)MALI_FUNC_EQUAL);
STATIC_ASSERT(VK_COMPARE_OP_LESS_OR_EQUAL == (VkCompareOp)MALI_FUNC_LEQUAL);
STATIC_ASSERT(VK_COMPARE_OP_GREATER == (VkCompareOp)MALI_FUNC_GREATER);
STATIC_ASSERT(VK_COMPARE_OP_NOT_EQUAL == (VkCompareOp)MALI_FUNC_NOT_EQUAL);
STATIC_ASSERT(VK_COMPARE_OP_GREATER_OR_EQUAL ==
(VkCompareOp)MALI_FUNC_GEQUAL);
STATIC_ASSERT(VK_COMPARE_OP_ALWAYS == (VkCompareOp)MALI_FUNC_ALWAYS);
return (enum mali_func)comp;
}
static enum mali_stencil_op
translate_stencil_op(VkStencilOp in)
{
switch (in) {
case VK_STENCIL_OP_KEEP:
return MALI_STENCIL_OP_KEEP;
case VK_STENCIL_OP_ZERO:
return MALI_STENCIL_OP_ZERO;
case VK_STENCIL_OP_REPLACE:
return MALI_STENCIL_OP_REPLACE;
case VK_STENCIL_OP_INCREMENT_AND_CLAMP:
return MALI_STENCIL_OP_INCR_SAT;
case VK_STENCIL_OP_DECREMENT_AND_CLAMP:
return MALI_STENCIL_OP_DECR_SAT;
case VK_STENCIL_OP_INCREMENT_AND_WRAP:
return MALI_STENCIL_OP_INCR_WRAP;
case VK_STENCIL_OP_DECREMENT_AND_WRAP:
return MALI_STENCIL_OP_DECR_WRAP;
case VK_STENCIL_OP_INVERT:
return MALI_STENCIL_OP_INVERT;
default:
unreachable("Invalid stencil op");
}
}
static void
emit_base_fs_rsd(const struct panvk_graphics_pipeline *pipeline,
const struct vk_graphics_pipeline_state *state, void *rsd)
{
const struct pan_shader_info *info = &pipeline->fs.info;
const struct vk_rasterization_state *rs = state->rs;
const struct vk_depth_stencil_state *ds = state->ds;
const struct vk_multisample_state *ms = state->ms;
bool test_s = ds && ds->stencil.test_enable;
bool test_z = ds && ds->depth.test_enable;
bool writes_z = writes_depth(ds);
bool writes_s = writes_stencil(ds);
pan_pack(rsd, RENDERER_STATE, cfg) {
bool alpha_to_coverage = ms && ms->alpha_to_coverage_enable;
if (pipeline->state.fs.required) {
pan_shader_prepare_rsd(info, pipeline->fs.code, &cfg);
uint8_t rt_written = info->outputs_written >> FRAG_RESULT_DATA0;
uint8_t rt_mask = pipeline->state.fs.rt_mask;
cfg.properties.allow_forward_pixel_to_kill =
pipeline->fs.info.fs.can_fpk && !(rt_mask & ~rt_written) &&
!alpha_to_coverage && !pipeline->state.blend.reads_dest;
bool writes_zs = writes_z || writes_s;
bool zs_always_passes = ds_test_always_passes(ds);
bool oq = false; /* TODO: Occlusion queries */
struct pan_earlyzs_state earlyzs =
pan_earlyzs_get(pan_earlyzs_analyze(info), writes_zs || oq,
alpha_to_coverage, zs_always_passes);
cfg.properties.pixel_kill_operation = earlyzs.kill;
cfg.properties.zs_update_operation = earlyzs.update;
} else {
cfg.properties.depth_source = MALI_DEPTH_SOURCE_FIXED_FUNCTION;
cfg.properties.allow_forward_pixel_to_kill = true;
cfg.properties.allow_forward_pixel_to_be_killed = true;
cfg.properties.zs_update_operation = MALI_PIXEL_KILL_STRONG_EARLY;
}
bool msaa = ms && ms->rasterization_samples > 1;
cfg.multisample_misc.multisample_enable = msaa;
cfg.multisample_misc.sample_mask = msaa ? ms->sample_mask : UINT16_MAX;
cfg.multisample_misc.depth_function =
test_z ? translate_compare_func(ds->depth.compare_op)
: MALI_FUNC_ALWAYS;
cfg.multisample_misc.depth_write_mask = writes_z;
cfg.multisample_misc.fixed_function_near_discard =
!rs->depth_clamp_enable;
cfg.multisample_misc.fixed_function_far_discard = !rs->depth_clamp_enable;
cfg.multisample_misc.shader_depth_range_fixed = true;
cfg.stencil_mask_misc.stencil_enable = test_s;
cfg.stencil_mask_misc.alpha_to_coverage = alpha_to_coverage;
cfg.stencil_mask_misc.alpha_test_compare_function = MALI_FUNC_ALWAYS;
cfg.stencil_mask_misc.front_facing_depth_bias = rs->depth_bias.enable;
cfg.stencil_mask_misc.back_facing_depth_bias = rs->depth_bias.enable;
cfg.stencil_mask_misc.single_sampled_lines =
!ms || ms->rasterization_samples <= 1;
if (dyn_state_is_set(pipeline, MESA_VK_DYNAMIC_RS_DEPTH_BIAS_FACTORS)) {
cfg.depth_units = rs->depth_bias.constant * 2.0f;
cfg.depth_factor = rs->depth_bias.slope;
cfg.depth_bias_clamp = rs->depth_bias.clamp;
}
if (dyn_state_is_set(pipeline, MESA_VK_DYNAMIC_DS_STENCIL_COMPARE_MASK)) {
cfg.stencil_front.mask = ds->stencil.front.compare_mask;
cfg.stencil_back.mask = ds->stencil.back.compare_mask;
}
if (dyn_state_is_set(pipeline, MESA_VK_DYNAMIC_DS_STENCIL_WRITE_MASK)) {
cfg.stencil_mask_misc.stencil_mask_front =
ds->stencil.front.write_mask;
cfg.stencil_mask_misc.stencil_mask_back = ds->stencil.back.write_mask;
}
if (dyn_state_is_set(pipeline, MESA_VK_DYNAMIC_DS_STENCIL_REFERENCE)) {
cfg.stencil_front.reference_value = ds->stencil.front.reference;
cfg.stencil_back.reference_value = ds->stencil.back.reference;
}
if (test_s) {
cfg.stencil_front.compare_function =
translate_compare_func(ds->stencil.front.op.compare);
cfg.stencil_front.stencil_fail =
translate_stencil_op(ds->stencil.front.op.fail);
cfg.stencil_front.depth_fail =
translate_stencil_op(ds->stencil.front.op.depth_fail);
cfg.stencil_front.depth_pass =
translate_stencil_op(ds->stencil.front.op.pass);
cfg.stencil_back.compare_function =
translate_compare_func(ds->stencil.back.op.compare);
cfg.stencil_back.stencil_fail =
translate_stencil_op(ds->stencil.back.op.fail);
cfg.stencil_back.depth_fail =
translate_stencil_op(ds->stencil.back.op.depth_fail);
cfg.stencil_back.depth_pass =
translate_stencil_op(ds->stencil.back.op.pass);
}
}
}
static enum mali_register_file_format
blend_type_from_nir(nir_alu_type nir_type)
{
switch (nir_type) {
case 0: /* Render target not in use */
return 0;
case nir_type_float16:
return MALI_REGISTER_FILE_FORMAT_F16;
case nir_type_float32:
return MALI_REGISTER_FILE_FORMAT_F32;
case nir_type_int32:
return MALI_REGISTER_FILE_FORMAT_I32;
case nir_type_uint32:
return MALI_REGISTER_FILE_FORMAT_U32;
case nir_type_int16:
return MALI_REGISTER_FILE_FORMAT_I16;
case nir_type_uint16:
return MALI_REGISTER_FILE_FORMAT_U16;
default:
unreachable("Unsupported blend shader type for NIR alu type");
}
}
static void
emit_blend(const struct panvk_graphics_pipeline *pipeline, unsigned rt,
void *bd)
{
const struct pan_blend_state *blend = &pipeline->state.blend.pstate;
const struct pan_blend_rt_state *rts = &blend->rts[rt];
bool dithered = false;
pan_pack(bd, BLEND, cfg) {
if (!blend->rt_count || !rts->equation.color_mask) {
cfg.enable = false;
cfg.internal.mode = MALI_BLEND_MODE_OFF;
continue;
}
cfg.srgb = util_format_is_srgb(rts->format);
cfg.load_destination = pan_blend_reads_dest(blend->rts[rt].equation);
cfg.round_to_fb_precision = !dithered;
const struct util_format_description *format_desc =
util_format_description(rts->format);
unsigned chan_size = 0;
for (unsigned i = 0; i < format_desc->nr_channels; i++)
chan_size = MAX2(format_desc->channel[i].size, chan_size);
pan_blend_to_fixed_function_equation(blend->rts[rt].equation,
&cfg.equation);
/* Fixed point constant */
float fconst = pan_blend_get_constant(
pan_blend_constant_mask(blend->rts[rt].equation), blend->constants);
u16 constant = fconst * ((1 << chan_size) - 1);
constant <<= 16 - chan_size;
cfg.constant = constant;
if (pan_blend_is_opaque(blend->rts[rt].equation)) {
cfg.internal.mode = MALI_BLEND_MODE_OPAQUE;
} else {
cfg.internal.mode = MALI_BLEND_MODE_FIXED_FUNCTION;
cfg.internal.fixed_function.alpha_zero_nop =
pan_blend_alpha_zero_nop(blend->rts[rt].equation);
cfg.internal.fixed_function.alpha_one_store =
pan_blend_alpha_one_store(blend->rts[rt].equation);
}
/* If we want the conversion to work properly,
* num_comps must be set to 4
*/
cfg.internal.fixed_function.num_comps = 4;
cfg.internal.fixed_function.conversion.memory_format =
GENX(panfrost_dithered_format_from_pipe_format)(rts->format, dithered);
cfg.internal.fixed_function.conversion.register_format =
blend_type_from_nir(pipeline->fs.info.bifrost.blend[rt].type);
cfg.internal.fixed_function.rt = rt;
}
}
#define is_dyn(__state, __name) \
BITSET_TEST((__state)->dynamic, MESA_VK_DYNAMIC_##__name)
static uint32_t
get_active_color_attachments(const struct vk_graphics_pipeline_state *state)
{
const struct vk_color_blend_state *cb = state->cb;
if (state->rs->rasterizer_discard_enable || !cb)
return 0;
return cb->color_write_enables & BITFIELD_MASK(cb->attachment_count);
}
static void
parse_color_blend(struct panvk_graphics_pipeline *pipeline,
const struct vk_graphics_pipeline_state *state)
{
const struct vk_color_blend_state *cb = state->cb;
const struct vk_render_pass_state *rp = state->rp;
const struct vk_multisample_state *ms = state->ms;
struct panvk_device *dev = to_panvk_device(pipeline->base.base.device);
if (!cb)
return;
uint32_t active_color_attachments = get_active_color_attachments(state);
pipeline->state.blend.pstate.logicop_enable = cb->logic_op_enable;
pipeline->state.blend.pstate.logicop_func =
vk_logic_op_to_pipe(cb->logic_op);
pipeline->state.blend.pstate.rt_count =
util_last_bit(active_color_attachments);
memcpy(pipeline->state.blend.pstate.constants, cb->blend_constants,
sizeof(pipeline->state.blend.pstate.constants));
for (unsigned i = 0; i < pipeline->state.blend.pstate.rt_count; i++) {
const struct vk_color_blend_attachment_state *in = &cb->attachments[i];
struct pan_blend_rt_state *out = &pipeline->state.blend.pstate.rts[i];
out->format = vk_format_to_pipe_format(rp->color_attachment_formats[i]);
bool dest_has_alpha = util_format_has_alpha(out->format);
out->nr_samples = ms->rasterization_samples;
out->equation.blend_enable = in->blend_enable;
out->equation.color_mask = in->write_mask;
out->equation.rgb_func = vk_blend_op_to_pipe(in->color_blend_op);
out->equation.rgb_src_factor =
vk_blend_factor_to_pipe(in->src_color_blend_factor);
out->equation.rgb_dst_factor =
vk_blend_factor_to_pipe(in->dst_color_blend_factor);
out->equation.alpha_func = vk_blend_op_to_pipe(in->alpha_blend_op);
out->equation.alpha_src_factor =
vk_blend_factor_to_pipe(in->src_alpha_blend_factor);
out->equation.alpha_dst_factor =
vk_blend_factor_to_pipe(in->dst_alpha_blend_factor);
if (!dest_has_alpha) {
out->equation.rgb_src_factor =
util_blend_dst_alpha_to_one(out->equation.rgb_src_factor);
out->equation.rgb_dst_factor =
util_blend_dst_alpha_to_one(out->equation.rgb_dst_factor);
out->equation.alpha_src_factor =
util_blend_dst_alpha_to_one(out->equation.alpha_src_factor);
out->equation.alpha_dst_factor =
util_blend_dst_alpha_to_one(out->equation.alpha_dst_factor);
}
pipeline->state.blend.reads_dest |= pan_blend_reads_dest(out->equation);
unsigned constant_mask = panvk_per_arch(blend_needs_lowering)(
dev, &pipeline->state.blend.pstate, i)
? 0
: pan_blend_constant_mask(out->equation);
pipeline->state.blend.constant[i].index = ffs(constant_mask) - 1;
if (constant_mask) {
/* On Bifrost, the blend constant is expressed with a UNORM of the
* size of the target format. The value is then shifted such that
* used bits are in the MSB. Here we calculate the factor at pipeline
* creation time so we only have to do a
* hw_constant = float_constant * factor;
* at descriptor emission time.
*/
const struct util_format_description *format_desc =
util_format_description(out->format);
unsigned chan_size = 0;
for (unsigned c = 0; c < format_desc->nr_channels; c++)
chan_size = MAX2(format_desc->channel[c].size, chan_size);
pipeline->state.blend.constant[i].bifrost_factor =
((1 << chan_size) - 1) << (16 - chan_size);
}
}
}
static bool
fs_required(struct panvk_graphics_pipeline *pipeline)
{
const struct pan_shader_info *info = &pipeline->fs.info;
/* If we generally have side effects */
if (info->fs.sidefx)
return true;
/* If colour is written we need to execute */
const struct pan_blend_state *blend = &pipeline->state.blend.pstate;
for (unsigned i = 0; i < blend->rt_count; ++i) {
if (blend->rts[i].equation.color_mask)
return true;
}
/* If depth is written and not implied we need to execute.
* TODO: Predicate on Z/S writes being enabled */
return (info->fs.writes_depth || info->fs.writes_stencil);
}
static void
init_fs_state(struct panvk_graphics_pipeline *pipeline,
const struct vk_graphics_pipeline_state *state)
{
pipeline->state.fs.dynamic_rsd = is_dyn(state, RS_DEPTH_BIAS_FACTORS) ||
is_dyn(state, CB_BLEND_CONSTANTS) ||
is_dyn(state, DS_STENCIL_COMPARE_MASK) ||
is_dyn(state, DS_STENCIL_WRITE_MASK) ||
is_dyn(state, DS_STENCIL_REFERENCE);
pipeline->state.fs.rt_mask = get_active_color_attachments(state);
pipeline->state.fs.required = fs_required(pipeline);
unsigned bd_count = MAX2(pipeline->state.blend.pstate.rt_count, 1);
struct mali_renderer_state_packed *rsd = &pipeline->state.fs.rsd_template;
struct mali_blend_packed *bds = pipeline->state.blend.bd_template;
if (!pipeline->state.fs.dynamic_rsd) {
struct panfrost_ptr ptr = pan_pool_alloc_desc_aggregate(
&pipeline->base.desc_pool.base, PAN_DESC(RENDERER_STATE),
PAN_DESC_ARRAY(bd_count, BLEND));
rsd = ptr.cpu;
bds = ptr.cpu + pan_size(RENDERER_STATE);
pipeline->fs.rsd = ptr.gpu;
}
emit_base_fs_rsd(pipeline, state, rsd);
for (unsigned i = 0; i < bd_count; i++)
emit_blend(pipeline, i, &bds[i]);
}
static VkResult
init_pipeline_shader(struct panvk_pipeline *pipeline,
const VkPipelineShaderStageCreateInfo *stage_info,
@ -484,15 +58,10 @@ init_pipeline_shader(struct panvk_pipeline *pipeline,
struct panvk_pipeline_shader *pshader)
{
struct panvk_device *dev = to_panvk_device(pipeline->base.device);
struct panvk_graphics_pipeline *gfx_pipeline =
panvk_pipeline_to_graphics_pipeline(pipeline);
struct panvk_shader *shader;
shader = panvk_per_arch(shader_create)(
dev, stage_info, pipeline->layout,
gfx_pipeline ? &gfx_pipeline->state.blend.pstate : NULL,
dyn_state_is_set(gfx_pipeline, MESA_VK_DYNAMIC_CB_BLEND_CONSTANTS),
alloc);
shader =
panvk_per_arch(shader_create)(dev, stage_info, pipeline->layout, alloc);
if (!shader)
return vk_error(dev, VK_ERROR_OUT_OF_HOST_MEMORY);
@ -764,8 +333,6 @@ panvk_graphics_pipeline_create(struct panvk_device *dev,
panvk_pool_init(&gfx_pipeline->base.desc_pool, dev, NULL, 0, 4096,
"Pipeline static state", false);
parse_color_blend(gfx_pipeline, &state);
/* Make sure the stage info is correct even if no stage info is provided for
* this stage in pStages.
*/
@ -793,7 +360,6 @@ panvk_graphics_pipeline_create(struct panvk_device *dev,
return result;
}
init_fs_state(gfx_pipeline, &state);
link_shaders(gfx_pipeline, &gfx_pipeline->vs, &gfx_pipeline->fs);
return VK_SUCCESS;

View file

@ -61,20 +61,12 @@ load_sysval_from_push_const(nir_builder *b, nir_intrinsic_instr *intr,
.range = intr->def.num_components * intr->def.bit_size / 8);
}
struct sysval_options {
/* If non-null, a vec4 of blend constants known at pipeline compile time. If
* null, blend constants are dynamic.
*/
float *static_blend_constants;
};
static bool
panvk_lower_sysvals(nir_builder *b, nir_instr *instr, void *data)
{
if (instr->type != nir_instr_type_intrinsic)
return false;
struct sysval_options *opts = data;
nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
nir_def *val = NULL;
b->cursor = nir_before_instr(instr);
@ -110,19 +102,8 @@ panvk_lower_sysvals(nir_builder *b, nir_instr *instr, void *data)
SYSVAL(graphics, vs.base_instance));
break;
case nir_intrinsic_load_blend_const_color_rgba:
if (opts->static_blend_constants) {
const nir_const_value constants[4] = {
{.f32 = opts->static_blend_constants[0]},
{.f32 = opts->static_blend_constants[1]},
{.f32 = opts->static_blend_constants[2]},
{.f32 = opts->static_blend_constants[3]},
};
val = nir_build_imm(b, 4, 32, constants);
} else {
val = load_sysval_from_push_const(b, intr,
SYSVAL(graphics, blend.constants));
}
val = load_sysval_from_push_const(b, intr,
SYSVAL(graphics, blend.constants));
break;
case nir_intrinsic_load_layer_id:
@ -215,8 +196,6 @@ struct panvk_shader *
panvk_per_arch(shader_create)(struct panvk_device *dev,
const VkPipelineShaderStageCreateInfo *stage_info,
const struct panvk_pipeline_layout *layout,
struct pan_blend_state *blend_state,
bool static_blend_constants,
const VkAllocationCallbacks *alloc)
{
VK_FROM_HANDLE(vk_shader_module, module, stage_info->module);
@ -371,30 +350,11 @@ panvk_per_arch(shader_create)(struct panvk_device *dev,
pan_shader_preprocess(nir, inputs.gpu_id);
if (stage == MESA_SHADER_FRAGMENT) {
panvk_lower_blend(dev, nir, &inputs, blend_state);
}
if (stage == MESA_SHADER_VERTEX)
NIR_PASS_V(nir, pan_lower_image_index, MAX_VS_ATTRIBS);
struct sysval_options sysval_options = {
.static_blend_constants =
static_blend_constants ? blend_state->constants : NULL,
};
NIR_PASS_V(nir, nir_shader_instructions_pass, panvk_lower_sysvals,
nir_metadata_block_index | nir_metadata_dominance,
&sysval_options);
if (stage == MESA_SHADER_FRAGMENT) {
enum pipe_format rt_formats[MAX_RTS] = {PIPE_FORMAT_NONE};
for (unsigned rt = 0; rt < MAX_RTS; ++rt)
rt_formats[rt] = blend_state->rts[rt].format;
NIR_PASS_V(nir, GENX(pan_inline_rt_conversion), rt_formats);
}
nir_metadata_block_index | nir_metadata_dominance, NULL);
GENX(pan_shader_compile)(nir, &inputs, &shader->binary, &shader->info);