vc4: Move all of our fixed function fragment color handling to NIR.

This massively reduces our dependency on VC4-specific optimization passes.

shader-db:
total uniforms in shared programs: 32077 -> 32067 (-0.03%)
uniforms in affected programs:     149 -> 139 (-6.71%)
total instructions in shared programs: 98208 -> 98182 (-0.03%)
instructions in affected programs:     2154 -> 2128 (-1.21%)
This commit is contained in:
Eric Anholt 2015-04-13 21:36:24 -07:00
parent 38c6c0f5b4
commit bf3c50fba2
6 changed files with 538 additions and 388 deletions

View file

@ -19,6 +19,7 @@ C_SOURCES := \
vc4_fence.c \
vc4_formats.c \
vc4_job.c \
vc4_nir_lower_blend.c \
vc4_nir_lower_io.c \
vc4_opt_algebraic.c \
vc4_opt_constant_folding.c \

View file

@ -0,0 +1,431 @@
/*
* Copyright © 2015 Broadcom
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
/**
* Implements most of the fixed function fragment pipeline in shader code.
*
* VC4 doesn't have any hardware support for blending, alpha test, logic ops,
* or color mask. Instead, you read the current contents of the destination
* from the tile buffer after having waited for the scoreboard (which is
* handled by vc4_qpu_emit.c), then do math using your output color and that
* destination value, and update the output color appropriately.
*/
/**
* Lowers fixed-function blending to a load of the destination color and a
* series of ALU operations before the store of the output.
*/
#include "util/u_format.h"
#include "vc4_qir.h"
#include "glsl/nir/nir_builder.h"
#include "vc4_context.h"
/** Emits a load of the previous fragment color from the tile buffer. */
static nir_ssa_def *
vc4_nir_get_dst_color(nir_builder *b)
{
nir_intrinsic_instr *load =
nir_intrinsic_instr_create(b->shader,
nir_intrinsic_load_input);
load->num_components = 1;
load->const_index[0] = VC4_NIR_TLB_COLOR_READ_INPUT;
nir_ssa_dest_init(&load->instr, &load->dest, 1, NULL);
nir_builder_instr_insert(b, &load->instr);
return &load->dest.ssa;
}
static nir_ssa_def *
vc4_nir_srgb_decode(nir_builder *b, nir_ssa_def *srgb)
{
nir_ssa_def *is_low = nir_flt(b, srgb, nir_imm_float(b, 0.04045));
nir_ssa_def *low = nir_fmul(b, srgb, nir_imm_float(b, 1.0 / 12.92));
nir_ssa_def *high = nir_fpow(b,
nir_fmul(b,
nir_fadd(b, srgb,
nir_imm_float(b, 0.055)),
nir_imm_float(b, 1.0 / 1.055)),
nir_imm_float(b, 2.4));
return nir_bcsel(b, is_low, low, high);
}
static nir_ssa_def *
vc4_nir_srgb_encode(nir_builder *b, nir_ssa_def *linear)
{
nir_ssa_def *is_low = nir_flt(b, linear, nir_imm_float(b, 0.0031308));
nir_ssa_def *low = nir_fmul(b, linear, nir_imm_float(b, 12.92));
nir_ssa_def *high = nir_fsub(b,
nir_fmul(b,
nir_imm_float(b, 1.055),
nir_fpow(b,
linear,
nir_imm_float(b, 0.41666))),
nir_imm_float(b, 0.055));
return nir_bcsel(b, is_low, low, high);
}
static nir_ssa_def *
vc4_blend_channel(nir_builder *b,
nir_ssa_def **src,
nir_ssa_def **dst,
unsigned factor,
int channel)
{
switch(factor) {
case PIPE_BLENDFACTOR_ONE:
return nir_imm_float(b, 1.0);
case PIPE_BLENDFACTOR_SRC_COLOR:
return src[channel];
case PIPE_BLENDFACTOR_SRC_ALPHA:
return src[3];
case PIPE_BLENDFACTOR_DST_ALPHA:
return dst[3];
case PIPE_BLENDFACTOR_DST_COLOR:
return dst[channel];
case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
if (channel != 3) {
return nir_fmin(b,
src[3],
nir_fsub(b,
nir_imm_float(b, 1.0),
dst[3]));
} else {
return nir_imm_float(b, 1.0);
}
case PIPE_BLENDFACTOR_CONST_COLOR:
return vc4_nir_get_state_uniform(b, QUNIFORM_BLEND_CONST_COLOR_X + channel);
case PIPE_BLENDFACTOR_CONST_ALPHA:
return vc4_nir_get_state_uniform(b, QUNIFORM_BLEND_CONST_COLOR_W);
case PIPE_BLENDFACTOR_ZERO:
return nir_imm_float(b, 0.0);
case PIPE_BLENDFACTOR_INV_SRC_COLOR:
return nir_fsub(b, nir_imm_float(b, 1.0), src[channel]);
case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
return nir_fsub(b, nir_imm_float(b, 1.0), src[3]);
case PIPE_BLENDFACTOR_INV_DST_ALPHA:
return nir_fsub(b, nir_imm_float(b, 1.0), dst[3]);
case PIPE_BLENDFACTOR_INV_DST_COLOR:
return nir_fsub(b, nir_imm_float(b, 1.0), dst[channel]);
case PIPE_BLENDFACTOR_INV_CONST_COLOR:
return nir_fsub(b, nir_imm_float(b, 1.0),
vc4_nir_get_state_uniform(b, QUNIFORM_BLEND_CONST_COLOR_X + channel));
case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
return nir_fsub(b, nir_imm_float(b, 1.0),
vc4_nir_get_state_uniform(b, QUNIFORM_BLEND_CONST_COLOR_W));
default:
case PIPE_BLENDFACTOR_SRC1_COLOR:
case PIPE_BLENDFACTOR_SRC1_ALPHA:
case PIPE_BLENDFACTOR_INV_SRC1_COLOR:
case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:
/* Unsupported. */
fprintf(stderr, "Unknown blend factor %d\n", factor);
return nir_imm_float(b, 1.0);
}
}
static nir_ssa_def *
vc4_blend_func(nir_builder *b, nir_ssa_def *src, nir_ssa_def *dst,
unsigned func)
{
switch (func) {
case PIPE_BLEND_ADD:
return nir_fadd(b, src, dst);
case PIPE_BLEND_SUBTRACT:
return nir_fsub(b, src, dst);
case PIPE_BLEND_REVERSE_SUBTRACT:
return nir_fsub(b, dst, src);
case PIPE_BLEND_MIN:
return nir_fmin(b, src, dst);
case PIPE_BLEND_MAX:
return nir_fmax(b, src, dst);
default:
/* Unsupported. */
fprintf(stderr, "Unknown blend func %d\n", func);
return src;
}
}
static void
vc4_do_blending(struct vc4_compile *c, nir_builder *b, nir_ssa_def **result,
nir_ssa_def **src_color, nir_ssa_def **dst_color)
{
struct pipe_rt_blend_state *blend = &c->fs_key->blend;
if (!blend->blend_enable) {
for (int i = 0; i < 4; i++)
result[i] = src_color[i];
return;
}
/* Clamp the src color to [0, 1]. Dest is already clamped. */
for (int i = 0; i < 4; i++)
src_color[i] = nir_fsat(b, src_color[i]);
nir_ssa_def *src_blend[4], *dst_blend[4];
for (int i = 0; i < 4; i++) {
int src_factor = ((i != 3) ? blend->rgb_src_factor :
blend->alpha_src_factor);
int dst_factor = ((i != 3) ? blend->rgb_dst_factor :
blend->alpha_dst_factor);
src_blend[i] = nir_fmul(b, src_color[i],
vc4_blend_channel(b,
src_color, dst_color,
src_factor, i));
dst_blend[i] = nir_fmul(b, dst_color[i],
vc4_blend_channel(b,
src_color, dst_color,
dst_factor, i));
}
for (int i = 0; i < 4; i++) {
result[i] = vc4_blend_func(b, src_blend[i], dst_blend[i],
((i != 3) ? blend->rgb_func :
blend->alpha_func));
}
}
static nir_ssa_def *
vc4_logicop(nir_builder *b, int logicop_func,
nir_ssa_def *src, nir_ssa_def *dst)
{
switch (logicop_func) {
case PIPE_LOGICOP_CLEAR:
return nir_imm_int(b, 0);
case PIPE_LOGICOP_NOR:
return nir_inot(b, nir_ior(b, src, dst));
case PIPE_LOGICOP_AND_INVERTED:
return nir_iand(b, nir_inot(b, src), dst);
case PIPE_LOGICOP_COPY_INVERTED:
return nir_inot(b, src);
case PIPE_LOGICOP_AND_REVERSE:
return nir_iand(b, src, nir_inot(b, dst));
case PIPE_LOGICOP_INVERT:
return nir_inot(b, dst);
case PIPE_LOGICOP_XOR:
return nir_ixor(b, src, dst);
case PIPE_LOGICOP_NAND:
return nir_inot(b, nir_iand(b, src, dst));
case PIPE_LOGICOP_AND:
return nir_iand(b, src, dst);
case PIPE_LOGICOP_EQUIV:
return nir_inot(b, nir_ixor(b, src, dst));
case PIPE_LOGICOP_NOOP:
return dst;
case PIPE_LOGICOP_OR_INVERTED:
return nir_ior(b, nir_inot(b, src), dst);
case PIPE_LOGICOP_OR_REVERSE:
return nir_ior(b, src, nir_inot(b, dst));
case PIPE_LOGICOP_OR:
return nir_ior(b, src, dst);
case PIPE_LOGICOP_SET:
return nir_imm_int(b, ~0);
default:
fprintf(stderr, "Unknown logic op %d\n", logicop_func);
/* FALLTHROUGH */
case PIPE_LOGICOP_COPY:
return src;
}
}
static nir_ssa_def *
vc4_nir_pipe_compare_func(nir_builder *b, int func,
nir_ssa_def *src0, nir_ssa_def *src1)
{
switch (func) {
default:
fprintf(stderr, "Unknown compare func %d\n", func);
/* FALLTHROUGH */
case PIPE_FUNC_NEVER:
return nir_imm_int(b, 0);
case PIPE_FUNC_ALWAYS:
return nir_imm_int(b, ~0);
case PIPE_FUNC_EQUAL:
return nir_feq(b, src0, src1);
case PIPE_FUNC_NOTEQUAL:
return nir_fne(b, src0, src1);
case PIPE_FUNC_GREATER:
return nir_flt(b, src1, src0);
case PIPE_FUNC_GEQUAL:
return nir_fge(b, src0, src1);
case PIPE_FUNC_LESS:
return nir_flt(b, src0, src1);
case PIPE_FUNC_LEQUAL:
return nir_fge(b, src1, src0);
}
}
static void
vc4_nir_emit_alpha_test_discard(struct vc4_compile *c, nir_builder *b,
nir_ssa_def *alpha)
{
if (!c->fs_key->alpha_test)
return;
nir_ssa_def *alpha_ref =
vc4_nir_get_state_uniform(b, QUNIFORM_ALPHA_REF);
nir_ssa_def *condition =
vc4_nir_pipe_compare_func(b, c->fs_key->alpha_test_func,
alpha, alpha_ref);
nir_intrinsic_instr *discard =
nir_intrinsic_instr_create(b->shader,
nir_intrinsic_discard_if);
discard->num_components = 1;
discard->src[0] = nir_src_for_ssa(nir_inot(b, condition));
nir_builder_instr_insert(b, &discard->instr);
}
static void
vc4_nir_lower_blend_instr(struct vc4_compile *c, nir_builder *b,
nir_intrinsic_instr *intr)
{
enum pipe_format color_format = c->fs_key->color_format;
const uint8_t *format_swiz = vc4_get_format_swizzle(color_format);
/* Pull out the float src/dst color components. */
nir_ssa_def *packed_dst_color = vc4_nir_get_dst_color(b);
nir_ssa_def *dst_vec4 = nir_unpack_unorm_4x8(b, packed_dst_color);
nir_ssa_def *src_color[4], *unpacked_dst_color[4];
for (unsigned i = 0; i < 4; i++) {
src_color[i] = nir_swizzle(b, intr->src[0].ssa, &i, 1, false);
unpacked_dst_color[i] = nir_swizzle(b, dst_vec4, &i, 1, false);
}
/* Unswizzle the destination color. */
nir_ssa_def *dst_color[4];
for (unsigned i = 0; i < 4; i++) {
dst_color[i] = vc4_nir_get_swizzled_channel(b,
unpacked_dst_color,
format_swiz[i]);
}
vc4_nir_emit_alpha_test_discard(c, b, src_color[3]);
/* Turn dst color to linear. */
if (util_format_is_srgb(color_format)) {
for (int i = 0; i < 3; i++)
dst_color[i] = vc4_nir_srgb_decode(b, dst_color[i]);
}
nir_ssa_def *blend_color[4];
vc4_do_blending(c, b, blend_color, src_color, dst_color);
/* sRGB encode the output color */
if (util_format_is_srgb(color_format)) {
for (int i = 0; i < 3; i++)
blend_color[i] = vc4_nir_srgb_encode(b, blend_color[i]);
}
nir_ssa_def *swizzled_outputs[4];
for (int i = 0; i < 4; i++) {
swizzled_outputs[i] =
vc4_nir_get_swizzled_channel(b, blend_color,
format_swiz[i]);
}
nir_ssa_def *packed_color =
nir_pack_unorm_4x8(b,
nir_vec4(b,
swizzled_outputs[0],
swizzled_outputs[1],
swizzled_outputs[2],
swizzled_outputs[3]));
packed_color = vc4_logicop(b, c->fs_key->logicop_func,
packed_color, packed_dst_color);
/* If the bit isn't set in the color mask, then just return the
* original dst color, instead.
*/
uint32_t colormask = 0xffffffff;
for (int i = 0; i < 4; i++) {
if (format_swiz[i] < 4 &&
!(c->fs_key->blend.colormask & (1 << format_swiz[i]))) {
colormask &= ~(0xff << (i * 8));
}
}
packed_color = nir_ior(b,
nir_iand(b, packed_color,
nir_imm_int(b, colormask)),
nir_iand(b, packed_dst_color,
nir_imm_int(b, ~colormask)));
/* Turn the old vec4 output into a store of the packed color. */
nir_instr_rewrite_src(&intr->instr, &intr->src[0],
nir_src_for_ssa(packed_color));
intr->num_components = 1;
}
static bool
vc4_nir_lower_blend_block(nir_block *block, void *state)
{
struct vc4_compile *c = state;
nir_foreach_instr(block, instr) {
if (instr->type != nir_instr_type_intrinsic)
continue;
nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
if (intr->intrinsic != nir_intrinsic_store_output)
continue;
nir_variable *output_var = NULL;
foreach_list_typed(nir_variable, var, node, &c->s->outputs) {
if (var->data.driver_location == intr->const_index[0]) {
output_var = var;
break;
}
}
assert(output_var);
unsigned semantic_name = output_var->data.location;
if (semantic_name != TGSI_SEMANTIC_COLOR)
continue;
nir_function_impl *impl =
nir_cf_node_get_function(&block->cf_node);
nir_builder b;
nir_builder_init(&b, impl);
nir_builder_insert_before_instr(&b, &intr->instr);
vc4_nir_lower_blend_instr(c, &b, intr);
}
return true;
}
void
vc4_nir_lower_blend(struct vc4_compile *c)
{
nir_foreach_overload(c->s, overload) {
if (overload->impl) {
nir_foreach_block(overload->impl,
vc4_nir_lower_blend_block, c);
nir_metadata_preserve(overload->impl,
nir_metadata_block_index |
nir_metadata_dominance);
}
}
}

View file

@ -56,11 +56,14 @@ static void
vc4_nir_lower_input(struct vc4_compile *c, nir_builder *b,
nir_intrinsic_instr *intr)
{
/* All TGSI-to-NIR inputs are vec4. */
assert(intr->num_components == 4);
nir_builder_insert_before_instr(b, &intr->instr);
if (c->stage == QSTAGE_FRAG && intr->const_index[0] ==
VC4_NIR_TLB_COLOR_READ_INPUT) {
/* This doesn't need any lowering. */
return;
}
nir_variable *input_var = NULL;
foreach_list_typed(nir_variable, var, node, &c->s->inputs) {
if (var->data.driver_location == intr->const_index[0]) {
@ -72,6 +75,9 @@ vc4_nir_lower_input(struct vc4_compile *c, nir_builder *b,
int semantic_name = input_var->data.location;
int semantic_index = input_var->data.index;
/* All TGSI-to-NIR inputs are vec4. */
assert(intr->num_components == 4);
/* Generate scalar loads equivalent to the original VEC4. */
nir_ssa_def *dests[4];
for (unsigned i = 0; i < intr->num_components; i++) {
@ -145,6 +151,12 @@ vc4_nir_lower_output(struct vc4_compile *c, nir_builder *b,
return;
}
/* Color output is lowered by vc4_nir_lower_blend(). */
if (c->stage == QSTAGE_FRAG && semantic_name == TGSI_SEMANTIC_COLOR) {
intr->const_index[0] *= 4;
return;
}
/* All TGSI-to-NIR outputs are VEC4. */
assert(intr->num_components == 4);
@ -170,7 +182,11 @@ static void
vc4_nir_lower_uniform(struct vc4_compile *c, nir_builder *b,
nir_intrinsic_instr *intr)
{
/* All TGSI-to-NIR uniform loads are vec4. */
/* All TGSI-to-NIR uniform loads are vec4, but we may create dword
* loads in our lowering passes.
*/
if (intr->num_components == 1)
return;
assert(intr->num_components == 4);
nir_builder_insert_before_instr(b, &intr->instr);

View file

@ -123,6 +123,26 @@ nir_ssa_def *vc4_nir_get_state_uniform(struct nir_builder *b,
return &intr->dest.ssa;
}
nir_ssa_def *
vc4_nir_get_swizzled_channel(nir_builder *b, nir_ssa_def **srcs, int swiz)
{
switch (swiz) {
default:
case UTIL_FORMAT_SWIZZLE_NONE:
fprintf(stderr, "warning: unknown swizzle\n");
/* FALLTHROUGH */
case UTIL_FORMAT_SWIZZLE_0:
return nir_imm_float(b, 0.0);
case UTIL_FORMAT_SWIZZLE_1:
return nir_imm_float(b, 1.0);
case UTIL_FORMAT_SWIZZLE_X:
case UTIL_FORMAT_SWIZZLE_Y:
case UTIL_FORMAT_SWIZZLE_Z:
case UTIL_FORMAT_SWIZZLE_W:
return srcs[swiz];
}
}
static struct qreg *
ntq_init_ssa_def(struct vc4_compile *c, nir_ssa_def *def)
{
@ -258,22 +278,6 @@ qir_srgb_decode(struct vc4_compile *c, struct qreg srgb)
return qir_SEL_X_Y_NS(c, low, high);
}
static struct qreg
qir_srgb_encode(struct vc4_compile *c, struct qreg linear)
{
struct qreg low = qir_FMUL(c, linear, qir_uniform_f(c, 12.92));
struct qreg high = qir_FSUB(c,
qir_FMUL(c,
qir_uniform_f(c, 1.055),
qir_POW(c,
linear,
qir_uniform_f(c, 0.41666))),
qir_uniform_f(c, 0.055));
qir_SF(c, qir_FSUB(c, linear, qir_uniform_f(c, 0.0031308)));
return qir_SEL_X_Y_NS(c, low, high);
}
static struct qreg
ntq_umul(struct vc4_compile *c, struct qreg src0, struct qreg src1)
{
@ -834,6 +838,32 @@ ntq_emit_alu(struct vc4_compile *c, nir_alu_instr *instr)
return;
}
if (instr->op == nir_op_pack_unorm_4x8) {
struct qreg result;
for (int i = 0; i < 4; i++) {
struct qreg src = ntq_get_src(c, instr->src[0].src,
instr->src[0].swizzle[i]);
if (i == 0)
result = qir_PACK_8888_F(c, src);
else
result = qir_PACK_8_F(c, result, src, i);
}
struct qreg *dest = ntq_get_dest(c, &instr->dest.dest);
*dest = result;
return;
}
if (instr->op == nir_op_unpack_unorm_4x8) {
struct qreg src = ntq_get_src(c, instr->src[0].src,
instr->src[0].swizzle[0]);
struct qreg *dest = ntq_get_dest(c, &instr->dest.dest);
for (int i = 0; i < 4; i++) {
if (instr->dest.write_mask & (1 << i))
dest[i] = qir_UNPACK_8_F(c, src, i);
}
return;
}
/* General case: We can just grab the one used channel per src. */
struct qreg src[nir_op_infos[instr->op].num_inputs];
for (int i = 0; i < nir_op_infos[instr->op].num_inputs; i++) {
@ -1036,161 +1066,6 @@ ntq_emit_alu(struct vc4_compile *c, nir_alu_instr *instr)
}
}
static struct qreg
vc4_blend_channel(struct vc4_compile *c,
struct qreg *dst,
struct qreg *src,
struct qreg val,
unsigned factor,
int channel)
{
switch(factor) {
case PIPE_BLENDFACTOR_ONE:
return val;
case PIPE_BLENDFACTOR_SRC_COLOR:
return qir_FMUL(c, val, src[channel]);
case PIPE_BLENDFACTOR_SRC_ALPHA:
return qir_FMUL(c, val, src[3]);
case PIPE_BLENDFACTOR_DST_ALPHA:
return qir_FMUL(c, val, dst[3]);
case PIPE_BLENDFACTOR_DST_COLOR:
return qir_FMUL(c, val, dst[channel]);
case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
if (channel != 3) {
return qir_FMUL(c,
val,
qir_FMIN(c,
src[3],
qir_FSUB(c,
qir_uniform_f(c, 1.0),
dst[3])));
} else {
return val;
}
case PIPE_BLENDFACTOR_CONST_COLOR:
return qir_FMUL(c, val,
qir_uniform(c, QUNIFORM_BLEND_CONST_COLOR,
channel));
case PIPE_BLENDFACTOR_CONST_ALPHA:
return qir_FMUL(c, val,
qir_uniform(c, QUNIFORM_BLEND_CONST_COLOR, 3));
case PIPE_BLENDFACTOR_ZERO:
return qir_uniform_f(c, 0.0);
case PIPE_BLENDFACTOR_INV_SRC_COLOR:
return qir_FMUL(c, val, qir_FSUB(c, qir_uniform_f(c, 1.0),
src[channel]));
case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
return qir_FMUL(c, val, qir_FSUB(c, qir_uniform_f(c, 1.0),
src[3]));
case PIPE_BLENDFACTOR_INV_DST_ALPHA:
return qir_FMUL(c, val, qir_FSUB(c, qir_uniform_f(c, 1.0),
dst[3]));
case PIPE_BLENDFACTOR_INV_DST_COLOR:
return qir_FMUL(c, val, qir_FSUB(c, qir_uniform_f(c, 1.0),
dst[channel]));
case PIPE_BLENDFACTOR_INV_CONST_COLOR:
return qir_FMUL(c, val,
qir_FSUB(c, qir_uniform_f(c, 1.0),
qir_uniform(c,
QUNIFORM_BLEND_CONST_COLOR,
channel)));
case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
return qir_FMUL(c, val,
qir_FSUB(c, qir_uniform_f(c, 1.0),
qir_uniform(c,
QUNIFORM_BLEND_CONST_COLOR,
3)));
default:
case PIPE_BLENDFACTOR_SRC1_COLOR:
case PIPE_BLENDFACTOR_SRC1_ALPHA:
case PIPE_BLENDFACTOR_INV_SRC1_COLOR:
case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:
/* Unsupported. */
fprintf(stderr, "Unknown blend factor %d\n", factor);
return val;
}
}
static struct qreg
vc4_blend_func(struct vc4_compile *c,
struct qreg src, struct qreg dst,
unsigned func)
{
switch (func) {
case PIPE_BLEND_ADD:
return qir_FADD(c, src, dst);
case PIPE_BLEND_SUBTRACT:
return qir_FSUB(c, src, dst);
case PIPE_BLEND_REVERSE_SUBTRACT:
return qir_FSUB(c, dst, src);
case PIPE_BLEND_MIN:
return qir_FMIN(c, src, dst);
case PIPE_BLEND_MAX:
return qir_FMAX(c, src, dst);
default:
/* Unsupported. */
fprintf(stderr, "Unknown blend func %d\n", func);
return src;
}
}
/**
* Implements fixed function blending in shader code.
*
* VC4 doesn't have any hardware support for blending. Instead, you read the
* current contents of the destination from the tile buffer after having
* waited for the scoreboard (which is handled by vc4_qpu_emit.c), then do
* math using your output color and that destination value, and update the
* output color appropriately.
*/
static void
vc4_blend(struct vc4_compile *c, struct qreg *result,
struct qreg *dst_color, struct qreg *src_color)
{
struct pipe_rt_blend_state *blend = &c->fs_key->blend;
if (!blend->blend_enable) {
for (int i = 0; i < 4; i++)
result[i] = src_color[i];
return;
}
for (int i = 0; i < 4; i++)
src_color[i] = qir_SAT(c, src_color[i]);
struct qreg src_blend[4], dst_blend[4];
for (int i = 0; i < 3; i++) {
src_blend[i] = vc4_blend_channel(c,
dst_color, src_color,
src_color[i],
blend->rgb_src_factor, i);
dst_blend[i] = vc4_blend_channel(c,
dst_color, src_color,
dst_color[i],
blend->rgb_dst_factor, i);
}
src_blend[3] = vc4_blend_channel(c,
dst_color, src_color,
src_color[3],
blend->alpha_src_factor, 3);
dst_blend[3] = vc4_blend_channel(c,
dst_color, src_color,
dst_color[3],
blend->alpha_dst_factor, 3);
for (int i = 0; i < 3; i++) {
result[i] = vc4_blend_func(c,
src_blend[i], dst_blend[i],
blend->rgb_func);
}
result[3] = vc4_blend_func(c,
src_blend[3], dst_blend[3],
blend->alpha_func);
}
static void
clip_distance_discard(struct vc4_compile *c)
{
@ -1213,217 +1088,17 @@ clip_distance_discard(struct vc4_compile *c)
}
}
static void
alpha_test_discard(struct vc4_compile *c)
{
struct qreg src_alpha;
struct qreg alpha_ref = qir_uniform(c, QUNIFORM_ALPHA_REF, 0);
if (!c->fs_key->alpha_test)
return;
if (c->output_color_index != -1)
src_alpha = c->outputs[c->output_color_index + 3];
else
src_alpha = qir_uniform_f(c, 1.0);
if (c->discard.file == QFILE_NULL)
c->discard = qir_uniform_ui(c, 0);
switch (c->fs_key->alpha_test_func) {
case PIPE_FUNC_NEVER:
c->discard = qir_uniform_ui(c, ~0);
break;
case PIPE_FUNC_ALWAYS:
break;
case PIPE_FUNC_EQUAL:
qir_SF(c, qir_FSUB(c, src_alpha, alpha_ref));
c->discard = qir_SEL_X_Y_ZS(c, c->discard,
qir_uniform_ui(c, ~0));
break;
case PIPE_FUNC_NOTEQUAL:
qir_SF(c, qir_FSUB(c, src_alpha, alpha_ref));
c->discard = qir_SEL_X_Y_ZC(c, c->discard,
qir_uniform_ui(c, ~0));
break;
case PIPE_FUNC_GREATER:
qir_SF(c, qir_FSUB(c, src_alpha, alpha_ref));
c->discard = qir_SEL_X_Y_NC(c, c->discard,
qir_uniform_ui(c, ~0));
break;
case PIPE_FUNC_GEQUAL:
qir_SF(c, qir_FSUB(c, alpha_ref, src_alpha));
c->discard = qir_SEL_X_Y_NS(c, c->discard,
qir_uniform_ui(c, ~0));
break;
case PIPE_FUNC_LESS:
qir_SF(c, qir_FSUB(c, src_alpha, alpha_ref));
c->discard = qir_SEL_X_Y_NS(c, c->discard,
qir_uniform_ui(c, ~0));
break;
case PIPE_FUNC_LEQUAL:
qir_SF(c, qir_FSUB(c, alpha_ref, src_alpha));
c->discard = qir_SEL_X_Y_NC(c, c->discard,
qir_uniform_ui(c, ~0));
break;
}
}
static struct qreg
vc4_logicop(struct vc4_compile *c, struct qreg src, struct qreg dst)
{
switch (c->fs_key->logicop_func) {
case PIPE_LOGICOP_CLEAR:
return qir_uniform_f(c, 0.0);
case PIPE_LOGICOP_NOR:
return qir_NOT(c, qir_OR(c, src, dst));
case PIPE_LOGICOP_AND_INVERTED:
return qir_AND(c, qir_NOT(c, src), dst);
case PIPE_LOGICOP_COPY_INVERTED:
return qir_NOT(c, src);
case PIPE_LOGICOP_AND_REVERSE:
return qir_AND(c, src, qir_NOT(c, dst));
case PIPE_LOGICOP_INVERT:
return qir_NOT(c, dst);
case PIPE_LOGICOP_XOR:
return qir_XOR(c, src, dst);
case PIPE_LOGICOP_NAND:
return qir_NOT(c, qir_AND(c, src, dst));
case PIPE_LOGICOP_AND:
return qir_AND(c, src, dst);
case PIPE_LOGICOP_EQUIV:
return qir_NOT(c, qir_XOR(c, src, dst));
case PIPE_LOGICOP_NOOP:
return dst;
case PIPE_LOGICOP_OR_INVERTED:
return qir_OR(c, qir_NOT(c, src), dst);
case PIPE_LOGICOP_OR_REVERSE:
return qir_OR(c, src, qir_NOT(c, dst));
case PIPE_LOGICOP_OR:
return qir_OR(c, src, dst);
case PIPE_LOGICOP_SET:
return qir_uniform_ui(c, ~0);
case PIPE_LOGICOP_COPY:
default:
return src;
}
}
/**
* Applies the GL blending pipeline and returns the packed (8888) output
* color.
*/
static struct qreg
blend_pipeline(struct vc4_compile *c)
{
enum pipe_format color_format = c->fs_key->color_format;
const uint8_t *format_swiz = vc4_get_format_swizzle(color_format);
struct qreg tlb_read_color[4] = { c->undef, c->undef, c->undef, c->undef };
struct qreg dst_color[4] = { c->undef, c->undef, c->undef, c->undef };
struct qreg linear_dst_color[4] = { c->undef, c->undef, c->undef, c->undef };
struct qreg packed_dst_color = c->undef;
if (c->fs_key->blend.blend_enable ||
c->fs_key->blend.colormask != 0xf ||
c->fs_key->logicop_func != PIPE_LOGICOP_COPY) {
packed_dst_color = qir_TLB_COLOR_READ(c);
for (int i = 0; i < 4; i++)
tlb_read_color[i] = qir_UNPACK_8_F(c,
packed_dst_color, i);
for (int i = 0; i < 4; i++) {
dst_color[i] = get_swizzled_channel(c,
tlb_read_color,
format_swiz[i]);
if (util_format_is_srgb(color_format) && i != 3) {
linear_dst_color[i] =
qir_srgb_decode(c, dst_color[i]);
} else {
linear_dst_color[i] = dst_color[i];
}
}
}
struct qreg undef_array[4] = { c->undef, c->undef, c->undef, c->undef };
const struct qreg *output_colors = (c->output_color_index != -1 ?
c->outputs + c->output_color_index :
undef_array);
struct qreg blend_src_color[4];
for (int i = 0; i < 4; i++)
blend_src_color[i] = output_colors[i];
struct qreg blend_color[4];
vc4_blend(c, blend_color, linear_dst_color, blend_src_color);
if (util_format_is_srgb(color_format)) {
for (int i = 0; i < 3; i++)
blend_color[i] = qir_srgb_encode(c, blend_color[i]);
}
/* Debug: Sometimes you're getting a black output and just want to see
* if the FS is getting executed at all. Spam magenta into the color
* output.
*/
if (0) {
blend_color[0] = qir_uniform_f(c, 1.0);
blend_color[1] = qir_uniform_f(c, 0.0);
blend_color[2] = qir_uniform_f(c, 1.0);
blend_color[3] = qir_uniform_f(c, 0.5);
}
struct qreg swizzled_outputs[4];
for (int i = 0; i < 4; i++) {
swizzled_outputs[i] = get_swizzled_channel(c, blend_color,
format_swiz[i]);
}
struct qreg packed_color = c->undef;
for (int i = 0; i < 4; i++) {
if (swizzled_outputs[i].file == QFILE_NULL)
continue;
if (packed_color.file == QFILE_NULL) {
packed_color = qir_PACK_8888_F(c, swizzled_outputs[i]);
} else {
packed_color = qir_PACK_8_F(c,
packed_color,
swizzled_outputs[i],
i);
}
}
if (packed_color.file == QFILE_NULL)
packed_color = qir_uniform_ui(c, 0);
if (c->fs_key->logicop_func != PIPE_LOGICOP_COPY) {
packed_color = vc4_logicop(c, packed_color, packed_dst_color);
}
/* If the bit isn't set in the color mask, then just return the
* original dst color, instead.
*/
uint32_t colormask = 0xffffffff;
for (int i = 0; i < 4; i++) {
if (format_swiz[i] < 4 &&
!(c->fs_key->blend.colormask & (1 << format_swiz[i]))) {
colormask &= ~(0xff << (i * 8));
}
}
if (colormask != 0xffffffff) {
packed_color = qir_OR(c,
qir_AND(c, packed_color,
qir_uniform_ui(c, colormask)),
qir_AND(c, packed_dst_color,
qir_uniform_ui(c, ~colormask)));
}
return packed_color;
}
static void
emit_frag_end(struct vc4_compile *c)
{
clip_distance_discard(c);
alpha_test_discard(c);
struct qreg color = blend_pipeline(c);
struct qreg color;
if (c->output_color_index != -1) {
color = c->outputs[c->output_color_index];
} else {
color = qir_uniform_ui(c, 0);
}
if (c->discard.file != QFILE_NULL)
qir_TLB_DISCARD_SETUP(c, c->discard);
@ -1839,8 +1514,11 @@ ntq_emit_intrinsic(struct vc4_compile *c, nir_intrinsic_instr *instr)
case nir_intrinsic_load_input:
assert(instr->num_components == 1);
*dest = c->inputs[instr->const_index[0]];
if (instr->const_index[0] == VC4_NIR_TLB_COLOR_READ_INPUT) {
*dest = qir_TLB_COLOR_READ(c);
} else {
*dest = c->inputs[instr->const_index[0]];
}
break;
case nir_intrinsic_store_output:
@ -2052,6 +1730,8 @@ vc4_shader_ntq(struct vc4_context *vc4, enum qstage stage,
c->s = tgsi_to_nir(tokens, &nir_options);
nir_opt_global_to_local(c->s);
nir_convert_to_ssa(c->s);
if (stage == QSTAGE_FRAG)
vc4_nir_lower_blend(c);
vc4_nir_lower_io(c);
nir_lower_idiv(c->s);
nir_lower_load_const_to_scalar(c->s);

View file

@ -39,6 +39,8 @@
#include "vc4_screen.h"
#include "pipe/p_state.h"
struct nir_builder;
enum qfile {
QFILE_NULL,
QFILE_TEMP,
@ -242,7 +244,11 @@ enum quniform_contents {
QUNIFORM_TEXTURE_BORDER_COLOR,
QUNIFORM_BLEND_CONST_COLOR,
QUNIFORM_BLEND_CONST_COLOR_X,
QUNIFORM_BLEND_CONST_COLOR_Y,
QUNIFORM_BLEND_CONST_COLOR_Z,
QUNIFORM_BLEND_CONST_COLOR_W,
QUNIFORM_STENCIL,
QUNIFORM_ALPHA_REF,
@ -414,6 +420,11 @@ struct vc4_compile {
uint32_t variant_id;
};
/* Special nir_load_input intrinsic index for loading the current TLB
* destination color.
*/
#define VC4_NIR_TLB_COLOR_READ_INPUT 2000000000
/* Special offset for nir_load_uniform values to get a QUNIFORM_*
* state-dependent value.
*/
@ -458,9 +469,12 @@ bool qir_opt_cse(struct vc4_compile *c);
bool qir_opt_dead_code(struct vc4_compile *c);
bool qir_opt_small_immediates(struct vc4_compile *c);
bool qir_opt_vpm_writes(struct vc4_compile *c);
void vc4_nir_lower_blend(struct vc4_compile *c);
void vc4_nir_lower_io(struct vc4_compile *c);
nir_ssa_def *vc4_nir_get_state_uniform(struct nir_builder *b,
enum quniform_contents contents);
nir_ssa_def *vc4_nir_get_swizzled_channel(struct nir_builder *b,
nir_ssa_def **srcs, int swiz);
void qir_lower_uniforms(struct vc4_compile *c);
void qpu_schedule_instructions(struct vc4_compile *c);

View file

@ -257,9 +257,14 @@ vc4_write_uniforms(struct vc4_context *vc4, struct vc4_compiled_shader *shader,
uinfo->data[i]));
break;
case QUNIFORM_BLEND_CONST_COLOR:
case QUNIFORM_BLEND_CONST_COLOR_X:
case QUNIFORM_BLEND_CONST_COLOR_Y:
case QUNIFORM_BLEND_CONST_COLOR_Z:
case QUNIFORM_BLEND_CONST_COLOR_W:
cl_aligned_f(&uniforms,
CLAMP(vc4->blend_color.color[uinfo->data[i]], 0, 1));
CLAMP(vc4->blend_color.color[uinfo->contents[i] -
QUNIFORM_BLEND_CONST_COLOR_X],
0, 1));
break;
case QUNIFORM_STENCIL:
@ -321,7 +326,10 @@ vc4_set_shader_uniform_dirty_flags(struct vc4_compiled_shader *shader)
dirty |= VC4_DIRTY_TEXSTATE;
break;
case QUNIFORM_BLEND_CONST_COLOR:
case QUNIFORM_BLEND_CONST_COLOR_X:
case QUNIFORM_BLEND_CONST_COLOR_Y:
case QUNIFORM_BLEND_CONST_COLOR_Z:
case QUNIFORM_BLEND_CONST_COLOR_W:
dirty |= VC4_DIRTY_BLEND_COLOR;
break;