gallivm/llvmpipe: add support for NIR to the linear/aos paths.

When the AOS/linear code was added it only worked with TGSI which
meant nothing in mesa upstream was really using it.

This adds support to analyse NIR shaders, and adds aos support
to the backend.

AOS support is limited to mov,vec,fmul,tex sampling in order to
accelerate mostly compositing operations. I've tested weston uses
the fast path. gnome-shell can't use it yet as we can't optimise
the depth test paths.

Acked-by: Jose Fonseca <jfonseca@vmware.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/15140>
This commit is contained in:
Dave Airlie 2022-02-25 10:00:25 +10:00 committed by Marge Bot
parent 6efd489ac9
commit 34379a937f
9 changed files with 609 additions and 20 deletions

View file

@ -39,6 +39,11 @@
#include "nir_deref.h"
#include "nir_search_helpers.h"
static bool is_aos(const struct lp_build_nir_context *bld_base)
{
return bld_base->base.type.length == 16 && bld_base->base.type.width == 8;
}
static void visit_cf_list(struct lp_build_nir_context *bld_base,
struct exec_list *list);
@ -169,7 +174,7 @@ static void assign_ssa(struct lp_build_nir_context *bld_base, int idx, LLVMValue
static void assign_ssa_dest(struct lp_build_nir_context *bld_base, const nir_ssa_def *ssa,
LLVMValueRef vals[NIR_MAX_VEC_COMPONENTS])
{
assign_ssa(bld_base, ssa->index, ssa->num_components == 1 ? vals[0] : lp_nir_array_build_gather_values(bld_base->base.gallivm->builder, vals, ssa->num_components));
assign_ssa(bld_base, ssa->index, (ssa->num_components == 1 || is_aos(bld_base)) ? vals[0] : lp_nir_array_build_gather_values(bld_base->base.gallivm->builder, vals, ssa->num_components));
}
static void assign_reg(struct lp_build_nir_context *bld_base, const nir_reg_dest *reg,
@ -276,6 +281,10 @@ static LLVMValueRef get_alu_src(struct lp_build_nir_context *bld_base,
bool need_swizzle = false;
assert(value);
if (is_aos(bld_base))
return value;
unsigned src_components = nir_src_num_components(src.src);
for (unsigned i = 0; i < num_components; ++i) {
assert(src.swizzle[i] < src_components);
@ -1110,6 +1119,15 @@ static void visit_alu(struct lp_build_nir_context *bld_base, const nir_alu_instr
src_bit_size[i] = nir_src_bit_size(instr->src[i].src);
}
if (instr->op == nir_op_mov && is_aos(bld_base) && !instr->dest.dest.is_ssa) {
for (unsigned i = 0; i < 4; i++) {
if (instr->dest.write_mask & (1 << i)) {
assign_reg(bld_base, &instr->dest.dest.reg, (1 << i), src);
}
}
return;
}
LLVMValueRef result[NIR_MAX_VEC_COMPONENTS];
if (instr->op == nir_op_vec4 || instr->op == nir_op_vec3 || instr->op == nir_op_vec2 || instr->op == nir_op_vec8 || instr->op == nir_op_vec16) {
for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++) {
@ -1122,7 +1140,15 @@ static void visit_alu(struct lp_build_nir_context *bld_base, const nir_alu_instr
temp_chan = cast_type(bld_base, temp_chan, nir_op_infos[instr->op].input_types[0], src_bit_size[0]);
result[0] = (c == 0) ? temp_chan : lp_build_add(get_flt_bld(bld_base, src_bit_size[0]), result[0], temp_chan);
}
} else {
} else if (is_aos(bld_base)) {
if (instr->op == nir_op_fmul) {
if (LLVMIsConstant(src[0]))
src[0] = lp_nir_aos_conv_const(gallivm, src[0], 1);
if (LLVMIsConstant(src[1]))
src[1] = lp_nir_aos_conv_const(gallivm, src[1], 1);
}
result[0] = do_alu_action(bld_base, instr, src_bit_size, src);
} else {
for (unsigned c = 0; c < num_components; c++) {
LLVMValueRef src_chan[NIR_MAX_VEC_COMPONENTS];
@ -2019,7 +2045,7 @@ static void visit_tex(struct lp_build_nir_context *bld_base, nir_tex_instr *inst
LLVMValueRef texel[NIR_MAX_VEC_COMPONENTS];
unsigned lod_src = 0;
LLVMValueRef coord_undef = LLVMGetUndef(bld_base->base.int_vec_type);
unsigned coord_vals = is_aos(bld_base) ? 1 : instr->coord_components;
memset(&params, 0, sizeof(params));
enum lp_sampler_lod_property lod_property = LP_SAMPLER_LOD_SCALAR;
@ -2038,14 +2064,14 @@ static void visit_tex(struct lp_build_nir_context *bld_base, nir_tex_instr *inst
switch (instr->src[i].src_type) {
case nir_tex_src_coord: {
LLVMValueRef coord = get_src(bld_base, instr->src[i].src);
if (instr->coord_components == 1)
if (coord_vals == 1)
coords[0] = coord;
else {
for (unsigned chan = 0; chan < instr->coord_components; ++chan)
coords[chan] = LLVMBuildExtractValue(builder, coord,
chan, "");
}
for (unsigned chan = instr->coord_components; chan < 5; chan++)
for (unsigned chan = coord_vals; chan < 5; chan++)
coords[chan] = coord_undef;
break;
@ -2144,7 +2170,7 @@ static void visit_tex(struct lp_build_nir_context *bld_base, nir_tex_instr *inst
if (instr->op == nir_texop_tex || instr->op == nir_texop_tg4 || instr->op == nir_texop_txb ||
instr->op == nir_texop_txl || instr->op == nir_texop_txd || instr->op == nir_texop_lod)
for (unsigned chan = 0; chan < instr->coord_components; ++chan)
for (unsigned chan = 0; chan < coord_vals; ++chan)
coords[chan] = cast_type(bld_base, coords[chan], nir_type_float, 32);
else if (instr->op == nir_texop_txf || instr->op == nir_texop_txf_ms)
for (unsigned chan = 0; chan < instr->coord_components; ++chan)
@ -2375,6 +2401,9 @@ handle_shader_output_decl(struct lp_build_nir_context *bld_base,
static LLVMTypeRef get_register_type(struct lp_build_nir_context *bld_base,
nir_register *reg)
{
if (is_aos(bld_base))
return bld_base->base.int_vec_type;
struct lp_build_context *int_bld = get_int_bld(bld_base, true, reg->bit_size == 1 ? 32 : reg->bit_size);
LLVMTypeRef type = int_bld->vec_type;
@ -2398,6 +2427,11 @@ bool lp_build_nir_llvm(
nir_remove_dead_derefs(nir);
nir_remove_dead_variables(nir, nir_var_function_temp, NULL);
if (is_aos(bld_base)) {
nir_move_vec_src_uses_to_dest(nir);
nir_lower_vec_to_movs(nir, NULL, NULL);
}
nir_foreach_shader_out_variable(variable, nir)
handle_shader_output_decl(bld_base, nir, variable);

View file

@ -40,6 +40,16 @@ void lp_build_nir_soa(struct gallivm_state *gallivm,
const struct lp_build_tgsi_params *params,
LLVMValueRef (*outputs)[4]);
void lp_build_nir_aos(struct gallivm_state *gallivm,
struct nir_shader *shader,
struct lp_type type,
const unsigned char swizzles[4],
LLVMValueRef consts_ptr,
const LLVMValueRef *inputs,
LLVMValueRef *outputs,
const struct lp_build_sampler_aos *sampler,
const struct tgsi_shader_info *info);
struct lp_build_nir_context
{
struct lp_build_context base;
@ -266,6 +276,30 @@ struct lp_build_nir_soa_context
unsigned gs_vertex_streams;
};
struct lp_build_nir_aos_context
{
struct lp_build_nir_context bld_base;
/* Builder for integer masks and indices */
struct lp_build_context int_bld;
/*
* AoS swizzle used:
* - swizzles[0] = red index
* - swizzles[1] = green index
* - swizzles[2] = blue index
* - swizzles[3] = alpha index
*/
unsigned char swizzles[4];
unsigned char inv_swizzles[4];
LLVMValueRef consts_ptr;
const LLVMValueRef *inputs;
LLVMValueRef *outputs;
const struct lp_build_sampler_aos *sampler;
};
bool
lp_build_nir_llvm(struct lp_build_nir_context *bld_base,
struct nir_shader *nir);
@ -332,4 +366,11 @@ static inline struct lp_build_context *get_int_bld(struct lp_build_nir_context *
}
}
static inline struct lp_build_nir_aos_context *
lp_nir_aos_context(struct lp_build_nir_context *bld_base)
{
return (struct lp_build_nir_aos_context *)bld_base;
}
LLVMValueRef lp_nir_aos_conv_const(struct gallivm_state *gallivm, LLVMValueRef constval, int nc);
#endif

View file

@ -0,0 +1,339 @@
/**************************************************************************
*
* Copyright 2022 Red Hat
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
**************************************************************************/
#include "lp_bld_nir.h"
#include "lp_bld_init.h"
#include "lp_bld_const.h"
#include "lp_bld_flow.h"
#include "lp_bld_struct.h"
#include "lp_bld_swizzle.h"
#include "lp_bld_debug.h"
#include "util/u_math.h"
static LLVMValueRef
swizzle_aos(struct lp_build_nir_context *bld_base,
LLVMValueRef a,
unsigned swizzle_x,
unsigned swizzle_y,
unsigned swizzle_z,
unsigned swizzle_w)
{
unsigned char swizzles[4];
struct lp_build_nir_aos_context *bld = lp_nir_aos_context(bld_base);
assert(swizzle_x < 4);
assert(swizzle_y < 4);
assert(swizzle_z < 4);
assert(swizzle_w < 4);
swizzles[bld->inv_swizzles[0]] = bld->swizzles[swizzle_x];
swizzles[bld->inv_swizzles[1]] = bld->swizzles[swizzle_y];
swizzles[bld->inv_swizzles[2]] = bld->swizzles[swizzle_z];
swizzles[bld->inv_swizzles[3]] = bld->swizzles[swizzle_w];
return lp_build_swizzle_aos(&bld->bld_base.base, a, swizzles);
}
LLVMValueRef lp_nir_aos_conv_const(struct gallivm_state *gallivm, LLVMValueRef constval, int nc)
{
LLVMValueRef elems[16];
uint8_t val = 0;
/* convert from 1..4 x f32 to 16 x unorm8 */
for (unsigned i = 0; i < nc; i++) {
LLVMValueRef value = LLVMBuildExtractElement(gallivm->builder, constval, lp_build_const_int32(gallivm, i), "");
assert(LLVMIsConstant(value));
unsigned uval = LLVMConstIntGetZExtValue(value);
float f = uif(uval);
val = float_to_ubyte(f);
for (unsigned j = 0; j < 4; j++) {
elems[j * 4 + i] = LLVMConstInt(LLVMInt8TypeInContext(gallivm->context), val, 0);
}
}
for (unsigned i = nc; i < 4; i++) {
for (unsigned j = 0; j < 4; j++) {
elems[j * 4 + i] = LLVMConstInt(LLVMInt8TypeInContext(gallivm->context), val, 0);
}
}
return LLVMConstVector(elems, 16);
}
static void init_var_slots(struct lp_build_nir_context *bld_base,
nir_variable *var)
{
struct lp_build_nir_aos_context *bld = (struct lp_build_nir_aos_context *)bld_base;
if (!bld->outputs)
return;
unsigned this_loc = var->data.driver_location;
bld->outputs[this_loc] = lp_build_alloca(bld_base->base.gallivm,
bld_base->base.vec_type, "output");
}
static void emit_var_decl(struct lp_build_nir_context *bld_base,
nir_variable *var)
{
switch (var->data.mode) {
case nir_var_shader_out: {
init_var_slots(bld_base, var);
break;
}
default:
break;
}
}
static void emit_load_var(struct lp_build_nir_context *bld_base,
nir_variable_mode deref_mode,
unsigned num_components,
unsigned bit_size,
nir_variable *var,
unsigned vertex_index,
LLVMValueRef indir_vertex_index,
unsigned const_index,
LLVMValueRef indir_index,
LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])
{
struct lp_build_nir_aos_context *bld = (struct lp_build_nir_aos_context *)bld_base;
unsigned location = var->data.driver_location;
switch (deref_mode) {
case nir_var_shader_in:
result[0] = bld->inputs[location];
break;
default:
break;
}
}
static void emit_store_var(struct lp_build_nir_context *bld_base,
nir_variable_mode deref_mode,
unsigned num_components,
unsigned bit_size,
nir_variable *var,
unsigned writemask,
LLVMValueRef indir_vertex_index,
unsigned const_index,
LLVMValueRef indir_index,
LLVMValueRef dst)
{
struct lp_build_nir_aos_context *bld = (struct lp_build_nir_aos_context *)bld_base;
struct gallivm_state *gallivm = bld_base->base.gallivm;
unsigned location = var->data.driver_location;
if (LLVMIsConstant(dst)) {
dst = lp_nir_aos_conv_const(gallivm, dst, num_components);
}
switch (deref_mode) {
case nir_var_shader_out:
LLVMBuildStore(gallivm->builder, dst, bld->outputs[location]);
break;
default:
break;
}
}
static LLVMValueRef emit_load_reg(struct lp_build_nir_context *bld_base,
struct lp_build_context *reg_bld,
const nir_reg_src *reg,
LLVMValueRef indir_src,
LLVMValueRef reg_storage)
{
struct gallivm_state *gallivm = bld_base->base.gallivm;
return LLVMBuildLoad(gallivm->builder, reg_storage, "");
}
static void emit_store_reg(struct lp_build_nir_context *bld_base,
struct lp_build_context *reg_bld,
const nir_reg_dest *reg,
unsigned writemask,
LLVMValueRef indir_src,
LLVMValueRef reg_storage,
LLVMValueRef dst[NIR_MAX_VEC_COMPONENTS])
{
struct gallivm_state *gallivm = bld_base->base.gallivm;
if (LLVMIsConstant(dst[0]))
dst[0] = lp_nir_aos_conv_const(gallivm, dst[0], 1);
if (writemask == 0xf) {
LLVMBuildStore(gallivm->builder, dst[0], reg_storage);
return;
}
LLVMValueRef cur = LLVMBuildLoad(gallivm->builder, reg_storage, "");
LLVMTypeRef i32t = LLVMInt32TypeInContext(gallivm->context);
for (unsigned i = 0; i < 4; i++) {
if (writemask & (1 << i)) {
LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH] = { 0 };
for (unsigned j = 0; j < 16; j++){
if (j % 4 == i)
shuffles[j] = LLVMConstInt(i32t, 16 + j, 0);
else
shuffles[j] = LLVMConstInt(i32t, j, 0);
}
cur = LLVMBuildShuffleVector(gallivm->builder, cur, dst[0],
LLVMConstVector(shuffles, 16), "");
}
}
LLVMBuildStore(gallivm->builder, cur, reg_storage);
}
static void emit_load_ubo(struct lp_build_nir_context *bld_base,
unsigned nc,
unsigned bit_size,
bool offset_is_uniform,
LLVMValueRef index,
LLVMValueRef offset,
LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])
{
struct lp_build_nir_aos_context *bld = (struct lp_build_nir_aos_context *)bld_base;
LLVMBuilderRef builder = bld_base->base.gallivm->builder;
struct gallivm_state *gallivm = bld_base->base.gallivm;
struct lp_type type = bld_base->base.type;
LLVMValueRef res;
res = bld->bld_base.base.undef;
offset = LLVMBuildExtractElement(builder, offset, lp_build_const_int32(gallivm, 0), "");
assert(LLVMIsConstant(offset));
unsigned offset_val = LLVMConstIntGetZExtValue(offset) >> 2;
for (unsigned chan = 0; chan < nc; ++chan) {
LLVMValueRef this_offset = lp_build_const_int32(gallivm, offset_val + chan);
LLVMValueRef scalar_ptr = LLVMBuildGEP(builder, bld->consts_ptr, &this_offset, 1, "");
LLVMValueRef scalar = LLVMBuildLoad(builder, scalar_ptr, "");
lp_build_name(scalar, "const[%u].%c", offset_val, "xyzw"[chan]);
LLVMValueRef swizzle = lp_build_const_int32(bld->bld_base.base.gallivm,
nc == 1 ? 0 : bld->swizzles[chan]);
res = LLVMBuildInsertElement(builder, res, scalar, swizzle, "");
}
if (type.length > 4) {
LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH];
for (unsigned chan = 0; chan < nc; ++chan) {
shuffles[chan] = lp_build_const_int32(bld->bld_base.base.gallivm, chan);
}
for (unsigned i = nc; i < type.length; ++i) {
shuffles[i] = shuffles[i % nc];
}
res = LLVMBuildShuffleVector(builder,
res, bld->bld_base.base.undef,
LLVMConstVector(shuffles, type.length),
"");
}
if (nc == 4)
swizzle_aos(bld_base, res, 0, 1, 2, 3);
result[0] = res;
}
static void emit_tex(struct lp_build_nir_context *bld_base,
struct lp_sampler_params *params)
{
struct lp_build_nir_aos_context *bld = (struct lp_build_nir_aos_context *)bld_base;
struct lp_derivatives derivs = { 0 };
params->type = bld_base->base.type;
params->texel[0] = bld->sampler->emit_fetch_texel(bld->sampler,
&bld->bld_base.base,
PIPE_TEXTURE_2D,
params->texture_index,
params->coords[0],
params->derivs ? params->derivs[0] : derivs,
0);
}
static void
emit_load_const(struct lp_build_nir_context *bld_base,
const nir_load_const_instr *instr,
LLVMValueRef outval[NIR_MAX_VEC_COMPONENTS])
{
struct lp_build_nir_aos_context *bld = lp_nir_aos_context(bld_base);
struct gallivm_state *gallivm = bld_base->base.gallivm;
LLVMValueRef elems[4];
int nc = instr->def.num_components;
bool do_swizzle = false;
if (nc == 4)
do_swizzle = true;
for (unsigned i = 0; i < nc; i++) {
int idx = do_swizzle ? bld->swizzles[i] : i;
elems[idx] = LLVMConstInt(LLVMInt32TypeInContext(gallivm->context), instr->value[i].u32, bld_base->base.type.sign ? 1 : 0);
}
outval[0] = LLVMConstVector(elems, nc);
}
void
lp_build_nir_aos(struct gallivm_state *gallivm,
struct nir_shader *shader,
struct lp_type type,
const unsigned char swizzles[4],
LLVMValueRef consts_ptr,
const LLVMValueRef *inputs,
LLVMValueRef *outputs,
const struct lp_build_sampler_aos *sampler,
const struct tgsi_shader_info *info)
{
struct lp_build_nir_aos_context bld;
memset(&bld, 0, sizeof bld);
lp_build_context_init(&bld.bld_base.base, gallivm, type);
lp_build_context_init(&bld.bld_base.uint_bld, gallivm, lp_uint_type(type));
lp_build_context_init(&bld.bld_base.int_bld, gallivm, lp_int_type(type));
for (unsigned chan = 0; chan < 4; ++chan) {
bld.swizzles[chan] = swizzles[chan];
bld.inv_swizzles[swizzles[chan]] = chan;
}
bld.sampler = sampler;
bld.bld_base.shader = shader;
bld.inputs = inputs;
bld.outputs = outputs;
bld.consts_ptr = consts_ptr;
bld.bld_base.load_var = emit_load_var;
bld.bld_base.store_var = emit_store_var;
bld.bld_base.load_reg = emit_load_reg;
bld.bld_base.store_reg = emit_store_reg;
bld.bld_base.load_ubo = emit_load_ubo;
bld.bld_base.load_const = emit_load_const;
bld.bld_base.tex = emit_tex;
bld.bld_base.emit_var_decl = emit_var_decl;
lp_build_nir_llvm(&bld.bld_base, shader);
}

View file

@ -383,6 +383,7 @@ if draw_with_llvm
'gallivm/lp_bld_misc.h',
'gallivm/lp_bld_nir.h',
'gallivm/lp_bld_nir.c',
'gallivm/lp_bld_nir_aos.c',
'gallivm/lp_bld_nir_soa.c',
'gallivm/lp_bld_pack.c',
'gallivm/lp_bld_pack.h',

View file

@ -87,7 +87,6 @@ lp_fs_linear_run(const struct lp_rast_state *state,
struct lp_linear_sampler samp[LP_MAX_LINEAR_TEXTURES];
struct lp_linear_interp interp[LP_MAX_LINEAR_INPUTS];
uint8_t constants[LP_MAX_LINEAR_CONSTANTS][4];
const float w0 = a0[0][3];
float oow = 1.0f/w0;
@ -110,18 +109,35 @@ lp_fs_linear_run(const struct lp_rast_state *state,
/* XXX: Per statechange:
*/
for (i = 0; i < nr_consts; i++) {
for (j = 0; j < 4; j++) {
float val = state->jit_context.constants[0][i*4+j];
if (variant->shader->base.type == PIPE_SHADER_IR_TGSI) {
uint8_t constants[LP_MAX_LINEAR_CONSTANTS][4];
for (i = 0; i < nr_consts; i++) {
for (j = 0; j < 4; j++) {
float val = state->jit_context.constants[0][i*4+j];
if (val < 0.0f || val > 1.0f) {
if (LP_DEBUG & DEBUG_LINEAR2)
debug_printf(" -- const[%d] out of range %f\n", i, val);
goto fail;
}
constants[i][j] = (uint8_t)(val * 255.0f);
}
}
jit.constants = (const uint8_t (*)[4])constants;
} else {
uint8_t nir_constants[LP_MAX_LINEAR_CONSTANTS * 4];
for (i = 0; i < state->jit_context.num_constants[0]; i++){
float val = state->jit_context.constants[0][i];
if (val < 0.0f || val > 1.0f) {
if (LP_DEBUG & DEBUG_LINEAR2)
debug_printf(" -- const[%d] out of range\n", i);
debug_printf(" -- const[%d] out of range %f\n", i, val);
goto fail;
}
constants[i][j] = (uint8_t)(val * 255.0f);
nir_constants[i] = (uint8_t)(val * 255.0f);
}
jit.constants = (const uint8_t (*)[4])nir_constants;
}
jit.constants = (const uint8_t (*)[4])constants;
/* We assume BGRA ordering */
assert(variant->key.cbuf_format[0] == PIPE_FORMAT_B8G8R8X8_UNORM ||

View file

@ -3878,7 +3878,7 @@ llvmpipe_create_fs_state(struct pipe_context *pipe,
if (templ->type == PIPE_SHADER_IR_TGSI)
llvmpipe_fs_analyse(shader, templ->tokens);
else
shader->kind = LP_FS_KIND_GENERAL;
llvmpipe_fs_analyse_nir(shader);
return shader;
}

View file

@ -233,6 +233,8 @@ struct lp_fragment_shader
};
void
llvmpipe_fs_analyse_nir(struct lp_fragment_shader *shader);
void
llvmpipe_fs_analyse(struct lp_fragment_shader *shader,
const struct tgsi_token *tokens);

View file

@ -34,7 +34,7 @@
#include "tgsi/tgsi_dump.h"
#include "lp_debug.h"
#include "lp_state.h"
#include "nir.h"
/*
* Detect Aero minification shaders.
@ -173,6 +173,150 @@ finished:
return TRUE;
}
static bool
llvmpipe_nir_fn_is_linear_compat(struct nir_shader *shader,
nir_function_impl *impl,
struct lp_tgsi_info *info)
{
nir_foreach_block(block, impl) {
nir_foreach_instr_safe(instr, block) {
switch (instr->type) {
case nir_instr_type_deref:
case nir_instr_type_load_const:
break;
case nir_instr_type_intrinsic: {
nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
if (intrin->intrinsic != nir_intrinsic_load_deref &&
intrin->intrinsic != nir_intrinsic_store_deref &&
intrin->intrinsic != nir_intrinsic_load_ubo)
return false;
if (intrin->intrinsic == nir_intrinsic_load_ubo) {
if (!nir_src_is_const(intrin->src[0]))
return false;
nir_load_const_instr *load =
nir_instr_as_load_const(intrin->src[0].ssa->parent_instr);
if (load->value[0].u32 != 0)
return false;
}
break;
}
case nir_instr_type_tex: {
nir_tex_instr *tex = nir_instr_as_tex(instr);
struct lp_tgsi_texture_info *tex_info = &info->tex[info->num_texs];
for (unsigned i = 0; i < tex->num_srcs; i++) {
switch (tex->src[i].src_type) {
case nir_tex_src_coord: {
nir_ssa_scalar scalar = nir_ssa_scalar_resolved(tex->src[i].src.ssa, 0);
if (scalar.def->parent_instr->type != nir_instr_type_intrinsic)
return false;
nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(scalar.def->parent_instr);
if (intrin->intrinsic != nir_intrinsic_load_deref)
return false;
nir_deref_instr *deref = nir_instr_as_deref(intrin->src[0].ssa->parent_instr);
nir_variable *var = nir_deref_instr_get_variable(deref);
if (var->data.mode != nir_var_shader_in)
return false;
break;
}
default:
continue;
}
}
switch (tex->op) {
case nir_texop_tex:
tex_info->modifier = LP_BLD_TEX_MODIFIER_NONE;
break;
default:
/* inaccurate but sufficient. */
tex_info->modifier = LP_BLD_TEX_MODIFIER_EXPLICIT_LOD;
return false;
}
switch (tex->sampler_dim) {
case GLSL_SAMPLER_DIM_2D:
tex_info->target = TGSI_TEXTURE_2D;
break;
default:
/* inaccurate but sufficient. */
tex_info->target = TGSI_TEXTURE_1D;
return false;
}
tex_info->sampler_unit = tex->sampler_index;
/* this is enforced in the scanner previously. */
tex_info->coord[0].file = TGSI_FILE_INPUT;
tex_info->coord[1].file = TGSI_FILE_INPUT;
tex_info->coord[1].swizzle = 1;
info->num_texs++;
break;
}
case nir_instr_type_alu: {
nir_alu_instr *alu = nir_instr_as_alu(instr);
if (alu->op != nir_op_mov &&
alu->op != nir_op_vec2 &&
alu->op != nir_op_vec4 &&
alu->op != nir_op_fmul)
return false;
if (alu->op == nir_op_fmul) {
unsigned num_src = nir_op_infos[alu->op].num_inputs;;
for (unsigned s = 0; s < num_src; s++) {
if (nir_src_is_const(alu->src[s].src)) {
nir_load_const_instr *load =
nir_instr_as_load_const(alu->src[s].src.ssa->parent_instr);
if (load->def.bit_size != 32)
return false;
for (unsigned c = 0; c < load->def.num_components; c++) {
if (load->value[c].f32 < 0.0 || load->value[c].f32 > 1.0) {
info->unclamped_immediates = true;
return false;
}
}
}
}
}
break;
}
default:
return false;
}
}
}
return true;
}
static bool
llvmpipe_nir_is_linear_compat(struct nir_shader *shader,
struct lp_tgsi_info *info)
{
nir_foreach_function(function, shader) {
if (function->impl) {
if (!llvmpipe_nir_fn_is_linear_compat(shader, function->impl, info))
return false;
}
}
return true;
}
void
llvmpipe_fs_analyse_nir(struct lp_fragment_shader *shader)
{
shader->kind = LP_FS_KIND_GENERAL;
if (shader->info.base.num_inputs <= LP_MAX_LINEAR_INPUTS &&
shader->info.base.num_outputs == 1 &&
!shader->info.indirect_textures &&
!shader->info.sampler_texture_units_different &&
!shader->info.unclamped_immediates &&
shader->info.num_texs <= LP_MAX_LINEAR_TEXTURES &&
llvmpipe_nir_is_linear_compat(shader->base.ir.nir, &shader->info)) {
shader->kind = LP_FS_KIND_LLVM_LINEAR;
}
}
void
llvmpipe_fs_analyse(struct lp_fragment_shader *shader,

View file

@ -52,6 +52,7 @@
#include "gallivm/lp_bld_flow.h"
#include "gallivm/lp_bld_printf.h"
#include "gallivm/lp_bld_debug.h"
#include "gallivm/lp_bld_nir.h"
#include "lp_bld_alpha.h"
#include "lp_bld_blend.h"
@ -186,11 +187,22 @@ llvm_fragment_body(struct lp_build_context *bld,
outputs[i] = bld->undef;
}
lp_build_tgsi_aos(gallivm, shader->base.tokens, fs_type,
bgra_swizzles,
consts_ptr, inputs, outputs,
&sampler->base,
&shader->info.base);
if (shader->base.type == PIPE_SHADER_IR_TGSI)
lp_build_tgsi_aos(gallivm, shader->base.tokens, fs_type,
bgra_swizzles,
consts_ptr, inputs, outputs,
&sampler->base,
&shader->info.base);
else {
nir_shader *clone = nir_shader_clone(NULL, shader->base.ir.nir);
lp_build_nir_aos(gallivm, clone, fs_type,
bgra_swizzles,
consts_ptr, inputs, outputs,
&sampler->base,
&shader->info.base);
ralloc_free(clone);
}
/*
* Blend output color