mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-07 13:38:06 +02:00
gallivm/llvmpipe: add support for NIR to the linear/aos paths.
When the AOS/linear code was added it only worked with TGSI which meant nothing in mesa upstream was really using it. This adds support to analyse NIR shaders, and adds aos support to the backend. AOS support is limited to mov,vec,fmul,tex sampling in order to accelerate mostly compositing operations. I've tested weston uses the fast path. gnome-shell can't use it yet as we can't optimise the depth test paths. Acked-by: Jose Fonseca <jfonseca@vmware.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/15140>
This commit is contained in:
parent
6efd489ac9
commit
34379a937f
9 changed files with 609 additions and 20 deletions
|
|
@ -39,6 +39,11 @@
|
|||
#include "nir_deref.h"
|
||||
#include "nir_search_helpers.h"
|
||||
|
||||
static bool is_aos(const struct lp_build_nir_context *bld_base)
|
||||
{
|
||||
return bld_base->base.type.length == 16 && bld_base->base.type.width == 8;
|
||||
}
|
||||
|
||||
static void visit_cf_list(struct lp_build_nir_context *bld_base,
|
||||
struct exec_list *list);
|
||||
|
||||
|
|
@ -169,7 +174,7 @@ static void assign_ssa(struct lp_build_nir_context *bld_base, int idx, LLVMValue
|
|||
static void assign_ssa_dest(struct lp_build_nir_context *bld_base, const nir_ssa_def *ssa,
|
||||
LLVMValueRef vals[NIR_MAX_VEC_COMPONENTS])
|
||||
{
|
||||
assign_ssa(bld_base, ssa->index, ssa->num_components == 1 ? vals[0] : lp_nir_array_build_gather_values(bld_base->base.gallivm->builder, vals, ssa->num_components));
|
||||
assign_ssa(bld_base, ssa->index, (ssa->num_components == 1 || is_aos(bld_base)) ? vals[0] : lp_nir_array_build_gather_values(bld_base->base.gallivm->builder, vals, ssa->num_components));
|
||||
}
|
||||
|
||||
static void assign_reg(struct lp_build_nir_context *bld_base, const nir_reg_dest *reg,
|
||||
|
|
@ -276,6 +281,10 @@ static LLVMValueRef get_alu_src(struct lp_build_nir_context *bld_base,
|
|||
bool need_swizzle = false;
|
||||
|
||||
assert(value);
|
||||
|
||||
if (is_aos(bld_base))
|
||||
return value;
|
||||
|
||||
unsigned src_components = nir_src_num_components(src.src);
|
||||
for (unsigned i = 0; i < num_components; ++i) {
|
||||
assert(src.swizzle[i] < src_components);
|
||||
|
|
@ -1110,6 +1119,15 @@ static void visit_alu(struct lp_build_nir_context *bld_base, const nir_alu_instr
|
|||
src_bit_size[i] = nir_src_bit_size(instr->src[i].src);
|
||||
}
|
||||
|
||||
if (instr->op == nir_op_mov && is_aos(bld_base) && !instr->dest.dest.is_ssa) {
|
||||
for (unsigned i = 0; i < 4; i++) {
|
||||
if (instr->dest.write_mask & (1 << i)) {
|
||||
assign_reg(bld_base, &instr->dest.dest.reg, (1 << i), src);
|
||||
}
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
LLVMValueRef result[NIR_MAX_VEC_COMPONENTS];
|
||||
if (instr->op == nir_op_vec4 || instr->op == nir_op_vec3 || instr->op == nir_op_vec2 || instr->op == nir_op_vec8 || instr->op == nir_op_vec16) {
|
||||
for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++) {
|
||||
|
|
@ -1122,7 +1140,15 @@ static void visit_alu(struct lp_build_nir_context *bld_base, const nir_alu_instr
|
|||
temp_chan = cast_type(bld_base, temp_chan, nir_op_infos[instr->op].input_types[0], src_bit_size[0]);
|
||||
result[0] = (c == 0) ? temp_chan : lp_build_add(get_flt_bld(bld_base, src_bit_size[0]), result[0], temp_chan);
|
||||
}
|
||||
} else {
|
||||
} else if (is_aos(bld_base)) {
|
||||
if (instr->op == nir_op_fmul) {
|
||||
if (LLVMIsConstant(src[0]))
|
||||
src[0] = lp_nir_aos_conv_const(gallivm, src[0], 1);
|
||||
if (LLVMIsConstant(src[1]))
|
||||
src[1] = lp_nir_aos_conv_const(gallivm, src[1], 1);
|
||||
}
|
||||
result[0] = do_alu_action(bld_base, instr, src_bit_size, src);
|
||||
} else {
|
||||
for (unsigned c = 0; c < num_components; c++) {
|
||||
LLVMValueRef src_chan[NIR_MAX_VEC_COMPONENTS];
|
||||
|
||||
|
|
@ -2019,7 +2045,7 @@ static void visit_tex(struct lp_build_nir_context *bld_base, nir_tex_instr *inst
|
|||
LLVMValueRef texel[NIR_MAX_VEC_COMPONENTS];
|
||||
unsigned lod_src = 0;
|
||||
LLVMValueRef coord_undef = LLVMGetUndef(bld_base->base.int_vec_type);
|
||||
|
||||
unsigned coord_vals = is_aos(bld_base) ? 1 : instr->coord_components;
|
||||
memset(¶ms, 0, sizeof(params));
|
||||
enum lp_sampler_lod_property lod_property = LP_SAMPLER_LOD_SCALAR;
|
||||
|
||||
|
|
@ -2038,14 +2064,14 @@ static void visit_tex(struct lp_build_nir_context *bld_base, nir_tex_instr *inst
|
|||
switch (instr->src[i].src_type) {
|
||||
case nir_tex_src_coord: {
|
||||
LLVMValueRef coord = get_src(bld_base, instr->src[i].src);
|
||||
if (instr->coord_components == 1)
|
||||
if (coord_vals == 1)
|
||||
coords[0] = coord;
|
||||
else {
|
||||
for (unsigned chan = 0; chan < instr->coord_components; ++chan)
|
||||
coords[chan] = LLVMBuildExtractValue(builder, coord,
|
||||
chan, "");
|
||||
}
|
||||
for (unsigned chan = instr->coord_components; chan < 5; chan++)
|
||||
for (unsigned chan = coord_vals; chan < 5; chan++)
|
||||
coords[chan] = coord_undef;
|
||||
|
||||
break;
|
||||
|
|
@ -2144,7 +2170,7 @@ static void visit_tex(struct lp_build_nir_context *bld_base, nir_tex_instr *inst
|
|||
|
||||
if (instr->op == nir_texop_tex || instr->op == nir_texop_tg4 || instr->op == nir_texop_txb ||
|
||||
instr->op == nir_texop_txl || instr->op == nir_texop_txd || instr->op == nir_texop_lod)
|
||||
for (unsigned chan = 0; chan < instr->coord_components; ++chan)
|
||||
for (unsigned chan = 0; chan < coord_vals; ++chan)
|
||||
coords[chan] = cast_type(bld_base, coords[chan], nir_type_float, 32);
|
||||
else if (instr->op == nir_texop_txf || instr->op == nir_texop_txf_ms)
|
||||
for (unsigned chan = 0; chan < instr->coord_components; ++chan)
|
||||
|
|
@ -2375,6 +2401,9 @@ handle_shader_output_decl(struct lp_build_nir_context *bld_base,
|
|||
static LLVMTypeRef get_register_type(struct lp_build_nir_context *bld_base,
|
||||
nir_register *reg)
|
||||
{
|
||||
if (is_aos(bld_base))
|
||||
return bld_base->base.int_vec_type;
|
||||
|
||||
struct lp_build_context *int_bld = get_int_bld(bld_base, true, reg->bit_size == 1 ? 32 : reg->bit_size);
|
||||
|
||||
LLVMTypeRef type = int_bld->vec_type;
|
||||
|
|
@ -2398,6 +2427,11 @@ bool lp_build_nir_llvm(
|
|||
nir_remove_dead_derefs(nir);
|
||||
nir_remove_dead_variables(nir, nir_var_function_temp, NULL);
|
||||
|
||||
if (is_aos(bld_base)) {
|
||||
nir_move_vec_src_uses_to_dest(nir);
|
||||
nir_lower_vec_to_movs(nir, NULL, NULL);
|
||||
}
|
||||
|
||||
nir_foreach_shader_out_variable(variable, nir)
|
||||
handle_shader_output_decl(bld_base, nir, variable);
|
||||
|
||||
|
|
|
|||
|
|
@ -40,6 +40,16 @@ void lp_build_nir_soa(struct gallivm_state *gallivm,
|
|||
const struct lp_build_tgsi_params *params,
|
||||
LLVMValueRef (*outputs)[4]);
|
||||
|
||||
void lp_build_nir_aos(struct gallivm_state *gallivm,
|
||||
struct nir_shader *shader,
|
||||
struct lp_type type,
|
||||
const unsigned char swizzles[4],
|
||||
LLVMValueRef consts_ptr,
|
||||
const LLVMValueRef *inputs,
|
||||
LLVMValueRef *outputs,
|
||||
const struct lp_build_sampler_aos *sampler,
|
||||
const struct tgsi_shader_info *info);
|
||||
|
||||
struct lp_build_nir_context
|
||||
{
|
||||
struct lp_build_context base;
|
||||
|
|
@ -266,6 +276,30 @@ struct lp_build_nir_soa_context
|
|||
unsigned gs_vertex_streams;
|
||||
};
|
||||
|
||||
struct lp_build_nir_aos_context
|
||||
{
|
||||
struct lp_build_nir_context bld_base;
|
||||
|
||||
/* Builder for integer masks and indices */
|
||||
struct lp_build_context int_bld;
|
||||
|
||||
/*
|
||||
* AoS swizzle used:
|
||||
* - swizzles[0] = red index
|
||||
* - swizzles[1] = green index
|
||||
* - swizzles[2] = blue index
|
||||
* - swizzles[3] = alpha index
|
||||
*/
|
||||
unsigned char swizzles[4];
|
||||
unsigned char inv_swizzles[4];
|
||||
|
||||
LLVMValueRef consts_ptr;
|
||||
const LLVMValueRef *inputs;
|
||||
LLVMValueRef *outputs;
|
||||
|
||||
const struct lp_build_sampler_aos *sampler;
|
||||
};
|
||||
|
||||
bool
|
||||
lp_build_nir_llvm(struct lp_build_nir_context *bld_base,
|
||||
struct nir_shader *nir);
|
||||
|
|
@ -332,4 +366,11 @@ static inline struct lp_build_context *get_int_bld(struct lp_build_nir_context *
|
|||
}
|
||||
}
|
||||
|
||||
static inline struct lp_build_nir_aos_context *
|
||||
lp_nir_aos_context(struct lp_build_nir_context *bld_base)
|
||||
{
|
||||
return (struct lp_build_nir_aos_context *)bld_base;
|
||||
}
|
||||
|
||||
LLVMValueRef lp_nir_aos_conv_const(struct gallivm_state *gallivm, LLVMValueRef constval, int nc);
|
||||
#endif
|
||||
|
|
|
|||
339
src/gallium/auxiliary/gallivm/lp_bld_nir_aos.c
Normal file
339
src/gallium/auxiliary/gallivm/lp_bld_nir_aos.c
Normal file
|
|
@ -0,0 +1,339 @@
|
|||
/**************************************************************************
|
||||
*
|
||||
* Copyright 2022 Red Hat
|
||||
* All Rights Reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included
|
||||
* in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*
|
||||
**************************************************************************/
|
||||
|
||||
#include "lp_bld_nir.h"
|
||||
#include "lp_bld_init.h"
|
||||
#include "lp_bld_const.h"
|
||||
#include "lp_bld_flow.h"
|
||||
#include "lp_bld_struct.h"
|
||||
#include "lp_bld_swizzle.h"
|
||||
#include "lp_bld_debug.h"
|
||||
#include "util/u_math.h"
|
||||
|
||||
static LLVMValueRef
|
||||
swizzle_aos(struct lp_build_nir_context *bld_base,
|
||||
LLVMValueRef a,
|
||||
unsigned swizzle_x,
|
||||
unsigned swizzle_y,
|
||||
unsigned swizzle_z,
|
||||
unsigned swizzle_w)
|
||||
{
|
||||
unsigned char swizzles[4];
|
||||
struct lp_build_nir_aos_context *bld = lp_nir_aos_context(bld_base);
|
||||
|
||||
assert(swizzle_x < 4);
|
||||
assert(swizzle_y < 4);
|
||||
assert(swizzle_z < 4);
|
||||
assert(swizzle_w < 4);
|
||||
|
||||
swizzles[bld->inv_swizzles[0]] = bld->swizzles[swizzle_x];
|
||||
swizzles[bld->inv_swizzles[1]] = bld->swizzles[swizzle_y];
|
||||
swizzles[bld->inv_swizzles[2]] = bld->swizzles[swizzle_z];
|
||||
swizzles[bld->inv_swizzles[3]] = bld->swizzles[swizzle_w];
|
||||
|
||||
return lp_build_swizzle_aos(&bld->bld_base.base, a, swizzles);
|
||||
}
|
||||
|
||||
LLVMValueRef lp_nir_aos_conv_const(struct gallivm_state *gallivm, LLVMValueRef constval, int nc)
|
||||
{
|
||||
LLVMValueRef elems[16];
|
||||
uint8_t val = 0;
|
||||
/* convert from 1..4 x f32 to 16 x unorm8 */
|
||||
for (unsigned i = 0; i < nc; i++) {
|
||||
LLVMValueRef value = LLVMBuildExtractElement(gallivm->builder, constval, lp_build_const_int32(gallivm, i), "");
|
||||
assert(LLVMIsConstant(value));
|
||||
unsigned uval = LLVMConstIntGetZExtValue(value);
|
||||
float f = uif(uval);
|
||||
val = float_to_ubyte(f);
|
||||
for (unsigned j = 0; j < 4; j++) {
|
||||
elems[j * 4 + i] = LLVMConstInt(LLVMInt8TypeInContext(gallivm->context), val, 0);
|
||||
}
|
||||
}
|
||||
for (unsigned i = nc; i < 4; i++) {
|
||||
for (unsigned j = 0; j < 4; j++) {
|
||||
elems[j * 4 + i] = LLVMConstInt(LLVMInt8TypeInContext(gallivm->context), val, 0);
|
||||
}
|
||||
}
|
||||
return LLVMConstVector(elems, 16);
|
||||
}
|
||||
|
||||
static void init_var_slots(struct lp_build_nir_context *bld_base,
|
||||
nir_variable *var)
|
||||
{
|
||||
struct lp_build_nir_aos_context *bld = (struct lp_build_nir_aos_context *)bld_base;
|
||||
|
||||
if (!bld->outputs)
|
||||
return;
|
||||
unsigned this_loc = var->data.driver_location;
|
||||
|
||||
bld->outputs[this_loc] = lp_build_alloca(bld_base->base.gallivm,
|
||||
bld_base->base.vec_type, "output");
|
||||
}
|
||||
|
||||
static void emit_var_decl(struct lp_build_nir_context *bld_base,
|
||||
nir_variable *var)
|
||||
{
|
||||
switch (var->data.mode) {
|
||||
case nir_var_shader_out: {
|
||||
init_var_slots(bld_base, var);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
static void emit_load_var(struct lp_build_nir_context *bld_base,
|
||||
nir_variable_mode deref_mode,
|
||||
unsigned num_components,
|
||||
unsigned bit_size,
|
||||
nir_variable *var,
|
||||
unsigned vertex_index,
|
||||
LLVMValueRef indir_vertex_index,
|
||||
unsigned const_index,
|
||||
LLVMValueRef indir_index,
|
||||
LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])
|
||||
{
|
||||
struct lp_build_nir_aos_context *bld = (struct lp_build_nir_aos_context *)bld_base;
|
||||
unsigned location = var->data.driver_location;
|
||||
|
||||
switch (deref_mode) {
|
||||
case nir_var_shader_in:
|
||||
result[0] = bld->inputs[location];
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
static void emit_store_var(struct lp_build_nir_context *bld_base,
|
||||
nir_variable_mode deref_mode,
|
||||
unsigned num_components,
|
||||
unsigned bit_size,
|
||||
nir_variable *var,
|
||||
unsigned writemask,
|
||||
LLVMValueRef indir_vertex_index,
|
||||
unsigned const_index,
|
||||
LLVMValueRef indir_index,
|
||||
LLVMValueRef dst)
|
||||
{
|
||||
struct lp_build_nir_aos_context *bld = (struct lp_build_nir_aos_context *)bld_base;
|
||||
struct gallivm_state *gallivm = bld_base->base.gallivm;
|
||||
unsigned location = var->data.driver_location;
|
||||
|
||||
if (LLVMIsConstant(dst)) {
|
||||
dst = lp_nir_aos_conv_const(gallivm, dst, num_components);
|
||||
}
|
||||
|
||||
switch (deref_mode) {
|
||||
case nir_var_shader_out:
|
||||
LLVMBuildStore(gallivm->builder, dst, bld->outputs[location]);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
static LLVMValueRef emit_load_reg(struct lp_build_nir_context *bld_base,
|
||||
struct lp_build_context *reg_bld,
|
||||
const nir_reg_src *reg,
|
||||
LLVMValueRef indir_src,
|
||||
LLVMValueRef reg_storage)
|
||||
{
|
||||
struct gallivm_state *gallivm = bld_base->base.gallivm;
|
||||
|
||||
return LLVMBuildLoad(gallivm->builder, reg_storage, "");
|
||||
}
|
||||
|
||||
static void emit_store_reg(struct lp_build_nir_context *bld_base,
|
||||
struct lp_build_context *reg_bld,
|
||||
const nir_reg_dest *reg,
|
||||
unsigned writemask,
|
||||
LLVMValueRef indir_src,
|
||||
LLVMValueRef reg_storage,
|
||||
LLVMValueRef dst[NIR_MAX_VEC_COMPONENTS])
|
||||
{
|
||||
struct gallivm_state *gallivm = bld_base->base.gallivm;
|
||||
|
||||
if (LLVMIsConstant(dst[0]))
|
||||
dst[0] = lp_nir_aos_conv_const(gallivm, dst[0], 1);
|
||||
|
||||
if (writemask == 0xf) {
|
||||
LLVMBuildStore(gallivm->builder, dst[0], reg_storage);
|
||||
return;
|
||||
}
|
||||
|
||||
LLVMValueRef cur = LLVMBuildLoad(gallivm->builder, reg_storage, "");
|
||||
LLVMTypeRef i32t = LLVMInt32TypeInContext(gallivm->context);
|
||||
for (unsigned i = 0; i < 4; i++) {
|
||||
if (writemask & (1 << i)) {
|
||||
LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH] = { 0 };
|
||||
for (unsigned j = 0; j < 16; j++){
|
||||
if (j % 4 == i)
|
||||
shuffles[j] = LLVMConstInt(i32t, 16 + j, 0);
|
||||
else
|
||||
shuffles[j] = LLVMConstInt(i32t, j, 0);
|
||||
}
|
||||
|
||||
cur = LLVMBuildShuffleVector(gallivm->builder, cur, dst[0],
|
||||
LLVMConstVector(shuffles, 16), "");
|
||||
}
|
||||
}
|
||||
LLVMBuildStore(gallivm->builder, cur, reg_storage);
|
||||
}
|
||||
|
||||
static void emit_load_ubo(struct lp_build_nir_context *bld_base,
|
||||
unsigned nc,
|
||||
unsigned bit_size,
|
||||
bool offset_is_uniform,
|
||||
LLVMValueRef index,
|
||||
LLVMValueRef offset,
|
||||
LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])
|
||||
{
|
||||
struct lp_build_nir_aos_context *bld = (struct lp_build_nir_aos_context *)bld_base;
|
||||
LLVMBuilderRef builder = bld_base->base.gallivm->builder;
|
||||
struct gallivm_state *gallivm = bld_base->base.gallivm;
|
||||
struct lp_type type = bld_base->base.type;
|
||||
LLVMValueRef res;
|
||||
|
||||
res = bld->bld_base.base.undef;
|
||||
offset = LLVMBuildExtractElement(builder, offset, lp_build_const_int32(gallivm, 0), "");
|
||||
assert(LLVMIsConstant(offset));
|
||||
unsigned offset_val = LLVMConstIntGetZExtValue(offset) >> 2;
|
||||
for (unsigned chan = 0; chan < nc; ++chan) {
|
||||
LLVMValueRef this_offset = lp_build_const_int32(gallivm, offset_val + chan);
|
||||
|
||||
LLVMValueRef scalar_ptr = LLVMBuildGEP(builder, bld->consts_ptr, &this_offset, 1, "");
|
||||
|
||||
LLVMValueRef scalar = LLVMBuildLoad(builder, scalar_ptr, "");
|
||||
|
||||
lp_build_name(scalar, "const[%u].%c", offset_val, "xyzw"[chan]);
|
||||
|
||||
LLVMValueRef swizzle = lp_build_const_int32(bld->bld_base.base.gallivm,
|
||||
nc == 1 ? 0 : bld->swizzles[chan]);
|
||||
|
||||
res = LLVMBuildInsertElement(builder, res, scalar, swizzle, "");
|
||||
}
|
||||
if (type.length > 4) {
|
||||
LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH];
|
||||
|
||||
for (unsigned chan = 0; chan < nc; ++chan) {
|
||||
shuffles[chan] = lp_build_const_int32(bld->bld_base.base.gallivm, chan);
|
||||
}
|
||||
|
||||
for (unsigned i = nc; i < type.length; ++i) {
|
||||
shuffles[i] = shuffles[i % nc];
|
||||
}
|
||||
|
||||
res = LLVMBuildShuffleVector(builder,
|
||||
res, bld->bld_base.base.undef,
|
||||
LLVMConstVector(shuffles, type.length),
|
||||
"");
|
||||
}
|
||||
|
||||
if (nc == 4)
|
||||
swizzle_aos(bld_base, res, 0, 1, 2, 3);
|
||||
|
||||
result[0] = res;
|
||||
}
|
||||
|
||||
static void emit_tex(struct lp_build_nir_context *bld_base,
|
||||
struct lp_sampler_params *params)
|
||||
{
|
||||
struct lp_build_nir_aos_context *bld = (struct lp_build_nir_aos_context *)bld_base;
|
||||
struct lp_derivatives derivs = { 0 };
|
||||
params->type = bld_base->base.type;
|
||||
params->texel[0] = bld->sampler->emit_fetch_texel(bld->sampler,
|
||||
&bld->bld_base.base,
|
||||
PIPE_TEXTURE_2D,
|
||||
params->texture_index,
|
||||
params->coords[0],
|
||||
params->derivs ? params->derivs[0] : derivs,
|
||||
0);
|
||||
}
|
||||
|
||||
static void
|
||||
emit_load_const(struct lp_build_nir_context *bld_base,
|
||||
const nir_load_const_instr *instr,
|
||||
LLVMValueRef outval[NIR_MAX_VEC_COMPONENTS])
|
||||
{
|
||||
struct lp_build_nir_aos_context *bld = lp_nir_aos_context(bld_base);
|
||||
struct gallivm_state *gallivm = bld_base->base.gallivm;
|
||||
LLVMValueRef elems[4];
|
||||
int nc = instr->def.num_components;
|
||||
bool do_swizzle = false;
|
||||
|
||||
if (nc == 4)
|
||||
do_swizzle = true;
|
||||
|
||||
for (unsigned i = 0; i < nc; i++) {
|
||||
int idx = do_swizzle ? bld->swizzles[i] : i;
|
||||
elems[idx] = LLVMConstInt(LLVMInt32TypeInContext(gallivm->context), instr->value[i].u32, bld_base->base.type.sign ? 1 : 0);
|
||||
}
|
||||
outval[0] = LLVMConstVector(elems, nc);
|
||||
}
|
||||
|
||||
void
|
||||
lp_build_nir_aos(struct gallivm_state *gallivm,
|
||||
struct nir_shader *shader,
|
||||
struct lp_type type,
|
||||
const unsigned char swizzles[4],
|
||||
LLVMValueRef consts_ptr,
|
||||
const LLVMValueRef *inputs,
|
||||
LLVMValueRef *outputs,
|
||||
const struct lp_build_sampler_aos *sampler,
|
||||
const struct tgsi_shader_info *info)
|
||||
{
|
||||
struct lp_build_nir_aos_context bld;
|
||||
|
||||
memset(&bld, 0, sizeof bld);
|
||||
lp_build_context_init(&bld.bld_base.base, gallivm, type);
|
||||
lp_build_context_init(&bld.bld_base.uint_bld, gallivm, lp_uint_type(type));
|
||||
lp_build_context_init(&bld.bld_base.int_bld, gallivm, lp_int_type(type));
|
||||
|
||||
for (unsigned chan = 0; chan < 4; ++chan) {
|
||||
bld.swizzles[chan] = swizzles[chan];
|
||||
bld.inv_swizzles[swizzles[chan]] = chan;
|
||||
}
|
||||
bld.sampler = sampler;
|
||||
|
||||
bld.bld_base.shader = shader;
|
||||
|
||||
bld.inputs = inputs;
|
||||
bld.outputs = outputs;
|
||||
bld.consts_ptr = consts_ptr;
|
||||
|
||||
bld.bld_base.load_var = emit_load_var;
|
||||
bld.bld_base.store_var = emit_store_var;
|
||||
bld.bld_base.load_reg = emit_load_reg;
|
||||
bld.bld_base.store_reg = emit_store_reg;
|
||||
bld.bld_base.load_ubo = emit_load_ubo;
|
||||
bld.bld_base.load_const = emit_load_const;
|
||||
|
||||
bld.bld_base.tex = emit_tex;
|
||||
bld.bld_base.emit_var_decl = emit_var_decl;
|
||||
|
||||
lp_build_nir_llvm(&bld.bld_base, shader);
|
||||
}
|
||||
|
|
@ -383,6 +383,7 @@ if draw_with_llvm
|
|||
'gallivm/lp_bld_misc.h',
|
||||
'gallivm/lp_bld_nir.h',
|
||||
'gallivm/lp_bld_nir.c',
|
||||
'gallivm/lp_bld_nir_aos.c',
|
||||
'gallivm/lp_bld_nir_soa.c',
|
||||
'gallivm/lp_bld_pack.c',
|
||||
'gallivm/lp_bld_pack.h',
|
||||
|
|
|
|||
|
|
@ -87,7 +87,6 @@ lp_fs_linear_run(const struct lp_rast_state *state,
|
|||
|
||||
struct lp_linear_sampler samp[LP_MAX_LINEAR_TEXTURES];
|
||||
struct lp_linear_interp interp[LP_MAX_LINEAR_INPUTS];
|
||||
uint8_t constants[LP_MAX_LINEAR_CONSTANTS][4];
|
||||
|
||||
const float w0 = a0[0][3];
|
||||
float oow = 1.0f/w0;
|
||||
|
|
@ -110,18 +109,35 @@ lp_fs_linear_run(const struct lp_rast_state *state,
|
|||
|
||||
/* XXX: Per statechange:
|
||||
*/
|
||||
for (i = 0; i < nr_consts; i++) {
|
||||
for (j = 0; j < 4; j++) {
|
||||
float val = state->jit_context.constants[0][i*4+j];
|
||||
if (variant->shader->base.type == PIPE_SHADER_IR_TGSI) {
|
||||
uint8_t constants[LP_MAX_LINEAR_CONSTANTS][4];
|
||||
|
||||
for (i = 0; i < nr_consts; i++) {
|
||||
for (j = 0; j < 4; j++) {
|
||||
float val = state->jit_context.constants[0][i*4+j];
|
||||
if (val < 0.0f || val > 1.0f) {
|
||||
if (LP_DEBUG & DEBUG_LINEAR2)
|
||||
debug_printf(" -- const[%d] out of range %f\n", i, val);
|
||||
goto fail;
|
||||
}
|
||||
constants[i][j] = (uint8_t)(val * 255.0f);
|
||||
}
|
||||
}
|
||||
jit.constants = (const uint8_t (*)[4])constants;
|
||||
} else {
|
||||
uint8_t nir_constants[LP_MAX_LINEAR_CONSTANTS * 4];
|
||||
|
||||
for (i = 0; i < state->jit_context.num_constants[0]; i++){
|
||||
float val = state->jit_context.constants[0][i];
|
||||
if (val < 0.0f || val > 1.0f) {
|
||||
if (LP_DEBUG & DEBUG_LINEAR2)
|
||||
debug_printf(" -- const[%d] out of range\n", i);
|
||||
debug_printf(" -- const[%d] out of range %f\n", i, val);
|
||||
goto fail;
|
||||
}
|
||||
constants[i][j] = (uint8_t)(val * 255.0f);
|
||||
nir_constants[i] = (uint8_t)(val * 255.0f);
|
||||
}
|
||||
jit.constants = (const uint8_t (*)[4])nir_constants;
|
||||
}
|
||||
jit.constants = (const uint8_t (*)[4])constants;
|
||||
|
||||
/* We assume BGRA ordering */
|
||||
assert(variant->key.cbuf_format[0] == PIPE_FORMAT_B8G8R8X8_UNORM ||
|
||||
|
|
|
|||
|
|
@ -3878,7 +3878,7 @@ llvmpipe_create_fs_state(struct pipe_context *pipe,
|
|||
if (templ->type == PIPE_SHADER_IR_TGSI)
|
||||
llvmpipe_fs_analyse(shader, templ->tokens);
|
||||
else
|
||||
shader->kind = LP_FS_KIND_GENERAL;
|
||||
llvmpipe_fs_analyse_nir(shader);
|
||||
|
||||
return shader;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -233,6 +233,8 @@ struct lp_fragment_shader
|
|||
};
|
||||
|
||||
|
||||
void
|
||||
llvmpipe_fs_analyse_nir(struct lp_fragment_shader *shader);
|
||||
void
|
||||
llvmpipe_fs_analyse(struct lp_fragment_shader *shader,
|
||||
const struct tgsi_token *tokens);
|
||||
|
|
|
|||
|
|
@ -34,7 +34,7 @@
|
|||
#include "tgsi/tgsi_dump.h"
|
||||
#include "lp_debug.h"
|
||||
#include "lp_state.h"
|
||||
|
||||
#include "nir.h"
|
||||
|
||||
/*
|
||||
* Detect Aero minification shaders.
|
||||
|
|
@ -173,6 +173,150 @@ finished:
|
|||
return TRUE;
|
||||
}
|
||||
|
||||
static bool
|
||||
llvmpipe_nir_fn_is_linear_compat(struct nir_shader *shader,
|
||||
nir_function_impl *impl,
|
||||
struct lp_tgsi_info *info)
|
||||
{
|
||||
nir_foreach_block(block, impl) {
|
||||
nir_foreach_instr_safe(instr, block) {
|
||||
switch (instr->type) {
|
||||
case nir_instr_type_deref:
|
||||
case nir_instr_type_load_const:
|
||||
break;
|
||||
case nir_instr_type_intrinsic: {
|
||||
nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
|
||||
if (intrin->intrinsic != nir_intrinsic_load_deref &&
|
||||
intrin->intrinsic != nir_intrinsic_store_deref &&
|
||||
intrin->intrinsic != nir_intrinsic_load_ubo)
|
||||
return false;
|
||||
|
||||
if (intrin->intrinsic == nir_intrinsic_load_ubo) {
|
||||
if (!nir_src_is_const(intrin->src[0]))
|
||||
return false;
|
||||
nir_load_const_instr *load =
|
||||
nir_instr_as_load_const(intrin->src[0].ssa->parent_instr);
|
||||
if (load->value[0].u32 != 0)
|
||||
return false;
|
||||
}
|
||||
break;
|
||||
}
|
||||
case nir_instr_type_tex: {
|
||||
nir_tex_instr *tex = nir_instr_as_tex(instr);
|
||||
struct lp_tgsi_texture_info *tex_info = &info->tex[info->num_texs];
|
||||
|
||||
for (unsigned i = 0; i < tex->num_srcs; i++) {
|
||||
switch (tex->src[i].src_type) {
|
||||
case nir_tex_src_coord: {
|
||||
nir_ssa_scalar scalar = nir_ssa_scalar_resolved(tex->src[i].src.ssa, 0);
|
||||
if (scalar.def->parent_instr->type != nir_instr_type_intrinsic)
|
||||
return false;
|
||||
nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(scalar.def->parent_instr);
|
||||
if (intrin->intrinsic != nir_intrinsic_load_deref)
|
||||
return false;
|
||||
nir_deref_instr *deref = nir_instr_as_deref(intrin->src[0].ssa->parent_instr);
|
||||
nir_variable *var = nir_deref_instr_get_variable(deref);
|
||||
if (var->data.mode != nir_var_shader_in)
|
||||
return false;
|
||||
break;
|
||||
}
|
||||
default:
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
switch (tex->op) {
|
||||
case nir_texop_tex:
|
||||
tex_info->modifier = LP_BLD_TEX_MODIFIER_NONE;
|
||||
break;
|
||||
default:
|
||||
/* inaccurate but sufficient. */
|
||||
tex_info->modifier = LP_BLD_TEX_MODIFIER_EXPLICIT_LOD;
|
||||
return false;
|
||||
}
|
||||
switch (tex->sampler_dim) {
|
||||
case GLSL_SAMPLER_DIM_2D:
|
||||
tex_info->target = TGSI_TEXTURE_2D;
|
||||
break;
|
||||
default:
|
||||
/* inaccurate but sufficient. */
|
||||
tex_info->target = TGSI_TEXTURE_1D;
|
||||
return false;
|
||||
}
|
||||
|
||||
tex_info->sampler_unit = tex->sampler_index;
|
||||
|
||||
/* this is enforced in the scanner previously. */
|
||||
tex_info->coord[0].file = TGSI_FILE_INPUT;
|
||||
tex_info->coord[1].file = TGSI_FILE_INPUT;
|
||||
tex_info->coord[1].swizzle = 1;
|
||||
info->num_texs++;
|
||||
break;
|
||||
}
|
||||
case nir_instr_type_alu: {
|
||||
nir_alu_instr *alu = nir_instr_as_alu(instr);
|
||||
if (alu->op != nir_op_mov &&
|
||||
alu->op != nir_op_vec2 &&
|
||||
alu->op != nir_op_vec4 &&
|
||||
alu->op != nir_op_fmul)
|
||||
return false;
|
||||
|
||||
if (alu->op == nir_op_fmul) {
|
||||
unsigned num_src = nir_op_infos[alu->op].num_inputs;;
|
||||
for (unsigned s = 0; s < num_src; s++) {
|
||||
if (nir_src_is_const(alu->src[s].src)) {
|
||||
nir_load_const_instr *load =
|
||||
nir_instr_as_load_const(alu->src[s].src.ssa->parent_instr);
|
||||
|
||||
if (load->def.bit_size != 32)
|
||||
return false;
|
||||
for (unsigned c = 0; c < load->def.num_components; c++) {
|
||||
if (load->value[c].f32 < 0.0 || load->value[c].f32 > 1.0) {
|
||||
info->unclamped_immediates = true;
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool
|
||||
llvmpipe_nir_is_linear_compat(struct nir_shader *shader,
|
||||
struct lp_tgsi_info *info)
|
||||
{
|
||||
nir_foreach_function(function, shader) {
|
||||
if (function->impl) {
|
||||
if (!llvmpipe_nir_fn_is_linear_compat(shader, function->impl, info))
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
void
|
||||
llvmpipe_fs_analyse_nir(struct lp_fragment_shader *shader)
|
||||
{
|
||||
shader->kind = LP_FS_KIND_GENERAL;
|
||||
|
||||
if (shader->info.base.num_inputs <= LP_MAX_LINEAR_INPUTS &&
|
||||
shader->info.base.num_outputs == 1 &&
|
||||
!shader->info.indirect_textures &&
|
||||
!shader->info.sampler_texture_units_different &&
|
||||
!shader->info.unclamped_immediates &&
|
||||
shader->info.num_texs <= LP_MAX_LINEAR_TEXTURES &&
|
||||
llvmpipe_nir_is_linear_compat(shader->base.ir.nir, &shader->info)) {
|
||||
shader->kind = LP_FS_KIND_LLVM_LINEAR;
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
llvmpipe_fs_analyse(struct lp_fragment_shader *shader,
|
||||
|
|
|
|||
|
|
@ -52,6 +52,7 @@
|
|||
#include "gallivm/lp_bld_flow.h"
|
||||
#include "gallivm/lp_bld_printf.h"
|
||||
#include "gallivm/lp_bld_debug.h"
|
||||
#include "gallivm/lp_bld_nir.h"
|
||||
|
||||
#include "lp_bld_alpha.h"
|
||||
#include "lp_bld_blend.h"
|
||||
|
|
@ -186,11 +187,22 @@ llvm_fragment_body(struct lp_build_context *bld,
|
|||
outputs[i] = bld->undef;
|
||||
}
|
||||
|
||||
lp_build_tgsi_aos(gallivm, shader->base.tokens, fs_type,
|
||||
bgra_swizzles,
|
||||
consts_ptr, inputs, outputs,
|
||||
&sampler->base,
|
||||
&shader->info.base);
|
||||
if (shader->base.type == PIPE_SHADER_IR_TGSI)
|
||||
lp_build_tgsi_aos(gallivm, shader->base.tokens, fs_type,
|
||||
bgra_swizzles,
|
||||
consts_ptr, inputs, outputs,
|
||||
&sampler->base,
|
||||
&shader->info.base);
|
||||
else {
|
||||
nir_shader *clone = nir_shader_clone(NULL, shader->base.ir.nir);
|
||||
lp_build_nir_aos(gallivm, clone, fs_type,
|
||||
bgra_swizzles,
|
||||
consts_ptr, inputs, outputs,
|
||||
&sampler->base,
|
||||
&shader->info.base);
|
||||
ralloc_free(clone);
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Blend output color
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue