diff --git a/src/gallium/auxiliary/gallivm/lp_bld_nir.c b/src/gallium/auxiliary/gallivm/lp_bld_nir.c index 2fbd4561c6e..de8e81dbc26 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_nir.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_nir.c @@ -39,6 +39,11 @@ #include "nir_deref.h" #include "nir_search_helpers.h" +static bool is_aos(const struct lp_build_nir_context *bld_base) +{ + return bld_base->base.type.length == 16 && bld_base->base.type.width == 8; +} + static void visit_cf_list(struct lp_build_nir_context *bld_base, struct exec_list *list); @@ -169,7 +174,7 @@ static void assign_ssa(struct lp_build_nir_context *bld_base, int idx, LLVMValue static void assign_ssa_dest(struct lp_build_nir_context *bld_base, const nir_ssa_def *ssa, LLVMValueRef vals[NIR_MAX_VEC_COMPONENTS]) { - assign_ssa(bld_base, ssa->index, ssa->num_components == 1 ? vals[0] : lp_nir_array_build_gather_values(bld_base->base.gallivm->builder, vals, ssa->num_components)); + assign_ssa(bld_base, ssa->index, (ssa->num_components == 1 || is_aos(bld_base)) ? vals[0] : lp_nir_array_build_gather_values(bld_base->base.gallivm->builder, vals, ssa->num_components)); } static void assign_reg(struct lp_build_nir_context *bld_base, const nir_reg_dest *reg, @@ -276,6 +281,10 @@ static LLVMValueRef get_alu_src(struct lp_build_nir_context *bld_base, bool need_swizzle = false; assert(value); + + if (is_aos(bld_base)) + return value; + unsigned src_components = nir_src_num_components(src.src); for (unsigned i = 0; i < num_components; ++i) { assert(src.swizzle[i] < src_components); @@ -1110,6 +1119,15 @@ static void visit_alu(struct lp_build_nir_context *bld_base, const nir_alu_instr src_bit_size[i] = nir_src_bit_size(instr->src[i].src); } + if (instr->op == nir_op_mov && is_aos(bld_base) && !instr->dest.dest.is_ssa) { + for (unsigned i = 0; i < 4; i++) { + if (instr->dest.write_mask & (1 << i)) { + assign_reg(bld_base, &instr->dest.dest.reg, (1 << i), src); + } + } + return; + } + LLVMValueRef result[NIR_MAX_VEC_COMPONENTS]; if (instr->op == nir_op_vec4 || instr->op == nir_op_vec3 || instr->op == nir_op_vec2 || instr->op == nir_op_vec8 || instr->op == nir_op_vec16) { for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++) { @@ -1122,7 +1140,15 @@ static void visit_alu(struct lp_build_nir_context *bld_base, const nir_alu_instr temp_chan = cast_type(bld_base, temp_chan, nir_op_infos[instr->op].input_types[0], src_bit_size[0]); result[0] = (c == 0) ? temp_chan : lp_build_add(get_flt_bld(bld_base, src_bit_size[0]), result[0], temp_chan); } - } else { + } else if (is_aos(bld_base)) { + if (instr->op == nir_op_fmul) { + if (LLVMIsConstant(src[0])) + src[0] = lp_nir_aos_conv_const(gallivm, src[0], 1); + if (LLVMIsConstant(src[1])) + src[1] = lp_nir_aos_conv_const(gallivm, src[1], 1); + } + result[0] = do_alu_action(bld_base, instr, src_bit_size, src); + } else { for (unsigned c = 0; c < num_components; c++) { LLVMValueRef src_chan[NIR_MAX_VEC_COMPONENTS]; @@ -2019,7 +2045,7 @@ static void visit_tex(struct lp_build_nir_context *bld_base, nir_tex_instr *inst LLVMValueRef texel[NIR_MAX_VEC_COMPONENTS]; unsigned lod_src = 0; LLVMValueRef coord_undef = LLVMGetUndef(bld_base->base.int_vec_type); - + unsigned coord_vals = is_aos(bld_base) ? 1 : instr->coord_components; memset(¶ms, 0, sizeof(params)); enum lp_sampler_lod_property lod_property = LP_SAMPLER_LOD_SCALAR; @@ -2038,14 +2064,14 @@ static void visit_tex(struct lp_build_nir_context *bld_base, nir_tex_instr *inst switch (instr->src[i].src_type) { case nir_tex_src_coord: { LLVMValueRef coord = get_src(bld_base, instr->src[i].src); - if (instr->coord_components == 1) + if (coord_vals == 1) coords[0] = coord; else { for (unsigned chan = 0; chan < instr->coord_components; ++chan) coords[chan] = LLVMBuildExtractValue(builder, coord, chan, ""); } - for (unsigned chan = instr->coord_components; chan < 5; chan++) + for (unsigned chan = coord_vals; chan < 5; chan++) coords[chan] = coord_undef; break; @@ -2144,7 +2170,7 @@ static void visit_tex(struct lp_build_nir_context *bld_base, nir_tex_instr *inst if (instr->op == nir_texop_tex || instr->op == nir_texop_tg4 || instr->op == nir_texop_txb || instr->op == nir_texop_txl || instr->op == nir_texop_txd || instr->op == nir_texop_lod) - for (unsigned chan = 0; chan < instr->coord_components; ++chan) + for (unsigned chan = 0; chan < coord_vals; ++chan) coords[chan] = cast_type(bld_base, coords[chan], nir_type_float, 32); else if (instr->op == nir_texop_txf || instr->op == nir_texop_txf_ms) for (unsigned chan = 0; chan < instr->coord_components; ++chan) @@ -2375,6 +2401,9 @@ handle_shader_output_decl(struct lp_build_nir_context *bld_base, static LLVMTypeRef get_register_type(struct lp_build_nir_context *bld_base, nir_register *reg) { + if (is_aos(bld_base)) + return bld_base->base.int_vec_type; + struct lp_build_context *int_bld = get_int_bld(bld_base, true, reg->bit_size == 1 ? 32 : reg->bit_size); LLVMTypeRef type = int_bld->vec_type; @@ -2398,6 +2427,11 @@ bool lp_build_nir_llvm( nir_remove_dead_derefs(nir); nir_remove_dead_variables(nir, nir_var_function_temp, NULL); + if (is_aos(bld_base)) { + nir_move_vec_src_uses_to_dest(nir); + nir_lower_vec_to_movs(nir, NULL, NULL); + } + nir_foreach_shader_out_variable(variable, nir) handle_shader_output_decl(bld_base, nir, variable); diff --git a/src/gallium/auxiliary/gallivm/lp_bld_nir.h b/src/gallium/auxiliary/gallivm/lp_bld_nir.h index f2469a6c6f5..b68bf073111 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_nir.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_nir.h @@ -40,6 +40,16 @@ void lp_build_nir_soa(struct gallivm_state *gallivm, const struct lp_build_tgsi_params *params, LLVMValueRef (*outputs)[4]); +void lp_build_nir_aos(struct gallivm_state *gallivm, + struct nir_shader *shader, + struct lp_type type, + const unsigned char swizzles[4], + LLVMValueRef consts_ptr, + const LLVMValueRef *inputs, + LLVMValueRef *outputs, + const struct lp_build_sampler_aos *sampler, + const struct tgsi_shader_info *info); + struct lp_build_nir_context { struct lp_build_context base; @@ -266,6 +276,30 @@ struct lp_build_nir_soa_context unsigned gs_vertex_streams; }; +struct lp_build_nir_aos_context +{ + struct lp_build_nir_context bld_base; + + /* Builder for integer masks and indices */ + struct lp_build_context int_bld; + + /* + * AoS swizzle used: + * - swizzles[0] = red index + * - swizzles[1] = green index + * - swizzles[2] = blue index + * - swizzles[3] = alpha index + */ + unsigned char swizzles[4]; + unsigned char inv_swizzles[4]; + + LLVMValueRef consts_ptr; + const LLVMValueRef *inputs; + LLVMValueRef *outputs; + + const struct lp_build_sampler_aos *sampler; +}; + bool lp_build_nir_llvm(struct lp_build_nir_context *bld_base, struct nir_shader *nir); @@ -332,4 +366,11 @@ static inline struct lp_build_context *get_int_bld(struct lp_build_nir_context * } } +static inline struct lp_build_nir_aos_context * +lp_nir_aos_context(struct lp_build_nir_context *bld_base) +{ + return (struct lp_build_nir_aos_context *)bld_base; +} + +LLVMValueRef lp_nir_aos_conv_const(struct gallivm_state *gallivm, LLVMValueRef constval, int nc); #endif diff --git a/src/gallium/auxiliary/gallivm/lp_bld_nir_aos.c b/src/gallium/auxiliary/gallivm/lp_bld_nir_aos.c new file mode 100644 index 00000000000..75c4eaed388 --- /dev/null +++ b/src/gallium/auxiliary/gallivm/lp_bld_nir_aos.c @@ -0,0 +1,339 @@ +/************************************************************************** + * + * Copyright 2022 Red Hat + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + **************************************************************************/ + +#include "lp_bld_nir.h" +#include "lp_bld_init.h" +#include "lp_bld_const.h" +#include "lp_bld_flow.h" +#include "lp_bld_struct.h" +#include "lp_bld_swizzle.h" +#include "lp_bld_debug.h" +#include "util/u_math.h" + +static LLVMValueRef +swizzle_aos(struct lp_build_nir_context *bld_base, + LLVMValueRef a, + unsigned swizzle_x, + unsigned swizzle_y, + unsigned swizzle_z, + unsigned swizzle_w) +{ + unsigned char swizzles[4]; + struct lp_build_nir_aos_context *bld = lp_nir_aos_context(bld_base); + + assert(swizzle_x < 4); + assert(swizzle_y < 4); + assert(swizzle_z < 4); + assert(swizzle_w < 4); + + swizzles[bld->inv_swizzles[0]] = bld->swizzles[swizzle_x]; + swizzles[bld->inv_swizzles[1]] = bld->swizzles[swizzle_y]; + swizzles[bld->inv_swizzles[2]] = bld->swizzles[swizzle_z]; + swizzles[bld->inv_swizzles[3]] = bld->swizzles[swizzle_w]; + + return lp_build_swizzle_aos(&bld->bld_base.base, a, swizzles); +} + +LLVMValueRef lp_nir_aos_conv_const(struct gallivm_state *gallivm, LLVMValueRef constval, int nc) +{ + LLVMValueRef elems[16]; + uint8_t val = 0; + /* convert from 1..4 x f32 to 16 x unorm8 */ + for (unsigned i = 0; i < nc; i++) { + LLVMValueRef value = LLVMBuildExtractElement(gallivm->builder, constval, lp_build_const_int32(gallivm, i), ""); + assert(LLVMIsConstant(value)); + unsigned uval = LLVMConstIntGetZExtValue(value); + float f = uif(uval); + val = float_to_ubyte(f); + for (unsigned j = 0; j < 4; j++) { + elems[j * 4 + i] = LLVMConstInt(LLVMInt8TypeInContext(gallivm->context), val, 0); + } + } + for (unsigned i = nc; i < 4; i++) { + for (unsigned j = 0; j < 4; j++) { + elems[j * 4 + i] = LLVMConstInt(LLVMInt8TypeInContext(gallivm->context), val, 0); + } + } + return LLVMConstVector(elems, 16); +} + +static void init_var_slots(struct lp_build_nir_context *bld_base, + nir_variable *var) +{ + struct lp_build_nir_aos_context *bld = (struct lp_build_nir_aos_context *)bld_base; + + if (!bld->outputs) + return; + unsigned this_loc = var->data.driver_location; + + bld->outputs[this_loc] = lp_build_alloca(bld_base->base.gallivm, + bld_base->base.vec_type, "output"); +} + +static void emit_var_decl(struct lp_build_nir_context *bld_base, + nir_variable *var) +{ + switch (var->data.mode) { + case nir_var_shader_out: { + init_var_slots(bld_base, var); + break; + } + default: + break; + } +} + +static void emit_load_var(struct lp_build_nir_context *bld_base, + nir_variable_mode deref_mode, + unsigned num_components, + unsigned bit_size, + nir_variable *var, + unsigned vertex_index, + LLVMValueRef indir_vertex_index, + unsigned const_index, + LLVMValueRef indir_index, + LLVMValueRef result[NIR_MAX_VEC_COMPONENTS]) +{ + struct lp_build_nir_aos_context *bld = (struct lp_build_nir_aos_context *)bld_base; + unsigned location = var->data.driver_location; + + switch (deref_mode) { + case nir_var_shader_in: + result[0] = bld->inputs[location]; + break; + default: + break; + } +} + +static void emit_store_var(struct lp_build_nir_context *bld_base, + nir_variable_mode deref_mode, + unsigned num_components, + unsigned bit_size, + nir_variable *var, + unsigned writemask, + LLVMValueRef indir_vertex_index, + unsigned const_index, + LLVMValueRef indir_index, + LLVMValueRef dst) +{ + struct lp_build_nir_aos_context *bld = (struct lp_build_nir_aos_context *)bld_base; + struct gallivm_state *gallivm = bld_base->base.gallivm; + unsigned location = var->data.driver_location; + + if (LLVMIsConstant(dst)) { + dst = lp_nir_aos_conv_const(gallivm, dst, num_components); + } + + switch (deref_mode) { + case nir_var_shader_out: + LLVMBuildStore(gallivm->builder, dst, bld->outputs[location]); + break; + default: + break; + } +} + +static LLVMValueRef emit_load_reg(struct lp_build_nir_context *bld_base, + struct lp_build_context *reg_bld, + const nir_reg_src *reg, + LLVMValueRef indir_src, + LLVMValueRef reg_storage) +{ + struct gallivm_state *gallivm = bld_base->base.gallivm; + + return LLVMBuildLoad(gallivm->builder, reg_storage, ""); +} + +static void emit_store_reg(struct lp_build_nir_context *bld_base, + struct lp_build_context *reg_bld, + const nir_reg_dest *reg, + unsigned writemask, + LLVMValueRef indir_src, + LLVMValueRef reg_storage, + LLVMValueRef dst[NIR_MAX_VEC_COMPONENTS]) +{ + struct gallivm_state *gallivm = bld_base->base.gallivm; + + if (LLVMIsConstant(dst[0])) + dst[0] = lp_nir_aos_conv_const(gallivm, dst[0], 1); + + if (writemask == 0xf) { + LLVMBuildStore(gallivm->builder, dst[0], reg_storage); + return; + } + + LLVMValueRef cur = LLVMBuildLoad(gallivm->builder, reg_storage, ""); + LLVMTypeRef i32t = LLVMInt32TypeInContext(gallivm->context); + for (unsigned i = 0; i < 4; i++) { + if (writemask & (1 << i)) { + LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH] = { 0 }; + for (unsigned j = 0; j < 16; j++){ + if (j % 4 == i) + shuffles[j] = LLVMConstInt(i32t, 16 + j, 0); + else + shuffles[j] = LLVMConstInt(i32t, j, 0); + } + + cur = LLVMBuildShuffleVector(gallivm->builder, cur, dst[0], + LLVMConstVector(shuffles, 16), ""); + } + } + LLVMBuildStore(gallivm->builder, cur, reg_storage); +} + +static void emit_load_ubo(struct lp_build_nir_context *bld_base, + unsigned nc, + unsigned bit_size, + bool offset_is_uniform, + LLVMValueRef index, + LLVMValueRef offset, + LLVMValueRef result[NIR_MAX_VEC_COMPONENTS]) +{ + struct lp_build_nir_aos_context *bld = (struct lp_build_nir_aos_context *)bld_base; + LLVMBuilderRef builder = bld_base->base.gallivm->builder; + struct gallivm_state *gallivm = bld_base->base.gallivm; + struct lp_type type = bld_base->base.type; + LLVMValueRef res; + + res = bld->bld_base.base.undef; + offset = LLVMBuildExtractElement(builder, offset, lp_build_const_int32(gallivm, 0), ""); + assert(LLVMIsConstant(offset)); + unsigned offset_val = LLVMConstIntGetZExtValue(offset) >> 2; + for (unsigned chan = 0; chan < nc; ++chan) { + LLVMValueRef this_offset = lp_build_const_int32(gallivm, offset_val + chan); + + LLVMValueRef scalar_ptr = LLVMBuildGEP(builder, bld->consts_ptr, &this_offset, 1, ""); + + LLVMValueRef scalar = LLVMBuildLoad(builder, scalar_ptr, ""); + + lp_build_name(scalar, "const[%u].%c", offset_val, "xyzw"[chan]); + + LLVMValueRef swizzle = lp_build_const_int32(bld->bld_base.base.gallivm, + nc == 1 ? 0 : bld->swizzles[chan]); + + res = LLVMBuildInsertElement(builder, res, scalar, swizzle, ""); + } + if (type.length > 4) { + LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH]; + + for (unsigned chan = 0; chan < nc; ++chan) { + shuffles[chan] = lp_build_const_int32(bld->bld_base.base.gallivm, chan); + } + + for (unsigned i = nc; i < type.length; ++i) { + shuffles[i] = shuffles[i % nc]; + } + + res = LLVMBuildShuffleVector(builder, + res, bld->bld_base.base.undef, + LLVMConstVector(shuffles, type.length), + ""); + } + + if (nc == 4) + swizzle_aos(bld_base, res, 0, 1, 2, 3); + + result[0] = res; +} + +static void emit_tex(struct lp_build_nir_context *bld_base, + struct lp_sampler_params *params) +{ + struct lp_build_nir_aos_context *bld = (struct lp_build_nir_aos_context *)bld_base; + struct lp_derivatives derivs = { 0 }; + params->type = bld_base->base.type; + params->texel[0] = bld->sampler->emit_fetch_texel(bld->sampler, + &bld->bld_base.base, + PIPE_TEXTURE_2D, + params->texture_index, + params->coords[0], + params->derivs ? params->derivs[0] : derivs, + 0); +} + +static void +emit_load_const(struct lp_build_nir_context *bld_base, + const nir_load_const_instr *instr, + LLVMValueRef outval[NIR_MAX_VEC_COMPONENTS]) +{ + struct lp_build_nir_aos_context *bld = lp_nir_aos_context(bld_base); + struct gallivm_state *gallivm = bld_base->base.gallivm; + LLVMValueRef elems[4]; + int nc = instr->def.num_components; + bool do_swizzle = false; + + if (nc == 4) + do_swizzle = true; + + for (unsigned i = 0; i < nc; i++) { + int idx = do_swizzle ? bld->swizzles[i] : i; + elems[idx] = LLVMConstInt(LLVMInt32TypeInContext(gallivm->context), instr->value[i].u32, bld_base->base.type.sign ? 1 : 0); + } + outval[0] = LLVMConstVector(elems, nc); +} + +void +lp_build_nir_aos(struct gallivm_state *gallivm, + struct nir_shader *shader, + struct lp_type type, + const unsigned char swizzles[4], + LLVMValueRef consts_ptr, + const LLVMValueRef *inputs, + LLVMValueRef *outputs, + const struct lp_build_sampler_aos *sampler, + const struct tgsi_shader_info *info) +{ + struct lp_build_nir_aos_context bld; + + memset(&bld, 0, sizeof bld); + lp_build_context_init(&bld.bld_base.base, gallivm, type); + lp_build_context_init(&bld.bld_base.uint_bld, gallivm, lp_uint_type(type)); + lp_build_context_init(&bld.bld_base.int_bld, gallivm, lp_int_type(type)); + + for (unsigned chan = 0; chan < 4; ++chan) { + bld.swizzles[chan] = swizzles[chan]; + bld.inv_swizzles[swizzles[chan]] = chan; + } + bld.sampler = sampler; + + bld.bld_base.shader = shader; + + bld.inputs = inputs; + bld.outputs = outputs; + bld.consts_ptr = consts_ptr; + + bld.bld_base.load_var = emit_load_var; + bld.bld_base.store_var = emit_store_var; + bld.bld_base.load_reg = emit_load_reg; + bld.bld_base.store_reg = emit_store_reg; + bld.bld_base.load_ubo = emit_load_ubo; + bld.bld_base.load_const = emit_load_const; + + bld.bld_base.tex = emit_tex; + bld.bld_base.emit_var_decl = emit_var_decl; + + lp_build_nir_llvm(&bld.bld_base, shader); +} diff --git a/src/gallium/auxiliary/meson.build b/src/gallium/auxiliary/meson.build index d0b3fabb5c1..d6cb65e2b28 100644 --- a/src/gallium/auxiliary/meson.build +++ b/src/gallium/auxiliary/meson.build @@ -383,6 +383,7 @@ if draw_with_llvm 'gallivm/lp_bld_misc.h', 'gallivm/lp_bld_nir.h', 'gallivm/lp_bld_nir.c', + 'gallivm/lp_bld_nir_aos.c', 'gallivm/lp_bld_nir_soa.c', 'gallivm/lp_bld_pack.c', 'gallivm/lp_bld_pack.h', diff --git a/src/gallium/drivers/llvmpipe/lp_linear.c b/src/gallium/drivers/llvmpipe/lp_linear.c index c069f04486e..9777061ff8e 100644 --- a/src/gallium/drivers/llvmpipe/lp_linear.c +++ b/src/gallium/drivers/llvmpipe/lp_linear.c @@ -87,7 +87,6 @@ lp_fs_linear_run(const struct lp_rast_state *state, struct lp_linear_sampler samp[LP_MAX_LINEAR_TEXTURES]; struct lp_linear_interp interp[LP_MAX_LINEAR_INPUTS]; - uint8_t constants[LP_MAX_LINEAR_CONSTANTS][4]; const float w0 = a0[0][3]; float oow = 1.0f/w0; @@ -110,18 +109,35 @@ lp_fs_linear_run(const struct lp_rast_state *state, /* XXX: Per statechange: */ - for (i = 0; i < nr_consts; i++) { - for (j = 0; j < 4; j++) { - float val = state->jit_context.constants[0][i*4+j]; + if (variant->shader->base.type == PIPE_SHADER_IR_TGSI) { + uint8_t constants[LP_MAX_LINEAR_CONSTANTS][4]; + + for (i = 0; i < nr_consts; i++) { + for (j = 0; j < 4; j++) { + float val = state->jit_context.constants[0][i*4+j]; + if (val < 0.0f || val > 1.0f) { + if (LP_DEBUG & DEBUG_LINEAR2) + debug_printf(" -- const[%d] out of range %f\n", i, val); + goto fail; + } + constants[i][j] = (uint8_t)(val * 255.0f); + } + } + jit.constants = (const uint8_t (*)[4])constants; + } else { + uint8_t nir_constants[LP_MAX_LINEAR_CONSTANTS * 4]; + + for (i = 0; i < state->jit_context.num_constants[0]; i++){ + float val = state->jit_context.constants[0][i]; if (val < 0.0f || val > 1.0f) { if (LP_DEBUG & DEBUG_LINEAR2) - debug_printf(" -- const[%d] out of range\n", i); + debug_printf(" -- const[%d] out of range %f\n", i, val); goto fail; } - constants[i][j] = (uint8_t)(val * 255.0f); + nir_constants[i] = (uint8_t)(val * 255.0f); } + jit.constants = (const uint8_t (*)[4])nir_constants; } - jit.constants = (const uint8_t (*)[4])constants; /* We assume BGRA ordering */ assert(variant->key.cbuf_format[0] == PIPE_FORMAT_B8G8R8X8_UNORM || diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c b/src/gallium/drivers/llvmpipe/lp_state_fs.c index c55b939b985..3771d1b5262 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_fs.c +++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c @@ -3878,7 +3878,7 @@ llvmpipe_create_fs_state(struct pipe_context *pipe, if (templ->type == PIPE_SHADER_IR_TGSI) llvmpipe_fs_analyse(shader, templ->tokens); else - shader->kind = LP_FS_KIND_GENERAL; + llvmpipe_fs_analyse_nir(shader); return shader; } diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.h b/src/gallium/drivers/llvmpipe/lp_state_fs.h index f88ef70dbd2..847efb62154 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_fs.h +++ b/src/gallium/drivers/llvmpipe/lp_state_fs.h @@ -233,6 +233,8 @@ struct lp_fragment_shader }; +void +llvmpipe_fs_analyse_nir(struct lp_fragment_shader *shader); void llvmpipe_fs_analyse(struct lp_fragment_shader *shader, const struct tgsi_token *tokens); diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs_analysis.c b/src/gallium/drivers/llvmpipe/lp_state_fs_analysis.c index cd397e10824..6383219c36e 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_fs_analysis.c +++ b/src/gallium/drivers/llvmpipe/lp_state_fs_analysis.c @@ -34,7 +34,7 @@ #include "tgsi/tgsi_dump.h" #include "lp_debug.h" #include "lp_state.h" - +#include "nir.h" /* * Detect Aero minification shaders. @@ -173,6 +173,150 @@ finished: return TRUE; } +static bool +llvmpipe_nir_fn_is_linear_compat(struct nir_shader *shader, + nir_function_impl *impl, + struct lp_tgsi_info *info) +{ + nir_foreach_block(block, impl) { + nir_foreach_instr_safe(instr, block) { + switch (instr->type) { + case nir_instr_type_deref: + case nir_instr_type_load_const: + break; + case nir_instr_type_intrinsic: { + nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); + if (intrin->intrinsic != nir_intrinsic_load_deref && + intrin->intrinsic != nir_intrinsic_store_deref && + intrin->intrinsic != nir_intrinsic_load_ubo) + return false; + + if (intrin->intrinsic == nir_intrinsic_load_ubo) { + if (!nir_src_is_const(intrin->src[0])) + return false; + nir_load_const_instr *load = + nir_instr_as_load_const(intrin->src[0].ssa->parent_instr); + if (load->value[0].u32 != 0) + return false; + } + break; + } + case nir_instr_type_tex: { + nir_tex_instr *tex = nir_instr_as_tex(instr); + struct lp_tgsi_texture_info *tex_info = &info->tex[info->num_texs]; + + for (unsigned i = 0; i < tex->num_srcs; i++) { + switch (tex->src[i].src_type) { + case nir_tex_src_coord: { + nir_ssa_scalar scalar = nir_ssa_scalar_resolved(tex->src[i].src.ssa, 0); + if (scalar.def->parent_instr->type != nir_instr_type_intrinsic) + return false; + nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(scalar.def->parent_instr); + if (intrin->intrinsic != nir_intrinsic_load_deref) + return false; + nir_deref_instr *deref = nir_instr_as_deref(intrin->src[0].ssa->parent_instr); + nir_variable *var = nir_deref_instr_get_variable(deref); + if (var->data.mode != nir_var_shader_in) + return false; + break; + } + default: + continue; + } + } + + switch (tex->op) { + case nir_texop_tex: + tex_info->modifier = LP_BLD_TEX_MODIFIER_NONE; + break; + default: + /* inaccurate but sufficient. */ + tex_info->modifier = LP_BLD_TEX_MODIFIER_EXPLICIT_LOD; + return false; + } + switch (tex->sampler_dim) { + case GLSL_SAMPLER_DIM_2D: + tex_info->target = TGSI_TEXTURE_2D; + break; + default: + /* inaccurate but sufficient. */ + tex_info->target = TGSI_TEXTURE_1D; + return false; + } + + tex_info->sampler_unit = tex->sampler_index; + + /* this is enforced in the scanner previously. */ + tex_info->coord[0].file = TGSI_FILE_INPUT; + tex_info->coord[1].file = TGSI_FILE_INPUT; + tex_info->coord[1].swizzle = 1; + info->num_texs++; + break; + } + case nir_instr_type_alu: { + nir_alu_instr *alu = nir_instr_as_alu(instr); + if (alu->op != nir_op_mov && + alu->op != nir_op_vec2 && + alu->op != nir_op_vec4 && + alu->op != nir_op_fmul) + return false; + + if (alu->op == nir_op_fmul) { + unsigned num_src = nir_op_infos[alu->op].num_inputs;; + for (unsigned s = 0; s < num_src; s++) { + if (nir_src_is_const(alu->src[s].src)) { + nir_load_const_instr *load = + nir_instr_as_load_const(alu->src[s].src.ssa->parent_instr); + + if (load->def.bit_size != 32) + return false; + for (unsigned c = 0; c < load->def.num_components; c++) { + if (load->value[c].f32 < 0.0 || load->value[c].f32 > 1.0) { + info->unclamped_immediates = true; + return false; + } + } + } + } + } + break; + } + default: + return false; + } + } + } + return true; +} + +static bool +llvmpipe_nir_is_linear_compat(struct nir_shader *shader, + struct lp_tgsi_info *info) +{ + nir_foreach_function(function, shader) { + if (function->impl) { + if (!llvmpipe_nir_fn_is_linear_compat(shader, function->impl, info)) + return false; + } + } + return true; +} + +void +llvmpipe_fs_analyse_nir(struct lp_fragment_shader *shader) +{ + shader->kind = LP_FS_KIND_GENERAL; + + if (shader->info.base.num_inputs <= LP_MAX_LINEAR_INPUTS && + shader->info.base.num_outputs == 1 && + !shader->info.indirect_textures && + !shader->info.sampler_texture_units_different && + !shader->info.unclamped_immediates && + shader->info.num_texs <= LP_MAX_LINEAR_TEXTURES && + llvmpipe_nir_is_linear_compat(shader->base.ir.nir, &shader->info)) { + shader->kind = LP_FS_KIND_LLVM_LINEAR; + } +} void llvmpipe_fs_analyse(struct lp_fragment_shader *shader, diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs_linear_llvm.c b/src/gallium/drivers/llvmpipe/lp_state_fs_linear_llvm.c index 28339ca44a0..f3b447da74c 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_fs_linear_llvm.c +++ b/src/gallium/drivers/llvmpipe/lp_state_fs_linear_llvm.c @@ -52,6 +52,7 @@ #include "gallivm/lp_bld_flow.h" #include "gallivm/lp_bld_printf.h" #include "gallivm/lp_bld_debug.h" +#include "gallivm/lp_bld_nir.h" #include "lp_bld_alpha.h" #include "lp_bld_blend.h" @@ -186,11 +187,22 @@ llvm_fragment_body(struct lp_build_context *bld, outputs[i] = bld->undef; } - lp_build_tgsi_aos(gallivm, shader->base.tokens, fs_type, - bgra_swizzles, - consts_ptr, inputs, outputs, - &sampler->base, - &shader->info.base); + if (shader->base.type == PIPE_SHADER_IR_TGSI) + lp_build_tgsi_aos(gallivm, shader->base.tokens, fs_type, + bgra_swizzles, + consts_ptr, inputs, outputs, + &sampler->base, + &shader->info.base); + else { + nir_shader *clone = nir_shader_clone(NULL, shader->base.ir.nir); + lp_build_nir_aos(gallivm, clone, fs_type, + bgra_swizzles, + consts_ptr, inputs, outputs, + &sampler->base, + &shader->info.base); + ralloc_free(clone); + } + /* * Blend output color