mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-04-20 03:30:36 +02:00
etnaviv: add alternative NIR compiler
enable with ETNA_MESA_DEBUG=nir Signed-off-by: Jonathan Marek <jonathan@marek.ca> Reviewed-by: Philipp Zabel <p.zabel@pengutronix.de>
This commit is contained in:
parent
ee1ed59458
commit
ed7a27719a
12 changed files with 2388 additions and 37 deletions
|
|
@ -18,6 +18,8 @@ C_SOURCES := \
|
|||
etnaviv_clear_blit.h \
|
||||
etnaviv_compiler.c \
|
||||
etnaviv_compiler.h \
|
||||
etnaviv_compiler_nir.c \
|
||||
etnaviv_compiler_nir_emit.h \
|
||||
etnaviv_context.c \
|
||||
etnaviv_context.h \
|
||||
etnaviv_debug.h \
|
||||
|
|
|
|||
|
|
@ -2286,6 +2286,9 @@ copy_uniform_state_to_shader(struct etna_compile *c, struct etna_shader_variant
|
|||
bool
|
||||
etna_compile_shader(struct etna_shader_variant *v)
|
||||
{
|
||||
if (DBG_ENABLED(ETNA_DBG_NIR))
|
||||
return etna_compile_shader_nir(v);
|
||||
|
||||
/* Create scratch space that may be too large to fit on stack
|
||||
*/
|
||||
bool ret;
|
||||
|
|
@ -2449,11 +2452,12 @@ etna_compile_shader(struct etna_shader_variant *v)
|
|||
etna_compile_fill_in_labels(c);
|
||||
|
||||
/* fill in output structure */
|
||||
v->processor = c->info.processor;
|
||||
v->stage = c->info.processor == PIPE_SHADER_FRAGMENT ? MESA_SHADER_FRAGMENT : MESA_SHADER_VERTEX;
|
||||
v->code_size = c->inst_ptr * 4;
|
||||
v->code = mem_dup(c->code, c->inst_ptr * 16);
|
||||
v->num_loops = c->num_loops;
|
||||
v->num_temps = c->next_free_native;
|
||||
v->vs_id_in_reg = -1;
|
||||
v->vs_pos_out_reg = -1;
|
||||
v->vs_pointsize_out_reg = -1;
|
||||
v->ps_color_out_reg = -1;
|
||||
|
|
@ -2483,7 +2487,7 @@ extern const char *tgsi_swizzle_names[];
|
|||
void
|
||||
etna_dump_shader(const struct etna_shader_variant *shader)
|
||||
{
|
||||
if (shader->processor == PIPE_SHADER_VERTEX)
|
||||
if (shader->stage == MESA_SHADER_VERTEX)
|
||||
printf("VERT\n");
|
||||
else
|
||||
printf("FRAG\n");
|
||||
|
|
@ -2502,22 +2506,42 @@ etna_dump_shader(const struct etna_shader_variant *shader)
|
|||
shader->uniforms.imm_data[idx],
|
||||
shader->uniforms.imm_contents[idx]);
|
||||
}
|
||||
printf("inputs:\n");
|
||||
for (int idx = 0; idx < shader->infile.num_reg; ++idx) {
|
||||
printf(" [%i] name=%s index=%i comps=%i\n", shader->infile.reg[idx].reg,
|
||||
tgsi_semantic_names[shader->infile.reg[idx].semantic.Name],
|
||||
shader->infile.reg[idx].semantic.Index,
|
||||
shader->infile.reg[idx].num_components);
|
||||
}
|
||||
printf("outputs:\n");
|
||||
for (int idx = 0; idx < shader->outfile.num_reg; ++idx) {
|
||||
printf(" [%i] name=%s index=%i comps=%i\n", shader->outfile.reg[idx].reg,
|
||||
tgsi_semantic_names[shader->outfile.reg[idx].semantic.Name],
|
||||
shader->outfile.reg[idx].semantic.Index,
|
||||
shader->outfile.reg[idx].num_components);
|
||||
|
||||
if (DBG_ENABLED(ETNA_DBG_NIR)) {
|
||||
printf("inputs:\n");
|
||||
for (int idx = 0; idx < shader->infile.num_reg; ++idx) {
|
||||
printf(" [%i] name=%s comps=%i\n", shader->infile.reg[idx].reg,
|
||||
(shader->stage == MESA_SHADER_VERTEX) ?
|
||||
gl_vert_attrib_name(shader->infile.reg[idx].slot) :
|
||||
gl_varying_slot_name(shader->infile.reg[idx].slot),
|
||||
shader->infile.reg[idx].num_components);
|
||||
}
|
||||
printf("outputs:\n");
|
||||
for (int idx = 0; idx < shader->outfile.num_reg; ++idx) {
|
||||
printf(" [%i] name=%s comps=%i\n", shader->outfile.reg[idx].reg,
|
||||
(shader->stage == MESA_SHADER_VERTEX) ?
|
||||
gl_varying_slot_name(shader->outfile.reg[idx].slot) :
|
||||
gl_frag_result_name(shader->outfile.reg[idx].slot),
|
||||
shader->outfile.reg[idx].num_components);
|
||||
}
|
||||
} else {
|
||||
printf("inputs:\n");
|
||||
for (int idx = 0; idx < shader->infile.num_reg; ++idx) {
|
||||
printf(" [%i] name=%s index=%i comps=%i\n", shader->infile.reg[idx].reg,
|
||||
tgsi_semantic_names[shader->infile.reg[idx].semantic.Name],
|
||||
shader->infile.reg[idx].semantic.Index,
|
||||
shader->infile.reg[idx].num_components);
|
||||
}
|
||||
printf("outputs:\n");
|
||||
for (int idx = 0; idx < shader->outfile.num_reg; ++idx) {
|
||||
printf(" [%i] name=%s index=%i comps=%i\n", shader->outfile.reg[idx].reg,
|
||||
tgsi_semantic_names[shader->outfile.reg[idx].semantic.Name],
|
||||
shader->outfile.reg[idx].semantic.Index,
|
||||
shader->outfile.reg[idx].num_components);
|
||||
}
|
||||
}
|
||||
printf("special:\n");
|
||||
if (shader->processor == PIPE_SHADER_VERTEX) {
|
||||
if (shader->stage == MESA_SHADER_VERTEX) {
|
||||
printf(" vs_pos_out_reg=%i\n", shader->vs_pos_out_reg);
|
||||
printf(" vs_pointsize_out_reg=%i\n", shader->vs_pointsize_out_reg);
|
||||
printf(" vs_load_balancing=0x%08x\n", shader->vs_load_balancing);
|
||||
|
|
@ -2531,6 +2555,9 @@ etna_dump_shader(const struct etna_shader_variant *shader)
|
|||
void
|
||||
etna_destroy_shader(struct etna_shader_variant *shader)
|
||||
{
|
||||
if (DBG_ENABLED(ETNA_DBG_NIR))
|
||||
return etna_destroy_shader_nir(shader);
|
||||
|
||||
assert(shader);
|
||||
|
||||
FREE(shader->code);
|
||||
|
|
@ -2554,6 +2581,9 @@ bool
|
|||
etna_link_shader(struct etna_shader_link_info *info,
|
||||
const struct etna_shader_variant *vs, const struct etna_shader_variant *fs)
|
||||
{
|
||||
if (DBG_ENABLED(ETNA_DBG_NIR))
|
||||
return etna_link_shader_nir(info, vs, fs);
|
||||
|
||||
int comp_ofs = 0;
|
||||
/* For each fragment input we need to find the associated vertex shader
|
||||
* output, which can be found by matching on semantic name and index. A
|
||||
|
|
|
|||
|
|
@ -32,6 +32,7 @@
|
|||
#include "etnaviv_shader.h"
|
||||
#include "pipe/p_compiler.h"
|
||||
#include "pipe/p_shader_tokens.h"
|
||||
#include "compiler/shader_enums.h"
|
||||
|
||||
/* XXX some of these are pretty arbitrary limits, may be better to switch
|
||||
* to dynamic allocation at some point.
|
||||
|
|
@ -47,6 +48,7 @@
|
|||
struct etna_shader_inout {
|
||||
int reg; /* native register */
|
||||
struct tgsi_declaration_semantic semantic; /* tgsi semantic name and index */
|
||||
int slot; /* nir: gl_varying_slot or gl_vert_attrib */
|
||||
int num_components;
|
||||
};
|
||||
|
||||
|
|
@ -59,7 +61,7 @@ struct etna_shader_io_file {
|
|||
struct etna_shader_variant {
|
||||
uint32_t id; /* for debug */
|
||||
|
||||
uint processor; /* TGSI_PROCESSOR_... */
|
||||
gl_shader_stage stage;
|
||||
uint32_t code_size; /* code size in uint32 words */
|
||||
uint32_t *code;
|
||||
unsigned num_loops;
|
||||
|
|
@ -77,12 +79,13 @@ struct etna_shader_variant {
|
|||
/* outputs (for linking) */
|
||||
struct etna_shader_io_file outfile;
|
||||
|
||||
/* index into outputs (for linking) */
|
||||
/* index into outputs (for linking) - only for TGSI compiler */
|
||||
int output_count_per_semantic[TGSI_SEMANTIC_COUNT];
|
||||
struct etna_shader_inout * *output_per_semantic_list; /* list of pointers to outputs */
|
||||
struct etna_shader_inout **output_per_semantic[TGSI_SEMANTIC_COUNT];
|
||||
|
||||
/* special outputs (vs only) */
|
||||
/* special inputs/outputs (vs only) */
|
||||
int vs_id_in_reg; /* vertexid+instanceid input */
|
||||
int vs_pos_out_reg; /* VS position output */
|
||||
int vs_pointsize_out_reg; /* VS point size output */
|
||||
uint32_t vs_load_balancing;
|
||||
|
|
@ -134,4 +137,20 @@ etna_link_shader(struct etna_shader_link_info *info,
|
|||
void
|
||||
etna_destroy_shader(struct etna_shader_variant *shader);
|
||||
|
||||
/* NIR compiler */
|
||||
|
||||
bool
|
||||
etna_compile_shader_nir(struct etna_shader_variant *shader);
|
||||
|
||||
void
|
||||
etna_dump_shader_nir(const struct etna_shader_variant *shader);
|
||||
|
||||
bool
|
||||
etna_link_shader_nir(struct etna_shader_link_info *info,
|
||||
const struct etna_shader_variant *vs,
|
||||
const struct etna_shader_variant *fs);
|
||||
|
||||
void
|
||||
etna_destroy_shader_nir(struct etna_shader_variant *shader);
|
||||
|
||||
#endif
|
||||
|
|
|
|||
853
src/gallium/drivers/etnaviv/etnaviv_compiler_nir.c
Normal file
853
src/gallium/drivers/etnaviv/etnaviv_compiler_nir.c
Normal file
|
|
@ -0,0 +1,853 @@
|
|||
/*
|
||||
* Copyright (c) 2012-2019 Etnaviv Project
|
||||
* Copyright (c) 2019 Zodiac Inflight Innovations
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sub license,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the
|
||||
* next paragraph) shall be included in all copies or substantial portions
|
||||
* of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
* Authors:
|
||||
* Jonathan Marek <jonathan@marek.ca>
|
||||
* Wladimir J. van der Laan <laanwj@gmail.com>
|
||||
*/
|
||||
|
||||
#include "etnaviv_compiler.h"
|
||||
#include "etnaviv_asm.h"
|
||||
#include "etnaviv_context.h"
|
||||
#include "etnaviv_debug.h"
|
||||
#include "etnaviv_disasm.h"
|
||||
#include "etnaviv_uniforms.h"
|
||||
#include "etnaviv_util.h"
|
||||
|
||||
#include <math.h>
|
||||
#include "util/u_memory.h"
|
||||
#include "util/register_allocate.h"
|
||||
#include "compiler/nir/nir_builder.h"
|
||||
#include "compiler/nir/nir_worklist.h"
|
||||
|
||||
#include "util/u_half.h"
|
||||
|
||||
struct etna_compile {
|
||||
nir_shader *nir;
|
||||
#define is_fs(c) ((c)->nir->info.stage == MESA_SHADER_FRAGMENT)
|
||||
const struct etna_specs *specs;
|
||||
struct etna_shader_variant *variant;
|
||||
|
||||
/* register assigned to each output, indexed by driver_location */
|
||||
unsigned output_reg[ETNA_NUM_INPUTS];
|
||||
|
||||
/* block # to instr index */
|
||||
unsigned *block_ptr;
|
||||
|
||||
/* Code generation */
|
||||
int inst_ptr; /* current instruction pointer */
|
||||
struct etna_inst code[ETNA_MAX_INSTRUCTIONS * ETNA_INST_SIZE];
|
||||
|
||||
/* There was an error during compilation */
|
||||
bool error;
|
||||
};
|
||||
|
||||
#define compile_error(ctx, args...) ({ \
|
||||
printf(args); \
|
||||
ctx->error = true; \
|
||||
assert(0); \
|
||||
})
|
||||
|
||||
/* io related lowering
|
||||
* run after lower_int_to_float because it adds i2f/f2i ops
|
||||
*/
|
||||
static void
|
||||
etna_lower_io(nir_shader *shader, struct etna_shader_variant *v)
|
||||
{
|
||||
bool rb_swap = shader->info.stage == MESA_SHADER_FRAGMENT && v->key.frag_rb_swap;
|
||||
|
||||
unsigned color_location = 0;
|
||||
nir_foreach_variable(var, &shader->outputs) {
|
||||
switch (var->data.location) {
|
||||
case FRAG_RESULT_COLOR:
|
||||
case FRAG_RESULT_DATA0:
|
||||
color_location = var->data.driver_location;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
nir_foreach_function(function, shader) {
|
||||
nir_builder b;
|
||||
nir_builder_init(&b, function->impl);
|
||||
|
||||
nir_foreach_block(block, function->impl) {
|
||||
nir_foreach_instr_safe(instr, block) {
|
||||
if (instr->type == nir_instr_type_intrinsic) {
|
||||
nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
|
||||
|
||||
switch (intr->intrinsic) {
|
||||
case nir_intrinsic_load_front_face: {
|
||||
/* front face inverted (run after int_to_float, so invert as float) */
|
||||
b.cursor = nir_after_instr(instr);
|
||||
|
||||
nir_ssa_def *ssa = nir_seq(&b, &intr->dest.ssa, nir_imm_float(&b, 0.0));
|
||||
nir_ssa_def_rewrite_uses_after(&intr->dest.ssa,
|
||||
nir_src_for_ssa(ssa),
|
||||
ssa->parent_instr);
|
||||
} break;
|
||||
case nir_intrinsic_store_output: {
|
||||
if (!rb_swap || nir_intrinsic_base(intr) != color_location)
|
||||
break;
|
||||
b.cursor = nir_before_instr(instr);
|
||||
|
||||
nir_ssa_def *ssa = nir_mov(&b, intr->src[0].ssa);
|
||||
nir_alu_instr *alu = nir_instr_as_alu(ssa->parent_instr);
|
||||
alu->src[0].swizzle[0] = 2;
|
||||
alu->src[0].swizzle[2] = 0;
|
||||
nir_instr_rewrite_src(instr, &intr->src[0], nir_src_for_ssa(ssa));
|
||||
} break;
|
||||
case nir_intrinsic_load_instance_id: {
|
||||
b.cursor = nir_after_instr(instr);
|
||||
nir_ssa_def *ssa = nir_i2f32(&b, &intr->dest.ssa);
|
||||
nir_ssa_def_rewrite_uses_after(&intr->dest.ssa,
|
||||
nir_src_for_ssa(ssa),
|
||||
ssa->parent_instr);
|
||||
} break;
|
||||
case nir_intrinsic_load_uniform: {
|
||||
/* multiply by 16 and convert to int */
|
||||
b.cursor = nir_before_instr(instr);
|
||||
nir_ssa_def *ssa = nir_f2u32(&b, nir_fmul(&b, intr->src[0].ssa,
|
||||
nir_imm_float(&b, 16.0f)));
|
||||
nir_instr_rewrite_src(instr, &intr->src[0], nir_src_for_ssa(ssa));
|
||||
} break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (instr->type != nir_instr_type_tex)
|
||||
continue;
|
||||
|
||||
nir_tex_instr *tex = nir_instr_as_tex(instr);
|
||||
nir_src *coord = NULL;
|
||||
nir_src *lod_bias = NULL;
|
||||
unsigned lod_bias_idx;
|
||||
|
||||
assert(tex->sampler_index == tex->texture_index);
|
||||
|
||||
for (unsigned i = 0; i < tex->num_srcs; i++) {
|
||||
switch (tex->src[i].src_type) {
|
||||
case nir_tex_src_coord:
|
||||
coord = &tex->src[i].src;
|
||||
break;
|
||||
case nir_tex_src_bias:
|
||||
case nir_tex_src_lod:
|
||||
assert(!lod_bias);
|
||||
lod_bias = &tex->src[i].src;
|
||||
lod_bias_idx = i;
|
||||
break;
|
||||
default:
|
||||
assert(0);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (tex->sampler_dim == GLSL_SAMPLER_DIM_RECT) {
|
||||
/* use a dummy load_uniform here to represent texcoord scale */
|
||||
b.cursor = nir_before_instr(instr);
|
||||
nir_intrinsic_instr *load =
|
||||
nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_uniform);
|
||||
nir_intrinsic_set_base(load, ~tex->sampler_index);
|
||||
load->num_components = 2;
|
||||
load->src[0] = nir_src_for_ssa(nir_imm_float(&b, 0.0f));
|
||||
nir_ssa_dest_init(&load->instr, &load->dest, 2, 32, NULL);
|
||||
nir_intrinsic_set_type(load, nir_type_float);
|
||||
|
||||
nir_builder_instr_insert(&b, &load->instr);
|
||||
|
||||
nir_ssa_def *new_coord = nir_fmul(&b, coord->ssa, &load->dest.ssa);
|
||||
nir_instr_rewrite_src(&tex->instr, coord, nir_src_for_ssa(new_coord));
|
||||
}
|
||||
|
||||
/* pre HALTI5 needs texture sources in a single source */
|
||||
|
||||
if (!lod_bias || v->shader->specs->halti >= 5)
|
||||
continue;
|
||||
|
||||
assert(coord && lod_bias && tex->coord_components < 4);
|
||||
|
||||
nir_alu_instr *vec = nir_alu_instr_create(shader, nir_op_vec4);
|
||||
for (unsigned i = 0; i < tex->coord_components; i++) {
|
||||
vec->src[i].src = nir_src_for_ssa(coord->ssa);
|
||||
vec->src[i].swizzle[0] = i;
|
||||
}
|
||||
for (unsigned i = tex->coord_components; i < 4; i++)
|
||||
vec->src[i].src = nir_src_for_ssa(lod_bias->ssa);
|
||||
|
||||
vec->dest.write_mask = 0xf;
|
||||
nir_ssa_dest_init(&vec->instr, &vec->dest.dest, 4, 32, NULL);
|
||||
|
||||
nir_tex_instr_remove_src(tex, lod_bias_idx);
|
||||
nir_instr_rewrite_src(&tex->instr, coord, nir_src_for_ssa(&vec->dest.dest.ssa));
|
||||
tex->coord_components = 4;
|
||||
|
||||
nir_instr_insert_before(&tex->instr, &vec->instr);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
etna_lower_alu_to_scalar(nir_shader *shader, const struct etna_specs *specs)
|
||||
{
|
||||
BITSET_DECLARE(scalar_ops, nir_num_opcodes);
|
||||
BITSET_ZERO(scalar_ops);
|
||||
|
||||
BITSET_SET(scalar_ops, nir_op_frsq);
|
||||
BITSET_SET(scalar_ops, nir_op_frcp);
|
||||
BITSET_SET(scalar_ops, nir_op_flog2);
|
||||
BITSET_SET(scalar_ops, nir_op_fexp2);
|
||||
BITSET_SET(scalar_ops, nir_op_fsqrt);
|
||||
BITSET_SET(scalar_ops, nir_op_fcos);
|
||||
BITSET_SET(scalar_ops, nir_op_fsin);
|
||||
BITSET_SET(scalar_ops, nir_op_fdiv);
|
||||
|
||||
if (!specs->has_halti2_instructions)
|
||||
BITSET_SET(scalar_ops, nir_op_fdot2);
|
||||
|
||||
nir_lower_alu_to_scalar(shader, scalar_ops);
|
||||
}
|
||||
|
||||
static void
|
||||
etna_lower_alu_impl(nir_function_impl *impl, struct etna_compile *c)
|
||||
{
|
||||
nir_shader *shader = impl->function->shader;
|
||||
|
||||
nir_builder b;
|
||||
nir_builder_init(&b, impl);
|
||||
|
||||
/* in a seperate loop so we can apply the multiple-uniform logic to the new fmul */
|
||||
nir_foreach_block(block, impl) {
|
||||
nir_foreach_instr_safe(instr, block) {
|
||||
if (instr->type != nir_instr_type_alu)
|
||||
continue;
|
||||
|
||||
nir_alu_instr *alu = nir_instr_as_alu(instr);
|
||||
/* multiply sin/cos src by constant
|
||||
* TODO: do this earlier (but it breaks const_prop opt)
|
||||
*/
|
||||
if (alu->op == nir_op_fsin || alu->op == nir_op_fcos) {
|
||||
b.cursor = nir_before_instr(instr);
|
||||
|
||||
nir_ssa_def *imm = c->specs->has_new_transcendentals ?
|
||||
nir_imm_float(&b, 1.0 / M_PI) :
|
||||
nir_imm_float(&b, 2.0 / M_PI);
|
||||
|
||||
nir_instr_rewrite_src(instr, &alu->src[0].src,
|
||||
nir_src_for_ssa(nir_fmul(&b, alu->src[0].src.ssa, imm)));
|
||||
}
|
||||
|
||||
/* change transcendental ops to vec2 and insert vec1 mul for the result
|
||||
* TODO: do this earlier (but it breaks with optimizations)
|
||||
*/
|
||||
if (c->specs->has_new_transcendentals && (
|
||||
alu->op == nir_op_fdiv || alu->op == nir_op_flog2 ||
|
||||
alu->op == nir_op_fsin || alu->op == nir_op_fcos)) {
|
||||
nir_ssa_def *ssa = &alu->dest.dest.ssa;
|
||||
|
||||
assert(ssa->num_components == 1);
|
||||
|
||||
nir_alu_instr *mul = nir_alu_instr_create(shader, nir_op_fmul);
|
||||
mul->src[0].src = mul->src[1].src = nir_src_for_ssa(ssa);
|
||||
mul->src[1].swizzle[0] = 1;
|
||||
|
||||
mul->dest.write_mask = 1;
|
||||
nir_ssa_dest_init(&mul->instr, &mul->dest.dest, 1, 32, NULL);
|
||||
|
||||
ssa->num_components = 2;
|
||||
|
||||
mul->dest.saturate = alu->dest.saturate;
|
||||
alu->dest.saturate = 0;
|
||||
|
||||
nir_instr_insert_after(instr, &mul->instr);
|
||||
|
||||
nir_ssa_def_rewrite_uses_after(ssa, nir_src_for_ssa(&mul->dest.dest.ssa), &mul->instr);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void etna_lower_alu(nir_shader *shader, struct etna_compile *c)
|
||||
{
|
||||
nir_foreach_function(function, shader) {
|
||||
if (function->impl)
|
||||
etna_lower_alu_impl(function->impl, c);
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
emit_inst(struct etna_compile *c, struct etna_inst *inst)
|
||||
{
|
||||
c->code[c->inst_ptr++] = *inst;
|
||||
}
|
||||
|
||||
/* to map nir srcs should to etna_inst srcs */
|
||||
enum {
|
||||
SRC_0_1_2 = (0 << 0) | (1 << 2) | (2 << 4),
|
||||
SRC_0_1_X = (0 << 0) | (1 << 2) | (3 << 4),
|
||||
SRC_0_X_X = (0 << 0) | (3 << 2) | (3 << 4),
|
||||
SRC_0_X_1 = (0 << 0) | (3 << 2) | (1 << 4),
|
||||
SRC_0_1_0 = (0 << 0) | (1 << 2) | (0 << 4),
|
||||
SRC_X_X_0 = (3 << 0) | (3 << 2) | (0 << 4),
|
||||
SRC_0_X_0 = (0 << 0) | (3 << 2) | (0 << 4),
|
||||
};
|
||||
|
||||
/* info to translate a nir op to etna_inst */
|
||||
struct etna_op_info {
|
||||
uint8_t opcode; /* INST_OPCODE_ */
|
||||
uint8_t src; /* SRC_ enum */
|
||||
uint8_t cond; /* INST_CONDITION_ */
|
||||
uint8_t type; /* INST_TYPE_ */
|
||||
};
|
||||
|
||||
static const struct etna_op_info etna_ops[] = {
|
||||
[0 ... nir_num_opcodes - 1] = {0xff},
|
||||
#undef TRUE
|
||||
#undef FALSE
|
||||
#define OPCT(nir, op, src, cond, type) [nir_op_##nir] = { \
|
||||
INST_OPCODE_##op, \
|
||||
SRC_##src, \
|
||||
INST_CONDITION_##cond, \
|
||||
INST_TYPE_##type \
|
||||
}
|
||||
#define OPC(nir, op, src, cond) OPCT(nir, op, src, cond, F32)
|
||||
#define OP(nir, op, src) OPC(nir, op, src, TRUE)
|
||||
OP(mov, MOV, X_X_0), OP(fneg, MOV, X_X_0), OP(fabs, MOV, X_X_0), OP(fsat, MOV, X_X_0),
|
||||
OP(fmul, MUL, 0_1_X), OP(fadd, ADD, 0_X_1), OP(ffma, MAD, 0_1_2),
|
||||
OP(fdot2, DP2, 0_1_X), OP(fdot3, DP3, 0_1_X), OP(fdot4, DP4, 0_1_X),
|
||||
OPC(fmin, SELECT, 0_1_0, GT), OPC(fmax, SELECT, 0_1_0, LT),
|
||||
OP(ffract, FRC, X_X_0), OP(frcp, RCP, X_X_0), OP(frsq, RSQ, X_X_0),
|
||||
OP(fsqrt, SQRT, X_X_0), OP(fsin, SIN, X_X_0), OP(fcos, COS, X_X_0),
|
||||
OP(fsign, SIGN, X_X_0), OP(ffloor, FLOOR, X_X_0), OP(fceil, CEIL, X_X_0),
|
||||
OP(flog2, LOG, X_X_0), OP(fexp2, EXP, X_X_0),
|
||||
OPC(seq, SET, 0_1_X, EQ), OPC(sne, SET, 0_1_X, NE), OPC(sge, SET, 0_1_X, GE), OPC(slt, SET, 0_1_X, LT),
|
||||
OPC(fcsel, SELECT, 0_1_2, NZ),
|
||||
OP(fdiv, DIV, 0_1_X),
|
||||
OP(fddx, DSX, 0_X_0), OP(fddy, DSY, 0_X_0),
|
||||
|
||||
/* integer opcodes */
|
||||
OPCT(i2f32, I2F, 0_X_X, TRUE, S32),
|
||||
OPCT(f2u32, F2I, 0_X_X, TRUE, U32),
|
||||
};
|
||||
|
||||
static void
|
||||
etna_emit_block_start(struct etna_compile *c, unsigned block)
|
||||
{
|
||||
c->block_ptr[block] = c->inst_ptr;
|
||||
}
|
||||
|
||||
static void
|
||||
etna_emit_alu(struct etna_compile *c, nir_op op, struct etna_inst_dst dst,
|
||||
struct etna_inst_src src[3], bool saturate)
|
||||
{
|
||||
struct etna_op_info ei = etna_ops[op];
|
||||
|
||||
assert(ei.opcode != 0xff);
|
||||
|
||||
struct etna_inst inst = {
|
||||
.opcode = ei.opcode,
|
||||
.type = ei.type,
|
||||
.cond = ei.cond,
|
||||
.dst = dst,
|
||||
.sat = saturate,
|
||||
};
|
||||
|
||||
switch (op) {
|
||||
case nir_op_fdiv:
|
||||
case nir_op_flog2:
|
||||
case nir_op_fsin:
|
||||
case nir_op_fcos:
|
||||
if (c->specs->has_new_transcendentals)
|
||||
inst.tex.amode = 1;
|
||||
/* fall through */
|
||||
case nir_op_frsq:
|
||||
case nir_op_frcp:
|
||||
case nir_op_fexp2:
|
||||
case nir_op_fsqrt:
|
||||
case nir_op_i2f32:
|
||||
case nir_op_f2u32:
|
||||
/* for these instructions we want src to be in x component
|
||||
* note: on HALTI2+ i2f/f2u are not scalar but we only use them this way currently
|
||||
*/
|
||||
src[0].swiz = inst_swiz_compose(src[0].swiz,
|
||||
INST_SWIZ_BROADCAST(ffs(inst.dst.write_mask)-1));
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
for (unsigned j = 0; j < 3; j++) {
|
||||
unsigned i = ((ei.src >> j*2) & 3);
|
||||
if (i < 3)
|
||||
inst.src[j] = src[i];
|
||||
}
|
||||
|
||||
emit_inst(c, &inst);
|
||||
}
|
||||
|
||||
static void
|
||||
etna_emit_tex(struct etna_compile *c, nir_texop op, unsigned texid, unsigned dst_swiz,
|
||||
struct etna_inst_dst dst, struct etna_inst_src coord,
|
||||
struct etna_inst_src lod_bias)
|
||||
{
|
||||
struct etna_inst inst = {
|
||||
.dst = dst,
|
||||
.tex.id = texid + (is_fs(c) ? 0 : c->specs->vertex_sampler_offset),
|
||||
.tex.swiz = dst_swiz,
|
||||
.src[0] = coord,
|
||||
};
|
||||
|
||||
if (lod_bias.use)
|
||||
inst.src[1] = lod_bias;
|
||||
|
||||
switch (op) {
|
||||
case nir_texop_tex: inst.opcode = INST_OPCODE_TEXLD; break;
|
||||
case nir_texop_txb: inst.opcode = INST_OPCODE_TEXLDB; break;
|
||||
case nir_texop_txl: inst.opcode = INST_OPCODE_TEXLDL; break;
|
||||
default:
|
||||
assert(0);
|
||||
}
|
||||
|
||||
emit_inst(c, &inst);
|
||||
}
|
||||
|
||||
static void
|
||||
etna_emit_jump(struct etna_compile *c, unsigned block, struct etna_inst_src condition)
|
||||
{
|
||||
if (!condition.use) {
|
||||
emit_inst(c, &(struct etna_inst) {.opcode = INST_OPCODE_BRANCH, .imm = block });
|
||||
return;
|
||||
}
|
||||
|
||||
struct etna_inst inst = {
|
||||
.opcode = INST_OPCODE_BRANCH,
|
||||
.cond = INST_CONDITION_NOT,
|
||||
.type = INST_TYPE_U32,
|
||||
.src[0] = condition,
|
||||
.imm = block,
|
||||
};
|
||||
inst.src[0].swiz = INST_SWIZ_BROADCAST(inst.src[0].swiz & 3);
|
||||
emit_inst(c, &inst);
|
||||
}
|
||||
|
||||
static void
|
||||
etna_emit_discard(struct etna_compile *c, struct etna_inst_src condition)
|
||||
{
|
||||
if (!condition.use) {
|
||||
emit_inst(c, &(struct etna_inst) { .opcode = INST_OPCODE_TEXKILL });
|
||||
return;
|
||||
}
|
||||
|
||||
struct etna_inst inst = {
|
||||
.opcode = INST_OPCODE_TEXKILL,
|
||||
.cond = INST_CONDITION_GZ,
|
||||
.src[0] = condition,
|
||||
};
|
||||
inst.src[0].swiz = INST_SWIZ_BROADCAST(inst.src[0].swiz & 3);
|
||||
emit_inst(c, &inst);
|
||||
}
|
||||
|
||||
static void
|
||||
etna_emit_output(struct etna_compile *c, unsigned index, struct etna_inst_src src)
|
||||
{
|
||||
c->output_reg[index] = src.reg;
|
||||
}
|
||||
|
||||
static void
|
||||
etna_emit_load_ubo(struct etna_compile *c, struct etna_inst_dst dst,
|
||||
struct etna_inst_src src, struct etna_inst_src base)
|
||||
{
|
||||
emit_inst(c, &(struct etna_inst) {
|
||||
.opcode = INST_OPCODE_LOAD,
|
||||
.type = INST_TYPE_U32,
|
||||
.dst = dst,
|
||||
.src[0] = src,
|
||||
.src[1] = base,
|
||||
});
|
||||
}
|
||||
|
||||
#define OPT(nir, pass, ...) ({ \
|
||||
bool this_progress = false; \
|
||||
NIR_PASS(this_progress, nir, pass, ##__VA_ARGS__); \
|
||||
this_progress; \
|
||||
})
|
||||
#define OPT_V(nir, pass, ...) NIR_PASS_V(nir, pass, ##__VA_ARGS__)
|
||||
|
||||
static void
|
||||
etna_optimize_loop(nir_shader *s)
|
||||
{
|
||||
bool progress;
|
||||
do {
|
||||
progress = false;
|
||||
|
||||
OPT_V(s, nir_lower_vars_to_ssa);
|
||||
progress |= OPT(s, nir_opt_copy_prop_vars);
|
||||
progress |= OPT(s, nir_copy_prop);
|
||||
progress |= OPT(s, nir_opt_dce);
|
||||
progress |= OPT(s, nir_opt_cse);
|
||||
progress |= OPT(s, nir_opt_peephole_select, 16, true, true);
|
||||
progress |= OPT(s, nir_opt_intrinsics);
|
||||
progress |= OPT(s, nir_opt_algebraic);
|
||||
progress |= OPT(s, nir_opt_constant_folding);
|
||||
progress |= OPT(s, nir_opt_dead_cf);
|
||||
if (OPT(s, nir_opt_trivial_continues)) {
|
||||
progress = true;
|
||||
/* If nir_opt_trivial_continues makes progress, then we need to clean
|
||||
* things up if we want any hope of nir_opt_if or nir_opt_loop_unroll
|
||||
* to make progress.
|
||||
*/
|
||||
OPT(s, nir_copy_prop);
|
||||
OPT(s, nir_opt_dce);
|
||||
}
|
||||
progress |= OPT(s, nir_opt_loop_unroll, nir_var_all);
|
||||
progress |= OPT(s, nir_opt_if, false);
|
||||
progress |= OPT(s, nir_opt_remove_phis);
|
||||
progress |= OPT(s, nir_opt_undef);
|
||||
}
|
||||
while (progress);
|
||||
}
|
||||
|
||||
static int
|
||||
etna_glsl_type_size(const struct glsl_type *type, bool bindless)
|
||||
{
|
||||
return glsl_count_attribute_slots(type, false);
|
||||
}
|
||||
|
||||
static void
|
||||
copy_uniform_state_to_shader(struct etna_shader_variant *sobj, uint64_t *consts, unsigned count)
|
||||
{
|
||||
struct etna_shader_uniform_info *uinfo = &sobj->uniforms;
|
||||
|
||||
uinfo->imm_count = count * 4;
|
||||
uinfo->imm_data = MALLOC(uinfo->imm_count * sizeof(*uinfo->imm_data));
|
||||
uinfo->imm_contents = MALLOC(uinfo->imm_count * sizeof(*uinfo->imm_contents));
|
||||
|
||||
for (unsigned i = 0; i < uinfo->imm_count; i++) {
|
||||
uinfo->imm_data[i] = consts[i];
|
||||
uinfo->imm_contents[i] = consts[i] >> 32;
|
||||
}
|
||||
|
||||
etna_set_shader_uniforms_dirty_flags(sobj);
|
||||
}
|
||||
|
||||
#include "etnaviv_compiler_nir_emit.h"
|
||||
|
||||
bool
|
||||
etna_compile_shader_nir(struct etna_shader_variant *v)
|
||||
{
|
||||
if (unlikely(!v))
|
||||
return false;
|
||||
|
||||
struct etna_compile *c = CALLOC_STRUCT(etna_compile);
|
||||
if (!c)
|
||||
return false;
|
||||
|
||||
c->variant = v;
|
||||
c->specs = v->shader->specs;
|
||||
c->nir = nir_shader_clone(NULL, v->shader->nir);
|
||||
|
||||
nir_shader *s = c->nir;
|
||||
const struct etna_specs *specs = c->specs;
|
||||
|
||||
v->stage = s->info.stage;
|
||||
v->num_loops = 0; /* TODO */
|
||||
v->vs_id_in_reg = -1;
|
||||
v->vs_pos_out_reg = -1;
|
||||
v->vs_pointsize_out_reg = -1;
|
||||
v->ps_color_out_reg = 0; /* 0 for shader that doesn't write fragcolor.. */
|
||||
v->ps_depth_out_reg = -1;
|
||||
|
||||
/* setup input linking */
|
||||
struct etna_shader_io_file *sf = &v->infile;
|
||||
if (s->info.stage == MESA_SHADER_VERTEX) {
|
||||
nir_foreach_variable(var, &s->inputs) {
|
||||
unsigned idx = var->data.driver_location;
|
||||
sf->reg[idx].reg = idx;
|
||||
sf->reg[idx].slot = var->data.location;
|
||||
sf->reg[idx].num_components = 4; /* TODO */
|
||||
sf->num_reg = MAX2(sf->num_reg, idx+1);
|
||||
}
|
||||
} else {
|
||||
unsigned count = 0;
|
||||
nir_foreach_variable(var, &s->inputs) {
|
||||
unsigned idx = var->data.driver_location;
|
||||
sf->reg[idx].reg = idx + 1;
|
||||
sf->reg[idx].slot = var->data.location;
|
||||
sf->reg[idx].num_components = 4; /* TODO */
|
||||
sf->num_reg = MAX2(sf->num_reg, idx+1);
|
||||
count++;
|
||||
}
|
||||
assert(sf->num_reg == count);
|
||||
}
|
||||
|
||||
NIR_PASS_V(s, nir_lower_io, nir_var_all, etna_glsl_type_size,
|
||||
(nir_lower_io_options)0);
|
||||
|
||||
OPT_V(s, nir_lower_regs_to_ssa);
|
||||
OPT_V(s, nir_lower_vars_to_ssa);
|
||||
OPT_V(s, nir_lower_indirect_derefs, nir_var_all);
|
||||
OPT_V(s, nir_lower_tex, &(struct nir_lower_tex_options) { .lower_txp = ~0u });
|
||||
OPT_V(s, etna_lower_alu_to_scalar, specs);
|
||||
|
||||
etna_optimize_loop(s);
|
||||
|
||||
/* use opt_algebraic between int_to_float and boot_to_float because
|
||||
* int_to_float emits ftrunc, and ftrunc lowering generates bool ops
|
||||
*/
|
||||
OPT_V(s, nir_lower_int_to_float);
|
||||
OPT_V(s, nir_opt_algebraic);
|
||||
OPT_V(s, nir_lower_bool_to_float);
|
||||
|
||||
/* after int to float because insert i2f for instance_id */
|
||||
OPT_V(s, etna_lower_io, v);
|
||||
|
||||
etna_optimize_loop(s);
|
||||
|
||||
if (DBG_ENABLED(ETNA_DBG_DUMP_SHADERS))
|
||||
nir_print_shader(s, stdout);
|
||||
|
||||
while( OPT(s, nir_opt_vectorize) );
|
||||
OPT_V(s, etna_lower_alu_to_scalar, specs);
|
||||
|
||||
NIR_PASS_V(s, nir_remove_dead_variables, nir_var_function_temp);
|
||||
NIR_PASS_V(s, nir_opt_algebraic_late);
|
||||
|
||||
NIR_PASS_V(s, nir_move_vec_src_uses_to_dest);
|
||||
NIR_PASS_V(s, nir_copy_prop);
|
||||
NIR_PASS_V(s, nir_lower_to_source_mods, ~nir_lower_int_source_mods);
|
||||
/* need copy prop after uses_to_dest, and before src mods: see
|
||||
* dEQP-GLES2.functional.shaders.random.all_features.fragment.95
|
||||
*/
|
||||
|
||||
NIR_PASS_V(s, nir_opt_dce);
|
||||
|
||||
NIR_PASS_V(s, etna_lower_alu, c);
|
||||
|
||||
if (DBG_ENABLED(ETNA_DBG_DUMP_SHADERS))
|
||||
nir_print_shader(s, stdout);
|
||||
|
||||
uint64_t consts[ETNA_MAX_IMM] = {};
|
||||
|
||||
unsigned block_ptr[nir_shader_get_entrypoint(s)->num_blocks];
|
||||
c->block_ptr = block_ptr;
|
||||
struct emit_options options = {
|
||||
.max_temps = ETNA_MAX_TEMPS,
|
||||
.max_consts = ETNA_MAX_IMM / 4,
|
||||
.id_reg = sf->num_reg,
|
||||
.single_const_src = c->specs->halti < 5,
|
||||
.etna_new_transcendentals = c->specs->has_new_transcendentals,
|
||||
.user = c,
|
||||
.consts = consts,
|
||||
};
|
||||
|
||||
unsigned num_consts;
|
||||
bool ok = emit_shader(c->nir, &options, &v->num_temps, &num_consts);
|
||||
assert(ok);
|
||||
|
||||
/* empty shader, emit NOP */
|
||||
if (!c->inst_ptr)
|
||||
emit_inst(c, &(struct etna_inst) { .opcode = INST_OPCODE_NOP });
|
||||
|
||||
/* assemble instructions, fixing up labels */
|
||||
uint32_t *code = MALLOC(c->inst_ptr * 16 + 1024);
|
||||
for (unsigned i = 0; i < c->inst_ptr; i++) {
|
||||
struct etna_inst *inst = &c->code[i];
|
||||
if (inst->opcode == INST_OPCODE_BRANCH)
|
||||
inst->imm = block_ptr[inst->imm];
|
||||
|
||||
inst->halti5 = specs->halti >= 5;
|
||||
etna_assemble(&code[i * 4], inst);
|
||||
}
|
||||
|
||||
v->code_size = c->inst_ptr * 4;
|
||||
v->code = code;
|
||||
v->needs_icache = c->inst_ptr > specs->max_instructions;
|
||||
|
||||
copy_uniform_state_to_shader(v, consts, num_consts);
|
||||
|
||||
if (s->info.stage == MESA_SHADER_FRAGMENT) {
|
||||
v->input_count_unk8 = 31; /* XXX what is this */
|
||||
|
||||
nir_foreach_variable(var, &s->outputs) {
|
||||
unsigned reg = c->output_reg[var->data.driver_location];
|
||||
switch (var->data.location) {
|
||||
case FRAG_RESULT_COLOR:
|
||||
case FRAG_RESULT_DATA0: /* DATA0 is used by gallium shaders for color */
|
||||
v->ps_color_out_reg = reg;
|
||||
break;
|
||||
case FRAG_RESULT_DEPTH:
|
||||
v->ps_depth_out_reg = reg;
|
||||
break;
|
||||
default:
|
||||
compile_error(c, "Unsupported fs output %s\n", gl_frag_result_name(var->data.location));
|
||||
}
|
||||
}
|
||||
assert(v->ps_depth_out_reg <= 0);
|
||||
v->outfile.num_reg = 0;
|
||||
ralloc_free(c->nir);
|
||||
FREE(c);
|
||||
return true;
|
||||
}
|
||||
|
||||
v->input_count_unk8 = DIV_ROUND_UP(v->infile.num_reg + 4, 16); /* XXX what is this */
|
||||
|
||||
sf = &v->outfile;
|
||||
sf->num_reg = 0;
|
||||
nir_foreach_variable(var, &s->outputs) {
|
||||
unsigned native = c->output_reg[var->data.driver_location];
|
||||
|
||||
if (var->data.location == VARYING_SLOT_POS) {
|
||||
v->vs_pos_out_reg = native;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (var->data.location == VARYING_SLOT_PSIZ) {
|
||||
v->vs_pointsize_out_reg = native;
|
||||
continue;
|
||||
}
|
||||
|
||||
sf->reg[sf->num_reg].reg = native;
|
||||
sf->reg[sf->num_reg].slot = var->data.location;
|
||||
sf->reg[sf->num_reg].num_components = 4; /* TODO */
|
||||
sf->num_reg++;
|
||||
}
|
||||
|
||||
/* fill in "mystery meat" load balancing value. This value determines how
|
||||
* work is scheduled between VS and PS
|
||||
* in the unified shader architecture. More precisely, it is determined from
|
||||
* the number of VS outputs, as well as chip-specific
|
||||
* vertex output buffer size, vertex cache size, and the number of shader
|
||||
* cores.
|
||||
*
|
||||
* XXX this is a conservative estimate, the "optimal" value is only known for
|
||||
* sure at link time because some
|
||||
* outputs may be unused and thus unmapped. Then again, in the general use
|
||||
* case with GLSL the vertex and fragment
|
||||
* shaders are linked already before submitting to Gallium, thus all outputs
|
||||
* are used.
|
||||
*
|
||||
* note: TGSI compiler counts all outputs (including position and pointsize), here
|
||||
* v->outfile.num_reg only counts varyings, +1 to compensate for the position output
|
||||
* TODO: might have a problem that we don't count pointsize when it is used
|
||||
*/
|
||||
|
||||
int half_out = v->outfile.num_reg / 2 + 1;
|
||||
assert(half_out);
|
||||
|
||||
uint32_t b = ((20480 / (specs->vertex_output_buffer_size -
|
||||
2 * half_out * specs->vertex_cache_size)) +
|
||||
9) /
|
||||
10;
|
||||
uint32_t a = (b + 256 / (specs->shader_core_count * half_out)) / 2;
|
||||
v->vs_load_balancing = VIVS_VS_LOAD_BALANCING_A(MIN2(a, 255)) |
|
||||
VIVS_VS_LOAD_BALANCING_B(MIN2(b, 255)) |
|
||||
VIVS_VS_LOAD_BALANCING_C(0x3f) |
|
||||
VIVS_VS_LOAD_BALANCING_D(0x0f);
|
||||
|
||||
ralloc_free(c->nir);
|
||||
FREE(c);
|
||||
return true;
|
||||
}
|
||||
|
||||
void
|
||||
etna_destroy_shader_nir(struct etna_shader_variant *shader)
|
||||
{
|
||||
assert(shader);
|
||||
|
||||
FREE(shader->code);
|
||||
FREE(shader->uniforms.imm_data);
|
||||
FREE(shader->uniforms.imm_contents);
|
||||
FREE(shader);
|
||||
}
|
||||
|
||||
static const struct etna_shader_inout *
|
||||
etna_shader_vs_lookup(const struct etna_shader_variant *sobj,
|
||||
const struct etna_shader_inout *in)
|
||||
{
|
||||
for (int i = 0; i < sobj->outfile.num_reg; i++)
|
||||
if (sobj->outfile.reg[i].slot == in->slot)
|
||||
return &sobj->outfile.reg[i];
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
bool
|
||||
etna_link_shader_nir(struct etna_shader_link_info *info,
|
||||
const struct etna_shader_variant *vs,
|
||||
const struct etna_shader_variant *fs)
|
||||
{
|
||||
int comp_ofs = 0;
|
||||
/* For each fragment input we need to find the associated vertex shader
|
||||
* output, which can be found by matching on semantic name and index. A
|
||||
* binary search could be used because the vs outputs are sorted by their
|
||||
* semantic index and grouped by semantic type by fill_in_vs_outputs.
|
||||
*/
|
||||
assert(fs->infile.num_reg < ETNA_NUM_INPUTS);
|
||||
info->pcoord_varying_comp_ofs = -1;
|
||||
|
||||
for (int idx = 0; idx < fs->infile.num_reg; ++idx) {
|
||||
const struct etna_shader_inout *fsio = &fs->infile.reg[idx];
|
||||
const struct etna_shader_inout *vsio = etna_shader_vs_lookup(vs, fsio);
|
||||
struct etna_varying *varying;
|
||||
bool interpolate_always = true;
|
||||
|
||||
assert(fsio->reg > 0 && fsio->reg <= ARRAY_SIZE(info->varyings));
|
||||
|
||||
if (fsio->reg > info->num_varyings)
|
||||
info->num_varyings = fsio->reg;
|
||||
|
||||
varying = &info->varyings[fsio->reg - 1];
|
||||
varying->num_components = fsio->num_components;
|
||||
|
||||
if (!interpolate_always) /* colors affected by flat shading */
|
||||
varying->pa_attributes = 0x200;
|
||||
else /* texture coord or other bypasses flat shading */
|
||||
varying->pa_attributes = 0x2f1;
|
||||
|
||||
varying->use[0] = VARYING_COMPONENT_USE_UNUSED;
|
||||
varying->use[1] = VARYING_COMPONENT_USE_UNUSED;
|
||||
varying->use[2] = VARYING_COMPONENT_USE_UNUSED;
|
||||
varying->use[3] = VARYING_COMPONENT_USE_UNUSED;
|
||||
|
||||
/* point coord is an input to the PS without matching VS output,
|
||||
* so it gets a varying slot without being assigned a VS register.
|
||||
*/
|
||||
if (fsio->slot == VARYING_SLOT_PNTC) {
|
||||
varying->use[0] = VARYING_COMPONENT_USE_POINTCOORD_X;
|
||||
varying->use[1] = VARYING_COMPONENT_USE_POINTCOORD_Y;
|
||||
|
||||
info->pcoord_varying_comp_ofs = comp_ofs;
|
||||
} else {
|
||||
if (vsio == NULL) { /* not found -- link error */
|
||||
BUG("Semantic value not found in vertex shader outputs\n");
|
||||
return true;
|
||||
}
|
||||
varying->reg = vsio->reg;
|
||||
}
|
||||
|
||||
comp_ofs += varying->num_components;
|
||||
}
|
||||
|
||||
assert(info->num_varyings == fs->infile.num_reg);
|
||||
|
||||
return false;
|
||||
}
|
||||
1396
src/gallium/drivers/etnaviv/etnaviv_compiler_nir_emit.h
Normal file
1396
src/gallium/drivers/etnaviv/etnaviv_compiler_nir_emit.h
Normal file
File diff suppressed because it is too large
Load diff
|
|
@ -53,6 +53,7 @@
|
|||
#define ETNA_DBG_DRAW_STALL 0x400000 /* Stall FE/PE after every draw op */
|
||||
#define ETNA_DBG_SHADERDB 0x800000 /* dump program compile information */
|
||||
#define ETNA_DBG_NO_SINGLEBUF 0x1000000 /* disable single buffer feature */
|
||||
#define ETNA_DBG_NIR 0x2000000 /* use new NIR compiler */
|
||||
|
||||
extern int etna_mesa_debug; /* set in etna_screen.c from ETNA_DEBUG */
|
||||
|
||||
|
|
|
|||
|
|
@ -72,6 +72,7 @@ static const struct debug_named_value debug_options[] = {
|
|||
{"draw_stall", ETNA_DBG_DRAW_STALL, "Stall FE/PE after each rendered primitive"},
|
||||
{"shaderdb", ETNA_DBG_SHADERDB, "Enable shaderdb output"},
|
||||
{"no_singlebuffer",ETNA_DBG_NO_SINGLEBUF, "Disable single buffer feature"},
|
||||
{"nir", ETNA_DBG_NIR, "use new NIR compiler"},
|
||||
DEBUG_NAMED_VALUE_END
|
||||
};
|
||||
|
||||
|
|
@ -154,6 +155,11 @@ etna_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
|
|||
return 1;
|
||||
case PIPE_CAP_NATIVE_FENCE_FD:
|
||||
return screen->drm_version >= ETNA_DRM_VERSION_FENCE_FD;
|
||||
case PIPE_CAP_TGSI_FS_POSITION_IS_SYSVAL:
|
||||
case PIPE_CAP_TGSI_FS_FACE_IS_INTEGER_SYSVAL: /* note: not integer */
|
||||
return DBG_ENABLED(ETNA_DBG_NIR);
|
||||
case PIPE_CAP_TGSI_FS_POINT_IS_SYSVAL:
|
||||
return 0;
|
||||
|
||||
/* Memory */
|
||||
case PIPE_CAP_CONSTANT_BUFFER_OFFSET_ALIGNMENT:
|
||||
|
|
@ -322,7 +328,7 @@ etna_screen_get_shader_param(struct pipe_screen *pscreen,
|
|||
? screen->specs.fragment_sampler_count
|
||||
: screen->specs.vertex_sampler_count;
|
||||
case PIPE_SHADER_CAP_PREFERRED_IR:
|
||||
return PIPE_SHADER_IR_TGSI;
|
||||
return DBG_ENABLED(ETNA_DBG_NIR) ? PIPE_SHADER_IR_NIR : PIPE_SHADER_IR_TGSI;
|
||||
case PIPE_SHADER_CAP_MAX_CONST_BUFFER_SIZE:
|
||||
return 4096;
|
||||
case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED:
|
||||
|
|
@ -743,6 +749,13 @@ etna_screen_bo_from_handle(struct pipe_screen *pscreen,
|
|||
return bo;
|
||||
}
|
||||
|
||||
static const void *
|
||||
etna_get_compiler_options(struct pipe_screen *pscreen,
|
||||
enum pipe_shader_ir ir, unsigned shader)
|
||||
{
|
||||
return &etna_screen(pscreen)->options;
|
||||
}
|
||||
|
||||
struct pipe_screen *
|
||||
etna_screen_create(struct etna_device *dev, struct etna_gpu *gpu,
|
||||
struct renderonly *ro)
|
||||
|
|
@ -845,6 +858,26 @@ etna_screen_create(struct etna_device *dev, struct etna_gpu *gpu,
|
|||
if (!etna_get_specs(screen))
|
||||
goto fail;
|
||||
|
||||
screen->options = (nir_shader_compiler_options) {
|
||||
.lower_fpow = true,
|
||||
.lower_sub = true,
|
||||
.lower_ftrunc = true,
|
||||
.fuse_ffma = true,
|
||||
.lower_bitops = true,
|
||||
.lower_all_io_to_temps = true,
|
||||
.vertex_id_zero_based = true,
|
||||
.lower_flrp32 = true,
|
||||
.lower_fmod = true,
|
||||
.lower_vector_cmp = true,
|
||||
.lower_fdph = true,
|
||||
.lower_fdiv = true, /* !screen->specs.has_new_transcendentals */
|
||||
.lower_fsign = !screen->specs.has_sign_floor_ceil,
|
||||
.lower_ffloor = !screen->specs.has_sign_floor_ceil,
|
||||
.lower_fceil = !screen->specs.has_sign_floor_ceil,
|
||||
.lower_fsqrt = !screen->specs.has_sin_cos_sqrt,
|
||||
.lower_sincos = !screen->specs.has_sin_cos_sqrt,
|
||||
};
|
||||
|
||||
/* apply debug options that disable individual features */
|
||||
if (DBG_ENABLED(ETNA_DBG_NO_EARLY_Z))
|
||||
screen->features[viv_chipFeatures] |= chipFeatures_NO_EARLY_Z;
|
||||
|
|
@ -861,6 +894,7 @@ etna_screen_create(struct etna_device *dev, struct etna_gpu *gpu,
|
|||
pscreen->get_param = etna_screen_get_param;
|
||||
pscreen->get_paramf = etna_screen_get_paramf;
|
||||
pscreen->get_shader_param = etna_screen_get_shader_param;
|
||||
pscreen->get_compiler_options = etna_get_compiler_options;
|
||||
|
||||
pscreen->get_name = etna_screen_get_name;
|
||||
pscreen->get_vendor = etna_screen_get_vendor;
|
||||
|
|
|
|||
|
|
@ -38,6 +38,7 @@
|
|||
#include "util/slab.h"
|
||||
#include "util/u_dynarray.h"
|
||||
#include "util/u_helpers.h"
|
||||
#include "compiler/nir/nir.h"
|
||||
|
||||
struct etna_bo;
|
||||
|
||||
|
|
@ -87,6 +88,8 @@ struct etna_screen {
|
|||
/* set of resources used by currently-unsubmitted renders */
|
||||
mtx_t lock;
|
||||
struct set *used_resources;
|
||||
|
||||
nir_shader_compiler_options options;
|
||||
};
|
||||
|
||||
static inline struct etna_screen *
|
||||
|
|
|
|||
|
|
@ -33,6 +33,7 @@
|
|||
#include "etnaviv_util.h"
|
||||
|
||||
#include "tgsi/tgsi_parse.h"
|
||||
#include "nir/tgsi_to_nir.h"
|
||||
#include "util/u_math.h"
|
||||
#include "util/u_memory.h"
|
||||
|
||||
|
|
@ -49,7 +50,7 @@ static bool etna_icache_upload_shader(struct etna_context *ctx, struct etna_shad
|
|||
etna_bo_cpu_prep(v->bo, DRM_ETNA_PREP_WRITE);
|
||||
memcpy(buf, v->code, v->code_size*4);
|
||||
etna_bo_cpu_fini(v->bo);
|
||||
DBG("Uploaded %s of %u words to bo %p", v->processor == PIPE_SHADER_FRAGMENT ? "fs":"vs", v->code_size, v->bo);
|
||||
DBG("Uploaded %s of %u words to bo %p", v->stage == MESA_SHADER_FRAGMENT ? "fs":"vs", v->code_size, v->bo);
|
||||
return true;
|
||||
}
|
||||
|
||||
|
|
@ -67,8 +68,8 @@ etna_link_shaders(struct etna_context *ctx, struct compiled_shader_state *cs,
|
|||
{
|
||||
struct etna_shader_link_info link = { };
|
||||
|
||||
assert(vs->processor == PIPE_SHADER_VERTEX);
|
||||
assert(fs->processor == PIPE_SHADER_FRAGMENT);
|
||||
assert(vs->stage == MESA_SHADER_VERTEX);
|
||||
assert(fs->stage == MESA_SHADER_FRAGMENT);
|
||||
|
||||
#ifdef DEBUG
|
||||
if (DBG_ENABLED(ETNA_DBG_DUMP_SHADERS)) {
|
||||
|
|
@ -275,10 +276,10 @@ etna_shader_update_vs_inputs(struct compiled_shader_state *cs,
|
|||
static inline const char *
|
||||
etna_shader_stage(struct etna_shader_variant *shader)
|
||||
{
|
||||
switch (shader->processor) {
|
||||
case PIPE_SHADER_VERTEX: return "VERT";
|
||||
case PIPE_SHADER_FRAGMENT: return "FRAG";
|
||||
case PIPE_SHADER_COMPUTE: return "CL";
|
||||
switch (shader->stage) {
|
||||
case MESA_SHADER_VERTEX: return "VERT";
|
||||
case MESA_SHADER_FRAGMENT: return "FRAG";
|
||||
case MESA_SHADER_COMPUTE: return "CL";
|
||||
default:
|
||||
unreachable("invalid type");
|
||||
return NULL;
|
||||
|
|
@ -372,7 +373,14 @@ etna_create_shader_state(struct pipe_context *pctx,
|
|||
static uint32_t id;
|
||||
shader->id = id++;
|
||||
shader->specs = &ctx->specs;
|
||||
shader->tokens = tgsi_dup_tokens(pss->tokens);
|
||||
|
||||
if (DBG_ENABLED(ETNA_DBG_NIR))
|
||||
shader->nir = (pss->type == PIPE_SHADER_IR_NIR) ? pss->ir.nir :
|
||||
tgsi_to_nir(pss->tokens, pctx->screen);
|
||||
else
|
||||
shader->tokens = tgsi_dup_tokens(pss->tokens);
|
||||
|
||||
|
||||
|
||||
if (etna_mesa_debug & ETNA_DBG_SHADERDB) {
|
||||
/* if shader-db run, create a standard variant immediately
|
||||
|
|
@ -401,7 +409,7 @@ etna_delete_shader_state(struct pipe_context *pctx, void *ss)
|
|||
etna_destroy_shader(t);
|
||||
}
|
||||
|
||||
FREE(shader->tokens);
|
||||
ralloc_free(shader->nir);
|
||||
FREE(shader);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -31,6 +31,7 @@
|
|||
|
||||
struct etna_context;
|
||||
struct etna_shader_variant;
|
||||
struct nir_shader;
|
||||
|
||||
struct etna_shader_key
|
||||
{
|
||||
|
|
@ -56,14 +57,15 @@ etna_shader_key_equal(struct etna_shader_key *a, struct etna_shader_key *b)
|
|||
}
|
||||
|
||||
struct etna_shader {
|
||||
/* shader id (for debug): */
|
||||
uint32_t id;
|
||||
uint32_t variant_count;
|
||||
/* shader id (for debug): */
|
||||
uint32_t id;
|
||||
uint32_t variant_count;
|
||||
|
||||
struct tgsi_token *tokens;
|
||||
const struct etna_specs *specs;
|
||||
struct tgsi_token *tokens;
|
||||
struct nir_shader *nir;
|
||||
const struct etna_specs *specs;
|
||||
|
||||
struct etna_shader_variant *variants;
|
||||
struct etna_shader_variant *variants;
|
||||
};
|
||||
|
||||
bool
|
||||
|
|
|
|||
|
|
@ -37,6 +37,8 @@ files_etnaviv = files(
|
|||
'etnaviv_clear_blit.h',
|
||||
'etnaviv_compiler.c',
|
||||
'etnaviv_compiler.h',
|
||||
'etnaviv_compiler_nir.c',
|
||||
'etnaviv_compiler_nir_emit.h',
|
||||
'etnaviv_context.c',
|
||||
'etnaviv_context.h',
|
||||
'etnaviv_debug.h',
|
||||
|
|
@ -97,7 +99,7 @@ libetnaviv = static_library(
|
|||
inc_include, inc_src, inc_gallium, inc_gallium_aux, inc_etnaviv,
|
||||
],
|
||||
link_with: libetnaviv_drm,
|
||||
dependencies : dep_libdrm,
|
||||
dependencies : [dep_libdrm, idep_nir_headers],
|
||||
)
|
||||
|
||||
etnaviv_compiler = executable(
|
||||
|
|
@ -115,4 +117,5 @@ etnaviv_compiler = executable(
|
|||
driver_etnaviv = declare_dependency(
|
||||
compile_args : '-DGALLIUM_ETNAVIV',
|
||||
link_with : [libetnaviv, libetnavivdrm],
|
||||
dependencies : idep_nir,
|
||||
)
|
||||
|
|
|
|||
|
|
@ -26,5 +26,5 @@ libetnavivdrm = static_library(
|
|||
inc_etnaviv,
|
||||
],
|
||||
link_with: libetnaviv_drm,
|
||||
dependencies : [dep_libdrm],
|
||||
dependencies : [dep_libdrm, idep_nir_headers],
|
||||
)
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue