mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-05 09:38:07 +02:00
freedreno: a2xx: NIR backend
This patch replaces the a2xx TGSI compiler with a NIR compiler. It also adds several new features: -gl_FrontFacing, gl_FragCoord, gl_PointCoord, gl_PointSize -control flow (including loops) -texture related features (LOD/bias, cubemaps) -filling scalar ALU slot when possible Signed-off-by: Jonathan Marek <jonathan@marek.ca>
This commit is contained in:
parent
da3ca69afa
commit
67610a0323
21 changed files with 3035 additions and 2531 deletions
|
|
@ -42,8 +42,6 @@ a2xx_SOURCES := \
|
|||
a2xx/disasm-a2xx.c \
|
||||
a2xx/fd2_blend.c \
|
||||
a2xx/fd2_blend.h \
|
||||
a2xx/fd2_compiler.c \
|
||||
a2xx/fd2_compiler.h \
|
||||
a2xx/fd2_context.c \
|
||||
a2xx/fd2_context.h \
|
||||
a2xx/fd2_draw.c \
|
||||
|
|
@ -67,8 +65,12 @@ a2xx_SOURCES := \
|
|||
a2xx/fd2_zsa.c \
|
||||
a2xx/fd2_zsa.h \
|
||||
a2xx/instr-a2xx.h \
|
||||
a2xx/ir-a2xx.c \
|
||||
a2xx/ir-a2xx.h
|
||||
a2xx/ir2.c \
|
||||
a2xx/ir2.h \
|
||||
a2xx/ir2_assemble.c \
|
||||
a2xx/ir2_nir.c \
|
||||
a2xx/ir2_private.h \
|
||||
a2xx/ir2_ra.c
|
||||
|
||||
a3xx_SOURCES := \
|
||||
a3xx/fd3_blend.c \
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load diff
|
|
@ -1,36 +0,0 @@
|
|||
/*
|
||||
* Copyright (C) 2012 Rob Clark <robclark@freedesktop.org>
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*
|
||||
* Authors:
|
||||
* Rob Clark <robclark@freedesktop.org>
|
||||
*/
|
||||
|
||||
#ifndef FD2_COMPILER_H_
|
||||
#define FD2_COMPILER_H_
|
||||
|
||||
#include "fd2_program.h"
|
||||
#include "fd2_util.h"
|
||||
|
||||
int fd2_compile_shader(struct fd_program_stateobj *prog,
|
||||
struct fd2_shader_stateobj *so);
|
||||
|
||||
#endif /* FD2_COMPILER_H_ */
|
||||
|
|
@ -216,7 +216,7 @@ fd2_clear(struct fd_context *ctx, unsigned buffers,
|
|||
OUT_RING(ring, 0x0000028f);
|
||||
}
|
||||
|
||||
fd2_program_emit(ring, &ctx->solid_prog);
|
||||
fd2_program_emit(ctx, ring, &ctx->solid_prog);
|
||||
|
||||
OUT_PKT0(ring, REG_A2XX_TC_CNTL_STATUS, 1);
|
||||
OUT_RING(ring, A2XX_TC_CNTL_STATUS_L2_INVALIDATE);
|
||||
|
|
|
|||
|
|
@ -272,12 +272,24 @@ fd2_emit_state(struct fd_context *ctx, const enum fd_dirty_3d_state dirty)
|
|||
OUT_RING(ring, fui(ctx->viewport.translate[1])); /* PA_CL_VPORT_YOFFSET */
|
||||
OUT_RING(ring, fui(ctx->viewport.scale[2])); /* PA_CL_VPORT_ZSCALE */
|
||||
OUT_RING(ring, fui(ctx->viewport.translate[2])); /* PA_CL_VPORT_ZOFFSET */
|
||||
|
||||
/* set viewport in C65/C66, for a20x hw binning and fragcoord.z */
|
||||
OUT_PKT3(ring, CP_SET_CONSTANT, 9);
|
||||
OUT_RING(ring, 0x00000184);
|
||||
|
||||
OUT_RING(ring, fui(ctx->viewport.translate[0]));
|
||||
OUT_RING(ring, fui(ctx->viewport.translate[1]));
|
||||
OUT_RING(ring, fui(ctx->viewport.translate[2]));
|
||||
OUT_RING(ring, fui(0.0f));
|
||||
|
||||
OUT_RING(ring, fui(ctx->viewport.scale[0]));
|
||||
OUT_RING(ring, fui(ctx->viewport.scale[1]));
|
||||
OUT_RING(ring, fui(ctx->viewport.scale[2]));
|
||||
OUT_RING(ring, fui(0.0f));
|
||||
}
|
||||
|
||||
if (dirty & (FD_DIRTY_PROG | FD_DIRTY_VTXSTATE | FD_DIRTY_TEXSTATE)) {
|
||||
fd2_program_validate(ctx);
|
||||
fd2_program_emit(ring, &ctx->prog);
|
||||
}
|
||||
if (dirty & (FD_DIRTY_PROG | FD_DIRTY_VTXSTATE | FD_DIRTY_TEXSTATE))
|
||||
fd2_program_emit(ctx, ring, &ctx->prog);
|
||||
|
||||
if (dirty & (FD_DIRTY_PROG | FD_DIRTY_CONST)) {
|
||||
emit_constants(ring, VS_CONST_BASE * 4,
|
||||
|
|
|
|||
|
|
@ -137,7 +137,7 @@ fd2_emit_tile_gmem2mem(struct fd_batch *batch, struct fd_tile *tile)
|
|||
OUT_RING(ring, 0x0000028f);
|
||||
}
|
||||
|
||||
fd2_program_emit(ring, &ctx->solid_prog);
|
||||
fd2_program_emit(ctx, ring, &ctx->solid_prog);
|
||||
|
||||
OUT_PKT3(ring, CP_SET_CONSTANT, 2);
|
||||
OUT_RING(ring, CP_REG(REG_A2XX_PA_SC_AA_MASK));
|
||||
|
|
@ -285,7 +285,7 @@ fd2_emit_tile_mem2gmem(struct fd_batch *batch, struct fd_tile *tile)
|
|||
OUT_RING(ring, CP_REG(REG_A2XX_VGT_INDX_OFFSET));
|
||||
OUT_RING(ring, 0);
|
||||
|
||||
fd2_program_emit(ring, &ctx->blit_prog[0]);
|
||||
fd2_program_emit(ctx, ring, &ctx->blit_prog[0]);
|
||||
|
||||
OUT_PKT0(ring, REG_A2XX_TC_CNTL_STATUS, 1);
|
||||
OUT_RING(ring, A2XX_TC_CNTL_STATUS_L2_INVALIDATE);
|
||||
|
|
@ -476,6 +476,16 @@ fd2_emit_tile_renderprep(struct fd_batch *batch, struct fd_tile *tile)
|
|||
OUT_RING(ring, CP_REG(REG_A2XX_PA_SC_WINDOW_OFFSET));
|
||||
OUT_RING(ring, A2XX_PA_SC_WINDOW_OFFSET_X(-tile->xoff) |
|
||||
A2XX_PA_SC_WINDOW_OFFSET_Y(-tile->yoff));
|
||||
|
||||
/* tile offset for gl_FragCoord on a20x (C64 in fragment shader) */
|
||||
if (is_a20x(batch->ctx->screen)) {
|
||||
OUT_PKT3(ring, CP_SET_CONSTANT, 5);
|
||||
OUT_RING(ring, 0x00000580);
|
||||
OUT_RING(ring, fui(tile->xoff));
|
||||
OUT_RING(ring, fui(tile->yoff));
|
||||
OUT_RING(ring, fui(0.0f));
|
||||
OUT_RING(ring, fui(0.0f));
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
|
|
|
|||
|
|
@ -22,6 +22,7 @@
|
|||
*
|
||||
* Authors:
|
||||
* Rob Clark <robclark@freedesktop.org>
|
||||
* Jonathan Marek <jonathan@marek.ca>
|
||||
*/
|
||||
|
||||
#include "pipe/p_state.h"
|
||||
|
|
@ -34,18 +35,20 @@
|
|||
|
||||
#include "freedreno_program.h"
|
||||
|
||||
#include "ir2.h"
|
||||
#include "fd2_program.h"
|
||||
#include "fd2_compiler.h"
|
||||
#include "fd2_texture.h"
|
||||
#include "fd2_util.h"
|
||||
#include "instr-a2xx.h"
|
||||
|
||||
static struct fd2_shader_stateobj *
|
||||
create_shader(gl_shader_stage type)
|
||||
create_shader(struct pipe_context *pctx, gl_shader_stage type)
|
||||
{
|
||||
struct fd2_shader_stateobj *so = CALLOC_STRUCT(fd2_shader_stateobj);
|
||||
if (!so)
|
||||
return NULL;
|
||||
so->type = type;
|
||||
so->is_a20x = is_a20x(fd_context(pctx)->screen);
|
||||
return so;
|
||||
}
|
||||
|
||||
|
|
@ -54,86 +57,64 @@ delete_shader(struct fd2_shader_stateobj *so)
|
|||
{
|
||||
if (!so)
|
||||
return;
|
||||
ir2_shader_destroy(so->ir);
|
||||
free(so->tokens);
|
||||
free(so->bin);
|
||||
ralloc_free(so->nir);
|
||||
for (int i = 0; i < ARRAY_SIZE(so->variant); i++)
|
||||
free(so->variant[i].info.dwords);
|
||||
free(so);
|
||||
}
|
||||
|
||||
static struct fd2_shader_stateobj *
|
||||
assemble(struct fd2_shader_stateobj *so)
|
||||
{
|
||||
free(so->bin);
|
||||
so->bin = ir2_shader_assemble(so->ir, &so->info);
|
||||
if (!so->bin)
|
||||
goto fail;
|
||||
|
||||
if (fd_mesa_debug & FD_DBG_DISASM) {
|
||||
DBG("disassemble: type=%d", so->type);
|
||||
disasm_a2xx(so->bin, so->info.sizedwords, 0, so->type);
|
||||
}
|
||||
|
||||
return so;
|
||||
|
||||
fail:
|
||||
debug_error("assemble failed!");
|
||||
delete_shader(so);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static struct fd2_shader_stateobj *
|
||||
compile(struct fd_program_stateobj *prog, struct fd2_shader_stateobj *so)
|
||||
{
|
||||
int ret;
|
||||
|
||||
if (fd_mesa_debug & FD_DBG_DISASM) {
|
||||
DBG("dump tgsi: type=%d", so->type);
|
||||
tgsi_dump(so->tokens, 0);
|
||||
}
|
||||
|
||||
ret = fd2_compile_shader(prog, so);
|
||||
if (ret)
|
||||
goto fail;
|
||||
|
||||
/* NOTE: we don't assemble yet because for VS we don't know the
|
||||
* type information for vertex fetch yet.. so those need to be
|
||||
* patched up later before assembling.
|
||||
*/
|
||||
|
||||
so->info.sizedwords = 0;
|
||||
|
||||
return so;
|
||||
|
||||
fail:
|
||||
debug_error("compile failed!");
|
||||
delete_shader(so);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static void
|
||||
emit(struct fd_ringbuffer *ring, struct fd2_shader_stateobj *so)
|
||||
emit(struct fd_ringbuffer *ring, gl_shader_stage type,
|
||||
struct ir2_shader_info *info)
|
||||
{
|
||||
unsigned i;
|
||||
|
||||
if (so->info.sizedwords == 0)
|
||||
assemble(so);
|
||||
assert(info->sizedwords);
|
||||
|
||||
OUT_PKT3(ring, CP_IM_LOAD_IMMEDIATE, 2 + so->info.sizedwords);
|
||||
OUT_RING(ring, (so->type == MESA_SHADER_VERTEX) ? 0 : 1);
|
||||
OUT_RING(ring, so->info.sizedwords);
|
||||
for (i = 0; i < so->info.sizedwords; i++)
|
||||
OUT_RING(ring, so->bin[i]);
|
||||
OUT_PKT3(ring, CP_IM_LOAD_IMMEDIATE, 2 + info->sizedwords);
|
||||
OUT_RING(ring, type == MESA_SHADER_FRAGMENT);
|
||||
OUT_RING(ring, info->sizedwords);
|
||||
for (i = 0; i < info->sizedwords; i++)
|
||||
OUT_RING(ring, info->dwords[i]);
|
||||
}
|
||||
|
||||
static int
|
||||
ir2_glsl_type_size(const struct glsl_type *type)
|
||||
{
|
||||
return glsl_count_attribute_slots(type, false);
|
||||
}
|
||||
|
||||
static void *
|
||||
fd2_fp_state_create(struct pipe_context *pctx,
|
||||
const struct pipe_shader_state *cso)
|
||||
{
|
||||
struct fd2_shader_stateobj *so = create_shader(MESA_SHADER_FRAGMENT);
|
||||
struct fd2_shader_stateobj *so = create_shader(pctx, MESA_SHADER_FRAGMENT);
|
||||
if (!so)
|
||||
return NULL;
|
||||
so->tokens = tgsi_dup_tokens(cso->tokens);
|
||||
|
||||
if (cso->type == PIPE_SHADER_IR_NIR) {
|
||||
so->nir = cso->ir.nir;
|
||||
NIR_PASS_V(so->nir, nir_lower_io, nir_var_all, ir2_glsl_type_size,
|
||||
(nir_lower_io_options)0);
|
||||
} else {
|
||||
assert(cso->type == PIPE_SHADER_IR_TGSI);
|
||||
so->nir = ir2_tgsi_to_nir(cso->tokens);
|
||||
}
|
||||
|
||||
if (ir2_optimize_nir(so->nir, true))
|
||||
goto fail;
|
||||
|
||||
so->first_immediate = so->nir->num_uniforms;
|
||||
|
||||
ir2_compile(so, 0, NULL);
|
||||
|
||||
ralloc_free(so->nir);
|
||||
so->nir = NULL;
|
||||
return so;
|
||||
|
||||
fail:
|
||||
delete_shader(so);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static void
|
||||
|
|
@ -147,11 +128,32 @@ static void *
|
|||
fd2_vp_state_create(struct pipe_context *pctx,
|
||||
const struct pipe_shader_state *cso)
|
||||
{
|
||||
struct fd2_shader_stateobj *so = create_shader(MESA_SHADER_VERTEX);
|
||||
struct fd2_shader_stateobj *so = create_shader(pctx, MESA_SHADER_VERTEX);
|
||||
if (!so)
|
||||
return NULL;
|
||||
so->tokens = tgsi_dup_tokens(cso->tokens);
|
||||
|
||||
if (cso->type == PIPE_SHADER_IR_NIR) {
|
||||
so->nir = cso->ir.nir;
|
||||
NIR_PASS_V(so->nir, nir_lower_io, nir_var_all, ir2_glsl_type_size,
|
||||
(nir_lower_io_options)0);
|
||||
} else {
|
||||
assert(cso->type == PIPE_SHADER_IR_TGSI);
|
||||
so->nir = ir2_tgsi_to_nir(cso->tokens);
|
||||
}
|
||||
|
||||
if (ir2_optimize_nir(so->nir, true))
|
||||
goto fail;
|
||||
|
||||
so->first_immediate = so->nir->num_uniforms;
|
||||
|
||||
/* compile binning variant now */
|
||||
ir2_compile(so, 0, NULL);
|
||||
|
||||
return so;
|
||||
|
||||
fail:
|
||||
delete_shader(so);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static void
|
||||
|
|
@ -162,277 +164,145 @@ fd2_vp_state_delete(struct pipe_context *pctx, void *hwcso)
|
|||
}
|
||||
|
||||
static void
|
||||
patch_vtx_fetches(struct fd_context *ctx, struct fd2_shader_stateobj *so,
|
||||
struct fd_vertex_stateobj *vtx)
|
||||
patch_vtx_fetch(struct fd_context *ctx, struct pipe_vertex_element *elem,
|
||||
instr_fetch_vtx_t *instr, uint16_t dst_swiz)
|
||||
{
|
||||
unsigned i;
|
||||
|
||||
assert(so->num_vfetch_instrs == vtx->num_elements);
|
||||
|
||||
/* update vtx fetch instructions: */
|
||||
for (i = 0; i < so->num_vfetch_instrs; i++) {
|
||||
struct ir2_instruction *instr = so->vfetch_instrs[i];
|
||||
struct pipe_vertex_element *elem = &vtx->pipe[i];
|
||||
struct pipe_vertex_buffer *vb =
|
||||
struct pipe_vertex_buffer *vb =
|
||||
&ctx->vtx.vertexbuf.vb[elem->vertex_buffer_index];
|
||||
enum pipe_format format = elem->src_format;
|
||||
const struct util_format_description *desc =
|
||||
util_format_description(format);
|
||||
unsigned j;
|
||||
enum pipe_format format = elem->src_format;
|
||||
const struct util_format_description *desc =
|
||||
util_format_description(format);
|
||||
unsigned j;
|
||||
|
||||
/* Find the first non-VOID channel. */
|
||||
for (j = 0; j < 4; j++)
|
||||
if (desc->channel[j].type != UTIL_FORMAT_TYPE_VOID)
|
||||
break;
|
||||
/* Find the first non-VOID channel. */
|
||||
for (j = 0; j < 4; j++)
|
||||
if (desc->channel[j].type != UTIL_FORMAT_TYPE_VOID)
|
||||
break;
|
||||
|
||||
/* CI/CIS can probably be set in compiler instead: */
|
||||
instr->fetch.const_idx = 20 + (i / 3);
|
||||
instr->fetch.const_idx_sel = i % 3;
|
||||
instr->format = fd2_pipe2surface(format);
|
||||
instr->num_format_all = !desc->channel[j].normalized;
|
||||
instr->format_comp_all = desc->channel[j].type == UTIL_FORMAT_TYPE_SIGNED;
|
||||
instr->stride = vb->stride;
|
||||
instr->offset = elem->src_offset;
|
||||
|
||||
instr->fetch.fmt = fd2_pipe2surface(format);
|
||||
instr->fetch.is_normalized = desc->channel[j].normalized;
|
||||
instr->fetch.is_signed =
|
||||
desc->channel[j].type == UTIL_FORMAT_TYPE_SIGNED;
|
||||
instr->fetch.stride = vb->stride ? : 1;
|
||||
instr->fetch.offset = elem->src_offset;
|
||||
|
||||
for (j = 0; j < 4; j++)
|
||||
instr->dst_reg.swizzle[j] = "xyzw01__"[desc->swizzle[j]];
|
||||
|
||||
assert(instr->fetch.fmt != ~0);
|
||||
|
||||
DBG("vtx[%d]: %s (%d), ci=%d, cis=%d, id=%d, swizzle=%s, "
|
||||
"stride=%d, offset=%d",
|
||||
i, util_format_name(format),
|
||||
instr->fetch.fmt,
|
||||
instr->fetch.const_idx,
|
||||
instr->fetch.const_idx_sel,
|
||||
elem->instance_divisor,
|
||||
instr->dst_reg.swizzle,
|
||||
instr->fetch.stride,
|
||||
instr->fetch.offset);
|
||||
unsigned swiz = 0;
|
||||
for (int i = 0; i < 4; i++) {
|
||||
unsigned s = dst_swiz >> i*3 & 7;
|
||||
swiz |= (s >= 4 ? s : desc->swizzle[s]) << i*3;
|
||||
}
|
||||
|
||||
/* trigger re-assemble: */
|
||||
so->info.sizedwords = 0;
|
||||
instr->dst_swiz = swiz;
|
||||
}
|
||||
|
||||
static void
|
||||
patch_tex_fetches(struct fd_context *ctx, struct fd2_shader_stateobj *so,
|
||||
struct fd_texture_stateobj *tex)
|
||||
patch_fetches(struct fd_context *ctx, struct ir2_shader_info *info,
|
||||
struct fd_vertex_stateobj *vtx, struct fd_texture_stateobj *tex)
|
||||
{
|
||||
unsigned i;
|
||||
for (int i = 0; i < info->num_fetch_instrs; i++) {
|
||||
struct ir2_fetch_info *fi = &info->fetch_info[i];
|
||||
|
||||
/* update tex fetch instructions: */
|
||||
for (i = 0; i < so->num_tfetch_instrs; i++) {
|
||||
struct ir2_instruction *instr = so->tfetch_instrs[i].instr;
|
||||
unsigned samp_id = so->tfetch_instrs[i].samp_id;
|
||||
unsigned const_idx = fd2_get_const_idx(ctx, tex, samp_id);
|
||||
instr_fetch_t *instr = (instr_fetch_t*) &info->dwords[fi->offset];
|
||||
if (instr->opc == VTX_FETCH) {
|
||||
unsigned idx = (instr->vtx.const_index - 20) * 3 +
|
||||
instr->vtx.const_index_sel;
|
||||
patch_vtx_fetch(ctx, &vtx->pipe[idx], &instr->vtx, fi->vtx.dst_swiz);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (const_idx != instr->fetch.const_idx) {
|
||||
instr->fetch.const_idx = const_idx;
|
||||
/* trigger re-assemble: */
|
||||
so->info.sizedwords = 0;
|
||||
assert(instr->opc == TEX_FETCH);
|
||||
instr->tex.const_idx = fd2_get_const_idx(ctx, tex, fi->tex.samp_id);
|
||||
instr->tex.src_swiz = fi->tex.src_swiz;
|
||||
if (fd2_texture_swap_xy(tex, fi->tex.samp_id)) {
|
||||
unsigned x = instr->tex.src_swiz;
|
||||
instr->tex.src_swiz = (x & 0x30) | (x & 3) << 2 | (x >> 2 & 3);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
fd2_program_validate(struct fd_context *ctx)
|
||||
{
|
||||
struct fd_program_stateobj *prog = &ctx->prog;
|
||||
bool dirty_fp = !!(ctx->dirty_shader[PIPE_SHADER_FRAGMENT] & FD_DIRTY_SHADER_PROG);
|
||||
bool dirty_vp = !!(ctx->dirty_shader[PIPE_SHADER_VERTEX] & FD_DIRTY_SHADER_PROG);
|
||||
|
||||
/* if vertex or frag shader is dirty, we may need to recompile. Compile
|
||||
* frag shader first, as that assigns the register slots for exports
|
||||
* from the vertex shader. And therefore if frag shader has changed we
|
||||
* need to recompile both vert and frag shader.
|
||||
*/
|
||||
if (dirty_fp)
|
||||
compile(prog, prog->fp);
|
||||
|
||||
if (dirty_fp || dirty_vp)
|
||||
compile(prog, prog->vp);
|
||||
|
||||
/* if necessary, fix up vertex fetch instructions: */
|
||||
if (ctx->dirty & (FD_DIRTY_VTXSTATE | FD_DIRTY_PROG))
|
||||
patch_vtx_fetches(ctx, prog->vp, ctx->vtx.vtx);
|
||||
|
||||
/* if necessary, fix up texture fetch instructions: */
|
||||
if (ctx->dirty & (FD_DIRTY_TEXSTATE | FD_DIRTY_PROG)) {
|
||||
patch_tex_fetches(ctx, prog->vp, &ctx->tex[PIPE_SHADER_VERTEX]);
|
||||
patch_tex_fetches(ctx, prog->fp, &ctx->tex[PIPE_SHADER_FRAGMENT]);
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
fd2_program_emit(struct fd_ringbuffer *ring,
|
||||
fd2_program_emit(struct fd_context *ctx, struct fd_ringbuffer *ring,
|
||||
struct fd_program_stateobj *prog)
|
||||
{
|
||||
struct ir2_shader_info *vsi =
|
||||
&((struct fd2_shader_stateobj *)prog->vp)->info;
|
||||
struct ir2_shader_info *fsi =
|
||||
&((struct fd2_shader_stateobj *)prog->fp)->info;
|
||||
uint8_t vs_gprs, fs_gprs, vs_export;
|
||||
struct fd2_shader_stateobj *fp = NULL, *vp;
|
||||
struct ir2_shader_info *fpi, *vpi;
|
||||
struct ir2_frag_linkage *f;
|
||||
uint8_t vs_gprs, fs_gprs = 0, vs_export = 0;
|
||||
enum a2xx_sq_ps_vtx_mode mode = POSITION_1_VECTOR;
|
||||
bool binning = (ctx->batch && ring == ctx->batch->binning);
|
||||
unsigned variant = 0;
|
||||
|
||||
emit(ring, prog->vp);
|
||||
emit(ring, prog->fp);
|
||||
vp = prog->vp;
|
||||
|
||||
vs_gprs = (vsi->max_reg < 0) ? 0x80 : vsi->max_reg;
|
||||
fs_gprs = (fsi->max_reg < 0) ? 0x80 : fsi->max_reg;
|
||||
vs_export = MAX2(1, prog->num_exports) - 1;
|
||||
/* find variant matching the linked fragment shader */
|
||||
if (!binning) {
|
||||
fp = prog->fp;
|
||||
for (variant = 1; variant < ARRAY_SIZE(vp->variant); variant++) {
|
||||
/* if checked all variants, compile a new variant */
|
||||
if (!vp->variant[variant].info.sizedwords) {
|
||||
ir2_compile(vp, variant, fp);
|
||||
break;
|
||||
}
|
||||
|
||||
/* check if fragment shader linkage matches */
|
||||
if (!memcmp(&vp->variant[variant].f, &fp->variant[0].f,
|
||||
sizeof(struct ir2_frag_linkage)))
|
||||
break;
|
||||
}
|
||||
assert(variant < ARRAY_SIZE(vp->variant));
|
||||
}
|
||||
|
||||
vpi = &vp->variant[variant].info;
|
||||
fpi = &fp->variant[0].info;
|
||||
f = &fp->variant[0].f;
|
||||
|
||||
/* clear/gmem2mem/mem2gmem need to be changed to remove this condition */
|
||||
if (prog != &ctx->solid_prog && prog != &ctx->blit_prog[0]) {
|
||||
patch_fetches(ctx, vpi, ctx->vtx.vtx, &ctx->tex[PIPE_SHADER_VERTEX]);
|
||||
if (fp)
|
||||
patch_fetches(ctx, fpi, NULL, &ctx->tex[PIPE_SHADER_FRAGMENT]);
|
||||
}
|
||||
|
||||
emit(ring, MESA_SHADER_VERTEX, vpi);
|
||||
|
||||
if (fp) {
|
||||
emit(ring, MESA_SHADER_FRAGMENT, fpi);
|
||||
fs_gprs = (fpi->max_reg < 0) ? 0x80 : fpi->max_reg;
|
||||
vs_export = MAX2(1, f->inputs_count) - 1;
|
||||
}
|
||||
|
||||
vs_gprs = (vpi->max_reg < 0) ? 0x80 : vpi->max_reg;
|
||||
|
||||
if (vp->writes_psize && !binning)
|
||||
mode = POSITION_2_VECTORS_SPRITE;
|
||||
|
||||
/* set register to use for param (fragcoord/pointcoord/frontfacing) */
|
||||
OUT_PKT3(ring, CP_SET_CONSTANT, 2);
|
||||
OUT_RING(ring, CP_REG(REG_A2XX_SQ_CONTEXT_MISC));
|
||||
OUT_RING(ring, A2XX_SQ_CONTEXT_MISC_SC_SAMPLE_CNTL(CENTERS_ONLY) |
|
||||
COND(fp, A2XX_SQ_CONTEXT_MISC_PARAM_GEN_POS(f->inputs_count)) |
|
||||
/* we need SCREEN_XY for both fragcoord and frontfacing */
|
||||
A2XX_SQ_CONTEXT_MISC_SC_OUTPUT_SCREEN_XY);
|
||||
|
||||
OUT_PKT3(ring, CP_SET_CONSTANT, 2);
|
||||
OUT_RING(ring, CP_REG(REG_A2XX_SQ_PROGRAM_CNTL));
|
||||
OUT_RING(ring, A2XX_SQ_PROGRAM_CNTL_PS_EXPORT_MODE(POSITION_2_VECTORS_SPRITE) |
|
||||
OUT_RING(ring, A2XX_SQ_PROGRAM_CNTL_PS_EXPORT_MODE(2) |
|
||||
A2XX_SQ_PROGRAM_CNTL_VS_EXPORT_MODE(mode) |
|
||||
A2XX_SQ_PROGRAM_CNTL_VS_RESOURCE |
|
||||
A2XX_SQ_PROGRAM_CNTL_PS_RESOURCE |
|
||||
A2XX_SQ_PROGRAM_CNTL_VS_EXPORT_COUNT(vs_export) |
|
||||
A2XX_SQ_PROGRAM_CNTL_PS_REGS(fs_gprs) |
|
||||
A2XX_SQ_PROGRAM_CNTL_VS_REGS(vs_gprs));
|
||||
}
|
||||
|
||||
/* Creates shader:
|
||||
* EXEC ADDR(0x2) CNT(0x1)
|
||||
* (S)FETCH: SAMPLE R0.xyzw = R0.xyx CONST(0) LOCATION(CENTER)
|
||||
* ALLOC PARAM/PIXEL SIZE(0x0)
|
||||
* EXEC_END ADDR(0x3) CNT(0x1)
|
||||
* ALU: MAXv export0 = R0, R0 ; gl_FragColor
|
||||
* NOP
|
||||
*/
|
||||
static struct fd2_shader_stateobj *
|
||||
create_blit_fp(void)
|
||||
{
|
||||
struct fd2_shader_stateobj *so = create_shader(MESA_SHADER_FRAGMENT);
|
||||
struct ir2_instruction *instr;
|
||||
|
||||
if (!so)
|
||||
return NULL;
|
||||
|
||||
so->ir = ir2_shader_create();
|
||||
|
||||
instr = ir2_instr_create_tex_fetch(so->ir, 0);
|
||||
ir2_dst_create(instr, 0, "xyzw", 0);
|
||||
ir2_reg_create(instr, 0, "xyx", IR2_REG_INPUT);
|
||||
instr->sync = true;
|
||||
|
||||
instr = ir2_instr_create_alu_v(so->ir, MAXv);
|
||||
ir2_dst_create(instr, 0, NULL, IR2_REG_EXPORT);
|
||||
ir2_reg_create(instr, 0, NULL, 0);
|
||||
ir2_reg_create(instr, 0, NULL, 0);
|
||||
|
||||
return assemble(so);
|
||||
}
|
||||
|
||||
/* Creates shader:
|
||||
* EXEC ADDR(0x3) CNT(0x2)
|
||||
* FETCH: VERTEX R1.xy01 = R0.x FMT_32_32_FLOAT UNSIGNED STRIDE(8) CONST(26, 1)
|
||||
* FETCH: VERTEX R2.xyz1 = R0.x FMT_32_32_32_FLOAT UNSIGNED STRIDE(12) CONST(26, 0)
|
||||
* ALLOC POSITION SIZE(0x0)
|
||||
* EXEC ADDR(0x5) CNT(0x1)
|
||||
* ALU: MAXv export62 = R2, R2 ; gl_Position
|
||||
* ALLOC PARAM/PIXEL SIZE(0x0)
|
||||
* EXEC_END ADDR(0x6) CNT(0x1)
|
||||
* ALU: MAXv export0 = R1, R1
|
||||
* NOP
|
||||
*/
|
||||
static struct fd2_shader_stateobj *
|
||||
create_blit_vp(void)
|
||||
{
|
||||
struct fd2_shader_stateobj *so = create_shader(MESA_SHADER_VERTEX);
|
||||
struct ir2_instruction *instr;
|
||||
|
||||
if (!so)
|
||||
return NULL;
|
||||
|
||||
so->ir = ir2_shader_create();
|
||||
|
||||
instr = ir2_instr_create_vtx_fetch(so->ir, 26, 1, FMT_32_32_FLOAT, false, 8);
|
||||
instr->fetch.is_normalized = true;
|
||||
ir2_dst_create(instr, 1, "xy01", 0);
|
||||
ir2_reg_create(instr, 0, "x", IR2_REG_INPUT);
|
||||
|
||||
instr = ir2_instr_create_vtx_fetch(so->ir, 26, 0, FMT_32_32_32_FLOAT, false, 12);
|
||||
instr->fetch.is_normalized = true;
|
||||
ir2_dst_create(instr, 2, "xyz1", 0);
|
||||
ir2_reg_create(instr, 0, "x", IR2_REG_INPUT);
|
||||
|
||||
instr = ir2_instr_create_alu_v(so->ir, MAXv);
|
||||
ir2_dst_create(instr, 62, NULL, IR2_REG_EXPORT);
|
||||
ir2_reg_create(instr, 2, NULL, 0);
|
||||
ir2_reg_create(instr, 2, NULL, 0);
|
||||
|
||||
instr = ir2_instr_create_alu_v(so->ir, MAXv);
|
||||
ir2_dst_create(instr, 0, NULL, IR2_REG_EXPORT);
|
||||
ir2_reg_create(instr, 1, NULL, 0);
|
||||
ir2_reg_create(instr, 1, NULL, 0);
|
||||
|
||||
return assemble(so);
|
||||
}
|
||||
|
||||
/* Creates shader:
|
||||
* ALLOC PARAM/PIXEL SIZE(0x0)
|
||||
* EXEC_END ADDR(0x1) CNT(0x1)
|
||||
* ALU: MAXv export0 = C0, C0 ; gl_FragColor
|
||||
*/
|
||||
static struct fd2_shader_stateobj *
|
||||
create_solid_fp(void)
|
||||
{
|
||||
struct fd2_shader_stateobj *so = create_shader(MESA_SHADER_FRAGMENT);
|
||||
struct ir2_instruction *instr;
|
||||
|
||||
if (!so)
|
||||
return NULL;
|
||||
|
||||
so->ir = ir2_shader_create();
|
||||
|
||||
instr = ir2_instr_create_alu_v(so->ir, MAXv);
|
||||
ir2_dst_create(instr, 0, NULL, IR2_REG_EXPORT);
|
||||
ir2_reg_create(instr, 0, NULL, IR2_REG_CONST);
|
||||
ir2_reg_create(instr, 0, NULL, IR2_REG_CONST);
|
||||
|
||||
return assemble(so);
|
||||
}
|
||||
|
||||
/* Creates shader:
|
||||
* EXEC ADDR(0x3) CNT(0x1)
|
||||
* (S)FETCH: VERTEX R1.xyz1 = R0.x FMT_32_32_32_FLOAT
|
||||
* UNSIGNED STRIDE(12) CONST(26, 0)
|
||||
* ALLOC POSITION SIZE(0x0)
|
||||
* EXEC ADDR(0x4) CNT(0x1)
|
||||
* ALU: MAXv export62 = R1, R1 ; gl_Position
|
||||
* ALLOC PARAM/PIXEL SIZE(0x0)
|
||||
* EXEC_END ADDR(0x5) CNT(0x0)
|
||||
*/
|
||||
static struct fd2_shader_stateobj *
|
||||
create_solid_vp(void)
|
||||
{
|
||||
struct fd2_shader_stateobj *so = create_shader(MESA_SHADER_VERTEX);
|
||||
struct ir2_instruction *instr;
|
||||
|
||||
if (!so)
|
||||
return NULL;
|
||||
|
||||
so->ir = ir2_shader_create();
|
||||
|
||||
instr = ir2_instr_create_vtx_fetch(so->ir, 26, 0, FMT_32_32_32_FLOAT, false, 12);
|
||||
ir2_dst_create(instr, 1, "xyz1", 0);
|
||||
ir2_reg_create(instr, 0, "x", IR2_REG_INPUT);
|
||||
|
||||
instr = ir2_instr_create_alu_v(so->ir, MAXv);
|
||||
ir2_dst_create(instr, 62, NULL, IR2_REG_EXPORT);
|
||||
ir2_reg_create(instr, 1, NULL, 0);
|
||||
ir2_reg_create(instr, 1, NULL, 0);
|
||||
|
||||
|
||||
return assemble(so);
|
||||
A2XX_SQ_PROGRAM_CNTL_VS_REGS(vs_gprs) |
|
||||
COND(fp && fp->need_param, A2XX_SQ_PROGRAM_CNTL_PARAM_GEN) |
|
||||
COND(!fp, A2XX_SQ_PROGRAM_CNTL_GEN_INDEX_VTX));
|
||||
}
|
||||
|
||||
void
|
||||
fd2_prog_init(struct pipe_context *pctx)
|
||||
{
|
||||
struct fd_context *ctx = fd_context(pctx);
|
||||
struct fd_program_stateobj *prog;
|
||||
struct fd2_shader_stateobj *so;
|
||||
struct ir2_shader_info *info;
|
||||
instr_fetch_vtx_t *instr;
|
||||
|
||||
pctx->create_fs_state = fd2_fp_state_create;
|
||||
pctx->delete_fs_state = fd2_fp_state_delete;
|
||||
|
|
@ -442,8 +312,47 @@ fd2_prog_init(struct pipe_context *pctx)
|
|||
|
||||
fd_prog_init(pctx);
|
||||
|
||||
ctx->solid_prog.fp = create_solid_fp();
|
||||
ctx->solid_prog.vp = create_solid_vp();
|
||||
ctx->blit_prog[0].fp = create_blit_fp();
|
||||
ctx->blit_prog[0].vp = create_blit_vp();
|
||||
/* XXX maybe its possible to reuse patch_vtx_fetch somehow? */
|
||||
|
||||
prog = &ctx->solid_prog;
|
||||
so = prog->vp;
|
||||
ir2_compile(prog->vp, 1, prog->fp);
|
||||
|
||||
#define IR2_FETCH_SWIZ_XY01 0xb08
|
||||
#define IR2_FETCH_SWIZ_XYZ1 0xa88
|
||||
|
||||
info = &so->variant[1].info;
|
||||
|
||||
instr = (instr_fetch_vtx_t*) &info->dwords[info->fetch_info[0].offset];
|
||||
instr->const_index = 26;
|
||||
instr->const_index_sel = 0;
|
||||
instr->format = FMT_32_32_32_FLOAT;
|
||||
instr->format_comp_all = false;
|
||||
instr->stride = 12;
|
||||
instr->num_format_all = true;
|
||||
instr->dst_swiz = IR2_FETCH_SWIZ_XYZ1;
|
||||
|
||||
prog = &ctx->blit_prog[0];
|
||||
so = prog->vp;
|
||||
ir2_compile(prog->vp, 1, prog->fp);
|
||||
|
||||
info = &so->variant[1].info;
|
||||
|
||||
instr = (instr_fetch_vtx_t*) &info->dwords[info->fetch_info[0].offset];
|
||||
instr->const_index = 26;
|
||||
instr->const_index_sel = 1;
|
||||
instr->format = FMT_32_32_FLOAT;
|
||||
instr->format_comp_all = false;
|
||||
instr->stride = 8;
|
||||
instr->num_format_all = false;
|
||||
instr->dst_swiz = IR2_FETCH_SWIZ_XY01;
|
||||
|
||||
instr = (instr_fetch_vtx_t*) &info->dwords[info->fetch_info[1].offset];
|
||||
instr->const_index = 26;
|
||||
instr->const_index_sel = 0;
|
||||
instr->format = FMT_32_32_32_FLOAT;
|
||||
instr->format_comp_all = false;
|
||||
instr->stride = 12;
|
||||
instr->num_format_all = false;
|
||||
instr->dst_swiz = IR2_FETCH_SWIZ_XYZ1;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -31,48 +31,38 @@
|
|||
|
||||
#include "freedreno_context.h"
|
||||
|
||||
#include "ir-a2xx.h"
|
||||
#include "ir2.h"
|
||||
#include "disasm.h"
|
||||
|
||||
struct fd2_shader_stateobj {
|
||||
nir_shader *nir;
|
||||
gl_shader_stage type;
|
||||
bool is_a20x;
|
||||
|
||||
uint32_t *bin;
|
||||
|
||||
struct tgsi_token *tokens;
|
||||
|
||||
/* note that we defer compiling shader until we know both vs and ps..
|
||||
* and if one changes, we potentially need to recompile in order to
|
||||
* get varying linkages correct:
|
||||
/* note: using same set of immediates for all variants
|
||||
* it doesn't matter, other than the slightly larger command stream
|
||||
*/
|
||||
struct ir2_shader_info info;
|
||||
struct ir2_shader *ir;
|
||||
|
||||
/* for vertex shaders, the fetch instructions which need to be
|
||||
* patched up before assembly:
|
||||
*/
|
||||
unsigned num_vfetch_instrs;
|
||||
struct ir2_instruction *vfetch_instrs[64];
|
||||
|
||||
/* for all shaders, any tex fetch instructions which need to be
|
||||
* patched before assembly:
|
||||
*/
|
||||
unsigned num_tfetch_instrs;
|
||||
struct {
|
||||
unsigned samp_id;
|
||||
struct ir2_instruction *instr;
|
||||
} tfetch_instrs[64];
|
||||
|
||||
unsigned first_immediate; /* const reg # of first immediate */
|
||||
unsigned num_immediates;
|
||||
struct {
|
||||
uint32_t val[4];
|
||||
unsigned ncomp;
|
||||
} immediates[64];
|
||||
|
||||
bool writes_psize;
|
||||
bool need_param;
|
||||
|
||||
/* note:
|
||||
* fragment shader only has one variant
|
||||
* first vertex shader variant is always binning shader
|
||||
* we should use a dynamic array but in normal case there is
|
||||
* only 2 variants (and 3 sometimes with GALLIUM_HUD)
|
||||
*/
|
||||
struct ir2_shader_variant variant[8];
|
||||
};
|
||||
|
||||
void fd2_program_emit(struct fd_ringbuffer *ring,
|
||||
void fd2_program_emit(struct fd_context *ctx, struct fd_ringbuffer *ring,
|
||||
struct fd_program_stateobj *prog);
|
||||
void fd2_program_validate(struct fd_context *ctx);
|
||||
|
||||
void fd2_prog_init(struct pipe_context *pctx);
|
||||
|
||||
|
|
|
|||
|
|
@ -87,6 +87,7 @@ typedef enum {
|
|||
SIN = 48,
|
||||
COS = 49,
|
||||
RETAIN_PREV = 50,
|
||||
SCALAR_NONE = 63,
|
||||
} instr_scalar_opc_t;
|
||||
|
||||
typedef enum {
|
||||
|
|
@ -120,6 +121,7 @@ typedef enum {
|
|||
KILLNEv = 27,
|
||||
DSTv = 28,
|
||||
MOVAv = 29,
|
||||
VECTOR_NONE = 31,
|
||||
} instr_vector_opc_t;
|
||||
|
||||
typedef struct PACKED {
|
||||
|
|
@ -161,9 +163,9 @@ typedef struct PACKED {
|
|||
};
|
||||
/* constants have full 8-bit index */
|
||||
struct {
|
||||
uint8_t src3_reg_const : 8;
|
||||
uint8_t src2_reg_const : 8;
|
||||
uint8_t src1_reg_const : 8;
|
||||
uint8_t src3_reg_byte : 8;
|
||||
uint8_t src2_reg_byte : 8;
|
||||
uint8_t src1_reg_byte : 8;
|
||||
};
|
||||
};
|
||||
instr_vector_opc_t vector_opc : 5;
|
||||
|
|
@ -389,10 +391,17 @@ typedef union PACKED {
|
|||
instr_fetch_opc_t opc : 5;
|
||||
uint32_t dummy0 : 27;
|
||||
/* dword1: */
|
||||
uint32_t dummy1 : 32;
|
||||
uint32_t dummy1 : 31;
|
||||
uint8_t pred_select : 1;
|
||||
/* dword2: */
|
||||
uint32_t dummy2 : 32;
|
||||
uint32_t dummy2 : 31;
|
||||
uint8_t pred_condition : 1;
|
||||
};
|
||||
} instr_fetch_t;
|
||||
|
||||
typedef union PACKED {
|
||||
instr_alu_t alu;
|
||||
instr_fetch_t fetch;
|
||||
} instr_t;
|
||||
|
||||
#endif /* INSTR_H_ */
|
||||
|
|
|
|||
|
|
@ -1,809 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2012 Rob Clark <robdclark@gmail.com>
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "ir-a2xx.h"
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <assert.h>
|
||||
|
||||
#include "freedreno_util.h"
|
||||
#include "instr-a2xx.h"
|
||||
|
||||
#define DEBUG_MSG(f, ...) do { if (0) DBG(f, ##__VA_ARGS__); } while (0)
|
||||
#define WARN_MSG(f, ...) DBG("WARN: "f, ##__VA_ARGS__)
|
||||
#define ERROR_MSG(f, ...) DBG("ERROR: "f, ##__VA_ARGS__)
|
||||
|
||||
static int instr_emit(struct ir2_instruction *instr, uint32_t *dwords,
|
||||
uint32_t idx, struct ir2_shader_info *info);
|
||||
|
||||
static uint32_t reg_fetch_src_swiz(struct ir2_src_register *reg, uint32_t n);
|
||||
static uint32_t reg_fetch_dst_swiz(struct ir2_dst_register *reg);
|
||||
static uint32_t reg_alu_dst_swiz(struct ir2_dst_register *reg);
|
||||
static uint32_t reg_alu_src_swiz(struct ir2_src_register *reg);
|
||||
|
||||
/* simple allocator to carve allocations out of an up-front allocated heap,
|
||||
* so that we can free everything easily in one shot.
|
||||
*/
|
||||
static void * ir2_alloc(struct ir2_shader *shader, int sz)
|
||||
{
|
||||
void *ptr = &shader->heap[shader->heap_idx];
|
||||
shader->heap_idx += align(sz, 4) / 4;
|
||||
return ptr;
|
||||
}
|
||||
|
||||
static char * ir2_strdup(struct ir2_shader *shader, const char *str)
|
||||
{
|
||||
char *ptr = NULL;
|
||||
if (str) {
|
||||
int len = strlen(str);
|
||||
ptr = ir2_alloc(shader, len+1);
|
||||
memcpy(ptr, str, len);
|
||||
ptr[len] = '\0';
|
||||
}
|
||||
return ptr;
|
||||
}
|
||||
|
||||
struct ir2_shader * ir2_shader_create(void)
|
||||
{
|
||||
DEBUG_MSG("");
|
||||
struct ir2_shader *shader = calloc(1, sizeof(struct ir2_shader));
|
||||
shader->max_reg = -1;
|
||||
return shader;
|
||||
}
|
||||
|
||||
void ir2_shader_destroy(struct ir2_shader *shader)
|
||||
{
|
||||
DEBUG_MSG("");
|
||||
free(shader);
|
||||
}
|
||||
|
||||
/* check if an instruction is a simple MOV
|
||||
*/
|
||||
static struct ir2_instruction * simple_mov(struct ir2_instruction *instr,
|
||||
bool output)
|
||||
{
|
||||
struct ir2_src_register *src_reg = instr->src_reg;
|
||||
struct ir2_dst_register *dst_reg = &instr->dst_reg;
|
||||
struct ir2_register *reg;
|
||||
unsigned i;
|
||||
|
||||
/* MAXv used for MOV */
|
||||
if (instr->instr_type != IR2_ALU_VECTOR ||
|
||||
instr->alu_vector.opc != MAXv)
|
||||
return NULL;
|
||||
|
||||
/* non identical srcs */
|
||||
if (src_reg[0].num != src_reg[1].num)
|
||||
return NULL;
|
||||
|
||||
/* flags */
|
||||
int flags = IR2_REG_NEGATE | IR2_REG_ABS;
|
||||
if (output)
|
||||
flags |= IR2_REG_INPUT | IR2_REG_CONST;
|
||||
if ((src_reg[0].flags & flags) || (src_reg[1].flags & flags))
|
||||
return NULL;
|
||||
|
||||
/* clamping */
|
||||
if (instr->alu_vector.clamp)
|
||||
return NULL;
|
||||
|
||||
/* swizzling */
|
||||
for (i = 0; i < 4; i++) {
|
||||
char swiz = (dst_reg->swizzle ? dst_reg->swizzle : "xyzw")[i];
|
||||
if (swiz == '_')
|
||||
continue;
|
||||
|
||||
if (swiz != (src_reg[0].swizzle ? src_reg[0].swizzle : "xyzw")[i] ||
|
||||
swiz != (src_reg[1].swizzle ? src_reg[1].swizzle : "xyzw")[i])
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if (output)
|
||||
reg = &instr->shader->reg[src_reg[0].num];
|
||||
else
|
||||
reg = &instr->shader->reg[dst_reg->num];
|
||||
|
||||
assert(reg->write_idx >= 0);
|
||||
if (reg->write_idx != reg->write_idx2)
|
||||
return NULL;
|
||||
|
||||
if (!output)
|
||||
return instr;
|
||||
|
||||
instr = instr->shader->instr[reg->write_idx];
|
||||
return instr->instr_type != IR2_ALU_VECTOR ? NULL : instr;
|
||||
}
|
||||
|
||||
static int src_to_reg(struct ir2_instruction *instr,
|
||||
struct ir2_src_register *reg)
|
||||
{
|
||||
if (reg->flags & IR2_REG_CONST)
|
||||
return reg->num;
|
||||
|
||||
return instr->shader->reg[reg->num].reg;
|
||||
}
|
||||
|
||||
static int dst_to_reg(struct ir2_instruction *instr,
|
||||
struct ir2_dst_register *reg)
|
||||
{
|
||||
if (reg->flags & IR2_REG_EXPORT)
|
||||
return reg->num;
|
||||
|
||||
return instr->shader->reg[reg->num].reg;
|
||||
}
|
||||
|
||||
static bool mask_get(uint32_t *mask, unsigned index)
|
||||
{
|
||||
return !!(mask[index / 32] & 1 << index % 32);
|
||||
}
|
||||
|
||||
static void mask_set(uint32_t *mask, struct ir2_register *reg, int index)
|
||||
{
|
||||
if (reg) {
|
||||
unsigned i;
|
||||
for (i = 0; i < ARRAY_SIZE(reg->regmask); i++)
|
||||
mask[i] |= reg->regmask[i];
|
||||
}
|
||||
if (index >= 0)
|
||||
mask[index / 32] |= 1 << index % 32;
|
||||
}
|
||||
|
||||
static bool sets_pred(struct ir2_instruction *instr)
|
||||
{
|
||||
return instr->instr_type == IR2_ALU_SCALAR &&
|
||||
instr->alu_scalar.opc >= PRED_SETEs &&
|
||||
instr->alu_scalar.opc <= PRED_SET_RESTOREs;
|
||||
}
|
||||
|
||||
|
||||
|
||||
void* ir2_shader_assemble(struct ir2_shader *shader,
|
||||
struct ir2_shader_info *info)
|
||||
{
|
||||
/* NOTES
|
||||
* blob compiler seems to always puts PRED_* instrs in a CF by
|
||||
* themselves, and wont combine EQ/NE in the same CF
|
||||
* (not doing this - doesn't seem to make a difference)
|
||||
*
|
||||
* TODO: implement scheduling for combining vector+scalar instructions
|
||||
* -some vector instructions can be replaced by scalar
|
||||
*/
|
||||
|
||||
/* first step:
|
||||
* 1. remove "NOP" MOV instructions generated by TGSI for input/output:
|
||||
* 2. track information for register allocation, and to remove
|
||||
* the dead code when some exports are not needed
|
||||
* 3. add additional instructions for a20x hw binning if needed
|
||||
* NOTE: modifies the shader instrs
|
||||
* this step could be done as instructions are added by compiler instead
|
||||
*/
|
||||
|
||||
/* mask of exports that must be generated
|
||||
* used to avoid calculating ps exports with hw binning
|
||||
*/
|
||||
uint64_t export = ~0ull;
|
||||
/* bitmask of variables required for exports defined by "export" */
|
||||
uint32_t export_mask[REG_MASK/32+1] = {};
|
||||
|
||||
unsigned idx, reg_idx;
|
||||
unsigned max_input = 0;
|
||||
int export_size = -1;
|
||||
|
||||
for (idx = 0; idx < shader->instr_count; idx++) {
|
||||
struct ir2_instruction *instr = shader->instr[idx], *prev;
|
||||
struct ir2_dst_register dst_reg = instr->dst_reg;
|
||||
|
||||
if (dst_reg.flags & IR2_REG_EXPORT) {
|
||||
if (dst_reg.num < 32)
|
||||
export_size++;
|
||||
|
||||
if ((prev = simple_mov(instr, true))) {
|
||||
/* copy instruction but keep dst */
|
||||
*instr = *prev;
|
||||
instr->dst_reg = dst_reg;
|
||||
}
|
||||
}
|
||||
|
||||
for (reg_idx = 0; reg_idx < instr->src_reg_count; reg_idx++) {
|
||||
struct ir2_src_register *src_reg = &instr->src_reg[reg_idx];
|
||||
struct ir2_register *reg;
|
||||
int num;
|
||||
|
||||
if (src_reg->flags & IR2_REG_CONST)
|
||||
continue;
|
||||
|
||||
num = src_reg->num;
|
||||
reg = &shader->reg[num];
|
||||
reg->read_idx = idx;
|
||||
|
||||
if (src_reg->flags & IR2_REG_INPUT) {
|
||||
max_input = MAX2(max_input, num);
|
||||
} else {
|
||||
/* bypass simple mov used to set src_reg */
|
||||
assert(reg->write_idx >= 0);
|
||||
prev = shader->instr[reg->write_idx];
|
||||
if (simple_mov(prev, false)) {
|
||||
*src_reg = prev->src_reg[0];
|
||||
/* process same src_reg again */
|
||||
reg_idx -= 1;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
/* update dependencies */
|
||||
uint32_t *mask = (dst_reg.flags & IR2_REG_EXPORT) ?
|
||||
export_mask : shader->reg[dst_reg.num].regmask;
|
||||
mask_set(mask, reg, num);
|
||||
if (sets_pred(instr))
|
||||
mask_set(export_mask, reg, num);
|
||||
}
|
||||
}
|
||||
|
||||
/* second step:
|
||||
* emit instructions (with CFs) + RA
|
||||
*/
|
||||
instr_cf_t cfs[128], *cf = cfs;
|
||||
uint32_t alufetch[3*256], *af = alufetch;
|
||||
|
||||
/* RA is done on write, so inputs must be allocated here */
|
||||
for (reg_idx = 0; reg_idx <= max_input; reg_idx++)
|
||||
shader->reg[reg_idx].reg = reg_idx;
|
||||
info->max_reg = max_input;
|
||||
|
||||
/* CF instr state */
|
||||
instr_cf_exec_t exec = { .opc = EXEC };
|
||||
instr_cf_alloc_t alloc = { .opc = ALLOC };
|
||||
bool need_alloc = 0;
|
||||
bool pos_export = 0;
|
||||
|
||||
export_size = MAX2(export_size, 0);
|
||||
|
||||
for (idx = 0; idx < shader->instr_count; idx++) {
|
||||
struct ir2_instruction *instr = shader->instr[idx];
|
||||
struct ir2_dst_register *dst_reg = &instr->dst_reg;
|
||||
unsigned num = dst_reg->num;
|
||||
struct ir2_register *reg;
|
||||
|
||||
/* a2xx only has 64 registers, so we can use a single 64-bit mask */
|
||||
uint64_t regmask = 0ull;
|
||||
|
||||
/* compute the current regmask */
|
||||
for (reg_idx = 0; (int) reg_idx <= shader->max_reg; reg_idx++) {
|
||||
reg = &shader->reg[reg_idx];
|
||||
if ((int) idx > reg->write_idx && idx < reg->read_idx)
|
||||
regmask |= (1ull << reg->reg);
|
||||
}
|
||||
|
||||
if (dst_reg->flags & IR2_REG_EXPORT) {
|
||||
/* skip if export is not needed */
|
||||
if (!(export & (1ull << num)))
|
||||
continue;
|
||||
|
||||
/* ALLOC CF:
|
||||
* want to alloc all < 32 at once
|
||||
* 32/33 and 62/63 come in pairs
|
||||
* XXX assuming all 3 types are never interleaved
|
||||
*/
|
||||
if (num < 32) {
|
||||
alloc.size = export_size;
|
||||
alloc.buffer_select = SQ_PARAMETER_PIXEL;
|
||||
need_alloc = export_size >= 0;
|
||||
export_size = -1;
|
||||
} else if (num == 32 || num == 33) {
|
||||
alloc.size = 0;
|
||||
alloc.buffer_select = SQ_MEMORY;
|
||||
need_alloc = num != 33;
|
||||
} else {
|
||||
alloc.size = 0;
|
||||
alloc.buffer_select = SQ_POSITION;
|
||||
need_alloc = !pos_export;
|
||||
pos_export = true;
|
||||
}
|
||||
|
||||
} else {
|
||||
/* skip if dst register not needed to compute exports */
|
||||
if (!mask_get(export_mask, num))
|
||||
continue;
|
||||
|
||||
/* RA on first write */
|
||||
reg = &shader->reg[num];
|
||||
if (reg->write_idx == idx) {
|
||||
reg->reg = ffsll(~regmask) - 1;
|
||||
info->max_reg = MAX2(info->max_reg, reg->reg);
|
||||
}
|
||||
}
|
||||
|
||||
if (exec.count == 6 || (exec.count && need_alloc)) {
|
||||
*cf++ = *(instr_cf_t*) &exec;
|
||||
exec.address += exec.count;
|
||||
exec.serialize = 0;
|
||||
exec.count = 0;
|
||||
}
|
||||
|
||||
if (need_alloc) {
|
||||
*cf++ = *(instr_cf_t*) &alloc;
|
||||
need_alloc = false;
|
||||
}
|
||||
|
||||
int ret = instr_emit(instr, af, idx, info); af += 3;
|
||||
assert(!ret);
|
||||
|
||||
if (instr->instr_type == IR2_FETCH)
|
||||
exec.serialize |= 0x1 << exec.count * 2;
|
||||
if (instr->sync)
|
||||
exec.serialize |= 0x2 << exec.count * 2;
|
||||
exec.count += 1;
|
||||
}
|
||||
|
||||
|
||||
exec.opc = !export_size ? EXEC : EXEC_END;
|
||||
*cf++ = *(instr_cf_t*) &exec;
|
||||
exec.address += exec.count;
|
||||
exec.serialize = 0;
|
||||
exec.count = 0;
|
||||
|
||||
/* GPU will hang without at least one pixel alloc */
|
||||
if (!export_size) {
|
||||
alloc.size = 0;
|
||||
alloc.buffer_select = SQ_PARAMETER_PIXEL;
|
||||
*cf++ = *(instr_cf_t*) &alloc;
|
||||
|
||||
exec.opc = EXEC_END;
|
||||
*cf++ = *(instr_cf_t*) &exec;
|
||||
}
|
||||
|
||||
unsigned num_cfs = cf - cfs;
|
||||
|
||||
/* insert nop to get an even # of CFs */
|
||||
if (num_cfs % 2) {
|
||||
*cf++ = (instr_cf_t) { .opc = NOP };
|
||||
num_cfs++;
|
||||
}
|
||||
|
||||
/* offset cf addrs */
|
||||
for (idx = 0; idx < num_cfs; idx++) {
|
||||
switch (cfs[idx].opc) {
|
||||
case EXEC:
|
||||
case EXEC_END:
|
||||
cfs[idx].exec.address += num_cfs / 2;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
/* XXX and any other address using cf that gets implemented */
|
||||
}
|
||||
}
|
||||
|
||||
/* concatenate cfs+alufetchs */
|
||||
uint32_t cfdwords = num_cfs / 2 * 3;
|
||||
uint32_t alufetchdwords = exec.address * 3;
|
||||
info->sizedwords = cfdwords + alufetchdwords;
|
||||
uint32_t *dwords = malloc(info->sizedwords * 4);
|
||||
assert(dwords);
|
||||
memcpy(dwords, cfs, cfdwords * 4);
|
||||
memcpy(&dwords[cfdwords], alufetch, alufetchdwords * 4);
|
||||
return dwords;
|
||||
}
|
||||
|
||||
struct ir2_instruction * ir2_instr_create(struct ir2_shader *shader,
|
||||
int instr_type)
|
||||
{
|
||||
struct ir2_instruction *instr =
|
||||
ir2_alloc(shader, sizeof(struct ir2_instruction));
|
||||
DEBUG_MSG("%d", instr_type);
|
||||
instr->shader = shader;
|
||||
instr->idx = shader->instr_count;
|
||||
instr->pred = shader->pred;
|
||||
instr->instr_type = instr_type;
|
||||
shader->instr[shader->instr_count++] = instr;
|
||||
return instr;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* FETCH instructions:
|
||||
*/
|
||||
|
||||
static int instr_emit_fetch(struct ir2_instruction *instr,
|
||||
uint32_t *dwords, uint32_t idx,
|
||||
struct ir2_shader_info *info)
|
||||
{
|
||||
instr_fetch_t *fetch = (instr_fetch_t *)dwords;
|
||||
struct ir2_dst_register *dst_reg = &instr->dst_reg;
|
||||
struct ir2_src_register *src_reg = &instr->src_reg[0];
|
||||
|
||||
memset(fetch, 0, sizeof(*fetch));
|
||||
|
||||
fetch->opc = instr->fetch.opc;
|
||||
|
||||
if (instr->fetch.opc == VTX_FETCH) {
|
||||
instr_fetch_vtx_t *vtx = &fetch->vtx;
|
||||
|
||||
assert(instr->fetch.stride <= 0xff);
|
||||
assert(instr->fetch.fmt <= 0x3f);
|
||||
assert(instr->fetch.const_idx <= 0x1f);
|
||||
assert(instr->fetch.const_idx_sel <= 0x3);
|
||||
|
||||
vtx->src_reg = src_to_reg(instr, src_reg);
|
||||
vtx->src_swiz = reg_fetch_src_swiz(src_reg, 1);
|
||||
vtx->dst_reg = dst_to_reg(instr, dst_reg);
|
||||
vtx->dst_swiz = reg_fetch_dst_swiz(dst_reg);
|
||||
vtx->must_be_one = 1;
|
||||
vtx->const_index = instr->fetch.const_idx;
|
||||
vtx->const_index_sel = instr->fetch.const_idx_sel;
|
||||
vtx->format_comp_all = !!instr->fetch.is_signed;
|
||||
vtx->num_format_all = !instr->fetch.is_normalized;
|
||||
vtx->format = instr->fetch.fmt;
|
||||
vtx->stride = instr->fetch.stride;
|
||||
vtx->offset = instr->fetch.offset;
|
||||
|
||||
if (instr->pred != IR2_PRED_NONE) {
|
||||
vtx->pred_select = 1;
|
||||
vtx->pred_condition = (instr->pred == IR2_PRED_EQ) ? 1 : 0;
|
||||
}
|
||||
|
||||
/* XXX seems like every FETCH but the first has
|
||||
* this bit set:
|
||||
*/
|
||||
vtx->reserved3 = (idx > 0) ? 0x1 : 0x0;
|
||||
vtx->reserved0 = (idx > 0) ? 0x2 : 0x3;
|
||||
} else if (instr->fetch.opc == TEX_FETCH) {
|
||||
instr_fetch_tex_t *tex = &fetch->tex;
|
||||
|
||||
assert(instr->fetch.const_idx <= 0x1f);
|
||||
|
||||
tex->src_reg = src_to_reg(instr, src_reg);
|
||||
tex->src_swiz = reg_fetch_src_swiz(src_reg, 3);
|
||||
tex->dst_reg = dst_to_reg(instr, dst_reg);
|
||||
tex->dst_swiz = reg_fetch_dst_swiz(dst_reg);
|
||||
tex->const_idx = instr->fetch.const_idx;
|
||||
tex->mag_filter = TEX_FILTER_USE_FETCH_CONST;
|
||||
tex->min_filter = TEX_FILTER_USE_FETCH_CONST;
|
||||
tex->mip_filter = TEX_FILTER_USE_FETCH_CONST;
|
||||
tex->aniso_filter = ANISO_FILTER_USE_FETCH_CONST;
|
||||
tex->arbitrary_filter = ARBITRARY_FILTER_USE_FETCH_CONST;
|
||||
tex->vol_mag_filter = TEX_FILTER_USE_FETCH_CONST;
|
||||
tex->vol_min_filter = TEX_FILTER_USE_FETCH_CONST;
|
||||
tex->use_comp_lod = 1;
|
||||
tex->use_reg_lod = !instr->fetch.is_cube;
|
||||
tex->sample_location = SAMPLE_CENTER;
|
||||
tex->tx_coord_denorm = instr->fetch.is_rect;
|
||||
|
||||
if (instr->pred != IR2_PRED_NONE) {
|
||||
tex->pred_select = 1;
|
||||
tex->pred_condition = (instr->pred == IR2_PRED_EQ) ? 1 : 0;
|
||||
}
|
||||
|
||||
} else {
|
||||
ERROR_MSG("invalid fetch opc: %d\n", instr->fetch.opc);
|
||||
return -1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* ALU instructions:
|
||||
*/
|
||||
|
||||
static int instr_emit_alu(struct ir2_instruction *instr_v,
|
||||
struct ir2_instruction *instr_s, uint32_t *dwords,
|
||||
struct ir2_shader_info *info)
|
||||
{
|
||||
instr_alu_t *alu = (instr_alu_t *)dwords;
|
||||
struct ir2_dst_register *vdst_reg, *sdst_reg;
|
||||
struct ir2_src_register *src1_reg, *src2_reg, *src3_reg;
|
||||
struct ir2_shader *shader = instr_v ? instr_v->shader : instr_s->shader;
|
||||
enum ir2_pred pred = IR2_PRED_NONE;
|
||||
|
||||
memset(alu, 0, sizeof(*alu));
|
||||
|
||||
vdst_reg = NULL;
|
||||
sdst_reg = NULL;
|
||||
src1_reg = NULL;
|
||||
src2_reg = NULL;
|
||||
src3_reg = NULL;
|
||||
|
||||
if (instr_v) {
|
||||
vdst_reg = &instr_v->dst_reg;
|
||||
assert(instr_v->src_reg_count >= 2);
|
||||
src1_reg = &instr_v->src_reg[0];
|
||||
src2_reg = &instr_v->src_reg[1];
|
||||
if (instr_v->src_reg_count > 2)
|
||||
src3_reg = &instr_v->src_reg[2];
|
||||
pred = instr_v->pred;
|
||||
}
|
||||
|
||||
if (instr_s) {
|
||||
sdst_reg = &instr_s->dst_reg;
|
||||
assert(instr_s->src_reg_count == 1);
|
||||
assert(!instr_v || vdst_reg->flags == sdst_reg->flags);
|
||||
assert(!instr_v || pred == instr_s->pred);
|
||||
if (src3_reg) {
|
||||
assert(src3_reg->flags == instr_s->src_reg[0].flags);
|
||||
assert(src3_reg->num == instr_s->src_reg[0].num);
|
||||
assert(!strcmp(src3_reg->swizzle, instr_s->src_reg[0].swizzle));
|
||||
}
|
||||
src3_reg = &instr_s->src_reg[0];
|
||||
pred = instr_s->pred;
|
||||
}
|
||||
|
||||
if (vdst_reg) {
|
||||
assert((vdst_reg->flags & ~IR2_REG_EXPORT) == 0);
|
||||
assert(!vdst_reg->swizzle || (strlen(vdst_reg->swizzle) == 4));
|
||||
alu->vector_opc = instr_v->alu_vector.opc;
|
||||
alu->vector_write_mask = reg_alu_dst_swiz(vdst_reg);
|
||||
alu->vector_dest = dst_to_reg(instr_v, vdst_reg);
|
||||
} else {
|
||||
alu->vector_opc = MAXv;
|
||||
}
|
||||
|
||||
if (sdst_reg) {
|
||||
alu->scalar_opc = instr_s->alu_scalar.opc;
|
||||
alu->scalar_write_mask = reg_alu_dst_swiz(sdst_reg);
|
||||
alu->scalar_dest = dst_to_reg(instr_s, sdst_reg);
|
||||
} else {
|
||||
/* not sure if this is required, but adreno compiler seems
|
||||
* to always set scalar opc to MAXs if it is not used:
|
||||
*/
|
||||
alu->scalar_opc = MAXs;
|
||||
}
|
||||
|
||||
alu->export_data =
|
||||
!!((instr_v ? vdst_reg : sdst_reg)->flags & IR2_REG_EXPORT);
|
||||
|
||||
/* export32 has this bit set.. it seems to do more than just set
|
||||
* the base address of the constants used to zero
|
||||
* TODO make this less of a hack
|
||||
*/
|
||||
if (alu->export_data && alu->vector_dest == 32) {
|
||||
assert(!instr_s);
|
||||
alu->relative_addr = 1;
|
||||
}
|
||||
|
||||
if (src1_reg) {
|
||||
if (src1_reg->flags & IR2_REG_CONST) {
|
||||
assert(!(src1_reg->flags & IR2_REG_ABS));
|
||||
alu->src1_reg_const = src1_reg->num;
|
||||
} else {
|
||||
alu->src1_reg = shader->reg[src1_reg->num].reg;
|
||||
alu->src1_reg_abs = !!(src1_reg->flags & IR2_REG_ABS);
|
||||
}
|
||||
alu->src1_swiz = reg_alu_src_swiz(src1_reg);
|
||||
alu->src1_reg_negate = !!(src1_reg->flags & IR2_REG_NEGATE);
|
||||
alu->src1_sel = !(src1_reg->flags & IR2_REG_CONST);
|
||||
} else {
|
||||
alu->src1_sel = 1;
|
||||
}
|
||||
|
||||
if (src2_reg) {
|
||||
if (src2_reg->flags & IR2_REG_CONST) {
|
||||
assert(!(src2_reg->flags & IR2_REG_ABS));
|
||||
alu->src2_reg_const = src2_reg->num;
|
||||
} else {
|
||||
alu->src2_reg = shader->reg[src2_reg->num].reg;
|
||||
alu->src2_reg_abs = !!(src2_reg->flags & IR2_REG_ABS);
|
||||
}
|
||||
alu->src2_swiz = reg_alu_src_swiz(src2_reg);
|
||||
alu->src2_reg_negate = !!(src2_reg->flags & IR2_REG_NEGATE);
|
||||
alu->src2_sel = !(src2_reg->flags & IR2_REG_CONST);
|
||||
} else {
|
||||
alu->src2_sel = 1;
|
||||
}
|
||||
|
||||
if (src3_reg) {
|
||||
if (src3_reg->flags & IR2_REG_CONST) {
|
||||
assert(!(src3_reg->flags & IR2_REG_ABS));
|
||||
alu->src3_reg_const = src3_reg->num;
|
||||
} else {
|
||||
alu->src3_reg = shader->reg[src3_reg->num].reg;
|
||||
alu->src3_reg_abs = !!(src3_reg->flags & IR2_REG_ABS);
|
||||
}
|
||||
alu->src3_swiz = reg_alu_src_swiz(src3_reg);
|
||||
alu->src3_reg_negate = !!(src3_reg->flags & IR2_REG_NEGATE);
|
||||
alu->src3_sel = !(src3_reg->flags & IR2_REG_CONST);
|
||||
} else {
|
||||
/* not sure if this is required, but adreno compiler seems
|
||||
* to always set register bank for 3rd src if unused:
|
||||
*/
|
||||
alu->src3_sel = 1;
|
||||
}
|
||||
|
||||
alu->vector_clamp = instr_v ? instr_v->alu_vector.clamp : 0;
|
||||
alu->scalar_clamp = instr_s ? instr_s->alu_scalar.clamp : 0;
|
||||
|
||||
if (pred != IR2_PRED_NONE)
|
||||
alu->pred_select = (pred == IR2_PRED_EQ) ? 3 : 2;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int instr_emit(struct ir2_instruction *instr, uint32_t *dwords,
|
||||
uint32_t idx, struct ir2_shader_info *info)
|
||||
{
|
||||
switch (instr->instr_type) {
|
||||
case IR2_FETCH: return instr_emit_fetch(instr, dwords, idx, info);
|
||||
case IR2_ALU_VECTOR: return instr_emit_alu(instr, NULL, dwords, info);
|
||||
case IR2_ALU_SCALAR: return instr_emit_alu(NULL, instr, dwords, info);
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
struct ir2_dst_register * ir2_dst_create(struct ir2_instruction *instr,
|
||||
int num, const char *swizzle, int flags)
|
||||
{
|
||||
if (!(flags & IR2_REG_EXPORT)) {
|
||||
struct ir2_register *reg = &instr->shader->reg[num];
|
||||
|
||||
unsigned i;
|
||||
for (i = instr->shader->max_reg + 1; i <= num; i++)
|
||||
instr->shader->reg[i].write_idx = -1;
|
||||
instr->shader->max_reg = i - 1;
|
||||
|
||||
if (reg->write_idx < 0)
|
||||
reg->write_idx = instr->idx;
|
||||
reg->write_idx2 = instr->idx;
|
||||
}
|
||||
|
||||
struct ir2_dst_register *reg = &instr->dst_reg;
|
||||
reg->flags = flags;
|
||||
reg->num = num;
|
||||
reg->swizzle = ir2_strdup(instr->shader, swizzle);
|
||||
return reg;
|
||||
}
|
||||
|
||||
struct ir2_src_register * ir2_reg_create(struct ir2_instruction *instr,
|
||||
int num, const char *swizzle, int flags)
|
||||
{
|
||||
assert(instr->src_reg_count + 1 <= ARRAY_SIZE(instr->src_reg));
|
||||
if (!(flags & IR2_REG_CONST)) {
|
||||
struct ir2_register *reg = &instr->shader->reg[num];
|
||||
|
||||
reg->read_idx = instr->idx;
|
||||
|
||||
unsigned i;
|
||||
for (i = instr->shader->max_reg + 1; i <= num; i++)
|
||||
instr->shader->reg[i].write_idx = -1;
|
||||
instr->shader->max_reg = i - 1;
|
||||
}
|
||||
|
||||
struct ir2_src_register *reg = &instr->src_reg[instr->src_reg_count++];
|
||||
reg->flags = flags;
|
||||
reg->num = num;
|
||||
reg->swizzle = ir2_strdup(instr->shader, swizzle);
|
||||
return reg;
|
||||
}
|
||||
|
||||
static uint32_t reg_fetch_src_swiz(struct ir2_src_register *reg, uint32_t n)
|
||||
{
|
||||
uint32_t swiz = 0;
|
||||
int i;
|
||||
|
||||
assert((reg->flags & ~IR2_REG_INPUT) == 0);
|
||||
assert(reg->swizzle);
|
||||
|
||||
DEBUG_MSG("fetch src R%d.%s", reg->num, reg->swizzle);
|
||||
|
||||
for (i = n-1; i >= 0; i--) {
|
||||
swiz <<= 2;
|
||||
switch (reg->swizzle[i]) {
|
||||
default:
|
||||
ERROR_MSG("invalid fetch src swizzle: %s", reg->swizzle);
|
||||
case 'x': swiz |= 0x0; break;
|
||||
case 'y': swiz |= 0x1; break;
|
||||
case 'z': swiz |= 0x2; break;
|
||||
case 'w': swiz |= 0x3; break;
|
||||
}
|
||||
}
|
||||
|
||||
return swiz;
|
||||
}
|
||||
|
||||
static uint32_t reg_fetch_dst_swiz(struct ir2_dst_register *reg)
|
||||
{
|
||||
uint32_t swiz = 0;
|
||||
int i;
|
||||
|
||||
assert(reg->flags == 0);
|
||||
assert(!reg->swizzle || (strlen(reg->swizzle) == 4));
|
||||
|
||||
DEBUG_MSG("fetch dst R%d.%s", reg->num, reg->swizzle);
|
||||
|
||||
if (reg->swizzle) {
|
||||
for (i = 3; i >= 0; i--) {
|
||||
swiz <<= 3;
|
||||
switch (reg->swizzle[i]) {
|
||||
default:
|
||||
ERROR_MSG("invalid dst swizzle: %s", reg->swizzle);
|
||||
case 'x': swiz |= 0x0; break;
|
||||
case 'y': swiz |= 0x1; break;
|
||||
case 'z': swiz |= 0x2; break;
|
||||
case 'w': swiz |= 0x3; break;
|
||||
case '0': swiz |= 0x4; break;
|
||||
case '1': swiz |= 0x5; break;
|
||||
case '_': swiz |= 0x7; break;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
swiz = 0x688;
|
||||
}
|
||||
|
||||
return swiz;
|
||||
}
|
||||
|
||||
/* actually, a write-mask */
|
||||
static uint32_t reg_alu_dst_swiz(struct ir2_dst_register *reg)
|
||||
{
|
||||
uint32_t swiz = 0;
|
||||
int i;
|
||||
|
||||
assert((reg->flags & ~IR2_REG_EXPORT) == 0);
|
||||
assert(!reg->swizzle || (strlen(reg->swizzle) == 4));
|
||||
|
||||
DEBUG_MSG("alu dst R%d.%s", reg->num, reg->swizzle);
|
||||
|
||||
if (reg->swizzle) {
|
||||
for (i = 3; i >= 0; i--) {
|
||||
swiz <<= 1;
|
||||
if (reg->swizzle[i] == "xyzw"[i]) {
|
||||
swiz |= 0x1;
|
||||
} else if (reg->swizzle[i] != '_') {
|
||||
ERROR_MSG("invalid dst swizzle: %s", reg->swizzle);
|
||||
break;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
swiz = 0xf;
|
||||
}
|
||||
|
||||
return swiz;
|
||||
}
|
||||
|
||||
static uint32_t reg_alu_src_swiz(struct ir2_src_register *reg)
|
||||
{
|
||||
uint32_t swiz = 0;
|
||||
int i;
|
||||
|
||||
assert(!reg->swizzle || (strlen(reg->swizzle) == 4));
|
||||
|
||||
DEBUG_MSG("vector src R%d.%s", reg->num, reg->swizzle);
|
||||
|
||||
if (reg->swizzle) {
|
||||
for (i = 3; i >= 0; i--) {
|
||||
swiz <<= 2;
|
||||
switch (reg->swizzle[i]) {
|
||||
default:
|
||||
ERROR_MSG("invalid vector src swizzle: %s", reg->swizzle);
|
||||
case 'x': swiz |= (0x0 - i) & 0x3; break;
|
||||
case 'y': swiz |= (0x1 - i) & 0x3; break;
|
||||
case 'z': swiz |= (0x2 - i) & 0x3; break;
|
||||
case 'w': swiz |= (0x3 - i) & 0x3; break;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
swiz = 0x0;
|
||||
}
|
||||
|
||||
return swiz;
|
||||
}
|
||||
|
|
@ -1,188 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2012 Rob Clark <robdclark@gmail.com>
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef IR2_H_
|
||||
#define IR2_H_
|
||||
|
||||
#include <stdint.h>
|
||||
#include <stdbool.h>
|
||||
|
||||
#include "instr-a2xx.h"
|
||||
|
||||
/* low level intermediate representation of an adreno a2xx shader program */
|
||||
|
||||
struct ir2_shader;
|
||||
|
||||
#define REG_MASK 0xff
|
||||
|
||||
struct ir2_shader_info {
|
||||
uint16_t sizedwords;
|
||||
int8_t max_reg; /* highest GPR # used by shader */
|
||||
};
|
||||
|
||||
struct ir2_register {
|
||||
int16_t write_idx, write_idx2, read_idx, reg;
|
||||
/* bitmask of variables on which this one depends
|
||||
* XXX: use bitmask util?
|
||||
*/
|
||||
uint32_t regmask[REG_MASK/32+1];
|
||||
};
|
||||
|
||||
struct ir2_src_register {
|
||||
enum {
|
||||
IR2_REG_INPUT = 0x1,
|
||||
IR2_REG_CONST = 0x2,
|
||||
IR2_REG_NEGATE = 0x4,
|
||||
IR2_REG_ABS = 0x8,
|
||||
} flags;
|
||||
int num;
|
||||
char *swizzle;
|
||||
};
|
||||
|
||||
struct ir2_dst_register {
|
||||
enum {
|
||||
IR2_REG_EXPORT = 0x1,
|
||||
} flags;
|
||||
int num;
|
||||
char *swizzle;
|
||||
};
|
||||
|
||||
enum ir2_pred {
|
||||
IR2_PRED_NONE = 0,
|
||||
IR2_PRED_EQ = 1,
|
||||
IR2_PRED_NE = 2,
|
||||
};
|
||||
|
||||
struct ir2_instruction {
|
||||
struct ir2_shader *shader;
|
||||
unsigned idx;
|
||||
enum {
|
||||
IR2_FETCH,
|
||||
IR2_ALU_VECTOR,
|
||||
IR2_ALU_SCALAR,
|
||||
} instr_type;
|
||||
enum ir2_pred pred;
|
||||
int sync;
|
||||
unsigned src_reg_count;
|
||||
struct ir2_dst_register dst_reg;
|
||||
struct ir2_src_register src_reg[3];
|
||||
union {
|
||||
/* FETCH specific: */
|
||||
struct {
|
||||
instr_fetch_opc_t opc;
|
||||
unsigned const_idx;
|
||||
/* texture fetch specific: */
|
||||
bool is_cube : 1;
|
||||
bool is_rect : 1;
|
||||
/* vertex fetch specific: */
|
||||
unsigned const_idx_sel;
|
||||
enum a2xx_sq_surfaceformat fmt;
|
||||
bool is_signed : 1;
|
||||
bool is_normalized : 1;
|
||||
uint32_t stride;
|
||||
uint32_t offset;
|
||||
} fetch;
|
||||
/* ALU-Vector specific: */
|
||||
struct {
|
||||
instr_vector_opc_t opc;
|
||||
bool clamp;
|
||||
} alu_vector;
|
||||
/* ALU-Scalar specific: */
|
||||
struct {
|
||||
instr_scalar_opc_t opc;
|
||||
bool clamp;
|
||||
} alu_scalar;
|
||||
};
|
||||
};
|
||||
|
||||
struct ir2_shader {
|
||||
unsigned instr_count;
|
||||
int max_reg;
|
||||
struct ir2_register reg[REG_MASK+1];
|
||||
|
||||
struct ir2_instruction *instr[0x200];
|
||||
uint32_t heap[100 * 4096];
|
||||
unsigned heap_idx;
|
||||
|
||||
enum ir2_pred pred; /* pred inherited by newly created instrs */
|
||||
};
|
||||
|
||||
struct ir2_shader * ir2_shader_create(void);
|
||||
void ir2_shader_destroy(struct ir2_shader *shader);
|
||||
void * ir2_shader_assemble(struct ir2_shader *shader,
|
||||
struct ir2_shader_info *info);
|
||||
|
||||
struct ir2_instruction * ir2_instr_create(struct ir2_shader *shader,
|
||||
int instr_type);
|
||||
|
||||
struct ir2_dst_register * ir2_dst_create(struct ir2_instruction *instr,
|
||||
int num, const char *swizzle, int flags);
|
||||
struct ir2_src_register * ir2_reg_create(struct ir2_instruction *instr,
|
||||
int num, const char *swizzle, int flags);
|
||||
|
||||
/* some helper fxns: */
|
||||
|
||||
static inline struct ir2_instruction *
|
||||
ir2_instr_create_alu_v(struct ir2_shader *shader, instr_vector_opc_t vop)
|
||||
{
|
||||
struct ir2_instruction *instr = ir2_instr_create(shader, IR2_ALU_VECTOR);
|
||||
if (!instr)
|
||||
return instr;
|
||||
instr->alu_vector.opc = vop;
|
||||
return instr;
|
||||
}
|
||||
|
||||
static inline struct ir2_instruction *
|
||||
ir2_instr_create_alu_s(struct ir2_shader *shader, instr_scalar_opc_t sop)
|
||||
{
|
||||
struct ir2_instruction *instr = ir2_instr_create(shader, IR2_ALU_SCALAR);
|
||||
if (!instr)
|
||||
return instr;
|
||||
instr->alu_scalar.opc = sop;
|
||||
return instr;
|
||||
}
|
||||
|
||||
static inline struct ir2_instruction *
|
||||
ir2_instr_create_vtx_fetch(struct ir2_shader *shader, int ci, int cis,
|
||||
enum a2xx_sq_surfaceformat fmt, bool is_signed, int stride)
|
||||
{
|
||||
struct ir2_instruction *instr = ir2_instr_create(shader, IR2_FETCH);
|
||||
instr->fetch.opc = VTX_FETCH;
|
||||
instr->fetch.const_idx = ci;
|
||||
instr->fetch.const_idx_sel = cis;
|
||||
instr->fetch.fmt = fmt;
|
||||
instr->fetch.is_signed = is_signed;
|
||||
instr->fetch.stride = stride;
|
||||
return instr;
|
||||
}
|
||||
static inline struct ir2_instruction *
|
||||
ir2_instr_create_tex_fetch(struct ir2_shader *shader, int ci)
|
||||
{
|
||||
struct ir2_instruction *instr = ir2_instr_create(shader, IR2_FETCH);
|
||||
instr->fetch.opc = TEX_FETCH;
|
||||
instr->fetch.const_idx = ci;
|
||||
return instr;
|
||||
}
|
||||
|
||||
|
||||
#endif /* IR2_H_ */
|
||||
304
src/gallium/drivers/freedreno/a2xx/ir2.c
Normal file
304
src/gallium/drivers/freedreno/a2xx/ir2.c
Normal file
|
|
@ -0,0 +1,304 @@
|
|||
/*
|
||||
* Copyright (C) 2018 Jonathan Marek <jonathan@marek.ca>
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*
|
||||
* Authors:
|
||||
* Jonathan Marek <jonathan@marek.ca>
|
||||
*/
|
||||
|
||||
#include "ir2_private.h"
|
||||
|
||||
static bool scalar_possible(struct ir2_instr *instr)
|
||||
{
|
||||
if (instr->alu.scalar_opc == SCALAR_NONE)
|
||||
return false;
|
||||
|
||||
return src_ncomp(instr) == 1;
|
||||
}
|
||||
|
||||
static bool is_alu_compatible(struct ir2_instr *a, struct ir2_instr *b)
|
||||
{
|
||||
if (!a)
|
||||
return true;
|
||||
|
||||
/* dont use same instruction twice */
|
||||
if (a == b)
|
||||
return false;
|
||||
|
||||
/* PRED_SET must be alone */
|
||||
if (b->alu.scalar_opc >= PRED_SETEs &&
|
||||
b->alu.scalar_opc <= PRED_SET_RESTOREs)
|
||||
return false;
|
||||
|
||||
/* must write to same export (issues otherwise?) */
|
||||
return a->alu.export == b->alu.export;
|
||||
}
|
||||
|
||||
/* priority of vector instruction for scheduling (lower=higher prio) */
|
||||
static unsigned alu_vector_prio(struct ir2_instr *instr)
|
||||
{
|
||||
if (instr->alu.vector_opc == VECTOR_NONE)
|
||||
return ~0u;
|
||||
|
||||
if (is_export(instr))
|
||||
return 4;
|
||||
|
||||
/* TODO check src type and ncomps */
|
||||
if (instr->src_count == 3)
|
||||
return 0;
|
||||
|
||||
if (!scalar_possible(instr))
|
||||
return 1;
|
||||
|
||||
return instr->src_count == 2 ? 2 : 3;
|
||||
}
|
||||
|
||||
/* priority of scalar instruction for scheduling (lower=higher prio) */
|
||||
static unsigned alu_scalar_prio(struct ir2_instr *instr)
|
||||
{
|
||||
if (!scalar_possible(instr))
|
||||
return ~0u;
|
||||
|
||||
/* this case is dealt with later */
|
||||
if (instr->src_count > 1)
|
||||
return ~0u;
|
||||
|
||||
if (is_export(instr))
|
||||
return 4;
|
||||
|
||||
/* PRED to end of block */
|
||||
if (instr->alu.scalar_opc >= PRED_SETEs &&
|
||||
instr->alu.scalar_opc <= PRED_SET_RESTOREs)
|
||||
return 5;
|
||||
|
||||
/* scalar only have highest priority */
|
||||
return instr->alu.vector_opc == VECTOR_NONE ? 0 : 3;
|
||||
}
|
||||
|
||||
/* fill sched with next fetch or (vector and/or scalar) alu instruction */
|
||||
static int sched_next(struct ir2_context *ctx, struct ir2_sched_instr *sched)
|
||||
{
|
||||
struct ir2_instr *avail[0x100], *instr_v = NULL, *instr_s = NULL;
|
||||
unsigned avail_count = 0;
|
||||
|
||||
instr_alloc_type_t export = ~0u;
|
||||
int block_idx = -1;
|
||||
|
||||
/* XXX merge this loop with the other one somehow? */
|
||||
ir2_foreach_instr(instr, ctx) {
|
||||
if (!instr->need_emit)
|
||||
continue;
|
||||
if (is_export(instr))
|
||||
export = MIN2(export, export_buf(instr->alu.export));
|
||||
}
|
||||
|
||||
ir2_foreach_instr(instr, ctx) {
|
||||
if (!instr->need_emit)
|
||||
continue;
|
||||
|
||||
/* dont mix exports */
|
||||
if (is_export(instr) && export_buf(instr->alu.export) != export)
|
||||
continue;
|
||||
|
||||
if (block_idx < 0)
|
||||
block_idx = instr->block_idx;
|
||||
else if (block_idx != instr->block_idx || /* must be same block */
|
||||
instr->type == IR2_CF || /* CF/MEM must be alone */
|
||||
(is_export(instr) && export == SQ_MEMORY))
|
||||
break;
|
||||
/* it works because IR2_CF is always at end of block
|
||||
* and somewhat same idea with MEM exports, which might not be alone
|
||||
* but will end up in-order at least
|
||||
*/
|
||||
|
||||
/* check if dependencies are satisfied */
|
||||
bool is_ok = true;
|
||||
ir2_foreach_src(src, instr) {
|
||||
if (src->type == IR2_SRC_REG) {
|
||||
/* need to check if all previous instructions in the block
|
||||
* which write the reg have been emitted
|
||||
* slow..
|
||||
* XXX: check components instead of whole register
|
||||
*/
|
||||
struct ir2_reg *reg = get_reg_src(ctx, src);
|
||||
ir2_foreach_instr(p, ctx) {
|
||||
if (!p->is_ssa && p->reg == reg && p->idx < instr->idx)
|
||||
is_ok &= !p->need_emit;
|
||||
}
|
||||
} else if (src->type == IR2_SRC_SSA) {
|
||||
/* in this case its easy, just check need_emit */
|
||||
is_ok &= !ctx->instr[src->num].need_emit;
|
||||
}
|
||||
}
|
||||
if (!is_ok)
|
||||
continue;
|
||||
|
||||
avail[avail_count++] = instr;
|
||||
}
|
||||
|
||||
if (!avail_count) {
|
||||
assert(block_idx == -1);
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* priority to FETCH instructions */
|
||||
ir2_foreach_avail(instr) {
|
||||
if (instr->type == IR2_ALU)
|
||||
continue;
|
||||
|
||||
ra_src_free(ctx, instr);
|
||||
ra_reg(ctx, get_reg(instr), -1, false, 0);
|
||||
|
||||
instr->need_emit = false;
|
||||
sched->instr = instr;
|
||||
sched->instr_s = NULL;
|
||||
return block_idx;
|
||||
}
|
||||
|
||||
/* TODO precompute priorities */
|
||||
|
||||
unsigned prio_v = ~0u, prio_s = ~0u, prio;
|
||||
ir2_foreach_avail(instr) {
|
||||
prio = alu_vector_prio(instr);
|
||||
if (prio < prio_v) {
|
||||
instr_v = instr;
|
||||
prio_v = prio;
|
||||
}
|
||||
}
|
||||
|
||||
/* TODO can still insert scalar if src_count=3, if smart about it */
|
||||
if (!instr_v || instr_v->src_count < 3) {
|
||||
ir2_foreach_avail(instr) {
|
||||
bool compat = is_alu_compatible(instr_v, instr);
|
||||
|
||||
prio = alu_scalar_prio(instr);
|
||||
if (prio >= prio_v && !compat)
|
||||
continue;
|
||||
|
||||
if (prio < prio_s) {
|
||||
instr_s = instr;
|
||||
prio_s = prio;
|
||||
if (!compat)
|
||||
instr_v = NULL;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
assert(instr_v || instr_s);
|
||||
|
||||
/* free src registers */
|
||||
if (instr_v) {
|
||||
instr_v->need_emit = false;
|
||||
ra_src_free(ctx, instr_v);
|
||||
}
|
||||
|
||||
if (instr_s) {
|
||||
instr_s->need_emit = false;
|
||||
ra_src_free(ctx, instr_s);
|
||||
}
|
||||
|
||||
/* allocate dst registers */
|
||||
if (instr_v)
|
||||
ra_reg(ctx, get_reg(instr_v), -1, is_export(instr_v), instr_v->alu.write_mask);
|
||||
|
||||
if (instr_s)
|
||||
ra_reg(ctx, get_reg(instr_s), -1, is_export(instr_s), instr_s->alu.write_mask);
|
||||
|
||||
sched->instr = instr_v;
|
||||
sched->instr_s = instr_s;
|
||||
return block_idx;
|
||||
}
|
||||
|
||||
/* scheduling: determine order of instructions */
|
||||
static void schedule_instrs(struct ir2_context *ctx)
|
||||
{
|
||||
struct ir2_sched_instr *sched;
|
||||
int block_idx;
|
||||
|
||||
/* allocate input registers */
|
||||
for (unsigned idx = 0; idx < ARRAY_SIZE(ctx->input); idx++)
|
||||
if (ctx->input[idx].initialized)
|
||||
ra_reg(ctx, &ctx->input[idx], idx, false, 0);
|
||||
|
||||
for (;;) {
|
||||
sched = &ctx->instr_sched[ctx->instr_sched_count++];
|
||||
block_idx = sched_next(ctx, sched);
|
||||
if (block_idx < 0)
|
||||
break;
|
||||
memcpy(sched->reg_state, ctx->reg_state, sizeof(ctx->reg_state));
|
||||
|
||||
/* catch texture fetch after scheduling and insert the
|
||||
* SET_TEX_LOD right before it if necessary
|
||||
* TODO clean this up
|
||||
*/
|
||||
struct ir2_instr *instr = sched->instr, *tex_lod;
|
||||
if (instr && instr->type == IR2_FETCH &&
|
||||
instr->fetch.opc == TEX_FETCH && instr->src_count == 2) {
|
||||
/* generate the SET_LOD instruction */
|
||||
tex_lod = &ctx->instr[ctx->instr_count++];
|
||||
tex_lod->type = IR2_FETCH;
|
||||
tex_lod->block_idx = instr->block_idx;
|
||||
tex_lod->pred = instr->pred;
|
||||
tex_lod->fetch.opc = TEX_SET_TEX_LOD;
|
||||
tex_lod->src[0] = instr->src[1];
|
||||
tex_lod->src_count = 1;
|
||||
|
||||
sched[1] = sched[0];
|
||||
sched->instr = tex_lod;
|
||||
ctx->instr_sched_count++;
|
||||
}
|
||||
|
||||
bool free_block = true;
|
||||
ir2_foreach_instr(instr, ctx)
|
||||
free_block &= instr->block_idx != block_idx;
|
||||
if (free_block)
|
||||
ra_block_free(ctx, block_idx);
|
||||
};
|
||||
ctx->instr_sched_count--;
|
||||
}
|
||||
|
||||
void
|
||||
ir2_compile(struct fd2_shader_stateobj *so, unsigned variant,
|
||||
struct fd2_shader_stateobj *fp)
|
||||
{
|
||||
struct ir2_context ctx = { };
|
||||
bool binning = !fp && so->type == MESA_SHADER_VERTEX;
|
||||
|
||||
if (fp)
|
||||
so->variant[variant].f = fp->variant[0].f;
|
||||
|
||||
ctx.so = so;
|
||||
ctx.info = &so->variant[variant].info;
|
||||
ctx.f = &so->variant[variant].f;
|
||||
ctx.info->max_reg = -1;
|
||||
|
||||
/* convert nir to internal representation */
|
||||
ir2_nir_compile(&ctx, binning);
|
||||
|
||||
/* get ref_counts and kill non-needed instructions */
|
||||
ra_count_refs(&ctx);
|
||||
|
||||
/* instruction order.. and vector->scalar conversions */
|
||||
schedule_instrs(&ctx);
|
||||
|
||||
/* finally, assemble to bitcode */
|
||||
assemble(&ctx, binning);
|
||||
}
|
||||
94
src/gallium/drivers/freedreno/a2xx/ir2.h
Normal file
94
src/gallium/drivers/freedreno/a2xx/ir2.h
Normal file
|
|
@ -0,0 +1,94 @@
|
|||
/*
|
||||
* Copyright (C) 2018 Jonathan Marek <jonathan@marek.ca>
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*
|
||||
* Authors:
|
||||
* Jonathan Marek <jonathan@marek.ca>
|
||||
*/
|
||||
|
||||
#ifndef IR2_H_
|
||||
#define IR2_H_
|
||||
|
||||
#include "compiler/nir/nir.h"
|
||||
|
||||
struct ir2_fetch_info {
|
||||
/* dword offset of the fetch instruction */
|
||||
uint16_t offset;
|
||||
union {
|
||||
/* swizzle to merge with tgsi swizzle */
|
||||
struct {
|
||||
uint16_t dst_swiz;
|
||||
} vtx;
|
||||
/* sampler id to patch const_idx */
|
||||
struct {
|
||||
uint16_t samp_id;
|
||||
uint8_t src_swiz;
|
||||
} tex;
|
||||
};
|
||||
};
|
||||
|
||||
struct ir2_shader_info {
|
||||
/* compiler shader */
|
||||
uint32_t *dwords;
|
||||
|
||||
/* size of the compiled shader in dwords */
|
||||
uint16_t sizedwords;
|
||||
|
||||
/* highest GPR # used by shader */
|
||||
int8_t max_reg;
|
||||
|
||||
/* offset in dwords of first MEMORY export CF (for a20x hw binning) */
|
||||
int16_t mem_export_ptr;
|
||||
|
||||
/* fetch instruction info for patching */
|
||||
uint16_t num_fetch_instrs;
|
||||
struct ir2_fetch_info fetch_info[64];
|
||||
};
|
||||
|
||||
struct ir2_frag_linkage {
|
||||
unsigned inputs_count;
|
||||
struct {
|
||||
uint8_t slot;
|
||||
uint8_t ncomp;
|
||||
} inputs[16];
|
||||
|
||||
/* driver_location of fragcoord.zw, -1 if not used */
|
||||
int fragcoord;
|
||||
};
|
||||
|
||||
struct ir2_shader_variant {
|
||||
struct ir2_shader_info info;
|
||||
struct ir2_frag_linkage f;
|
||||
};
|
||||
|
||||
struct fd2_shader_stateobj;
|
||||
struct tgsi_token;
|
||||
|
||||
void ir2_compile(struct fd2_shader_stateobj *so, unsigned variant,
|
||||
struct fd2_shader_stateobj *fp);
|
||||
|
||||
struct nir_shader *ir2_tgsi_to_nir(const struct tgsi_token *tokens);
|
||||
|
||||
const nir_shader_compiler_options *ir2_get_compiler_options(void);
|
||||
|
||||
int ir2_optimize_nir(nir_shader *s, bool lower);
|
||||
|
||||
#endif /* IR2_H_ */
|
||||
548
src/gallium/drivers/freedreno/a2xx/ir2_assemble.c
Normal file
548
src/gallium/drivers/freedreno/a2xx/ir2_assemble.c
Normal file
|
|
@ -0,0 +1,548 @@
|
|||
/*
|
||||
* Copyright (C) 2018 Jonathan Marek <jonathan@marek.ca>
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*
|
||||
* Authors:
|
||||
* Jonathan Marek <jonathan@marek.ca>
|
||||
*/
|
||||
|
||||
#include "ir2_private.h"
|
||||
|
||||
static unsigned
|
||||
src_swizzle(struct ir2_context *ctx, struct ir2_src *src, unsigned ncomp)
|
||||
{
|
||||
struct ir2_reg_component *comps;
|
||||
unsigned swiz = 0;
|
||||
|
||||
switch (src->type) {
|
||||
case IR2_SRC_SSA:
|
||||
case IR2_SRC_REG:
|
||||
break;
|
||||
default:
|
||||
return src->swizzle;
|
||||
}
|
||||
/* we need to take into account where the components were allocated */
|
||||
comps = get_reg_src(ctx, src)->comp;
|
||||
for (int i = 0; i < ncomp; i++) {
|
||||
swiz |= swiz_set(comps[swiz_get(src->swizzle, i)].c, i);
|
||||
}
|
||||
return swiz;
|
||||
}
|
||||
|
||||
/* alu instr need to take into how the output components are allocated */
|
||||
|
||||
/* scalar doesn't need to take into account dest swizzle */
|
||||
|
||||
static unsigned
|
||||
alu_swizzle_scalar(struct ir2_context *ctx, struct ir2_src *reg)
|
||||
{
|
||||
/* hardware seems to take from W, but swizzle everywhere just in case */
|
||||
return swiz_merge(src_swizzle(ctx, reg, 1), IR2_SWIZZLE_XXXX);
|
||||
}
|
||||
|
||||
static unsigned
|
||||
alu_swizzle(struct ir2_context *ctx, struct ir2_instr *instr, struct ir2_src *src)
|
||||
{
|
||||
struct ir2_reg_component *comp = get_reg(instr)->comp;
|
||||
unsigned swiz0 = src_swizzle(ctx, src, src_ncomp(instr));
|
||||
unsigned swiz = 0;
|
||||
|
||||
/* non per component special cases */
|
||||
switch (instr->alu.vector_opc) {
|
||||
case PRED_SETE_PUSHv ... PRED_SETGTE_PUSHv:
|
||||
return alu_swizzle_scalar(ctx, src);
|
||||
case DOT2ADDv:
|
||||
case DOT3v:
|
||||
case DOT4v:
|
||||
case CUBEv:
|
||||
return swiz0;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
for (int i = 0, j = 0; i < dst_ncomp(instr); j++) {
|
||||
if (instr->alu.write_mask & 1 << j) {
|
||||
if (comp[j].c != 7)
|
||||
swiz |= swiz_set(i, comp[j].c);
|
||||
i++;
|
||||
}
|
||||
}
|
||||
return swiz_merge(swiz0, swiz);
|
||||
}
|
||||
|
||||
static unsigned
|
||||
alu_swizzle_scalar2(struct ir2_context *ctx, struct ir2_src *src, unsigned s1)
|
||||
{
|
||||
/* hardware seems to take from ZW, but swizzle everywhere (ABAB) */
|
||||
unsigned s0 = swiz_get(src_swizzle(ctx, src, 1), 0);
|
||||
return swiz_merge(swiz_set(s0, 0) | swiz_set(s1, 1), IR2_SWIZZLE_XYXY);
|
||||
}
|
||||
|
||||
/* write_mask needs to be transformed by allocation information */
|
||||
|
||||
static unsigned
|
||||
alu_write_mask(struct ir2_context *ctx, struct ir2_instr *instr)
|
||||
{
|
||||
struct ir2_reg_component *comp = get_reg(instr)->comp;
|
||||
unsigned write_mask = 0;
|
||||
|
||||
for (int i = 0; i < 4; i++) {
|
||||
if (instr->alu.write_mask & 1 << i)
|
||||
write_mask |= 1 << comp[i].c;
|
||||
}
|
||||
|
||||
return write_mask;
|
||||
}
|
||||
|
||||
/* fetch instructions can swizzle dest, but src swizzle needs conversion */
|
||||
|
||||
static unsigned
|
||||
fetch_swizzle(struct ir2_context *ctx, struct ir2_src *src, unsigned ncomp)
|
||||
{
|
||||
unsigned alu_swiz = src_swizzle(ctx, src, ncomp);
|
||||
unsigned swiz = 0;
|
||||
for (int i = 0; i < ncomp; i++)
|
||||
swiz |= swiz_get(alu_swiz, i) << i * 2;
|
||||
return swiz;
|
||||
}
|
||||
|
||||
static unsigned
|
||||
fetch_dst_swiz(struct ir2_context *ctx, struct ir2_instr *instr)
|
||||
{
|
||||
struct ir2_reg_component *comp = get_reg(instr)->comp;
|
||||
unsigned dst_swiz = 0xfff;
|
||||
for (int i = 0; i < dst_ncomp(instr); i++) {
|
||||
dst_swiz &= ~(7 << comp[i].c * 3);
|
||||
dst_swiz |= i << comp[i].c * 3;
|
||||
}
|
||||
return dst_swiz;
|
||||
}
|
||||
|
||||
/* register / export # for instr */
|
||||
static unsigned
|
||||
dst_to_reg(struct ir2_context *ctx, struct ir2_instr *instr)
|
||||
{
|
||||
if (is_export(instr))
|
||||
return instr->alu.export;
|
||||
|
||||
return get_reg(instr)->idx;
|
||||
}
|
||||
|
||||
/* register # for src */
|
||||
static unsigned src_to_reg(struct ir2_context *ctx, struct ir2_src *src)
|
||||
{
|
||||
return get_reg_src(ctx, src)->idx;
|
||||
}
|
||||
|
||||
static unsigned src_reg_byte(struct ir2_context *ctx, struct ir2_src *src)
|
||||
{
|
||||
if (src->type == IR2_SRC_CONST) {
|
||||
assert(!src->abs); /* no abs bit for const */
|
||||
return src->num;
|
||||
}
|
||||
return src_to_reg(ctx, src) | (src->abs ? 0x80 : 0);
|
||||
}
|
||||
|
||||
/* produce the 12 byte binary instruction for a given sched_instr */
|
||||
static void
|
||||
fill_instr(struct ir2_context *ctx, struct ir2_sched_instr *sched,
|
||||
instr_t *bc, bool * is_fetch)
|
||||
{
|
||||
struct ir2_instr *instr = sched->instr, *instr_s, *instr_v;
|
||||
|
||||
*bc = (instr_t) {};
|
||||
|
||||
if (instr && instr->type == IR2_FETCH) {
|
||||
*is_fetch = true;
|
||||
|
||||
bc->fetch.opc = instr->fetch.opc;
|
||||
bc->fetch.pred_select = !!instr->pred;
|
||||
bc->fetch.pred_condition = instr->pred & 1;
|
||||
|
||||
struct ir2_src *src = instr->src;
|
||||
|
||||
if (instr->fetch.opc == VTX_FETCH) {
|
||||
instr_fetch_vtx_t *vtx = &bc->fetch.vtx;
|
||||
|
||||
assert(instr->fetch.vtx.const_idx <= 0x1f);
|
||||
assert(instr->fetch.vtx.const_idx_sel <= 0x3);
|
||||
|
||||
vtx->src_reg = src_to_reg(ctx, src);
|
||||
vtx->src_swiz = fetch_swizzle(ctx, src, 1);
|
||||
vtx->dst_reg = dst_to_reg(ctx, instr);
|
||||
vtx->dst_swiz = fetch_dst_swiz(ctx, instr);
|
||||
|
||||
vtx->must_be_one = 1;
|
||||
vtx->const_index = instr->fetch.vtx.const_idx;
|
||||
vtx->const_index_sel = instr->fetch.vtx.const_idx_sel;
|
||||
|
||||
/* other fields will be patched */
|
||||
|
||||
/* XXX seems like every FETCH but the first has
|
||||
* this bit set:
|
||||
*/
|
||||
vtx->reserved3 = instr->idx ? 0x1 : 0x0;
|
||||
vtx->reserved0 = instr->idx ? 0x2 : 0x3;
|
||||
} else if (instr->fetch.opc == TEX_FETCH) {
|
||||
instr_fetch_tex_t *tex = &bc->fetch.tex;
|
||||
|
||||
tex->src_reg = src_to_reg(ctx, src);
|
||||
tex->src_swiz = fetch_swizzle(ctx, src, 3);
|
||||
tex->dst_reg = dst_to_reg(ctx, instr);
|
||||
tex->dst_swiz = fetch_dst_swiz(ctx, instr);
|
||||
/* tex->const_idx = patch_fetches */
|
||||
tex->mag_filter = TEX_FILTER_USE_FETCH_CONST;
|
||||
tex->min_filter = TEX_FILTER_USE_FETCH_CONST;
|
||||
tex->mip_filter = TEX_FILTER_USE_FETCH_CONST;
|
||||
tex->aniso_filter = ANISO_FILTER_USE_FETCH_CONST;
|
||||
tex->arbitrary_filter = ARBITRARY_FILTER_USE_FETCH_CONST;
|
||||
tex->vol_mag_filter = TEX_FILTER_USE_FETCH_CONST;
|
||||
tex->vol_min_filter = TEX_FILTER_USE_FETCH_CONST;
|
||||
tex->use_comp_lod = ctx->so->type == MESA_SHADER_FRAGMENT;
|
||||
tex->use_reg_lod = instr->src_count == 2;
|
||||
tex->sample_location = SAMPLE_CENTER;
|
||||
tex->tx_coord_denorm = instr->fetch.tex.is_rect;
|
||||
} else if (instr->fetch.opc == TEX_SET_TEX_LOD) {
|
||||
instr_fetch_tex_t *tex = &bc->fetch.tex;
|
||||
|
||||
tex->src_reg = src_to_reg(ctx, src);
|
||||
tex->src_swiz = fetch_swizzle(ctx, src, 1);
|
||||
tex->dst_reg = 0;
|
||||
tex->dst_swiz = 0xfff;
|
||||
|
||||
tex->mag_filter = TEX_FILTER_USE_FETCH_CONST;
|
||||
tex->min_filter = TEX_FILTER_USE_FETCH_CONST;
|
||||
tex->mip_filter = TEX_FILTER_USE_FETCH_CONST;
|
||||
tex->aniso_filter = ANISO_FILTER_USE_FETCH_CONST;
|
||||
tex->arbitrary_filter = ARBITRARY_FILTER_USE_FETCH_CONST;
|
||||
tex->vol_mag_filter = TEX_FILTER_USE_FETCH_CONST;
|
||||
tex->vol_min_filter = TEX_FILTER_USE_FETCH_CONST;
|
||||
tex->use_comp_lod = 1;
|
||||
tex->use_reg_lod = 0;
|
||||
tex->sample_location = SAMPLE_CENTER;
|
||||
} else {
|
||||
assert(0);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
instr_v = sched->instr;
|
||||
instr_s = sched->instr_s;
|
||||
|
||||
if (instr_v) {
|
||||
struct ir2_src src1, src2, *src3;
|
||||
|
||||
src1 = instr_v->src[0];
|
||||
src2 = instr_v->src[instr_v->src_count > 1];
|
||||
src3 = instr_v->src_count == 3 ? &instr_v->src[2] : NULL;
|
||||
|
||||
bc->alu.vector_opc = instr_v->alu.vector_opc;
|
||||
bc->alu.vector_write_mask = alu_write_mask(ctx, instr_v);
|
||||
bc->alu.vector_dest = dst_to_reg(ctx, instr_v);
|
||||
bc->alu.vector_clamp = instr_v->alu.saturate;
|
||||
bc->alu.export_data = instr_v->alu.export >= 0;
|
||||
|
||||
/* single operand SETEv, use 0.0f as src2 */
|
||||
if (instr_v->src_count == 1 &&
|
||||
(bc->alu.vector_opc == SETEv ||
|
||||
bc->alu.vector_opc == SETNEv ||
|
||||
bc->alu.vector_opc == SETGTv ||
|
||||
bc->alu.vector_opc == SETGTEv))
|
||||
src2 = ir2_zero(ctx);
|
||||
|
||||
/* export32 instr for a20x hw binning has this bit set..
|
||||
* it seems to do more than change the base address of constants
|
||||
* XXX this is a hack
|
||||
*/
|
||||
bc->alu.relative_addr =
|
||||
(bc->alu.export_data && bc->alu.vector_dest == 32);
|
||||
|
||||
bc->alu.src1_reg_byte = src_reg_byte(ctx, &src1);
|
||||
bc->alu.src1_swiz = alu_swizzle(ctx, instr_v, &src1);
|
||||
bc->alu.src1_reg_negate = src1.negate;
|
||||
bc->alu.src1_sel = src1.type != IR2_SRC_CONST;
|
||||
|
||||
bc->alu.src2_reg_byte = src_reg_byte(ctx, &src2);
|
||||
bc->alu.src2_swiz = alu_swizzle(ctx, instr_v, &src2);
|
||||
bc->alu.src2_reg_negate = src2.negate;
|
||||
bc->alu.src2_sel = src2.type != IR2_SRC_CONST;
|
||||
|
||||
if (src3) {
|
||||
bc->alu.src3_reg_byte = src_reg_byte(ctx, src3);
|
||||
bc->alu.src3_swiz = alu_swizzle(ctx, instr_v, src3);
|
||||
bc->alu.src3_reg_negate = src3->negate;
|
||||
bc->alu.src3_sel = src3->type != IR2_SRC_CONST;
|
||||
}
|
||||
|
||||
bc->alu.pred_select = instr_v->pred;
|
||||
}
|
||||
|
||||
if (instr_s) {
|
||||
struct ir2_src *src = instr_s->src;
|
||||
|
||||
bc->alu.scalar_opc = instr_s->alu.scalar_opc;
|
||||
bc->alu.scalar_write_mask = alu_write_mask(ctx, instr_s);
|
||||
bc->alu.scalar_dest = dst_to_reg(ctx, instr_s);
|
||||
bc->alu.scalar_clamp = instr_s->alu.saturate;
|
||||
bc->alu.export_data = instr_s->alu.export >= 0;
|
||||
|
||||
if (instr_s->src_count == 1) {
|
||||
bc->alu.src3_reg_byte = src_reg_byte(ctx, src);
|
||||
bc->alu.src3_swiz = alu_swizzle_scalar(ctx, src);
|
||||
bc->alu.src3_reg_negate = src->negate;
|
||||
bc->alu.src3_sel = src->type != IR2_SRC_CONST;
|
||||
} else {
|
||||
assert(instr_s->src_count == 2);
|
||||
|
||||
bc->alu.src3_reg_byte = src_reg_byte(ctx, src);
|
||||
bc->alu.src3_swiz = alu_swizzle_scalar2(ctx, src, instr_s->alu.src1_swizzle);
|
||||
bc->alu.src3_reg_negate = src->negate;
|
||||
bc->alu.src3_sel = src->type != IR2_SRC_CONST;;
|
||||
}
|
||||
|
||||
if (instr_v)
|
||||
assert(instr_s->pred == instr_v->pred);
|
||||
bc->alu.pred_select = instr_s->pred;
|
||||
}
|
||||
|
||||
*is_fetch = false;
|
||||
return;
|
||||
}
|
||||
|
||||
static unsigned
|
||||
write_cfs(struct ir2_context *ctx, instr_cf_t * cfs, unsigned cf_idx,
|
||||
instr_cf_alloc_t *alloc, instr_cf_exec_t *exec)
|
||||
{
|
||||
assert(exec->count);
|
||||
|
||||
if (alloc)
|
||||
cfs[cf_idx++].alloc = *alloc;
|
||||
|
||||
/* for memory alloc offset for patching */
|
||||
if (alloc && alloc->buffer_select == SQ_MEMORY &&
|
||||
ctx->info->mem_export_ptr == -1)
|
||||
ctx->info->mem_export_ptr = cf_idx / 2 * 3;
|
||||
|
||||
cfs[cf_idx++].exec = *exec;
|
||||
exec->address += exec->count;
|
||||
exec->serialize = 0;
|
||||
exec->count = 0;
|
||||
|
||||
return cf_idx;
|
||||
}
|
||||
|
||||
/* assemble the final shader */
|
||||
void assemble(struct ir2_context *ctx, bool binning)
|
||||
{
|
||||
/* hw seems to have a limit of 384 (num_cf/2+num_instr <= 384)
|
||||
* address is 9 bits so could it be 512 ?
|
||||
*/
|
||||
instr_cf_t cfs[384];
|
||||
instr_t bytecode[384], bc;
|
||||
unsigned block_addr[128];
|
||||
unsigned num_cf = 0;
|
||||
|
||||
/* CF instr state */
|
||||
instr_cf_exec_t exec = {.opc = EXEC};
|
||||
instr_cf_alloc_t alloc = {.opc = ALLOC};
|
||||
|
||||
int sync_id, sync_id_prev = -1;
|
||||
bool is_fetch = false;
|
||||
bool need_sync = true;
|
||||
bool need_alloc = false;
|
||||
unsigned block_idx = 0;
|
||||
|
||||
ctx->info->mem_export_ptr = -1;
|
||||
ctx->info->num_fetch_instrs = 0;
|
||||
|
||||
/* vertex shader always needs to allocate at least one parameter
|
||||
* if it will never happen,
|
||||
*/
|
||||
if (ctx->so->type == MESA_SHADER_VERTEX && ctx->f->inputs_count == 0) {
|
||||
alloc.buffer_select = SQ_PARAMETER_PIXEL;
|
||||
cfs[num_cf++].alloc = alloc;
|
||||
}
|
||||
|
||||
block_addr[0] = 0;
|
||||
|
||||
for (int i = 0, j = 0; j < ctx->instr_sched_count; j++) {
|
||||
struct ir2_instr *instr = ctx->instr_sched[j].instr;
|
||||
|
||||
/* catch IR2_CF since it isn't a regular instruction */
|
||||
if (instr && instr->type == IR2_CF) {
|
||||
assert(!need_alloc); /* XXX */
|
||||
|
||||
/* flush any exec cf before inserting jmp */
|
||||
if (exec.count)
|
||||
num_cf = write_cfs(ctx, cfs, num_cf, NULL, &exec);
|
||||
|
||||
cfs[num_cf++].jmp_call = (instr_cf_jmp_call_t) {
|
||||
.opc = COND_JMP,
|
||||
.address = instr->cf.block_idx, /* will be fixed later */
|
||||
.force_call = !instr->pred,
|
||||
.predicated_jmp = 1,
|
||||
.direction = instr->cf.block_idx > instr->block_idx,
|
||||
.condition = instr->pred & 1,
|
||||
};
|
||||
continue;
|
||||
}
|
||||
|
||||
/* fill the 3 dwords for the instruction */
|
||||
fill_instr(ctx, &ctx->instr_sched[j], &bc, &is_fetch);
|
||||
|
||||
/* we need to sync between ALU/VTX_FETCH/TEX_FETCH types */
|
||||
sync_id = 0;
|
||||
if (is_fetch)
|
||||
sync_id = bc.fetch.opc == VTX_FETCH ? 1 : 2;
|
||||
|
||||
need_sync = sync_id != sync_id_prev;
|
||||
sync_id_prev = sync_id;
|
||||
|
||||
unsigned block;
|
||||
{
|
||||
|
||||
if (ctx->instr_sched[j].instr)
|
||||
block = ctx->instr_sched[j].instr->block_idx;
|
||||
else
|
||||
block = ctx->instr_sched[j].instr_s->block_idx;
|
||||
|
||||
assert(block_idx <= block);
|
||||
}
|
||||
|
||||
/* info for patching */
|
||||
if (is_fetch) {
|
||||
struct ir2_fetch_info *info =
|
||||
&ctx->info->fetch_info[ctx->info->num_fetch_instrs++];
|
||||
info->offset = i * 3; /* add cf offset later */
|
||||
|
||||
if (bc.fetch.opc == VTX_FETCH) {
|
||||
info->vtx.dst_swiz = bc.fetch.vtx.dst_swiz;
|
||||
} else if (bc.fetch.opc == TEX_FETCH) {
|
||||
info->tex.samp_id = instr->fetch.tex.samp_id;
|
||||
info->tex.src_swiz = bc.fetch.tex.src_swiz;
|
||||
} else {
|
||||
ctx->info->num_fetch_instrs--;
|
||||
}
|
||||
}
|
||||
|
||||
/* exec cf after 6 instr or when switching between fetch / alu */
|
||||
if (exec.count == 6 || (exec.count && (need_sync || block != block_idx))) {
|
||||
num_cf = write_cfs(ctx, cfs, num_cf, need_alloc ? &alloc : NULL, &exec);
|
||||
need_alloc = false;
|
||||
}
|
||||
|
||||
/* update block_addrs for jmp patching */
|
||||
while (block_idx < block)
|
||||
block_addr[++block_idx] = num_cf;
|
||||
|
||||
/* export - fill alloc cf */
|
||||
if (!is_fetch && bc.alu.export_data) {
|
||||
/* get the export buffer from either vector/scalar dest */
|
||||
instr_alloc_type_t buffer =
|
||||
export_buf(bc.alu.vector_dest);
|
||||
if (bc.alu.scalar_write_mask) {
|
||||
if (bc.alu.vector_write_mask)
|
||||
assert(buffer == export_buf(bc.alu.scalar_dest));
|
||||
buffer = export_buf(bc.alu.scalar_dest);
|
||||
}
|
||||
|
||||
/* flush previous alloc if the buffer changes */
|
||||
bool need_new_alloc = buffer != alloc.buffer_select;
|
||||
|
||||
/* memory export always in 32/33 pair, new alloc on 32 */
|
||||
if (bc.alu.vector_dest == 32)
|
||||
need_new_alloc = true;
|
||||
|
||||
if (need_new_alloc && exec.count) {
|
||||
num_cf = write_cfs(ctx, cfs, num_cf, need_alloc ? &alloc : NULL, &exec);
|
||||
need_alloc = false;
|
||||
}
|
||||
|
||||
need_alloc |= need_new_alloc;
|
||||
|
||||
alloc.size = 0;
|
||||
alloc.buffer_select = buffer;
|
||||
|
||||
if (buffer == SQ_PARAMETER_PIXEL && ctx->so->type == MESA_SHADER_VERTEX)
|
||||
alloc.size = ctx->f->inputs_count - 1;
|
||||
|
||||
if (buffer == SQ_POSITION)
|
||||
alloc.size = ctx->so->writes_psize;
|
||||
}
|
||||
|
||||
if (is_fetch)
|
||||
exec.serialize |= 0x1 << exec.count * 2;
|
||||
if (need_sync)
|
||||
exec.serialize |= 0x2 << exec.count * 2;
|
||||
|
||||
need_sync = false;
|
||||
exec.count += 1;
|
||||
bytecode[i++] = bc;
|
||||
}
|
||||
|
||||
/* final exec cf */
|
||||
exec.opc = EXEC_END;
|
||||
num_cf =
|
||||
write_cfs(ctx, cfs, num_cf, need_alloc ? &alloc : NULL, &exec);
|
||||
|
||||
/* insert nop to get an even # of CFs */
|
||||
if (num_cf % 2)
|
||||
cfs[num_cf++] = (instr_cf_t) {
|
||||
.opc = NOP};
|
||||
|
||||
/* patch cf addrs */
|
||||
for (int idx = 0; idx < num_cf; idx++) {
|
||||
switch (cfs[idx].opc) {
|
||||
case NOP:
|
||||
case ALLOC:
|
||||
break;
|
||||
case EXEC:
|
||||
case EXEC_END:
|
||||
cfs[idx].exec.address += num_cf / 2;
|
||||
break;
|
||||
case COND_JMP:
|
||||
cfs[idx].jmp_call.address = block_addr[cfs[idx].jmp_call.address];
|
||||
break;
|
||||
default:
|
||||
assert(0);
|
||||
}
|
||||
}
|
||||
|
||||
/* concatenate cfs and alu/fetch */
|
||||
uint32_t cfdwords = num_cf / 2 * 3;
|
||||
uint32_t alufetchdwords = exec.address * 3;
|
||||
uint32_t sizedwords = cfdwords + alufetchdwords;
|
||||
uint32_t *dwords = malloc(sizedwords * 4);
|
||||
assert(dwords);
|
||||
memcpy(dwords, cfs, cfdwords * 4);
|
||||
memcpy(&dwords[cfdwords], bytecode, alufetchdwords * 4);
|
||||
|
||||
/* finalize ir2_shader_info */
|
||||
ctx->info->dwords = dwords;
|
||||
ctx->info->sizedwords = sizedwords;
|
||||
for (int i = 0; i < ctx->info->num_fetch_instrs; i++)
|
||||
ctx->info->fetch_info[i].offset += cfdwords;
|
||||
|
||||
if (fd_mesa_debug & FD_DBG_DISASM) {
|
||||
DBG("disassemble: type=%d", ctx->so->type);
|
||||
disasm_a2xx(dwords, sizedwords, 0, ctx->so->type);
|
||||
}
|
||||
}
|
||||
1173
src/gallium/drivers/freedreno/a2xx/ir2_nir.c
Normal file
1173
src/gallium/drivers/freedreno/a2xx/ir2_nir.c
Normal file
File diff suppressed because it is too large
Load diff
392
src/gallium/drivers/freedreno/a2xx/ir2_private.h
Normal file
392
src/gallium/drivers/freedreno/a2xx/ir2_private.h
Normal file
|
|
@ -0,0 +1,392 @@
|
|||
/*
|
||||
* Copyright (C) 2018 Jonathan Marek <jonathan@marek.ca>
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*
|
||||
* Authors:
|
||||
* Jonathan Marek <jonathan@marek.ca>
|
||||
*/
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <stdint.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <assert.h>
|
||||
|
||||
#include "ir2.h"
|
||||
#include "fd2_program.h"
|
||||
#include "instr-a2xx.h"
|
||||
|
||||
enum ir2_src_type {
|
||||
IR2_SRC_SSA,
|
||||
IR2_SRC_REG,
|
||||
IR2_SRC_INPUT,
|
||||
IR2_SRC_CONST,
|
||||
};
|
||||
|
||||
struct ir2_src {
|
||||
/* num can mean different things
|
||||
* ssa: index of instruction
|
||||
* reg: index in ctx->reg array
|
||||
* input: index in ctx->input array
|
||||
* const: constant index (C0, C1, etc)
|
||||
*/
|
||||
uint16_t num;
|
||||
uint8_t swizzle;
|
||||
enum ir2_src_type type : 2;
|
||||
uint8_t abs : 1;
|
||||
uint8_t negate : 1;
|
||||
uint8_t : 4;
|
||||
};
|
||||
|
||||
struct ir2_reg_component {
|
||||
uint8_t c : 3; /* assigned x/y/z/w (7=dont write, for fetch instr) */
|
||||
bool alloc : 1; /* is it currently allocated */
|
||||
uint8_t ref_count; /* for ra */
|
||||
};
|
||||
|
||||
struct ir2_reg {
|
||||
uint8_t idx; /* assigned hardware register */
|
||||
uint8_t ncomp;
|
||||
|
||||
uint8_t loop_depth;
|
||||
bool initialized;
|
||||
/* block_idx to free on (-1 = free on ref_count==0) */
|
||||
int block_idx_free;
|
||||
struct ir2_reg_component comp[4];
|
||||
};
|
||||
|
||||
struct ir2_instr {
|
||||
unsigned idx;
|
||||
|
||||
unsigned block_idx;
|
||||
|
||||
enum {
|
||||
IR2_NONE,
|
||||
IR2_FETCH,
|
||||
IR2_ALU,
|
||||
IR2_CF,
|
||||
} type : 2;
|
||||
|
||||
/* instruction needs to be emitted (for scheduling) */
|
||||
bool need_emit : 1;
|
||||
|
||||
/* predicate value - (usually) same for entire block */
|
||||
uint8_t pred : 2;
|
||||
|
||||
/* src */
|
||||
uint8_t src_count;
|
||||
struct ir2_src src[4];
|
||||
|
||||
/* dst */
|
||||
bool is_ssa;
|
||||
union {
|
||||
struct ir2_reg ssa;
|
||||
struct ir2_reg *reg;
|
||||
};
|
||||
|
||||
/* type-specific */
|
||||
union {
|
||||
struct {
|
||||
instr_fetch_opc_t opc : 5;
|
||||
union {
|
||||
struct {
|
||||
uint8_t const_idx;
|
||||
uint8_t const_idx_sel;
|
||||
} vtx;
|
||||
struct {
|
||||
bool is_cube : 1;
|
||||
bool is_rect : 1;
|
||||
uint8_t samp_id;
|
||||
} tex;
|
||||
};
|
||||
} fetch;
|
||||
struct {
|
||||
/* store possible opcs, then we can choose vector/scalar instr */
|
||||
instr_scalar_opc_t scalar_opc : 6;
|
||||
instr_vector_opc_t vector_opc : 5;
|
||||
/* same as nir */
|
||||
uint8_t write_mask : 4;
|
||||
bool saturate : 1;
|
||||
|
||||
/* export idx (-1 no export) */
|
||||
int8_t export;
|
||||
|
||||
/* for scalarized 2 src instruction */
|
||||
uint8_t src1_swizzle;
|
||||
} alu;
|
||||
struct {
|
||||
/* jmp dst block_idx */
|
||||
uint8_t block_idx;
|
||||
} cf;
|
||||
};
|
||||
};
|
||||
|
||||
struct ir2_sched_instr {
|
||||
uint32_t reg_state[8];
|
||||
struct ir2_instr *instr, *instr_s;
|
||||
};
|
||||
|
||||
struct ir2_context {
|
||||
struct fd2_shader_stateobj *so;
|
||||
|
||||
unsigned block_idx, pred_idx;
|
||||
uint8_t pred;
|
||||
bool block_has_jump[64];
|
||||
|
||||
unsigned loop_last_block[64];
|
||||
unsigned loop_depth;
|
||||
|
||||
nir_shader *nir;
|
||||
|
||||
/* ssa index of position output */
|
||||
struct ir2_src position;
|
||||
|
||||
/* to translate SSA ids to instruction ids */
|
||||
int16_t ssa_map[1024];
|
||||
|
||||
struct ir2_shader_info *info;
|
||||
struct ir2_frag_linkage *f;
|
||||
|
||||
int prev_export;
|
||||
|
||||
/* RA state */
|
||||
struct ir2_reg* live_regs[64];
|
||||
uint32_t reg_state[256/32]; /* 64*4 bits */
|
||||
|
||||
/* inputs */
|
||||
struct ir2_reg input[16 + 1]; /* 16 + param */
|
||||
|
||||
/* non-ssa regs */
|
||||
struct ir2_reg reg[64];
|
||||
unsigned reg_count;
|
||||
|
||||
struct ir2_instr instr[0x300];
|
||||
unsigned instr_count;
|
||||
|
||||
struct ir2_sched_instr instr_sched[0x180];
|
||||
unsigned instr_sched_count;
|
||||
};
|
||||
|
||||
void assemble(struct ir2_context *ctx, bool binning);
|
||||
|
||||
void ir2_nir_compile(struct ir2_context *ctx, bool binning);
|
||||
|
||||
void ra_count_refs(struct ir2_context *ctx);
|
||||
void ra_reg(struct ir2_context *ctx, struct ir2_reg *reg, int force_idx,
|
||||
bool export, uint8_t export_writemask);
|
||||
void ra_src_free(struct ir2_context *ctx, struct ir2_instr *instr);
|
||||
void ra_block_free(struct ir2_context *ctx, unsigned block);
|
||||
|
||||
/* utils */
|
||||
enum {
|
||||
IR2_SWIZZLE_Y = 1 << 0,
|
||||
IR2_SWIZZLE_Z = 2 << 0,
|
||||
IR2_SWIZZLE_W = 3 << 0,
|
||||
|
||||
IR2_SWIZZLE_ZW = 2 << 0 | 2 << 2,
|
||||
|
||||
IR2_SWIZZLE_XYW = 0 << 0 | 0 << 2 | 1 << 4,
|
||||
|
||||
IR2_SWIZZLE_XXXX = 0 << 0 | 3 << 2 | 2 << 4 | 1 << 6,
|
||||
IR2_SWIZZLE_YYYY = 1 << 0 | 0 << 2 | 3 << 4 | 2 << 6,
|
||||
IR2_SWIZZLE_ZZZZ = 2 << 0 | 1 << 2 | 0 << 4 | 3 << 6,
|
||||
IR2_SWIZZLE_WWWW = 3 << 0 | 2 << 2 | 1 << 4 | 0 << 6,
|
||||
IR2_SWIZZLE_WYWW = 3 << 0 | 0 << 2 | 1 << 4 | 0 << 6,
|
||||
IR2_SWIZZLE_XYXY = 0 << 0 | 0 << 2 | 2 << 4 | 2 << 6,
|
||||
IR2_SWIZZLE_ZZXY = 2 << 0 | 1 << 2 | 2 << 4 | 2 << 6,
|
||||
IR2_SWIZZLE_YXZZ = 1 << 0 | 3 << 2 | 0 << 4 | 3 << 6,
|
||||
};
|
||||
|
||||
#define compile_error(ctx, args...) ({ \
|
||||
printf(args); \
|
||||
assert(0); \
|
||||
})
|
||||
|
||||
static inline struct ir2_src
|
||||
ir2_src(uint16_t num, uint8_t swizzle, enum ir2_src_type type)
|
||||
{
|
||||
return (struct ir2_src) {
|
||||
.num = num,
|
||||
.swizzle = swizzle,
|
||||
.type = type
|
||||
};
|
||||
}
|
||||
|
||||
/* ir2_assemble uses it .. */
|
||||
struct ir2_src ir2_zero(struct ir2_context *ctx);
|
||||
|
||||
#define ir2_foreach_instr(it, ctx) \
|
||||
for (struct ir2_instr *it = (ctx)->instr; ({ \
|
||||
while (it != &(ctx)->instr[(ctx)->instr_count] && it->type == IR2_NONE) it++; \
|
||||
it != &(ctx)->instr[(ctx)->instr_count]; }); it++)
|
||||
|
||||
#define ir2_foreach_live_reg(it, ctx) \
|
||||
for (struct ir2_reg **__ptr = (ctx)->live_regs, *it; ({ \
|
||||
while (__ptr != &(ctx)->live_regs[64] && *__ptr == NULL) __ptr++; \
|
||||
__ptr != &(ctx)->live_regs[64] ? (it=*__ptr) : NULL; }); it++)
|
||||
|
||||
#define ir2_foreach_avail(it) \
|
||||
for (struct ir2_instr **__instrp = avail, *it; \
|
||||
it = *__instrp, __instrp != &avail[avail_count]; __instrp++)
|
||||
|
||||
#define ir2_foreach_src(it, instr) \
|
||||
for (struct ir2_src *it = instr->src; \
|
||||
it != &instr->src[instr->src_count]; it++)
|
||||
|
||||
/* mask for register allocation
|
||||
* 64 registers with 4 components each = 256 bits
|
||||
*/
|
||||
/* typedef struct {
|
||||
uint64_t data[4];
|
||||
} regmask_t; */
|
||||
|
||||
static inline bool mask_isset(uint32_t * mask, unsigned num)
|
||||
{
|
||||
return ! !(mask[num / 32] & 1 << num % 32);
|
||||
}
|
||||
|
||||
static inline void mask_set(uint32_t * mask, unsigned num)
|
||||
{
|
||||
mask[num / 32] |= 1 << num % 32;
|
||||
}
|
||||
|
||||
static inline void mask_unset(uint32_t * mask, unsigned num)
|
||||
{
|
||||
mask[num / 32] &= ~(1 << num % 32);
|
||||
}
|
||||
|
||||
static inline unsigned mask_reg(uint32_t * mask, unsigned num)
|
||||
{
|
||||
return mask[num / 8] >> num % 8 * 4 & 0xf;
|
||||
}
|
||||
|
||||
static inline bool is_export(struct ir2_instr *instr)
|
||||
{
|
||||
return instr->type == IR2_ALU && instr->alu.export >= 0;
|
||||
}
|
||||
|
||||
static inline instr_alloc_type_t export_buf(unsigned num)
|
||||
{
|
||||
return num < 32 ? SQ_PARAMETER_PIXEL :
|
||||
num >= 62 ? SQ_POSITION : SQ_MEMORY;
|
||||
}
|
||||
|
||||
/* component c for channel i */
|
||||
static inline unsigned swiz_set(unsigned c, unsigned i)
|
||||
{
|
||||
return ((c - i) & 3) << i * 2;
|
||||
}
|
||||
|
||||
/* get swizzle in channel i */
|
||||
static inline unsigned swiz_get(unsigned swiz, unsigned i)
|
||||
{
|
||||
return ((swiz >> i * 2) + i) & 3;
|
||||
}
|
||||
|
||||
static inline unsigned swiz_merge(unsigned swiz0, unsigned swiz1)
|
||||
{
|
||||
unsigned swiz = 0;
|
||||
for (int i = 0; i < 4; i++)
|
||||
swiz |= swiz_set(swiz_get(swiz0, swiz_get(swiz1, i)), i);
|
||||
return swiz;
|
||||
}
|
||||
|
||||
static inline void swiz_merge_p(uint8_t *swiz0, unsigned swiz1)
|
||||
{
|
||||
unsigned swiz = 0;
|
||||
for (int i = 0; i < 4; i++)
|
||||
swiz |= swiz_set(swiz_get(*swiz0, swiz_get(swiz1, i)), i);
|
||||
*swiz0 = swiz;
|
||||
}
|
||||
|
||||
static inline struct ir2_reg * get_reg(struct ir2_instr *instr)
|
||||
{
|
||||
return instr->is_ssa ? &instr->ssa : instr->reg;
|
||||
}
|
||||
|
||||
static inline struct ir2_reg *
|
||||
get_reg_src(struct ir2_context *ctx, struct ir2_src *src)
|
||||
{
|
||||
switch (src->type) {
|
||||
case IR2_SRC_INPUT:
|
||||
return &ctx->input[src->num];
|
||||
case IR2_SRC_SSA:
|
||||
return &ctx->instr[src->num].ssa;
|
||||
case IR2_SRC_REG:
|
||||
return &ctx->reg[src->num];
|
||||
default:
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
|
||||
/* gets a ncomp value for the dst */
|
||||
static inline unsigned dst_ncomp(struct ir2_instr *instr)
|
||||
{
|
||||
if (instr->is_ssa)
|
||||
return instr->ssa.ncomp;
|
||||
|
||||
if (instr->type == IR2_FETCH)
|
||||
return instr->reg->ncomp;
|
||||
|
||||
assert(instr->type == IR2_ALU);
|
||||
|
||||
unsigned ncomp = 0;
|
||||
for (int i = 0; i < instr->reg->ncomp; i++)
|
||||
ncomp += !!(instr->alu.write_mask & 1 << i);
|
||||
return ncomp;
|
||||
}
|
||||
|
||||
/* gets a ncomp value for the src registers */
|
||||
static inline unsigned src_ncomp(struct ir2_instr *instr)
|
||||
{
|
||||
if (instr->type == IR2_FETCH) {
|
||||
switch (instr->fetch.opc) {
|
||||
case VTX_FETCH:
|
||||
return 1;
|
||||
case TEX_FETCH:
|
||||
return instr->fetch.tex.is_cube ? 3 : 2;
|
||||
case TEX_SET_TEX_LOD:
|
||||
return 1;
|
||||
default:
|
||||
assert(0);
|
||||
}
|
||||
}
|
||||
|
||||
switch (instr->alu.scalar_opc) {
|
||||
case PRED_SETEs ... KILLONEs:
|
||||
return 1;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
switch (instr->alu.vector_opc) {
|
||||
case DOT2ADDv:
|
||||
return 2;
|
||||
case DOT3v:
|
||||
return 3;
|
||||
case DOT4v:
|
||||
case CUBEv:
|
||||
case PRED_SETE_PUSHv:
|
||||
return 4;
|
||||
default:
|
||||
return dst_ncomp(instr);
|
||||
}
|
||||
}
|
||||
226
src/gallium/drivers/freedreno/a2xx/ir2_ra.c
Normal file
226
src/gallium/drivers/freedreno/a2xx/ir2_ra.c
Normal file
|
|
@ -0,0 +1,226 @@
|
|||
/*
|
||||
* Copyright (C) 2018 Jonathan Marek <jonathan@marek.ca>
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*
|
||||
* Authors:
|
||||
* Jonathan Marek <jonathan@marek.ca>
|
||||
*/
|
||||
|
||||
#include "ir2_private.h"
|
||||
|
||||
/* if an instruction has side effects, we should never kill it */
|
||||
static bool has_side_effects(struct ir2_instr *instr)
|
||||
{
|
||||
if (instr->type == IR2_CF)
|
||||
return true;
|
||||
else if (instr->type == IR2_FETCH)
|
||||
return false;
|
||||
|
||||
switch (instr->alu.scalar_opc) {
|
||||
case PRED_SETEs ... KILLONEs:
|
||||
return true;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
switch (instr->alu.vector_opc) {
|
||||
case PRED_SETE_PUSHv ... KILLNEv:
|
||||
return true;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
return instr->alu.export >= 0;
|
||||
}
|
||||
|
||||
/* mark an instruction as required, and all its sources recursively */
|
||||
static void set_need_emit(struct ir2_context *ctx, struct ir2_instr *instr)
|
||||
{
|
||||
struct ir2_reg *reg;
|
||||
|
||||
/* don't repeat work already done */
|
||||
if (instr->need_emit)
|
||||
return;
|
||||
|
||||
instr->need_emit = true;
|
||||
|
||||
ir2_foreach_src(src, instr) {
|
||||
switch (src->type) {
|
||||
case IR2_SRC_SSA:
|
||||
set_need_emit(ctx, &ctx->instr[src->num]);
|
||||
break;
|
||||
case IR2_SRC_REG:
|
||||
/* slow .. */
|
||||
reg = get_reg_src(ctx, src);
|
||||
ir2_foreach_instr(instr, ctx) {
|
||||
if (!instr->is_ssa && instr->reg == reg)
|
||||
set_need_emit(ctx, instr);
|
||||
}
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* get current bit mask of allocated components for a register */
|
||||
static unsigned reg_mask(struct ir2_context *ctx, unsigned idx)
|
||||
{
|
||||
return ctx->reg_state[idx/8] >> idx%8*4 & 0xf;
|
||||
}
|
||||
|
||||
static void reg_setmask(struct ir2_context *ctx, unsigned idx, unsigned c)
|
||||
{
|
||||
idx = idx * 4 + c;
|
||||
ctx->reg_state[idx/32] |= 1 << idx%32;
|
||||
}
|
||||
|
||||
static void reg_freemask(struct ir2_context *ctx, unsigned idx, unsigned c)
|
||||
{
|
||||
idx = idx * 4 + c;
|
||||
ctx->reg_state[idx/32] &= ~(1 << idx%32);
|
||||
}
|
||||
|
||||
void ra_count_refs(struct ir2_context *ctx)
|
||||
{
|
||||
struct ir2_reg *reg;
|
||||
|
||||
/* mark instructions as needed
|
||||
* need to do this because "substitutions" pass makes many movs not needed
|
||||
*/
|
||||
ir2_foreach_instr(instr, ctx) {
|
||||
if (has_side_effects(instr))
|
||||
set_need_emit(ctx, instr);
|
||||
}
|
||||
|
||||
/* compute ref_counts */
|
||||
ir2_foreach_instr(instr, ctx) {
|
||||
/* kill non-needed so they can be skipped */
|
||||
if (!instr->need_emit) {
|
||||
instr->type = IR2_NONE;
|
||||
continue;
|
||||
}
|
||||
|
||||
ir2_foreach_src(src, instr) {
|
||||
if (src->type == IR2_SRC_CONST)
|
||||
continue;
|
||||
|
||||
reg = get_reg_src(ctx, src);
|
||||
for (int i = 0; i < src_ncomp(instr); i++)
|
||||
reg->comp[swiz_get(src->swizzle, i)].ref_count++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void ra_reg(struct ir2_context *ctx, struct ir2_reg *reg, int force_idx,
|
||||
bool export, uint8_t export_writemask)
|
||||
{
|
||||
/* for export, don't allocate anything but set component layout */
|
||||
if (export) {
|
||||
for (int i = 0; i < 4; i++)
|
||||
reg->comp[i].c = i;
|
||||
return;
|
||||
}
|
||||
|
||||
unsigned idx = force_idx;
|
||||
|
||||
/* TODO: allocate into the same register if theres room
|
||||
* note: the blob doesn't do it, so verify that it is indeed better
|
||||
* also, doing it would conflict with scalar mov insertion
|
||||
*/
|
||||
|
||||
/* check if already allocated */
|
||||
for (int i = 0; i < reg->ncomp; i++) {
|
||||
if (reg->comp[i].alloc)
|
||||
return;
|
||||
}
|
||||
|
||||
if (force_idx < 0) {
|
||||
for (idx = 0; idx < 64; idx++) {
|
||||
if (reg_mask(ctx, idx) == 0)
|
||||
break;
|
||||
}
|
||||
}
|
||||
assert(idx != 64); /* TODO ran out of register space.. */
|
||||
|
||||
/* update max_reg value */
|
||||
ctx->info->max_reg = MAX2(ctx->info->max_reg, (int) idx);
|
||||
|
||||
unsigned mask = reg_mask(ctx, idx);
|
||||
|
||||
for (int i = 0; i < reg->ncomp; i++) {
|
||||
/* don't allocate never used values */
|
||||
if (reg->comp[i].ref_count == 0) {
|
||||
reg->comp[i].c = 7;
|
||||
continue;
|
||||
}
|
||||
|
||||
/* TODO */
|
||||
unsigned c = 1 ? i : (ffs(~mask) - 1);
|
||||
mask |= 1 << c;
|
||||
reg->comp[i].c = c;
|
||||
reg_setmask(ctx, idx, c);
|
||||
reg->comp[i].alloc = true;
|
||||
}
|
||||
|
||||
reg->idx = idx;
|
||||
ctx->live_regs[reg->idx] = reg;
|
||||
}
|
||||
|
||||
/* reduce srcs ref_count and free if needed */
|
||||
void ra_src_free(struct ir2_context *ctx, struct ir2_instr *instr)
|
||||
{
|
||||
struct ir2_reg *reg;
|
||||
struct ir2_reg_component *comp;
|
||||
|
||||
ir2_foreach_src(src, instr) {
|
||||
if (src->type == IR2_SRC_CONST)
|
||||
continue;
|
||||
|
||||
reg = get_reg_src(ctx, src);
|
||||
/* XXX use before write case */
|
||||
|
||||
for (int i = 0; i < src_ncomp(instr); i++) {
|
||||
comp = ®->comp[swiz_get(src->swizzle, i)];
|
||||
if (!--comp->ref_count && reg->block_idx_free < 0) {
|
||||
reg_freemask(ctx, reg->idx, comp->c);
|
||||
comp->alloc = false;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* free any regs left for a block */
|
||||
void ra_block_free(struct ir2_context *ctx, unsigned block)
|
||||
{
|
||||
ir2_foreach_live_reg(reg, ctx) {
|
||||
if (reg->block_idx_free != block)
|
||||
continue;
|
||||
|
||||
for (int i = 0; i < reg->ncomp; i++) {
|
||||
if (!reg->comp[i].alloc) /* XXX should never be true? */
|
||||
continue;
|
||||
|
||||
reg_freemask(ctx, reg->idx, reg->comp[i].c);
|
||||
reg->comp[i].alloc = false;
|
||||
}
|
||||
ctx->live_regs[reg->idx] = NULL;
|
||||
}
|
||||
}
|
||||
|
|
@ -56,14 +56,6 @@ struct fd_texture_stateobj {
|
|||
|
||||
struct fd_program_stateobj {
|
||||
void *vp, *fp;
|
||||
|
||||
/* rest only used by fd2.. split out: */
|
||||
uint8_t num_exports;
|
||||
/* Indexed by semantic name or TGSI_SEMANTIC_COUNT + semantic index
|
||||
* for TGSI_SEMANTIC_GENERIC. Special vs exports (position and point-
|
||||
* size) are not included in this
|
||||
*/
|
||||
uint8_t export_linkage[63];
|
||||
};
|
||||
|
||||
struct fd_constbuf_stateobj {
|
||||
|
|
|
|||
|
|
@ -129,15 +129,14 @@ void fd_prog_init(struct pipe_context *pctx)
|
|||
pctx->bind_fs_state = fd_fp_state_bind;
|
||||
pctx->bind_vs_state = fd_vp_state_bind;
|
||||
|
||||
// XXX for now, let a2xx keep it's own hand-rolled shaders
|
||||
// for solid and blit progs:
|
||||
if (ctx->screen->gpu_id < 300)
|
||||
return;
|
||||
|
||||
ctx->solid_prog.fp = assemble_tgsi(pctx, solid_fp, true);
|
||||
ctx->solid_prog.vp = assemble_tgsi(pctx, solid_vp, false);
|
||||
ctx->blit_prog[0].vp = assemble_tgsi(pctx, blit_vp, false);
|
||||
ctx->blit_prog[0].fp = fd_prog_blit(pctx, 1, false);
|
||||
|
||||
if (ctx->screen->gpu_id < 300)
|
||||
return;
|
||||
|
||||
for (i = 1; i < ctx->screen->max_rts; i++) {
|
||||
ctx->blit_prog[i].vp = ctx->blit_prog[0].vp;
|
||||
ctx->blit_prog[i].fp = fd_prog_blit(pctx, i + 1, false);
|
||||
|
|
|
|||
|
|
@ -58,6 +58,7 @@
|
|||
|
||||
|
||||
#include "ir3/ir3_nir.h"
|
||||
#include "a2xx/ir2.h"
|
||||
|
||||
/* XXX this should go away */
|
||||
#include "state_tracker/drm_driver.h"
|
||||
|
|
@ -496,16 +497,9 @@ fd_screen_get_shader_param(struct pipe_screen *pscreen,
|
|||
case PIPE_SHADER_CAP_MAX_SAMPLER_VIEWS:
|
||||
return 16;
|
||||
case PIPE_SHADER_CAP_PREFERRED_IR:
|
||||
if (is_ir3(screen))
|
||||
return PIPE_SHADER_IR_NIR;
|
||||
return PIPE_SHADER_IR_TGSI;
|
||||
return PIPE_SHADER_IR_NIR;
|
||||
case PIPE_SHADER_CAP_SUPPORTED_IRS:
|
||||
if (is_ir3(screen)) {
|
||||
return (1 << PIPE_SHADER_IR_NIR) | (1 << PIPE_SHADER_IR_TGSI);
|
||||
} else {
|
||||
return (1 << PIPE_SHADER_IR_TGSI);
|
||||
}
|
||||
return 0;
|
||||
return (1 << PIPE_SHADER_IR_NIR) | (1 << PIPE_SHADER_IR_TGSI);
|
||||
case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT:
|
||||
return 32;
|
||||
case PIPE_SHADER_CAP_SCALAR_ISA:
|
||||
|
|
@ -636,7 +630,7 @@ fd_get_compiler_options(struct pipe_screen *pscreen,
|
|||
if (is_ir3(screen))
|
||||
return ir3_get_compiler_options(screen->compiler);
|
||||
|
||||
return NULL;
|
||||
return ir2_get_compiler_options();
|
||||
}
|
||||
|
||||
boolean
|
||||
|
|
|
|||
|
|
@ -60,8 +60,6 @@ files_libfreedreno = files(
|
|||
'a2xx/disasm-a2xx.c',
|
||||
'a2xx/fd2_blend.c',
|
||||
'a2xx/fd2_blend.h',
|
||||
'a2xx/fd2_compiler.c',
|
||||
'a2xx/fd2_compiler.h',
|
||||
'a2xx/fd2_context.c',
|
||||
'a2xx/fd2_context.h',
|
||||
'a2xx/fd2_draw.c',
|
||||
|
|
@ -85,8 +83,12 @@ files_libfreedreno = files(
|
|||
'a2xx/fd2_zsa.c',
|
||||
'a2xx/fd2_zsa.h',
|
||||
'a2xx/instr-a2xx.h',
|
||||
'a2xx/ir-a2xx.c',
|
||||
'a2xx/ir-a2xx.h',
|
||||
'a2xx/ir2.c',
|
||||
'a2xx/ir2.h',
|
||||
'a2xx/ir2_assemble.c',
|
||||
'a2xx/ir2_nir.c',
|
||||
'a2xx/ir2_private.h',
|
||||
'a2xx/ir2_ra.c',
|
||||
'a3xx/fd3_blend.c',
|
||||
'a3xx/fd3_blend.h',
|
||||
'a3xx/fd3_context.c',
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue