freedreno: Re-indent

clang-format -fallback-style=none --style=file -i src/gallium/drivers/freedreno/*.[ch] src/gallium/drivers/freedreno/*/*.[ch]

Signed-off-by: Rob Clark <robdclark@chromium.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/8883>
This commit is contained in:
Rob Clark 2021-04-14 08:04:06 -07:00 committed by Marge Bot
parent fdcae5b5b8
commit 2d439343ea
176 changed files with 30083 additions and 30077 deletions

View file

@ -26,87 +26,90 @@
#include "pipe/p_state.h" #include "pipe/p_state.h"
#include "util/u_blend.h" #include "util/u_blend.h"
#include "util/u_string.h"
#include "util/u_memory.h" #include "util/u_memory.h"
#include "util/u_string.h"
#include "fd2_blend.h" #include "fd2_blend.h"
#include "fd2_context.h" #include "fd2_context.h"
#include "fd2_util.h" #include "fd2_util.h"
static enum a2xx_rb_blend_opcode static enum a2xx_rb_blend_opcode
blend_func(unsigned func) blend_func(unsigned func)
{ {
switch (func) { switch (func) {
case PIPE_BLEND_ADD: case PIPE_BLEND_ADD:
return BLEND2_DST_PLUS_SRC; return BLEND2_DST_PLUS_SRC;
case PIPE_BLEND_MIN: case PIPE_BLEND_MIN:
return BLEND2_MIN_DST_SRC; return BLEND2_MIN_DST_SRC;
case PIPE_BLEND_MAX: case PIPE_BLEND_MAX:
return BLEND2_MAX_DST_SRC; return BLEND2_MAX_DST_SRC;
case PIPE_BLEND_SUBTRACT: case PIPE_BLEND_SUBTRACT:
return BLEND2_SRC_MINUS_DST; return BLEND2_SRC_MINUS_DST;
case PIPE_BLEND_REVERSE_SUBTRACT: case PIPE_BLEND_REVERSE_SUBTRACT:
return BLEND2_DST_MINUS_SRC; return BLEND2_DST_MINUS_SRC;
default: default:
DBG("invalid blend func: %x", func); DBG("invalid blend func: %x", func);
return 0; return 0;
} }
} }
void * void *
fd2_blend_state_create(struct pipe_context *pctx, fd2_blend_state_create(struct pipe_context *pctx,
const struct pipe_blend_state *cso) const struct pipe_blend_state *cso)
{ {
const struct pipe_rt_blend_state *rt = &cso->rt[0]; const struct pipe_rt_blend_state *rt = &cso->rt[0];
struct fd2_blend_stateobj *so; struct fd2_blend_stateobj *so;
unsigned rop = PIPE_LOGICOP_COPY; unsigned rop = PIPE_LOGICOP_COPY;
if (cso->logicop_enable) if (cso->logicop_enable)
rop = cso->logicop_func; /* 1:1 mapping with hw */ rop = cso->logicop_func; /* 1:1 mapping with hw */
if (cso->independent_blend_enable) { if (cso->independent_blend_enable) {
DBG("Unsupported! independent blend state"); DBG("Unsupported! independent blend state");
return NULL; return NULL;
} }
so = CALLOC_STRUCT(fd2_blend_stateobj); so = CALLOC_STRUCT(fd2_blend_stateobj);
if (!so) if (!so)
return NULL; return NULL;
so->base = *cso; so->base = *cso;
so->rb_colorcontrol = A2XX_RB_COLORCONTROL_ROP_CODE(rop); so->rb_colorcontrol = A2XX_RB_COLORCONTROL_ROP_CODE(rop);
so->rb_blendcontrol = so->rb_blendcontrol =
A2XX_RB_BLEND_CONTROL_COLOR_SRCBLEND(fd_blend_factor(rt->rgb_src_factor)) | A2XX_RB_BLEND_CONTROL_COLOR_SRCBLEND(
A2XX_RB_BLEND_CONTROL_COLOR_COMB_FCN(blend_func(rt->rgb_func)) | fd_blend_factor(rt->rgb_src_factor)) |
A2XX_RB_BLEND_CONTROL_COLOR_DESTBLEND(fd_blend_factor(rt->rgb_dst_factor)); A2XX_RB_BLEND_CONTROL_COLOR_COMB_FCN(blend_func(rt->rgb_func)) |
A2XX_RB_BLEND_CONTROL_COLOR_DESTBLEND(
fd_blend_factor(rt->rgb_dst_factor));
/* hardware doesn't support SRC_ALPHA_SATURATE for alpha, but it is equivalent to ONE */ /* hardware doesn't support SRC_ALPHA_SATURATE for alpha, but it is
unsigned alpha_src_factor = rt->alpha_src_factor; * equivalent to ONE */
if (alpha_src_factor == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE) unsigned alpha_src_factor = rt->alpha_src_factor;
alpha_src_factor = PIPE_BLENDFACTOR_ONE; if (alpha_src_factor == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE)
alpha_src_factor = PIPE_BLENDFACTOR_ONE;
so->rb_blendcontrol |= so->rb_blendcontrol |=
A2XX_RB_BLEND_CONTROL_ALPHA_SRCBLEND(fd_blend_factor(alpha_src_factor)) | A2XX_RB_BLEND_CONTROL_ALPHA_SRCBLEND(fd_blend_factor(alpha_src_factor)) |
A2XX_RB_BLEND_CONTROL_ALPHA_COMB_FCN(blend_func(rt->alpha_func)) | A2XX_RB_BLEND_CONTROL_ALPHA_COMB_FCN(blend_func(rt->alpha_func)) |
A2XX_RB_BLEND_CONTROL_ALPHA_DESTBLEND(fd_blend_factor(rt->alpha_dst_factor)); A2XX_RB_BLEND_CONTROL_ALPHA_DESTBLEND(
fd_blend_factor(rt->alpha_dst_factor));
if (rt->colormask & PIPE_MASK_R) if (rt->colormask & PIPE_MASK_R)
so->rb_colormask |= A2XX_RB_COLOR_MASK_WRITE_RED; so->rb_colormask |= A2XX_RB_COLOR_MASK_WRITE_RED;
if (rt->colormask & PIPE_MASK_G) if (rt->colormask & PIPE_MASK_G)
so->rb_colormask |= A2XX_RB_COLOR_MASK_WRITE_GREEN; so->rb_colormask |= A2XX_RB_COLOR_MASK_WRITE_GREEN;
if (rt->colormask & PIPE_MASK_B) if (rt->colormask & PIPE_MASK_B)
so->rb_colormask |= A2XX_RB_COLOR_MASK_WRITE_BLUE; so->rb_colormask |= A2XX_RB_COLOR_MASK_WRITE_BLUE;
if (rt->colormask & PIPE_MASK_A) if (rt->colormask & PIPE_MASK_A)
so->rb_colormask |= A2XX_RB_COLOR_MASK_WRITE_ALPHA; so->rb_colormask |= A2XX_RB_COLOR_MASK_WRITE_ALPHA;
if (!rt->blend_enable) if (!rt->blend_enable)
so->rb_colorcontrol |= A2XX_RB_COLORCONTROL_BLEND_DISABLE; so->rb_colorcontrol |= A2XX_RB_COLORCONTROL_BLEND_DISABLE;
if (cso->dither) if (cso->dither)
so->rb_colorcontrol |= A2XX_RB_COLORCONTROL_DITHER_MODE(DITHER_ALWAYS); so->rb_colorcontrol |= A2XX_RB_COLORCONTROL_DITHER_MODE(DITHER_ALWAYS);
return so; return so;
} }

View file

@ -27,23 +27,23 @@
#ifndef FD2_BLEND_H_ #ifndef FD2_BLEND_H_
#define FD2_BLEND_H_ #define FD2_BLEND_H_
#include "pipe/p_state.h"
#include "pipe/p_context.h" #include "pipe/p_context.h"
#include "pipe/p_state.h"
struct fd2_blend_stateobj { struct fd2_blend_stateobj {
struct pipe_blend_state base; struct pipe_blend_state base;
uint32_t rb_blendcontrol; uint32_t rb_blendcontrol;
uint32_t rb_colorcontrol; /* must be OR'd w/ zsa->rb_colorcontrol */ uint32_t rb_colorcontrol; /* must be OR'd w/ zsa->rb_colorcontrol */
uint32_t rb_colormask; uint32_t rb_colormask;
}; };
static inline struct fd2_blend_stateobj * static inline struct fd2_blend_stateobj *
fd2_blend_stateobj(struct pipe_blend_state *blend) fd2_blend_stateobj(struct pipe_blend_state *blend)
{ {
return (struct fd2_blend_stateobj *)blend; return (struct fd2_blend_stateobj *)blend;
} }
void * fd2_blend_state_create(struct pipe_context *pctx, void *fd2_blend_state_create(struct pipe_context *pctx,
const struct pipe_blend_state *cso); const struct pipe_blend_state *cso);
#endif /* FD2_BLEND_H_ */ #endif /* FD2_BLEND_H_ */

View file

@ -24,7 +24,6 @@
* Rob Clark <robclark@freedesktop.org> * Rob Clark <robclark@freedesktop.org>
*/ */
#include "fd2_context.h" #include "fd2_context.h"
#include "fd2_blend.h" #include "fd2_blend.h"
#include "fd2_draw.h" #include "fd2_draw.h"
@ -37,11 +36,10 @@
#include "fd2_zsa.h" #include "fd2_zsa.h"
static void static void
fd2_context_destroy(struct pipe_context *pctx) fd2_context_destroy(struct pipe_context *pctx) in_dt
in_dt
{ {
fd_context_destroy(pctx); fd_context_destroy(pctx);
free(pctx); free(pctx);
} }
static struct pipe_resource * static struct pipe_resource *
@ -64,11 +62,12 @@ create_solid_vertexbuf(struct pipe_context *pctx)
}; };
/* clang-format on */ /* clang-format on */
struct pipe_resource *prsc = pipe_buffer_create(pctx->screen, struct pipe_resource *prsc =
PIPE_BIND_CUSTOM, PIPE_USAGE_IMMUTABLE, sizeof(init_shader_const)); pipe_buffer_create(pctx->screen, PIPE_BIND_CUSTOM, PIPE_USAGE_IMMUTABLE,
pipe_buffer_write(pctx, prsc, 0, sizeof(init_shader_const));
sizeof(init_shader_const), init_shader_const); pipe_buffer_write(pctx, prsc, 0, sizeof(init_shader_const),
return prsc; init_shader_const);
return prsc;
} }
/* clang-format off */ /* clang-format off */
@ -95,40 +94,40 @@ static const uint8_t a20x_primtypes[PIPE_PRIM_MAX] = {
struct pipe_context * struct pipe_context *
fd2_context_create(struct pipe_screen *pscreen, void *priv, unsigned flags) fd2_context_create(struct pipe_screen *pscreen, void *priv, unsigned flags)
{ {
struct fd_screen *screen = fd_screen(pscreen); struct fd_screen *screen = fd_screen(pscreen);
struct fd2_context *fd2_ctx = CALLOC_STRUCT(fd2_context); struct fd2_context *fd2_ctx = CALLOC_STRUCT(fd2_context);
struct pipe_context *pctx; struct pipe_context *pctx;
if (!fd2_ctx) if (!fd2_ctx)
return NULL; return NULL;
pctx = &fd2_ctx->base.base; pctx = &fd2_ctx->base.base;
pctx->screen = pscreen; pctx->screen = pscreen;
fd2_ctx->base.dev = fd_device_ref(screen->dev); fd2_ctx->base.dev = fd_device_ref(screen->dev);
fd2_ctx->base.screen = fd_screen(pscreen); fd2_ctx->base.screen = fd_screen(pscreen);
pctx->destroy = fd2_context_destroy; pctx->destroy = fd2_context_destroy;
pctx->create_blend_state = fd2_blend_state_create; pctx->create_blend_state = fd2_blend_state_create;
pctx->create_rasterizer_state = fd2_rasterizer_state_create; pctx->create_rasterizer_state = fd2_rasterizer_state_create;
pctx->create_depth_stencil_alpha_state = fd2_zsa_state_create; pctx->create_depth_stencil_alpha_state = fd2_zsa_state_create;
fd2_draw_init(pctx); fd2_draw_init(pctx);
fd2_gmem_init(pctx); fd2_gmem_init(pctx);
fd2_texture_init(pctx); fd2_texture_init(pctx);
fd2_prog_init(pctx); fd2_prog_init(pctx);
fd2_emit_init(pctx); fd2_emit_init(pctx);
pctx = fd_context_init(&fd2_ctx->base, pscreen, pctx = fd_context_init(
(screen->gpu_id >= 220) ? a22x_primtypes : a20x_primtypes, &fd2_ctx->base, pscreen,
priv, flags); (screen->gpu_id >= 220) ? a22x_primtypes : a20x_primtypes, priv, flags);
if (!pctx) if (!pctx)
return NULL; return NULL;
/* construct vertex state used for solid ops (clear, and gmem<->mem) */ /* construct vertex state used for solid ops (clear, and gmem<->mem) */
fd2_ctx->solid_vertexbuf = create_solid_vertexbuf(pctx); fd2_ctx->solid_vertexbuf = create_solid_vertexbuf(pctx);
fd2_query_context_init(pctx); fd2_query_context_init(pctx);
return pctx; return pctx;
} }

View file

@ -30,21 +30,21 @@
#include "freedreno_context.h" #include "freedreno_context.h"
struct fd2_context { struct fd2_context {
struct fd_context base; struct fd_context base;
/* vertex buf used for clear/gmem->mem vertices, and mem->gmem /* vertex buf used for clear/gmem->mem vertices, and mem->gmem
* vertices and tex coords: * vertices and tex coords:
*/ */
struct pipe_resource *solid_vertexbuf; struct pipe_resource *solid_vertexbuf;
}; };
static inline struct fd2_context * static inline struct fd2_context *
fd2_context(struct fd_context *ctx) fd2_context(struct fd_context *ctx)
{ {
return (struct fd2_context *)ctx; return (struct fd2_context *)ctx;
} }
struct pipe_context * struct pipe_context *fd2_context_create(struct pipe_screen *pscreen, void *priv,
fd2_context_create(struct pipe_screen *pscreen, void *priv, unsigned flags); unsigned flags);
#endif /* FD2_CONTEXT_H_ */ #endif /* FD2_CONTEXT_H_ */

File diff suppressed because it is too large Load diff

View file

@ -34,10 +34,10 @@
void fd2_draw_init(struct pipe_context *pctx); void fd2_draw_init(struct pipe_context *pctx);
enum { enum {
GMEM_PATCH_FASTCLEAR_COLOR, GMEM_PATCH_FASTCLEAR_COLOR,
GMEM_PATCH_FASTCLEAR_DEPTH, GMEM_PATCH_FASTCLEAR_DEPTH,
GMEM_PATCH_FASTCLEAR_COLOR_DEPTH, GMEM_PATCH_FASTCLEAR_COLOR_DEPTH,
GMEM_PATCH_RESTORE_INFO, GMEM_PATCH_RESTORE_INFO,
}; };
#endif /* FD2_DRAW_H_ */ #endif /* FD2_DRAW_H_ */

View file

@ -25,15 +25,15 @@
*/ */
#include "pipe/p_state.h" #include "pipe/p_state.h"
#include "util/u_string.h"
#include "util/u_memory.h"
#include "util/u_helpers.h" #include "util/u_helpers.h"
#include "util/u_memory.h"
#include "util/u_string.h"
#include "freedreno_resource.h" #include "freedreno_resource.h"
#include "fd2_emit.h"
#include "fd2_blend.h" #include "fd2_blend.h"
#include "fd2_context.h" #include "fd2_context.h"
#include "fd2_emit.h"
#include "fd2_program.h" #include "fd2_program.h"
#include "fd2_rasterizer.h" #include "fd2_rasterizer.h"
#include "fd2_texture.h" #include "fd2_texture.h"
@ -49,347 +49,353 @@
static void static void
emit_constants(struct fd_ringbuffer *ring, uint32_t base, emit_constants(struct fd_ringbuffer *ring, uint32_t base,
struct fd_constbuf_stateobj *constbuf, struct fd_constbuf_stateobj *constbuf,
struct fd2_shader_stateobj *shader) struct fd2_shader_stateobj *shader)
{ {
uint32_t enabled_mask = constbuf->enabled_mask; uint32_t enabled_mask = constbuf->enabled_mask;
uint32_t start_base = base; uint32_t start_base = base;
unsigned i; unsigned i;
/* emit user constants: */ /* emit user constants: */
while (enabled_mask) { while (enabled_mask) {
unsigned index = ffs(enabled_mask) - 1; unsigned index = ffs(enabled_mask) - 1;
struct pipe_constant_buffer *cb = &constbuf->cb[index]; struct pipe_constant_buffer *cb = &constbuf->cb[index];
unsigned size = align(cb->buffer_size, 4) / 4; /* size in dwords */ unsigned size = align(cb->buffer_size, 4) / 4; /* size in dwords */
// I expect that size should be a multiple of vec4's: // I expect that size should be a multiple of vec4's:
assert(size == align(size, 4)); assert(size == align(size, 4));
/* hmm, sometimes we still seem to end up with consts bound, /* hmm, sometimes we still seem to end up with consts bound,
* even if shader isn't using them, which ends up overwriting * even if shader isn't using them, which ends up overwriting
* const reg's used for immediates.. this is a hack to work * const reg's used for immediates.. this is a hack to work
* around that: * around that:
*/ */
if (shader && ((base - start_base) >= (shader->first_immediate * 4))) if (shader && ((base - start_base) >= (shader->first_immediate * 4)))
break; break;
const uint32_t *dwords; const uint32_t *dwords;
if (cb->user_buffer) { if (cb->user_buffer) {
dwords = cb->user_buffer; dwords = cb->user_buffer;
} else { } else {
struct fd_resource *rsc = fd_resource(cb->buffer); struct fd_resource *rsc = fd_resource(cb->buffer);
dwords = fd_bo_map(rsc->bo); dwords = fd_bo_map(rsc->bo);
} }
dwords = (uint32_t *)(((uint8_t *)dwords) + cb->buffer_offset); dwords = (uint32_t *)(((uint8_t *)dwords) + cb->buffer_offset);
OUT_PKT3(ring, CP_SET_CONSTANT, size + 1); OUT_PKT3(ring, CP_SET_CONSTANT, size + 1);
OUT_RING(ring, base); OUT_RING(ring, base);
for (i = 0; i < size; i++) for (i = 0; i < size; i++)
OUT_RING(ring, *(dwords++)); OUT_RING(ring, *(dwords++));
base += size; base += size;
enabled_mask &= ~(1 << index); enabled_mask &= ~(1 << index);
} }
/* emit shader immediates: */ /* emit shader immediates: */
if (shader) { if (shader) {
for (i = 0; i < shader->num_immediates; i++) { for (i = 0; i < shader->num_immediates; i++) {
OUT_PKT3(ring, CP_SET_CONSTANT, 5); OUT_PKT3(ring, CP_SET_CONSTANT, 5);
OUT_RING(ring, start_base + (4 * (shader->first_immediate + i))); OUT_RING(ring, start_base + (4 * (shader->first_immediate + i)));
OUT_RING(ring, shader->immediates[i].val[0]); OUT_RING(ring, shader->immediates[i].val[0]);
OUT_RING(ring, shader->immediates[i].val[1]); OUT_RING(ring, shader->immediates[i].val[1]);
OUT_RING(ring, shader->immediates[i].val[2]); OUT_RING(ring, shader->immediates[i].val[2]);
OUT_RING(ring, shader->immediates[i].val[3]); OUT_RING(ring, shader->immediates[i].val[3]);
base += 4; base += 4;
} }
} }
} }
typedef uint32_t texmask; typedef uint32_t texmask;
static texmask static texmask
emit_texture(struct fd_ringbuffer *ring, struct fd_context *ctx, emit_texture(struct fd_ringbuffer *ring, struct fd_context *ctx,
struct fd_texture_stateobj *tex, unsigned samp_id, texmask emitted) struct fd_texture_stateobj *tex, unsigned samp_id, texmask emitted)
{ {
unsigned const_idx = fd2_get_const_idx(ctx, tex, samp_id); unsigned const_idx = fd2_get_const_idx(ctx, tex, samp_id);
static const struct fd2_sampler_stateobj dummy_sampler = {}; static const struct fd2_sampler_stateobj dummy_sampler = {};
static const struct fd2_pipe_sampler_view dummy_view = {}; static const struct fd2_pipe_sampler_view dummy_view = {};
const struct fd2_sampler_stateobj *sampler; const struct fd2_sampler_stateobj *sampler;
const struct fd2_pipe_sampler_view *view; const struct fd2_pipe_sampler_view *view;
struct fd_resource *rsc; struct fd_resource *rsc;
if (emitted & (1 << const_idx)) if (emitted & (1 << const_idx))
return 0; return 0;
sampler = tex->samplers[samp_id] ? sampler = tex->samplers[samp_id]
fd2_sampler_stateobj(tex->samplers[samp_id]) : ? fd2_sampler_stateobj(tex->samplers[samp_id])
&dummy_sampler; : &dummy_sampler;
view = tex->textures[samp_id] ? view = tex->textures[samp_id] ? fd2_pipe_sampler_view(tex->textures[samp_id])
fd2_pipe_sampler_view(tex->textures[samp_id]) : : &dummy_view;
&dummy_view;
rsc = view->base.texture ? fd_resource(view->base.texture) : NULL; rsc = view->base.texture ? fd_resource(view->base.texture) : NULL;
OUT_PKT3(ring, CP_SET_CONSTANT, 7); OUT_PKT3(ring, CP_SET_CONSTANT, 7);
OUT_RING(ring, 0x00010000 + (0x6 * const_idx)); OUT_RING(ring, 0x00010000 + (0x6 * const_idx));
OUT_RING(ring, sampler->tex0 | view->tex0); OUT_RING(ring, sampler->tex0 | view->tex0);
if (rsc) if (rsc)
OUT_RELOC(ring, rsc->bo, fd_resource_offset(rsc, 0, 0), view->tex1, 0); OUT_RELOC(ring, rsc->bo, fd_resource_offset(rsc, 0, 0), view->tex1, 0);
else else
OUT_RING(ring, 0); OUT_RING(ring, 0);
OUT_RING(ring, view->tex2); OUT_RING(ring, view->tex2);
OUT_RING(ring, sampler->tex3 | view->tex3); OUT_RING(ring, sampler->tex3 | view->tex3);
OUT_RING(ring, sampler->tex4 | view->tex4); OUT_RING(ring, sampler->tex4 | view->tex4);
if (rsc && rsc->b.b.last_level) if (rsc && rsc->b.b.last_level)
OUT_RELOC(ring, rsc->bo, fd_resource_offset(rsc, 1, 0), view->tex5, 0); OUT_RELOC(ring, rsc->bo, fd_resource_offset(rsc, 1, 0), view->tex5, 0);
else else
OUT_RING(ring, view->tex5); OUT_RING(ring, view->tex5);
return (1 << const_idx); return (1 << const_idx);
} }
static void static void
emit_textures(struct fd_ringbuffer *ring, struct fd_context *ctx) emit_textures(struct fd_ringbuffer *ring, struct fd_context *ctx)
{ {
struct fd_texture_stateobj *fragtex = &ctx->tex[PIPE_SHADER_FRAGMENT]; struct fd_texture_stateobj *fragtex = &ctx->tex[PIPE_SHADER_FRAGMENT];
struct fd_texture_stateobj *verttex = &ctx->tex[PIPE_SHADER_VERTEX]; struct fd_texture_stateobj *verttex = &ctx->tex[PIPE_SHADER_VERTEX];
texmask emitted = 0; texmask emitted = 0;
unsigned i; unsigned i;
for (i = 0; i < verttex->num_samplers; i++) for (i = 0; i < verttex->num_samplers; i++)
if (verttex->samplers[i]) if (verttex->samplers[i])
emitted |= emit_texture(ring, ctx, verttex, i, emitted); emitted |= emit_texture(ring, ctx, verttex, i, emitted);
for (i = 0; i < fragtex->num_samplers; i++) for (i = 0; i < fragtex->num_samplers; i++)
if (fragtex->samplers[i]) if (fragtex->samplers[i])
emitted |= emit_texture(ring, ctx, fragtex, i, emitted); emitted |= emit_texture(ring, ctx, fragtex, i, emitted);
} }
void void
fd2_emit_vertex_bufs(struct fd_ringbuffer *ring, uint32_t val, fd2_emit_vertex_bufs(struct fd_ringbuffer *ring, uint32_t val,
struct fd2_vertex_buf *vbufs, uint32_t n) struct fd2_vertex_buf *vbufs, uint32_t n)
{ {
unsigned i; unsigned i;
OUT_PKT3(ring, CP_SET_CONSTANT, 1 + (2 * n)); OUT_PKT3(ring, CP_SET_CONSTANT, 1 + (2 * n));
OUT_RING(ring, (0x1 << 16) | (val & 0xffff)); OUT_RING(ring, (0x1 << 16) | (val & 0xffff));
for (i = 0; i < n; i++) { for (i = 0; i < n; i++) {
struct fd_resource *rsc = fd_resource(vbufs[i].prsc); struct fd_resource *rsc = fd_resource(vbufs[i].prsc);
OUT_RELOC(ring, rsc->bo, vbufs[i].offset, 3, 0); OUT_RELOC(ring, rsc->bo, vbufs[i].offset, 3, 0);
OUT_RING (ring, vbufs[i].size); OUT_RING(ring, vbufs[i].size);
} }
} }
void void
fd2_emit_state_binning(struct fd_context *ctx, const enum fd_dirty_3d_state dirty) fd2_emit_state_binning(struct fd_context *ctx,
const enum fd_dirty_3d_state dirty)
{ {
struct fd2_blend_stateobj *blend = fd2_blend_stateobj(ctx->blend); struct fd2_blend_stateobj *blend = fd2_blend_stateobj(ctx->blend);
struct fd_ringbuffer *ring = ctx->batch->binning; struct fd_ringbuffer *ring = ctx->batch->binning;
/* subset of fd2_emit_state needed for hw binning on a20x */ /* subset of fd2_emit_state needed for hw binning on a20x */
if (dirty & (FD_DIRTY_PROG | FD_DIRTY_VTXSTATE)) if (dirty & (FD_DIRTY_PROG | FD_DIRTY_VTXSTATE))
fd2_program_emit(ctx, ring, &ctx->prog); fd2_program_emit(ctx, ring, &ctx->prog);
if (dirty & (FD_DIRTY_PROG | FD_DIRTY_CONST)) { if (dirty & (FD_DIRTY_PROG | FD_DIRTY_CONST)) {
emit_constants(ring, VS_CONST_BASE * 4, emit_constants(ring, VS_CONST_BASE * 4,
&ctx->constbuf[PIPE_SHADER_VERTEX], &ctx->constbuf[PIPE_SHADER_VERTEX],
(dirty & FD_DIRTY_PROG) ? ctx->prog.vs : NULL); (dirty & FD_DIRTY_PROG) ? ctx->prog.vs : NULL);
} }
if (dirty & FD_DIRTY_VIEWPORT) { if (dirty & FD_DIRTY_VIEWPORT) {
OUT_PKT3(ring, CP_SET_CONSTANT, 9); OUT_PKT3(ring, CP_SET_CONSTANT, 9);
OUT_RING(ring, 0x00000184); OUT_RING(ring, 0x00000184);
OUT_RING(ring, fui(ctx->viewport.translate[0])); OUT_RING(ring, fui(ctx->viewport.translate[0]));
OUT_RING(ring, fui(ctx->viewport.translate[1])); OUT_RING(ring, fui(ctx->viewport.translate[1]));
OUT_RING(ring, fui(ctx->viewport.translate[2])); OUT_RING(ring, fui(ctx->viewport.translate[2]));
OUT_RING(ring, fui(0.0f)); OUT_RING(ring, fui(0.0f));
OUT_RING(ring, fui(ctx->viewport.scale[0])); OUT_RING(ring, fui(ctx->viewport.scale[0]));
OUT_RING(ring, fui(ctx->viewport.scale[1])); OUT_RING(ring, fui(ctx->viewport.scale[1]));
OUT_RING(ring, fui(ctx->viewport.scale[2])); OUT_RING(ring, fui(ctx->viewport.scale[2]));
OUT_RING(ring, fui(0.0f)); OUT_RING(ring, fui(0.0f));
} }
/* not sure why this is needed */ /* not sure why this is needed */
if (dirty & (FD_DIRTY_BLEND | FD_DIRTY_FRAMEBUFFER)) { if (dirty & (FD_DIRTY_BLEND | FD_DIRTY_FRAMEBUFFER)) {
OUT_PKT3(ring, CP_SET_CONSTANT, 2); OUT_PKT3(ring, CP_SET_CONSTANT, 2);
OUT_RING(ring, CP_REG(REG_A2XX_RB_BLEND_CONTROL)); OUT_RING(ring, CP_REG(REG_A2XX_RB_BLEND_CONTROL));
OUT_RING(ring, blend->rb_blendcontrol); OUT_RING(ring, blend->rb_blendcontrol);
OUT_PKT3(ring, CP_SET_CONSTANT, 2); OUT_PKT3(ring, CP_SET_CONSTANT, 2);
OUT_RING(ring, CP_REG(REG_A2XX_RB_COLOR_MASK)); OUT_RING(ring, CP_REG(REG_A2XX_RB_COLOR_MASK));
OUT_RING(ring, blend->rb_colormask); OUT_RING(ring, blend->rb_colormask);
} }
OUT_PKT3(ring, CP_SET_CONSTANT, 2); OUT_PKT3(ring, CP_SET_CONSTANT, 2);
OUT_RING(ring, CP_REG(REG_A2XX_PA_SU_SC_MODE_CNTL)); OUT_RING(ring, CP_REG(REG_A2XX_PA_SU_SC_MODE_CNTL));
OUT_RING(ring, A2XX_PA_SU_SC_MODE_CNTL_FACE_KILL_ENABLE); OUT_RING(ring, A2XX_PA_SU_SC_MODE_CNTL_FACE_KILL_ENABLE);
} }
void void
fd2_emit_state(struct fd_context *ctx, const enum fd_dirty_3d_state dirty) fd2_emit_state(struct fd_context *ctx, const enum fd_dirty_3d_state dirty)
{ {
struct fd2_blend_stateobj *blend = fd2_blend_stateobj(ctx->blend); struct fd2_blend_stateobj *blend = fd2_blend_stateobj(ctx->blend);
struct fd2_zsa_stateobj *zsa = fd2_zsa_stateobj(ctx->zsa); struct fd2_zsa_stateobj *zsa = fd2_zsa_stateobj(ctx->zsa);
struct fd2_shader_stateobj *fs = ctx->prog.fs; struct fd2_shader_stateobj *fs = ctx->prog.fs;
struct fd_ringbuffer *ring = ctx->batch->draw; struct fd_ringbuffer *ring = ctx->batch->draw;
/* NOTE: we probably want to eventually refactor this so each state /* NOTE: we probably want to eventually refactor this so each state
* object handles emitting it's own state.. although the mapping of * object handles emitting it's own state.. although the mapping of
* state to registers is not always orthogonal, sometimes a single * state to registers is not always orthogonal, sometimes a single
* register contains bitfields coming from multiple state objects, * register contains bitfields coming from multiple state objects,
* so not sure the best way to deal with that yet. * so not sure the best way to deal with that yet.
*/ */
if (dirty & FD_DIRTY_SAMPLE_MASK) { if (dirty & FD_DIRTY_SAMPLE_MASK) {
OUT_PKT3(ring, CP_SET_CONSTANT, 2); OUT_PKT3(ring, CP_SET_CONSTANT, 2);
OUT_RING(ring, CP_REG(REG_A2XX_PA_SC_AA_MASK)); OUT_RING(ring, CP_REG(REG_A2XX_PA_SC_AA_MASK));
OUT_RING(ring, ctx->sample_mask); OUT_RING(ring, ctx->sample_mask);
} }
if (dirty & (FD_DIRTY_ZSA | FD_DIRTY_STENCIL_REF | FD_DIRTY_PROG)) { if (dirty & (FD_DIRTY_ZSA | FD_DIRTY_STENCIL_REF | FD_DIRTY_PROG)) {
struct pipe_stencil_ref *sr = &ctx->stencil_ref; struct pipe_stencil_ref *sr = &ctx->stencil_ref;
uint32_t val = zsa->rb_depthcontrol; uint32_t val = zsa->rb_depthcontrol;
if (fs->has_kill) if (fs->has_kill)
val &= ~A2XX_RB_DEPTHCONTROL_EARLY_Z_ENABLE; val &= ~A2XX_RB_DEPTHCONTROL_EARLY_Z_ENABLE;
OUT_PKT3(ring, CP_SET_CONSTANT, 2); OUT_PKT3(ring, CP_SET_CONSTANT, 2);
OUT_RING(ring, CP_REG(REG_A2XX_RB_DEPTHCONTROL)); OUT_RING(ring, CP_REG(REG_A2XX_RB_DEPTHCONTROL));
OUT_RING(ring, val); OUT_RING(ring, val);
OUT_PKT3(ring, CP_SET_CONSTANT, 4); OUT_PKT3(ring, CP_SET_CONSTANT, 4);
OUT_RING(ring, CP_REG(REG_A2XX_RB_STENCILREFMASK_BF)); OUT_RING(ring, CP_REG(REG_A2XX_RB_STENCILREFMASK_BF));
OUT_RING(ring, zsa->rb_stencilrefmask_bf | OUT_RING(ring, zsa->rb_stencilrefmask_bf |
A2XX_RB_STENCILREFMASK_STENCILREF(sr->ref_value[1])); A2XX_RB_STENCILREFMASK_STENCILREF(sr->ref_value[1]));
OUT_RING(ring, zsa->rb_stencilrefmask | OUT_RING(ring, zsa->rb_stencilrefmask |
A2XX_RB_STENCILREFMASK_STENCILREF(sr->ref_value[0])); A2XX_RB_STENCILREFMASK_STENCILREF(sr->ref_value[0]));
OUT_RING(ring, zsa->rb_alpha_ref); OUT_RING(ring, zsa->rb_alpha_ref);
} }
if (ctx->rasterizer && dirty & FD_DIRTY_RASTERIZER) { if (ctx->rasterizer && dirty & FD_DIRTY_RASTERIZER) {
struct fd2_rasterizer_stateobj *rasterizer = struct fd2_rasterizer_stateobj *rasterizer =
fd2_rasterizer_stateobj(ctx->rasterizer); fd2_rasterizer_stateobj(ctx->rasterizer);
OUT_PKT3(ring, CP_SET_CONSTANT, 3); OUT_PKT3(ring, CP_SET_CONSTANT, 3);
OUT_RING(ring, CP_REG(REG_A2XX_PA_CL_CLIP_CNTL)); OUT_RING(ring, CP_REG(REG_A2XX_PA_CL_CLIP_CNTL));
OUT_RING(ring, rasterizer->pa_cl_clip_cntl); OUT_RING(ring, rasterizer->pa_cl_clip_cntl);
OUT_RING(ring, rasterizer->pa_su_sc_mode_cntl | OUT_RING(ring, rasterizer->pa_su_sc_mode_cntl |
A2XX_PA_SU_SC_MODE_CNTL_VTX_WINDOW_OFFSET_ENABLE); A2XX_PA_SU_SC_MODE_CNTL_VTX_WINDOW_OFFSET_ENABLE);
OUT_PKT3(ring, CP_SET_CONSTANT, 5); OUT_PKT3(ring, CP_SET_CONSTANT, 5);
OUT_RING(ring, CP_REG(REG_A2XX_PA_SU_POINT_SIZE)); OUT_RING(ring, CP_REG(REG_A2XX_PA_SU_POINT_SIZE));
OUT_RING(ring, rasterizer->pa_su_point_size); OUT_RING(ring, rasterizer->pa_su_point_size);
OUT_RING(ring, rasterizer->pa_su_point_minmax); OUT_RING(ring, rasterizer->pa_su_point_minmax);
OUT_RING(ring, rasterizer->pa_su_line_cntl); OUT_RING(ring, rasterizer->pa_su_line_cntl);
OUT_RING(ring, rasterizer->pa_sc_line_stipple); OUT_RING(ring, rasterizer->pa_sc_line_stipple);
OUT_PKT3(ring, CP_SET_CONSTANT, 6); OUT_PKT3(ring, CP_SET_CONSTANT, 6);
OUT_RING(ring, CP_REG(REG_A2XX_PA_SU_VTX_CNTL)); OUT_RING(ring, CP_REG(REG_A2XX_PA_SU_VTX_CNTL));
OUT_RING(ring, rasterizer->pa_su_vtx_cntl); OUT_RING(ring, rasterizer->pa_su_vtx_cntl);
OUT_RING(ring, fui(1.0)); /* PA_CL_GB_VERT_CLIP_ADJ */ OUT_RING(ring, fui(1.0)); /* PA_CL_GB_VERT_CLIP_ADJ */
OUT_RING(ring, fui(1.0)); /* PA_CL_GB_VERT_DISC_ADJ */ OUT_RING(ring, fui(1.0)); /* PA_CL_GB_VERT_DISC_ADJ */
OUT_RING(ring, fui(1.0)); /* PA_CL_GB_HORZ_CLIP_ADJ */ OUT_RING(ring, fui(1.0)); /* PA_CL_GB_HORZ_CLIP_ADJ */
OUT_RING(ring, fui(1.0)); /* PA_CL_GB_HORZ_DISC_ADJ */ OUT_RING(ring, fui(1.0)); /* PA_CL_GB_HORZ_DISC_ADJ */
if (rasterizer->base.offset_tri) { if (rasterizer->base.offset_tri) {
/* TODO: why multiply scale by 2 ? without it deqp test fails /* TODO: why multiply scale by 2 ? without it deqp test fails
* deqp/piglit tests aren't very precise * deqp/piglit tests aren't very precise
*/ */
OUT_PKT3(ring, CP_SET_CONSTANT, 5); OUT_PKT3(ring, CP_SET_CONSTANT, 5);
OUT_RING(ring, CP_REG(REG_A2XX_PA_SU_POLY_OFFSET_FRONT_SCALE)); OUT_RING(ring, CP_REG(REG_A2XX_PA_SU_POLY_OFFSET_FRONT_SCALE));
OUT_RING(ring, fui(rasterizer->base.offset_scale * 2.0f)); /* FRONT_SCALE */ OUT_RING(ring,
OUT_RING(ring, fui(rasterizer->base.offset_units)); /* FRONT_OFFSET */ fui(rasterizer->base.offset_scale * 2.0f)); /* FRONT_SCALE */
OUT_RING(ring, fui(rasterizer->base.offset_scale * 2.0f)); /* BACK_SCALE */ OUT_RING(ring, fui(rasterizer->base.offset_units)); /* FRONT_OFFSET */
OUT_RING(ring, fui(rasterizer->base.offset_units)); /* BACK_OFFSET */ OUT_RING(ring,
} fui(rasterizer->base.offset_scale * 2.0f)); /* BACK_SCALE */
} OUT_RING(ring, fui(rasterizer->base.offset_units)); /* BACK_OFFSET */
}
}
/* NOTE: scissor enabled bit is part of rasterizer state: */ /* NOTE: scissor enabled bit is part of rasterizer state: */
if (dirty & (FD_DIRTY_SCISSOR | FD_DIRTY_RASTERIZER)) { if (dirty & (FD_DIRTY_SCISSOR | FD_DIRTY_RASTERIZER)) {
struct pipe_scissor_state *scissor = fd_context_get_scissor(ctx); struct pipe_scissor_state *scissor = fd_context_get_scissor(ctx);
OUT_PKT3(ring, CP_SET_CONSTANT, 3); OUT_PKT3(ring, CP_SET_CONSTANT, 3);
OUT_RING(ring, CP_REG(REG_A2XX_PA_SC_WINDOW_SCISSOR_TL)); OUT_RING(ring, CP_REG(REG_A2XX_PA_SC_WINDOW_SCISSOR_TL));
OUT_RING(ring, xy2d(scissor->minx, /* PA_SC_WINDOW_SCISSOR_TL */ OUT_RING(ring, xy2d(scissor->minx, /* PA_SC_WINDOW_SCISSOR_TL */
scissor->miny)); scissor->miny));
OUT_RING(ring, xy2d(scissor->maxx, /* PA_SC_WINDOW_SCISSOR_BR */ OUT_RING(ring, xy2d(scissor->maxx, /* PA_SC_WINDOW_SCISSOR_BR */
scissor->maxy)); scissor->maxy));
ctx->batch->max_scissor.minx = MIN2(ctx->batch->max_scissor.minx, scissor->minx); ctx->batch->max_scissor.minx =
ctx->batch->max_scissor.miny = MIN2(ctx->batch->max_scissor.miny, scissor->miny); MIN2(ctx->batch->max_scissor.minx, scissor->minx);
ctx->batch->max_scissor.maxx = MAX2(ctx->batch->max_scissor.maxx, scissor->maxx); ctx->batch->max_scissor.miny =
ctx->batch->max_scissor.maxy = MAX2(ctx->batch->max_scissor.maxy, scissor->maxy); MIN2(ctx->batch->max_scissor.miny, scissor->miny);
} ctx->batch->max_scissor.maxx =
MAX2(ctx->batch->max_scissor.maxx, scissor->maxx);
ctx->batch->max_scissor.maxy =
MAX2(ctx->batch->max_scissor.maxy, scissor->maxy);
}
if (dirty & FD_DIRTY_VIEWPORT) { if (dirty & FD_DIRTY_VIEWPORT) {
OUT_PKT3(ring, CP_SET_CONSTANT, 7); OUT_PKT3(ring, CP_SET_CONSTANT, 7);
OUT_RING(ring, CP_REG(REG_A2XX_PA_CL_VPORT_XSCALE)); OUT_RING(ring, CP_REG(REG_A2XX_PA_CL_VPORT_XSCALE));
OUT_RING(ring, fui(ctx->viewport.scale[0])); /* PA_CL_VPORT_XSCALE */ OUT_RING(ring, fui(ctx->viewport.scale[0])); /* PA_CL_VPORT_XSCALE */
OUT_RING(ring, fui(ctx->viewport.translate[0])); /* PA_CL_VPORT_XOFFSET */ OUT_RING(ring, fui(ctx->viewport.translate[0])); /* PA_CL_VPORT_XOFFSET */
OUT_RING(ring, fui(ctx->viewport.scale[1])); /* PA_CL_VPORT_YSCALE */ OUT_RING(ring, fui(ctx->viewport.scale[1])); /* PA_CL_VPORT_YSCALE */
OUT_RING(ring, fui(ctx->viewport.translate[1])); /* PA_CL_VPORT_YOFFSET */ OUT_RING(ring, fui(ctx->viewport.translate[1])); /* PA_CL_VPORT_YOFFSET */
OUT_RING(ring, fui(ctx->viewport.scale[2])); /* PA_CL_VPORT_ZSCALE */ OUT_RING(ring, fui(ctx->viewport.scale[2])); /* PA_CL_VPORT_ZSCALE */
OUT_RING(ring, fui(ctx->viewport.translate[2])); /* PA_CL_VPORT_ZOFFSET */ OUT_RING(ring, fui(ctx->viewport.translate[2])); /* PA_CL_VPORT_ZOFFSET */
/* set viewport in C65/C66, for a20x hw binning and fragcoord.z */ /* set viewport in C65/C66, for a20x hw binning and fragcoord.z */
OUT_PKT3(ring, CP_SET_CONSTANT, 9); OUT_PKT3(ring, CP_SET_CONSTANT, 9);
OUT_RING(ring, 0x00000184); OUT_RING(ring, 0x00000184);
OUT_RING(ring, fui(ctx->viewport.translate[0])); OUT_RING(ring, fui(ctx->viewport.translate[0]));
OUT_RING(ring, fui(ctx->viewport.translate[1])); OUT_RING(ring, fui(ctx->viewport.translate[1]));
OUT_RING(ring, fui(ctx->viewport.translate[2])); OUT_RING(ring, fui(ctx->viewport.translate[2]));
OUT_RING(ring, fui(0.0f)); OUT_RING(ring, fui(0.0f));
OUT_RING(ring, fui(ctx->viewport.scale[0])); OUT_RING(ring, fui(ctx->viewport.scale[0]));
OUT_RING(ring, fui(ctx->viewport.scale[1])); OUT_RING(ring, fui(ctx->viewport.scale[1]));
OUT_RING(ring, fui(ctx->viewport.scale[2])); OUT_RING(ring, fui(ctx->viewport.scale[2]));
OUT_RING(ring, fui(0.0f)); OUT_RING(ring, fui(0.0f));
} }
if (dirty & (FD_DIRTY_PROG | FD_DIRTY_VTXSTATE | FD_DIRTY_TEXSTATE)) if (dirty & (FD_DIRTY_PROG | FD_DIRTY_VTXSTATE | FD_DIRTY_TEXSTATE))
fd2_program_emit(ctx, ring, &ctx->prog); fd2_program_emit(ctx, ring, &ctx->prog);
if (dirty & (FD_DIRTY_PROG | FD_DIRTY_CONST)) { if (dirty & (FD_DIRTY_PROG | FD_DIRTY_CONST)) {
emit_constants(ring, VS_CONST_BASE * 4, emit_constants(ring, VS_CONST_BASE * 4,
&ctx->constbuf[PIPE_SHADER_VERTEX], &ctx->constbuf[PIPE_SHADER_VERTEX],
(dirty & FD_DIRTY_PROG) ? ctx->prog.vs : NULL); (dirty & FD_DIRTY_PROG) ? ctx->prog.vs : NULL);
emit_constants(ring, PS_CONST_BASE * 4, emit_constants(ring, PS_CONST_BASE * 4,
&ctx->constbuf[PIPE_SHADER_FRAGMENT], &ctx->constbuf[PIPE_SHADER_FRAGMENT],
(dirty & FD_DIRTY_PROG) ? ctx->prog.fs : NULL); (dirty & FD_DIRTY_PROG) ? ctx->prog.fs : NULL);
} }
if (dirty & (FD_DIRTY_BLEND | FD_DIRTY_ZSA)) { if (dirty & (FD_DIRTY_BLEND | FD_DIRTY_ZSA)) {
OUT_PKT3(ring, CP_SET_CONSTANT, 2); OUT_PKT3(ring, CP_SET_CONSTANT, 2);
OUT_RING(ring, CP_REG(REG_A2XX_RB_COLORCONTROL)); OUT_RING(ring, CP_REG(REG_A2XX_RB_COLORCONTROL));
OUT_RING(ring, zsa->rb_colorcontrol | blend->rb_colorcontrol); OUT_RING(ring, zsa->rb_colorcontrol | blend->rb_colorcontrol);
} }
if (dirty & (FD_DIRTY_BLEND | FD_DIRTY_FRAMEBUFFER)) { if (dirty & (FD_DIRTY_BLEND | FD_DIRTY_FRAMEBUFFER)) {
OUT_PKT3(ring, CP_SET_CONSTANT, 2); OUT_PKT3(ring, CP_SET_CONSTANT, 2);
OUT_RING(ring, CP_REG(REG_A2XX_RB_BLEND_CONTROL)); OUT_RING(ring, CP_REG(REG_A2XX_RB_BLEND_CONTROL));
OUT_RING(ring, blend->rb_blendcontrol); OUT_RING(ring, blend->rb_blendcontrol);
OUT_PKT3(ring, CP_SET_CONSTANT, 2); OUT_PKT3(ring, CP_SET_CONSTANT, 2);
OUT_RING(ring, CP_REG(REG_A2XX_RB_COLOR_MASK)); OUT_RING(ring, CP_REG(REG_A2XX_RB_COLOR_MASK));
OUT_RING(ring, blend->rb_colormask); OUT_RING(ring, blend->rb_colormask);
} }
if (dirty & FD_DIRTY_BLEND_COLOR) { if (dirty & FD_DIRTY_BLEND_COLOR) {
OUT_PKT3(ring, CP_SET_CONSTANT, 5); OUT_PKT3(ring, CP_SET_CONSTANT, 5);
OUT_RING(ring, CP_REG(REG_A2XX_RB_BLEND_RED)); OUT_RING(ring, CP_REG(REG_A2XX_RB_BLEND_RED));
OUT_RING(ring, float_to_ubyte(ctx->blend_color.color[0])); OUT_RING(ring, float_to_ubyte(ctx->blend_color.color[0]));
OUT_RING(ring, float_to_ubyte(ctx->blend_color.color[1])); OUT_RING(ring, float_to_ubyte(ctx->blend_color.color[1]));
OUT_RING(ring, float_to_ubyte(ctx->blend_color.color[2])); OUT_RING(ring, float_to_ubyte(ctx->blend_color.color[2]));
OUT_RING(ring, float_to_ubyte(ctx->blend_color.color[3])); OUT_RING(ring, float_to_ubyte(ctx->blend_color.color[3]));
} }
if (dirty & (FD_DIRTY_TEX | FD_DIRTY_PROG)) if (dirty & (FD_DIRTY_TEX | FD_DIRTY_PROG))
emit_textures(ring, ctx); emit_textures(ring, ctx);
} }
/* emit per-context initialization: /* emit per-context initialization:
@ -397,177 +403,175 @@ fd2_emit_state(struct fd_context *ctx, const enum fd_dirty_3d_state dirty)
void void
fd2_emit_restore(struct fd_context *ctx, struct fd_ringbuffer *ring) fd2_emit_restore(struct fd_context *ctx, struct fd_ringbuffer *ring)
{ {
if (is_a20x(ctx->screen)) { if (is_a20x(ctx->screen)) {
OUT_PKT0(ring, REG_A2XX_RB_BC_CONTROL, 1); OUT_PKT0(ring, REG_A2XX_RB_BC_CONTROL, 1);
OUT_RING(ring, OUT_RING(ring, A2XX_RB_BC_CONTROL_ACCUM_TIMEOUT_SELECT(3) |
A2XX_RB_BC_CONTROL_ACCUM_TIMEOUT_SELECT(3) | A2XX_RB_BC_CONTROL_DISABLE_LZ_NULL_ZCMD_DROP |
A2XX_RB_BC_CONTROL_DISABLE_LZ_NULL_ZCMD_DROP | A2XX_RB_BC_CONTROL_ENABLE_CRC_UPDATE |
A2XX_RB_BC_CONTROL_ENABLE_CRC_UPDATE | A2XX_RB_BC_CONTROL_ACCUM_DATA_FIFO_LIMIT(8) |
A2XX_RB_BC_CONTROL_ACCUM_DATA_FIFO_LIMIT(8) | A2XX_RB_BC_CONTROL_MEM_EXPORT_TIMEOUT_SELECT(3));
A2XX_RB_BC_CONTROL_MEM_EXPORT_TIMEOUT_SELECT(3));
/* not sure why this is required */ /* not sure why this is required */
OUT_PKT3(ring, CP_SET_CONSTANT, 2); OUT_PKT3(ring, CP_SET_CONSTANT, 2);
OUT_RING(ring, CP_REG(REG_A2XX_PA_SC_VIZ_QUERY)); OUT_RING(ring, CP_REG(REG_A2XX_PA_SC_VIZ_QUERY));
OUT_RING(ring, A2XX_PA_SC_VIZ_QUERY_VIZ_QUERY_ID(16)); OUT_RING(ring, A2XX_PA_SC_VIZ_QUERY_VIZ_QUERY_ID(16));
OUT_PKT3(ring, CP_SET_CONSTANT, 2); OUT_PKT3(ring, CP_SET_CONSTANT, 2);
OUT_RING(ring, CP_REG(REG_A2XX_VGT_VERTEX_REUSE_BLOCK_CNTL)); OUT_RING(ring, CP_REG(REG_A2XX_VGT_VERTEX_REUSE_BLOCK_CNTL));
OUT_RING(ring, 0x00000002); OUT_RING(ring, 0x00000002);
OUT_PKT3(ring, CP_SET_CONSTANT, 2); OUT_PKT3(ring, CP_SET_CONSTANT, 2);
OUT_RING(ring, CP_REG(REG_A2XX_VGT_OUT_DEALLOC_CNTL)); OUT_RING(ring, CP_REG(REG_A2XX_VGT_OUT_DEALLOC_CNTL));
OUT_RING(ring, 0x00000002); OUT_RING(ring, 0x00000002);
} else { } else {
OUT_PKT3(ring, CP_SET_CONSTANT, 2); OUT_PKT3(ring, CP_SET_CONSTANT, 2);
OUT_RING(ring, CP_REG(REG_A2XX_VGT_VERTEX_REUSE_BLOCK_CNTL)); OUT_RING(ring, CP_REG(REG_A2XX_VGT_VERTEX_REUSE_BLOCK_CNTL));
OUT_RING(ring, 0x0000003b); OUT_RING(ring, 0x0000003b);
} }
/* enable perfcntrs */ /* enable perfcntrs */
OUT_PKT0(ring, REG_A2XX_CP_PERFMON_CNTL, 1); OUT_PKT0(ring, REG_A2XX_CP_PERFMON_CNTL, 1);
OUT_RING(ring, COND(FD_DBG(PERFC), 1)); OUT_RING(ring, COND(FD_DBG(PERFC), 1));
/* note: perfcntrs don't work without the PM_OVERRIDE bit */ /* note: perfcntrs don't work without the PM_OVERRIDE bit */
OUT_PKT0(ring, REG_A2XX_RBBM_PM_OVERRIDE1, 2); OUT_PKT0(ring, REG_A2XX_RBBM_PM_OVERRIDE1, 2);
OUT_RING(ring, 0xffffffff); OUT_RING(ring, 0xffffffff);
OUT_RING(ring, 0x00000fff); OUT_RING(ring, 0x00000fff);
OUT_PKT0(ring, REG_A2XX_TP0_CHICKEN, 1); OUT_PKT0(ring, REG_A2XX_TP0_CHICKEN, 1);
OUT_RING(ring, 0x00000002); OUT_RING(ring, 0x00000002);
OUT_PKT3(ring, CP_INVALIDATE_STATE, 1); OUT_PKT3(ring, CP_INVALIDATE_STATE, 1);
OUT_RING(ring, 0x00007fff); OUT_RING(ring, 0x00007fff);
OUT_PKT3(ring, CP_SET_CONSTANT, 2); OUT_PKT3(ring, CP_SET_CONSTANT, 2);
OUT_RING(ring, CP_REG(REG_A2XX_SQ_VS_CONST)); OUT_RING(ring, CP_REG(REG_A2XX_SQ_VS_CONST));
OUT_RING(ring, A2XX_SQ_VS_CONST_BASE(VS_CONST_BASE) | OUT_RING(ring, A2XX_SQ_VS_CONST_BASE(VS_CONST_BASE) |
A2XX_SQ_VS_CONST_SIZE(0x100)); A2XX_SQ_VS_CONST_SIZE(0x100));
OUT_PKT3(ring, CP_SET_CONSTANT, 2); OUT_PKT3(ring, CP_SET_CONSTANT, 2);
OUT_RING(ring, CP_REG(REG_A2XX_SQ_PS_CONST)); OUT_RING(ring, CP_REG(REG_A2XX_SQ_PS_CONST));
OUT_RING(ring, A2XX_SQ_PS_CONST_BASE(PS_CONST_BASE) | OUT_RING(ring,
A2XX_SQ_PS_CONST_SIZE(0xe0)); A2XX_SQ_PS_CONST_BASE(PS_CONST_BASE) | A2XX_SQ_PS_CONST_SIZE(0xe0));
OUT_PKT3(ring, CP_SET_CONSTANT, 3); OUT_PKT3(ring, CP_SET_CONSTANT, 3);
OUT_RING(ring, CP_REG(REG_A2XX_VGT_MAX_VTX_INDX)); OUT_RING(ring, CP_REG(REG_A2XX_VGT_MAX_VTX_INDX));
OUT_RING(ring, 0xffffffff); /* VGT_MAX_VTX_INDX */ OUT_RING(ring, 0xffffffff); /* VGT_MAX_VTX_INDX */
OUT_RING(ring, 0x00000000); /* VGT_MIN_VTX_INDX */ OUT_RING(ring, 0x00000000); /* VGT_MIN_VTX_INDX */
OUT_PKT3(ring, CP_SET_CONSTANT, 2); OUT_PKT3(ring, CP_SET_CONSTANT, 2);
OUT_RING(ring, CP_REG(REG_A2XX_VGT_INDX_OFFSET)); OUT_RING(ring, CP_REG(REG_A2XX_VGT_INDX_OFFSET));
OUT_RING(ring, 0x00000000); OUT_RING(ring, 0x00000000);
OUT_PKT3(ring, CP_SET_CONSTANT, 2); OUT_PKT3(ring, CP_SET_CONSTANT, 2);
OUT_RING(ring, CP_REG(REG_A2XX_SQ_CONTEXT_MISC)); OUT_RING(ring, CP_REG(REG_A2XX_SQ_CONTEXT_MISC));
OUT_RING(ring, A2XX_SQ_CONTEXT_MISC_SC_SAMPLE_CNTL(CENTERS_ONLY)); OUT_RING(ring, A2XX_SQ_CONTEXT_MISC_SC_SAMPLE_CNTL(CENTERS_ONLY));
OUT_PKT3(ring, CP_SET_CONSTANT, 2); OUT_PKT3(ring, CP_SET_CONSTANT, 2);
OUT_RING(ring, CP_REG(REG_A2XX_SQ_INTERPOLATOR_CNTL)); OUT_RING(ring, CP_REG(REG_A2XX_SQ_INTERPOLATOR_CNTL));
OUT_RING(ring, 0xffffffff); OUT_RING(ring, 0xffffffff);
OUT_PKT3(ring, CP_SET_CONSTANT, 2); OUT_PKT3(ring, CP_SET_CONSTANT, 2);
OUT_RING(ring, CP_REG(REG_A2XX_PA_SC_AA_CONFIG)); OUT_RING(ring, CP_REG(REG_A2XX_PA_SC_AA_CONFIG));
OUT_RING(ring, 0x00000000); OUT_RING(ring, 0x00000000);
OUT_PKT3(ring, CP_SET_CONSTANT, 2); OUT_PKT3(ring, CP_SET_CONSTANT, 2);
OUT_RING(ring, CP_REG(REG_A2XX_PA_SC_LINE_CNTL)); OUT_RING(ring, CP_REG(REG_A2XX_PA_SC_LINE_CNTL));
OUT_RING(ring, 0x00000000); OUT_RING(ring, 0x00000000);
OUT_PKT3(ring, CP_SET_CONSTANT, 2); OUT_PKT3(ring, CP_SET_CONSTANT, 2);
OUT_RING(ring, CP_REG(REG_A2XX_PA_SC_WINDOW_OFFSET)); OUT_RING(ring, CP_REG(REG_A2XX_PA_SC_WINDOW_OFFSET));
OUT_RING(ring, 0x00000000); OUT_RING(ring, 0x00000000);
// XXX we change this dynamically for draw/clear.. vs gmem<->mem.. // XXX we change this dynamically for draw/clear.. vs gmem<->mem..
OUT_PKT3(ring, CP_SET_CONSTANT, 2); OUT_PKT3(ring, CP_SET_CONSTANT, 2);
OUT_RING(ring, CP_REG(REG_A2XX_RB_MODECONTROL)); OUT_RING(ring, CP_REG(REG_A2XX_RB_MODECONTROL));
OUT_RING(ring, A2XX_RB_MODECONTROL_EDRAM_MODE(COLOR_DEPTH)); OUT_RING(ring, A2XX_RB_MODECONTROL_EDRAM_MODE(COLOR_DEPTH));
OUT_PKT3(ring, CP_SET_CONSTANT, 2); OUT_PKT3(ring, CP_SET_CONSTANT, 2);
OUT_RING(ring, CP_REG(REG_A2XX_RB_SAMPLE_POS)); OUT_RING(ring, CP_REG(REG_A2XX_RB_SAMPLE_POS));
OUT_RING(ring, 0x88888888); OUT_RING(ring, 0x88888888);
OUT_PKT3(ring, CP_SET_CONSTANT, 2); OUT_PKT3(ring, CP_SET_CONSTANT, 2);
OUT_RING(ring, CP_REG(REG_A2XX_RB_COLOR_DEST_MASK)); OUT_RING(ring, CP_REG(REG_A2XX_RB_COLOR_DEST_MASK));
OUT_RING(ring, 0xffffffff); OUT_RING(ring, 0xffffffff);
OUT_PKT3(ring, CP_SET_CONSTANT, 2); OUT_PKT3(ring, CP_SET_CONSTANT, 2);
OUT_RING(ring, CP_REG(REG_A2XX_RB_COPY_DEST_INFO)); OUT_RING(ring, CP_REG(REG_A2XX_RB_COPY_DEST_INFO));
OUT_RING(ring, A2XX_RB_COPY_DEST_INFO_FORMAT(COLORX_4_4_4_4) | OUT_RING(ring, A2XX_RB_COPY_DEST_INFO_FORMAT(COLORX_4_4_4_4) |
A2XX_RB_COPY_DEST_INFO_WRITE_RED | A2XX_RB_COPY_DEST_INFO_WRITE_RED |
A2XX_RB_COPY_DEST_INFO_WRITE_GREEN | A2XX_RB_COPY_DEST_INFO_WRITE_GREEN |
A2XX_RB_COPY_DEST_INFO_WRITE_BLUE | A2XX_RB_COPY_DEST_INFO_WRITE_BLUE |
A2XX_RB_COPY_DEST_INFO_WRITE_ALPHA); A2XX_RB_COPY_DEST_INFO_WRITE_ALPHA);
OUT_PKT3(ring, CP_SET_CONSTANT, 3); OUT_PKT3(ring, CP_SET_CONSTANT, 3);
OUT_RING(ring, CP_REG(REG_A2XX_SQ_WRAPPING_0)); OUT_RING(ring, CP_REG(REG_A2XX_SQ_WRAPPING_0));
OUT_RING(ring, 0x00000000); /* SQ_WRAPPING_0 */ OUT_RING(ring, 0x00000000); /* SQ_WRAPPING_0 */
OUT_RING(ring, 0x00000000); /* SQ_WRAPPING_1 */ OUT_RING(ring, 0x00000000); /* SQ_WRAPPING_1 */
OUT_PKT3(ring, CP_SET_DRAW_INIT_FLAGS, 1); OUT_PKT3(ring, CP_SET_DRAW_INIT_FLAGS, 1);
OUT_RING(ring, 0x00000000); OUT_RING(ring, 0x00000000);
OUT_PKT3(ring, CP_WAIT_REG_EQ, 4); OUT_PKT3(ring, CP_WAIT_REG_EQ, 4);
OUT_RING(ring, 0x000005d0); OUT_RING(ring, 0x000005d0);
OUT_RING(ring, 0x00000000); OUT_RING(ring, 0x00000000);
OUT_RING(ring, 0x5f601000); OUT_RING(ring, 0x5f601000);
OUT_RING(ring, 0x00000001); OUT_RING(ring, 0x00000001);
OUT_PKT0(ring, REG_A2XX_SQ_INST_STORE_MANAGMENT, 1); OUT_PKT0(ring, REG_A2XX_SQ_INST_STORE_MANAGMENT, 1);
OUT_RING(ring, 0x00000180); OUT_RING(ring, 0x00000180);
OUT_PKT3(ring, CP_INVALIDATE_STATE, 1); OUT_PKT3(ring, CP_INVALIDATE_STATE, 1);
OUT_RING(ring, 0x00000300); OUT_RING(ring, 0x00000300);
OUT_PKT3(ring, CP_SET_SHADER_BASES, 1); OUT_PKT3(ring, CP_SET_SHADER_BASES, 1);
OUT_RING(ring, 0x80000180); OUT_RING(ring, 0x80000180);
/* not sure what this form of CP_SET_CONSTANT is.. */ /* not sure what this form of CP_SET_CONSTANT is.. */
OUT_PKT3(ring, CP_SET_CONSTANT, 13); OUT_PKT3(ring, CP_SET_CONSTANT, 13);
OUT_RING(ring, 0x00000000); OUT_RING(ring, 0x00000000);
OUT_RING(ring, 0x00000000); OUT_RING(ring, 0x00000000);
OUT_RING(ring, 0x00000000); OUT_RING(ring, 0x00000000);
OUT_RING(ring, 0x00000000); OUT_RING(ring, 0x00000000);
OUT_RING(ring, 0x00000000); OUT_RING(ring, 0x00000000);
OUT_RING(ring, 0x469c4000); OUT_RING(ring, 0x469c4000);
OUT_RING(ring, 0x3f800000); OUT_RING(ring, 0x3f800000);
OUT_RING(ring, 0x3f000000); OUT_RING(ring, 0x3f000000);
OUT_RING(ring, 0x00000000); OUT_RING(ring, 0x00000000);
OUT_RING(ring, 0x40000000); OUT_RING(ring, 0x40000000);
OUT_RING(ring, 0x3f400000); OUT_RING(ring, 0x3f400000);
OUT_RING(ring, 0x3ec00000); OUT_RING(ring, 0x3ec00000);
OUT_RING(ring, 0x3e800000); OUT_RING(ring, 0x3e800000);
OUT_PKT3(ring, CP_SET_CONSTANT, 2); OUT_PKT3(ring, CP_SET_CONSTANT, 2);
OUT_RING(ring, CP_REG(REG_A2XX_RB_COLOR_MASK)); OUT_RING(ring, CP_REG(REG_A2XX_RB_COLOR_MASK));
OUT_RING(ring, A2XX_RB_COLOR_MASK_WRITE_RED | OUT_RING(ring,
A2XX_RB_COLOR_MASK_WRITE_GREEN | A2XX_RB_COLOR_MASK_WRITE_RED | A2XX_RB_COLOR_MASK_WRITE_GREEN |
A2XX_RB_COLOR_MASK_WRITE_BLUE | A2XX_RB_COLOR_MASK_WRITE_BLUE | A2XX_RB_COLOR_MASK_WRITE_ALPHA);
A2XX_RB_COLOR_MASK_WRITE_ALPHA);
OUT_PKT3(ring, CP_SET_CONSTANT, 5); OUT_PKT3(ring, CP_SET_CONSTANT, 5);
OUT_RING(ring, CP_REG(REG_A2XX_RB_BLEND_RED)); OUT_RING(ring, CP_REG(REG_A2XX_RB_BLEND_RED));
OUT_RING(ring, 0x00000000); /* RB_BLEND_RED */ OUT_RING(ring, 0x00000000); /* RB_BLEND_RED */
OUT_RING(ring, 0x00000000); /* RB_BLEND_GREEN */ OUT_RING(ring, 0x00000000); /* RB_BLEND_GREEN */
OUT_RING(ring, 0x00000000); /* RB_BLEND_BLUE */ OUT_RING(ring, 0x00000000); /* RB_BLEND_BLUE */
OUT_RING(ring, 0x000000ff); /* RB_BLEND_ALPHA */ OUT_RING(ring, 0x000000ff); /* RB_BLEND_ALPHA */
OUT_PKT3(ring, CP_SET_CONSTANT, 2); OUT_PKT3(ring, CP_SET_CONSTANT, 2);
OUT_RING(ring, CP_REG(REG_A2XX_PA_CL_VTE_CNTL)); OUT_RING(ring, CP_REG(REG_A2XX_PA_CL_VTE_CNTL));
OUT_RING(ring, A2XX_PA_CL_VTE_CNTL_VTX_W0_FMT | OUT_RING(ring, A2XX_PA_CL_VTE_CNTL_VTX_W0_FMT |
A2XX_PA_CL_VTE_CNTL_VPORT_X_SCALE_ENA | A2XX_PA_CL_VTE_CNTL_VPORT_X_SCALE_ENA |
A2XX_PA_CL_VTE_CNTL_VPORT_X_OFFSET_ENA | A2XX_PA_CL_VTE_CNTL_VPORT_X_OFFSET_ENA |
A2XX_PA_CL_VTE_CNTL_VPORT_Y_SCALE_ENA | A2XX_PA_CL_VTE_CNTL_VPORT_Y_SCALE_ENA |
A2XX_PA_CL_VTE_CNTL_VPORT_Y_OFFSET_ENA | A2XX_PA_CL_VTE_CNTL_VPORT_Y_OFFSET_ENA |
A2XX_PA_CL_VTE_CNTL_VPORT_Z_SCALE_ENA | A2XX_PA_CL_VTE_CNTL_VPORT_Z_SCALE_ENA |
A2XX_PA_CL_VTE_CNTL_VPORT_Z_OFFSET_ENA); A2XX_PA_CL_VTE_CNTL_VPORT_Z_OFFSET_ENA);
} }
void void
fd2_emit_init_screen(struct pipe_screen *pscreen) fd2_emit_init_screen(struct pipe_screen *pscreen)
{ {
struct fd_screen *screen = fd_screen(pscreen); struct fd_screen *screen = fd_screen(pscreen);
screen->emit_ib = fd2_emit_ib; screen->emit_ib = fd2_emit_ib;
} }
void void

View file

@ -34,14 +34,16 @@
struct fd_ringbuffer; struct fd_ringbuffer;
struct fd2_vertex_buf { struct fd2_vertex_buf {
unsigned offset, size; unsigned offset, size;
struct pipe_resource *prsc; struct pipe_resource *prsc;
}; };
void fd2_emit_vertex_bufs(struct fd_ringbuffer *ring, uint32_t val, void fd2_emit_vertex_bufs(struct fd_ringbuffer *ring, uint32_t val,
struct fd2_vertex_buf *vbufs, uint32_t n); struct fd2_vertex_buf *vbufs, uint32_t n);
void fd2_emit_state_binning(struct fd_context *ctx, const enum fd_dirty_3d_state dirty) assert_dt; void fd2_emit_state_binning(struct fd_context *ctx,
void fd2_emit_state(struct fd_context *ctx, const enum fd_dirty_3d_state dirty) assert_dt; const enum fd_dirty_3d_state dirty) assert_dt;
void fd2_emit_state(struct fd_context *ctx,
const enum fd_dirty_3d_state dirty) assert_dt;
void fd2_emit_restore(struct fd_context *ctx, struct fd_ringbuffer *ring); void fd2_emit_restore(struct fd_context *ctx, struct fd_ringbuffer *ring);
void fd2_emit_init_screen(struct pipe_screen *pscreen); void fd2_emit_init_screen(struct pipe_screen *pscreen);
@ -50,7 +52,7 @@ void fd2_emit_init(struct pipe_context *pctx);
static inline void static inline void
fd2_emit_ib(struct fd_ringbuffer *ring, struct fd_ringbuffer *target) fd2_emit_ib(struct fd_ringbuffer *ring, struct fd_ringbuffer *target)
{ {
__OUT_IB(ring, false, target); __OUT_IB(ring, false, target);
} }
#endif /* FD2_EMIT_H */ #endif /* FD2_EMIT_H */

File diff suppressed because it is too large Load diff

View file

@ -25,319 +25,321 @@
* Jonathan Marek <jonathan@marek.ca> * Jonathan Marek <jonathan@marek.ca>
*/ */
#include "nir/tgsi_to_nir.h"
#include "pipe/p_state.h" #include "pipe/p_state.h"
#include "util/u_string.h"
#include "util/u_memory.h"
#include "util/u_inlines.h"
#include "util/format/u_format.h"
#include "tgsi/tgsi_dump.h" #include "tgsi/tgsi_dump.h"
#include "tgsi/tgsi_parse.h" #include "tgsi/tgsi_parse.h"
#include "nir/tgsi_to_nir.h" #include "util/format/u_format.h"
#include "util/u_inlines.h"
#include "util/u_memory.h"
#include "util/u_string.h"
#include "freedreno_program.h" #include "freedreno_program.h"
#include "ir2.h" #include "ir2/instr-a2xx.h"
#include "fd2_program.h" #include "fd2_program.h"
#include "fd2_texture.h" #include "fd2_texture.h"
#include "fd2_util.h" #include "fd2_util.h"
#include "ir2/instr-a2xx.h" #include "ir2.h"
static struct fd2_shader_stateobj * static struct fd2_shader_stateobj *
create_shader(struct pipe_context *pctx, gl_shader_stage type) create_shader(struct pipe_context *pctx, gl_shader_stage type)
{ {
struct fd2_shader_stateobj *so = CALLOC_STRUCT(fd2_shader_stateobj); struct fd2_shader_stateobj *so = CALLOC_STRUCT(fd2_shader_stateobj);
if (!so) if (!so)
return NULL; return NULL;
so->type = type; so->type = type;
so->is_a20x = is_a20x(fd_context(pctx)->screen); so->is_a20x = is_a20x(fd_context(pctx)->screen);
return so; return so;
} }
static void static void
delete_shader(struct fd2_shader_stateobj *so) delete_shader(struct fd2_shader_stateobj *so)
{ {
if (!so) if (!so)
return; return;
ralloc_free(so->nir); ralloc_free(so->nir);
for (int i = 0; i < ARRAY_SIZE(so->variant); i++) for (int i = 0; i < ARRAY_SIZE(so->variant); i++)
free(so->variant[i].info.dwords); free(so->variant[i].info.dwords);
free(so); free(so);
} }
static void static void
emit(struct fd_ringbuffer *ring, gl_shader_stage type, emit(struct fd_ringbuffer *ring, gl_shader_stage type,
struct ir2_shader_info *info, struct util_dynarray *patches) struct ir2_shader_info *info, struct util_dynarray *patches)
{ {
unsigned i; unsigned i;
assert(info->sizedwords); assert(info->sizedwords);
OUT_PKT3(ring, CP_IM_LOAD_IMMEDIATE, 2 + info->sizedwords); OUT_PKT3(ring, CP_IM_LOAD_IMMEDIATE, 2 + info->sizedwords);
OUT_RING(ring, type == MESA_SHADER_FRAGMENT); OUT_RING(ring, type == MESA_SHADER_FRAGMENT);
OUT_RING(ring, info->sizedwords); OUT_RING(ring, info->sizedwords);
if (patches) if (patches)
util_dynarray_append(patches, uint32_t*, &ring->cur[info->mem_export_ptr]); util_dynarray_append(patches, uint32_t *,
&ring->cur[info->mem_export_ptr]);
for (i = 0; i < info->sizedwords; i++) for (i = 0; i < info->sizedwords; i++)
OUT_RING(ring, info->dwords[i]); OUT_RING(ring, info->dwords[i]);
} }
static int static int
ir2_glsl_type_size(const struct glsl_type *type, bool bindless) ir2_glsl_type_size(const struct glsl_type *type, bool bindless)
{ {
return glsl_count_attribute_slots(type, false); return glsl_count_attribute_slots(type, false);
} }
static void * static void *
fd2_fp_state_create(struct pipe_context *pctx, fd2_fp_state_create(struct pipe_context *pctx,
const struct pipe_shader_state *cso) const struct pipe_shader_state *cso)
{ {
struct fd2_shader_stateobj *so = create_shader(pctx, MESA_SHADER_FRAGMENT); struct fd2_shader_stateobj *so = create_shader(pctx, MESA_SHADER_FRAGMENT);
if (!so) if (!so)
return NULL; return NULL;
so->nir = (cso->type == PIPE_SHADER_IR_NIR) ? cso->ir.nir : so->nir = (cso->type == PIPE_SHADER_IR_NIR)
tgsi_to_nir(cso->tokens, pctx->screen, false); ? cso->ir.nir
: tgsi_to_nir(cso->tokens, pctx->screen, false);
NIR_PASS_V(so->nir, nir_lower_io, NIR_PASS_V(so->nir, nir_lower_io, nir_var_shader_in | nir_var_shader_out,
nir_var_shader_in | nir_var_shader_out, ir2_glsl_type_size, (nir_lower_io_options)0);
ir2_glsl_type_size, (nir_lower_io_options)0);
if (ir2_optimize_nir(so->nir, true)) if (ir2_optimize_nir(so->nir, true))
goto fail; goto fail;
so->first_immediate = so->nir->num_uniforms; so->first_immediate = so->nir->num_uniforms;
ir2_compile(so, 0, NULL); ir2_compile(so, 0, NULL);
ralloc_free(so->nir); ralloc_free(so->nir);
so->nir = NULL; so->nir = NULL;
return so; return so;
fail: fail:
delete_shader(so); delete_shader(so);
return NULL; return NULL;
} }
static void static void
fd2_fp_state_delete(struct pipe_context *pctx, void *hwcso) fd2_fp_state_delete(struct pipe_context *pctx, void *hwcso)
{ {
struct fd2_shader_stateobj *so = hwcso; struct fd2_shader_stateobj *so = hwcso;
delete_shader(so); delete_shader(so);
} }
static void * static void *
fd2_vp_state_create(struct pipe_context *pctx, fd2_vp_state_create(struct pipe_context *pctx,
const struct pipe_shader_state *cso) const struct pipe_shader_state *cso)
{ {
struct fd2_shader_stateobj *so = create_shader(pctx, MESA_SHADER_VERTEX); struct fd2_shader_stateobj *so = create_shader(pctx, MESA_SHADER_VERTEX);
if (!so) if (!so)
return NULL; return NULL;
so->nir = (cso->type == PIPE_SHADER_IR_NIR) ? cso->ir.nir : so->nir = (cso->type == PIPE_SHADER_IR_NIR)
tgsi_to_nir(cso->tokens, pctx->screen, false); ? cso->ir.nir
: tgsi_to_nir(cso->tokens, pctx->screen, false);
NIR_PASS_V(so->nir, nir_lower_io, NIR_PASS_V(so->nir, nir_lower_io, nir_var_shader_in | nir_var_shader_out,
nir_var_shader_in | nir_var_shader_out, ir2_glsl_type_size, (nir_lower_io_options)0);
ir2_glsl_type_size, (nir_lower_io_options)0);
if (ir2_optimize_nir(so->nir, true)) if (ir2_optimize_nir(so->nir, true))
goto fail; goto fail;
so->first_immediate = so->nir->num_uniforms; so->first_immediate = so->nir->num_uniforms;
/* compile binning variant now */ /* compile binning variant now */
ir2_compile(so, 0, NULL); ir2_compile(so, 0, NULL);
return so; return so;
fail: fail:
delete_shader(so); delete_shader(so);
return NULL; return NULL;
} }
static void static void
fd2_vp_state_delete(struct pipe_context *pctx, void *hwcso) fd2_vp_state_delete(struct pipe_context *pctx, void *hwcso)
{ {
struct fd2_shader_stateobj *so = hwcso; struct fd2_shader_stateobj *so = hwcso;
delete_shader(so); delete_shader(so);
} }
static void static void
patch_vtx_fetch(struct fd_context *ctx, struct pipe_vertex_element *elem, patch_vtx_fetch(struct fd_context *ctx, struct pipe_vertex_element *elem,
instr_fetch_vtx_t *instr, uint16_t dst_swiz) instr_fetch_vtx_t *instr, uint16_t dst_swiz) assert_dt
assert_dt
{ {
struct surface_format fmt = fd2_pipe2surface(elem->src_format); struct surface_format fmt = fd2_pipe2surface(elem->src_format);
instr->dst_swiz = fd2_vtx_swiz(elem->src_format, dst_swiz); instr->dst_swiz = fd2_vtx_swiz(elem->src_format, dst_swiz);
instr->format_comp_all = fmt.sign == SQ_TEX_SIGN_SIGNED; instr->format_comp_all = fmt.sign == SQ_TEX_SIGN_SIGNED;
instr->num_format_all = fmt.num_format; instr->num_format_all = fmt.num_format;
instr->format = fmt.format; instr->format = fmt.format;
instr->exp_adjust_all = fmt.exp_adjust; instr->exp_adjust_all = fmt.exp_adjust;
instr->stride = ctx->vtx.vertexbuf.vb[elem->vertex_buffer_index].stride; instr->stride = ctx->vtx.vertexbuf.vb[elem->vertex_buffer_index].stride;
instr->offset = elem->src_offset; instr->offset = elem->src_offset;
} }
static void static void
patch_fetches(struct fd_context *ctx, struct ir2_shader_info *info, patch_fetches(struct fd_context *ctx, struct ir2_shader_info *info,
struct fd_vertex_stateobj *vtx, struct fd_texture_stateobj *tex) struct fd_vertex_stateobj *vtx,
assert_dt struct fd_texture_stateobj *tex) assert_dt
{ {
for (int i = 0; i < info->num_fetch_instrs; i++) { for (int i = 0; i < info->num_fetch_instrs; i++) {
struct ir2_fetch_info *fi = &info->fetch_info[i]; struct ir2_fetch_info *fi = &info->fetch_info[i];
instr_fetch_t *instr = (instr_fetch_t*) &info->dwords[fi->offset]; instr_fetch_t *instr = (instr_fetch_t *)&info->dwords[fi->offset];
if (instr->opc == VTX_FETCH) { if (instr->opc == VTX_FETCH) {
unsigned idx = (instr->vtx.const_index - 20) * 3 + unsigned idx =
instr->vtx.const_index_sel; (instr->vtx.const_index - 20) * 3 + instr->vtx.const_index_sel;
patch_vtx_fetch(ctx, &vtx->pipe[idx], &instr->vtx, fi->vtx.dst_swiz); patch_vtx_fetch(ctx, &vtx->pipe[idx], &instr->vtx, fi->vtx.dst_swiz);
continue; continue;
} }
assert(instr->opc == TEX_FETCH); assert(instr->opc == TEX_FETCH);
instr->tex.const_idx = fd2_get_const_idx(ctx, tex, fi->tex.samp_id); instr->tex.const_idx = fd2_get_const_idx(ctx, tex, fi->tex.samp_id);
instr->tex.src_swiz = fi->tex.src_swiz; instr->tex.src_swiz = fi->tex.src_swiz;
} }
} }
void void
fd2_program_emit(struct fd_context *ctx, struct fd_ringbuffer *ring, fd2_program_emit(struct fd_context *ctx, struct fd_ringbuffer *ring,
struct fd_program_stateobj *prog) struct fd_program_stateobj *prog)
{ {
struct fd2_shader_stateobj *fp = NULL, *vp; struct fd2_shader_stateobj *fp = NULL, *vp;
struct ir2_shader_info *fpi, *vpi; struct ir2_shader_info *fpi, *vpi;
struct ir2_frag_linkage *f; struct ir2_frag_linkage *f;
uint8_t vs_gprs, fs_gprs = 0, vs_export = 0; uint8_t vs_gprs, fs_gprs = 0, vs_export = 0;
enum a2xx_sq_ps_vtx_mode mode = POSITION_1_VECTOR; enum a2xx_sq_ps_vtx_mode mode = POSITION_1_VECTOR;
bool binning = (ctx->batch && ring == ctx->batch->binning); bool binning = (ctx->batch && ring == ctx->batch->binning);
unsigned variant = 0; unsigned variant = 0;
vp = prog->vs; vp = prog->vs;
/* find variant matching the linked fragment shader */ /* find variant matching the linked fragment shader */
if (!binning) { if (!binning) {
fp = prog->fs; fp = prog->fs;
for (variant = 1; variant < ARRAY_SIZE(vp->variant); variant++) { for (variant = 1; variant < ARRAY_SIZE(vp->variant); variant++) {
/* if checked all variants, compile a new variant */ /* if checked all variants, compile a new variant */
if (!vp->variant[variant].info.sizedwords) { if (!vp->variant[variant].info.sizedwords) {
ir2_compile(vp, variant, fp); ir2_compile(vp, variant, fp);
break; break;
} }
/* check if fragment shader linkage matches */ /* check if fragment shader linkage matches */
if (!memcmp(&vp->variant[variant].f, &fp->variant[0].f, if (!memcmp(&vp->variant[variant].f, &fp->variant[0].f,
sizeof(struct ir2_frag_linkage))) sizeof(struct ir2_frag_linkage)))
break; break;
} }
assert(variant < ARRAY_SIZE(vp->variant)); assert(variant < ARRAY_SIZE(vp->variant));
} }
vpi = &vp->variant[variant].info; vpi = &vp->variant[variant].info;
fpi = &fp->variant[0].info; fpi = &fp->variant[0].info;
f = &fp->variant[0].f; f = &fp->variant[0].f;
/* clear/gmem2mem/mem2gmem need to be changed to remove this condition */ /* clear/gmem2mem/mem2gmem need to be changed to remove this condition */
if (prog != &ctx->solid_prog && prog != &ctx->blit_prog[0]) { if (prog != &ctx->solid_prog && prog != &ctx->blit_prog[0]) {
patch_fetches(ctx, vpi, ctx->vtx.vtx, &ctx->tex[PIPE_SHADER_VERTEX]); patch_fetches(ctx, vpi, ctx->vtx.vtx, &ctx->tex[PIPE_SHADER_VERTEX]);
if (fp) if (fp)
patch_fetches(ctx, fpi, NULL, &ctx->tex[PIPE_SHADER_FRAGMENT]); patch_fetches(ctx, fpi, NULL, &ctx->tex[PIPE_SHADER_FRAGMENT]);
} }
emit(ring, MESA_SHADER_VERTEX, vpi, emit(ring, MESA_SHADER_VERTEX, vpi,
binning ? &ctx->batch->shader_patches : NULL); binning ? &ctx->batch->shader_patches : NULL);
if (fp) { if (fp) {
emit(ring, MESA_SHADER_FRAGMENT, fpi, NULL); emit(ring, MESA_SHADER_FRAGMENT, fpi, NULL);
fs_gprs = (fpi->max_reg < 0) ? 0x80 : fpi->max_reg; fs_gprs = (fpi->max_reg < 0) ? 0x80 : fpi->max_reg;
vs_export = MAX2(1, f->inputs_count) - 1; vs_export = MAX2(1, f->inputs_count) - 1;
} }
vs_gprs = (vpi->max_reg < 0) ? 0x80 : vpi->max_reg; vs_gprs = (vpi->max_reg < 0) ? 0x80 : vpi->max_reg;
if (vp->writes_psize && !binning) if (vp->writes_psize && !binning)
mode = POSITION_2_VECTORS_SPRITE; mode = POSITION_2_VECTORS_SPRITE;
/* set register to use for param (fragcoord/pointcoord/frontfacing) */ /* set register to use for param (fragcoord/pointcoord/frontfacing) */
OUT_PKT3(ring, CP_SET_CONSTANT, 2); OUT_PKT3(ring, CP_SET_CONSTANT, 2);
OUT_RING(ring, CP_REG(REG_A2XX_SQ_CONTEXT_MISC)); OUT_RING(ring, CP_REG(REG_A2XX_SQ_CONTEXT_MISC));
OUT_RING(ring, A2XX_SQ_CONTEXT_MISC_SC_SAMPLE_CNTL(CENTERS_ONLY) | OUT_RING(ring,
COND(fp, A2XX_SQ_CONTEXT_MISC_PARAM_GEN_POS(f->inputs_count)) | A2XX_SQ_CONTEXT_MISC_SC_SAMPLE_CNTL(CENTERS_ONLY) |
/* we need SCREEN_XY for both fragcoord and frontfacing */ COND(fp, A2XX_SQ_CONTEXT_MISC_PARAM_GEN_POS(f->inputs_count)) |
A2XX_SQ_CONTEXT_MISC_SC_OUTPUT_SCREEN_XY); /* we need SCREEN_XY for both fragcoord and frontfacing */
A2XX_SQ_CONTEXT_MISC_SC_OUTPUT_SCREEN_XY);
OUT_PKT3(ring, CP_SET_CONSTANT, 2); OUT_PKT3(ring, CP_SET_CONSTANT, 2);
OUT_RING(ring, CP_REG(REG_A2XX_SQ_PROGRAM_CNTL)); OUT_RING(ring, CP_REG(REG_A2XX_SQ_PROGRAM_CNTL));
OUT_RING(ring, A2XX_SQ_PROGRAM_CNTL_PS_EXPORT_MODE(2) | OUT_RING(ring,
A2XX_SQ_PROGRAM_CNTL_VS_EXPORT_MODE(mode) | A2XX_SQ_PROGRAM_CNTL_PS_EXPORT_MODE(2) |
A2XX_SQ_PROGRAM_CNTL_VS_RESOURCE | A2XX_SQ_PROGRAM_CNTL_VS_EXPORT_MODE(mode) |
A2XX_SQ_PROGRAM_CNTL_PS_RESOURCE | A2XX_SQ_PROGRAM_CNTL_VS_RESOURCE |
A2XX_SQ_PROGRAM_CNTL_VS_EXPORT_COUNT(vs_export) | A2XX_SQ_PROGRAM_CNTL_PS_RESOURCE |
A2XX_SQ_PROGRAM_CNTL_PS_REGS(fs_gprs) | A2XX_SQ_PROGRAM_CNTL_VS_EXPORT_COUNT(vs_export) |
A2XX_SQ_PROGRAM_CNTL_VS_REGS(vs_gprs) | A2XX_SQ_PROGRAM_CNTL_PS_REGS(fs_gprs) |
COND(fp && fp->need_param, A2XX_SQ_PROGRAM_CNTL_PARAM_GEN) | A2XX_SQ_PROGRAM_CNTL_VS_REGS(vs_gprs) |
COND(!fp, A2XX_SQ_PROGRAM_CNTL_GEN_INDEX_VTX)); COND(fp && fp->need_param, A2XX_SQ_PROGRAM_CNTL_PARAM_GEN) |
COND(!fp, A2XX_SQ_PROGRAM_CNTL_GEN_INDEX_VTX));
} }
void void
fd2_prog_init(struct pipe_context *pctx) fd2_prog_init(struct pipe_context *pctx)
{ {
struct fd_context *ctx = fd_context(pctx); struct fd_context *ctx = fd_context(pctx);
struct fd_program_stateobj *prog; struct fd_program_stateobj *prog;
struct fd2_shader_stateobj *so; struct fd2_shader_stateobj *so;
struct ir2_shader_info *info; struct ir2_shader_info *info;
instr_fetch_vtx_t *instr; instr_fetch_vtx_t *instr;
pctx->create_fs_state = fd2_fp_state_create; pctx->create_fs_state = fd2_fp_state_create;
pctx->delete_fs_state = fd2_fp_state_delete; pctx->delete_fs_state = fd2_fp_state_delete;
pctx->create_vs_state = fd2_vp_state_create; pctx->create_vs_state = fd2_vp_state_create;
pctx->delete_vs_state = fd2_vp_state_delete; pctx->delete_vs_state = fd2_vp_state_delete;
fd_prog_init(pctx); fd_prog_init(pctx);
/* XXX maybe its possible to reuse patch_vtx_fetch somehow? */ /* XXX maybe its possible to reuse patch_vtx_fetch somehow? */
prog = &ctx->solid_prog; prog = &ctx->solid_prog;
so = prog->vs; so = prog->vs;
ir2_compile(prog->vs, 1, prog->fs); ir2_compile(prog->vs, 1, prog->fs);
#define IR2_FETCH_SWIZ_XY01 0xb08 #define IR2_FETCH_SWIZ_XY01 0xb08
#define IR2_FETCH_SWIZ_XYZ1 0xa88 #define IR2_FETCH_SWIZ_XYZ1 0xa88
info = &so->variant[1].info; info = &so->variant[1].info;
instr = (instr_fetch_vtx_t*) &info->dwords[info->fetch_info[0].offset]; instr = (instr_fetch_vtx_t *)&info->dwords[info->fetch_info[0].offset];
instr->const_index = 26; instr->const_index = 26;
instr->const_index_sel = 0; instr->const_index_sel = 0;
instr->format = FMT_32_32_32_FLOAT; instr->format = FMT_32_32_32_FLOAT;
instr->format_comp_all = false; instr->format_comp_all = false;
instr->stride = 12; instr->stride = 12;
instr->num_format_all = true; instr->num_format_all = true;
instr->dst_swiz = IR2_FETCH_SWIZ_XYZ1; instr->dst_swiz = IR2_FETCH_SWIZ_XYZ1;
prog = &ctx->blit_prog[0]; prog = &ctx->blit_prog[0];
so = prog->vs; so = prog->vs;
ir2_compile(prog->vs, 1, prog->fs); ir2_compile(prog->vs, 1, prog->fs);
info = &so->variant[1].info; info = &so->variant[1].info;
instr = (instr_fetch_vtx_t*) &info->dwords[info->fetch_info[0].offset]; instr = (instr_fetch_vtx_t *)&info->dwords[info->fetch_info[0].offset];
instr->const_index = 26; instr->const_index = 26;
instr->const_index_sel = 1; instr->const_index_sel = 1;
instr->format = FMT_32_32_FLOAT; instr->format = FMT_32_32_FLOAT;
instr->format_comp_all = false; instr->format_comp_all = false;
instr->stride = 8; instr->stride = 8;
instr->num_format_all = false; instr->num_format_all = false;
instr->dst_swiz = IR2_FETCH_SWIZ_XY01; instr->dst_swiz = IR2_FETCH_SWIZ_XY01;
instr = (instr_fetch_vtx_t*) &info->dwords[info->fetch_info[1].offset]; instr = (instr_fetch_vtx_t *)&info->dwords[info->fetch_info[1].offset];
instr->const_index = 26; instr->const_index = 26;
instr->const_index_sel = 0; instr->const_index_sel = 0;
instr->format = FMT_32_32_32_FLOAT; instr->format = FMT_32_32_32_FLOAT;
instr->format_comp_all = false; instr->format_comp_all = false;
instr->stride = 12; instr->stride = 12;
instr->num_format_all = false; instr->num_format_all = false;
instr->dst_swiz = IR2_FETCH_SWIZ_XYZ1; instr->dst_swiz = IR2_FETCH_SWIZ_XYZ1;
} }

View file

@ -31,39 +31,39 @@
#include "freedreno_context.h" #include "freedreno_context.h"
#include "ir2.h"
#include "disasm.h" #include "disasm.h"
#include "ir2.h"
struct fd2_shader_stateobj { struct fd2_shader_stateobj {
nir_shader *nir; nir_shader *nir;
gl_shader_stage type; gl_shader_stage type;
bool is_a20x; bool is_a20x;
/* note: using same set of immediates for all variants /* note: using same set of immediates for all variants
* it doesn't matter, other than the slightly larger command stream * it doesn't matter, other than the slightly larger command stream
*/ */
unsigned first_immediate; /* const reg # of first immediate */ unsigned first_immediate; /* const reg # of first immediate */
unsigned num_immediates; unsigned num_immediates;
struct { struct {
uint32_t val[4]; uint32_t val[4];
unsigned ncomp; unsigned ncomp;
} immediates[64]; } immediates[64];
bool writes_psize; bool writes_psize;
bool need_param; bool need_param;
bool has_kill; bool has_kill;
/* note: /* note:
* fragment shader only has one variant * fragment shader only has one variant
* first vertex shader variant is always binning shader * first vertex shader variant is always binning shader
* we should use a dynamic array but in normal case there is * we should use a dynamic array but in normal case there is
* only 2 variants (and 3 sometimes with GALLIUM_HUD) * only 2 variants (and 3 sometimes with GALLIUM_HUD)
*/ */
struct ir2_shader_variant variant[8]; struct ir2_shader_variant variant[8];
}; };
void fd2_program_emit(struct fd_context *ctx, struct fd_ringbuffer *ring, void fd2_program_emit(struct fd_context *ctx, struct fd_ringbuffer *ring,
struct fd_program_stateobj *prog) assert_dt; struct fd_program_stateobj *prog) assert_dt;
void fd2_prog_init(struct pipe_context *pctx); void fd2_prog_init(struct pipe_context *pctx);

View file

@ -37,20 +37,19 @@
#include "fd2_query.h" #include "fd2_query.h"
struct PACKED fd2_query_sample { struct PACKED fd2_query_sample {
uint32_t start; uint32_t start;
uint32_t stop; uint32_t stop;
}; };
/* offset of a single field of an array of fd2_query_sample: */ /* offset of a single field of an array of fd2_query_sample: */
#define query_sample_idx(aq, idx, field) \ #define query_sample_idx(aq, idx, field) \
fd_resource((aq)->prsc)->bo, \ fd_resource((aq)->prsc)->bo, \
(idx * sizeof(struct fd2_query_sample)) + \ (idx * sizeof(struct fd2_query_sample)) + \
offsetof(struct fd2_query_sample, field), \ offsetof(struct fd2_query_sample, field), \
0, 0 0, 0
/* offset of a single field of fd2_query_sample: */ /* offset of a single field of fd2_query_sample: */
#define query_sample(aq, field) \ #define query_sample(aq, field) query_sample_idx(aq, 0, field)
query_sample_idx(aq, 0, field)
/* /*
* Performance Counter (batch) queries: * Performance Counter (batch) queries:
@ -62,186 +61,183 @@ struct PACKED fd2_query_sample {
*/ */
struct fd_batch_query_entry { struct fd_batch_query_entry {
uint8_t gid; /* group-id */ uint8_t gid; /* group-id */
uint8_t cid; /* countable-id within the group */ uint8_t cid; /* countable-id within the group */
}; };
struct fd_batch_query_data { struct fd_batch_query_data {
struct fd_screen *screen; struct fd_screen *screen;
unsigned num_query_entries; unsigned num_query_entries;
struct fd_batch_query_entry query_entries[]; struct fd_batch_query_entry query_entries[];
}; };
static void static void
perfcntr_resume(struct fd_acc_query *aq, struct fd_batch *batch) perfcntr_resume(struct fd_acc_query *aq, struct fd_batch *batch) assert_dt
assert_dt
{ {
struct fd_batch_query_data *data = aq->query_data; struct fd_batch_query_data *data = aq->query_data;
struct fd_screen *screen = data->screen; struct fd_screen *screen = data->screen;
struct fd_ringbuffer *ring = batch->draw; struct fd_ringbuffer *ring = batch->draw;
unsigned counters_per_group[screen->num_perfcntr_groups]; unsigned counters_per_group[screen->num_perfcntr_groups];
memset(counters_per_group, 0, sizeof(counters_per_group)); memset(counters_per_group, 0, sizeof(counters_per_group));
fd_wfi(batch, ring); fd_wfi(batch, ring);
/* configure performance counters for the requested queries: */ /* configure performance counters for the requested queries: */
for (unsigned i = 0; i < data->num_query_entries; i++) { for (unsigned i = 0; i < data->num_query_entries; i++) {
struct fd_batch_query_entry *entry = &data->query_entries[i]; struct fd_batch_query_entry *entry = &data->query_entries[i];
const struct fd_perfcntr_group *g = &screen->perfcntr_groups[entry->gid]; const struct fd_perfcntr_group *g = &screen->perfcntr_groups[entry->gid];
unsigned counter_idx = counters_per_group[entry->gid]++; unsigned counter_idx = counters_per_group[entry->gid]++;
debug_assert(counter_idx < g->num_counters); debug_assert(counter_idx < g->num_counters);
OUT_PKT0(ring, g->counters[counter_idx].select_reg, 1); OUT_PKT0(ring, g->counters[counter_idx].select_reg, 1);
OUT_RING(ring, g->countables[entry->cid].selector); OUT_RING(ring, g->countables[entry->cid].selector);
} }
memset(counters_per_group, 0, sizeof(counters_per_group)); memset(counters_per_group, 0, sizeof(counters_per_group));
/* and snapshot the start values */ /* and snapshot the start values */
for (unsigned i = 0; i < data->num_query_entries; i++) { for (unsigned i = 0; i < data->num_query_entries; i++) {
struct fd_batch_query_entry *entry = &data->query_entries[i]; struct fd_batch_query_entry *entry = &data->query_entries[i];
const struct fd_perfcntr_group *g = &screen->perfcntr_groups[entry->gid]; const struct fd_perfcntr_group *g = &screen->perfcntr_groups[entry->gid];
unsigned counter_idx = counters_per_group[entry->gid]++; unsigned counter_idx = counters_per_group[entry->gid]++;
const struct fd_perfcntr_counter *counter = &g->counters[counter_idx]; const struct fd_perfcntr_counter *counter = &g->counters[counter_idx];
OUT_PKT3(ring, CP_REG_TO_MEM, 2); OUT_PKT3(ring, CP_REG_TO_MEM, 2);
OUT_RING(ring, counter->counter_reg_lo | CP_REG_TO_MEM_0_ACCUMULATE); OUT_RING(ring, counter->counter_reg_lo | CP_REG_TO_MEM_0_ACCUMULATE);
OUT_RELOC(ring, query_sample_idx(aq, i, start)); OUT_RELOC(ring, query_sample_idx(aq, i, start));
} }
} }
static void static void
perfcntr_pause(struct fd_acc_query *aq, struct fd_batch *batch) perfcntr_pause(struct fd_acc_query *aq, struct fd_batch *batch) assert_dt
assert_dt
{ {
struct fd_batch_query_data *data = aq->query_data; struct fd_batch_query_data *data = aq->query_data;
struct fd_screen *screen = data->screen; struct fd_screen *screen = data->screen;
struct fd_ringbuffer *ring = batch->draw; struct fd_ringbuffer *ring = batch->draw;
unsigned counters_per_group[screen->num_perfcntr_groups]; unsigned counters_per_group[screen->num_perfcntr_groups];
memset(counters_per_group, 0, sizeof(counters_per_group)); memset(counters_per_group, 0, sizeof(counters_per_group));
fd_wfi(batch, ring); fd_wfi(batch, ring);
/* TODO do we need to bother to turn anything off? */ /* TODO do we need to bother to turn anything off? */
/* snapshot the end values: */ /* snapshot the end values: */
for (unsigned i = 0; i < data->num_query_entries; i++) { for (unsigned i = 0; i < data->num_query_entries; i++) {
struct fd_batch_query_entry *entry = &data->query_entries[i]; struct fd_batch_query_entry *entry = &data->query_entries[i];
const struct fd_perfcntr_group *g = &screen->perfcntr_groups[entry->gid]; const struct fd_perfcntr_group *g = &screen->perfcntr_groups[entry->gid];
unsigned counter_idx = counters_per_group[entry->gid]++; unsigned counter_idx = counters_per_group[entry->gid]++;
const struct fd_perfcntr_counter *counter = &g->counters[counter_idx]; const struct fd_perfcntr_counter *counter = &g->counters[counter_idx];
OUT_PKT3(ring, CP_REG_TO_MEM, 2); OUT_PKT3(ring, CP_REG_TO_MEM, 2);
OUT_RING(ring, counter->counter_reg_lo | CP_REG_TO_MEM_0_ACCUMULATE); OUT_RING(ring, counter->counter_reg_lo | CP_REG_TO_MEM_0_ACCUMULATE);
OUT_RELOC(ring, query_sample_idx(aq, i, stop)); OUT_RELOC(ring, query_sample_idx(aq, i, stop));
} }
} }
static void static void
perfcntr_accumulate_result(struct fd_acc_query *aq, void *buf, perfcntr_accumulate_result(struct fd_acc_query *aq, void *buf,
union pipe_query_result *result) union pipe_query_result *result)
{ {
struct fd_batch_query_data *data = aq->query_data; struct fd_batch_query_data *data = aq->query_data;
struct fd2_query_sample *sp = buf; struct fd2_query_sample *sp = buf;
for (unsigned i = 0; i < data->num_query_entries; i++) for (unsigned i = 0; i < data->num_query_entries; i++)
result->batch[i].u64 = sp[i].stop - sp[i].start; result->batch[i].u64 = sp[i].stop - sp[i].start;
} }
static const struct fd_acc_sample_provider perfcntr = { static const struct fd_acc_sample_provider perfcntr = {
.query_type = FD_QUERY_FIRST_PERFCNTR, .query_type = FD_QUERY_FIRST_PERFCNTR,
.always = true, .always = true,
.resume = perfcntr_resume, .resume = perfcntr_resume,
.pause = perfcntr_pause, .pause = perfcntr_pause,
.result = perfcntr_accumulate_result, .result = perfcntr_accumulate_result,
}; };
static struct pipe_query * static struct pipe_query *
fd2_create_batch_query(struct pipe_context *pctx, fd2_create_batch_query(struct pipe_context *pctx, unsigned num_queries,
unsigned num_queries, unsigned *query_types) unsigned *query_types)
{ {
struct fd_context *ctx = fd_context(pctx); struct fd_context *ctx = fd_context(pctx);
struct fd_screen *screen = ctx->screen; struct fd_screen *screen = ctx->screen;
struct fd_query *q; struct fd_query *q;
struct fd_acc_query *aq; struct fd_acc_query *aq;
struct fd_batch_query_data *data; struct fd_batch_query_data *data;
data = CALLOC_VARIANT_LENGTH_STRUCT(fd_batch_query_data, data = CALLOC_VARIANT_LENGTH_STRUCT(
num_queries * sizeof(data->query_entries[0])); fd_batch_query_data, num_queries * sizeof(data->query_entries[0]));
data->screen = screen; data->screen = screen;
data->num_query_entries = num_queries; data->num_query_entries = num_queries;
/* validate the requested query_types and ensure we don't try /* validate the requested query_types and ensure we don't try
* to request more query_types of a given group than we have * to request more query_types of a given group than we have
* counters: * counters:
*/ */
unsigned counters_per_group[screen->num_perfcntr_groups]; unsigned counters_per_group[screen->num_perfcntr_groups];
memset(counters_per_group, 0, sizeof(counters_per_group)); memset(counters_per_group, 0, sizeof(counters_per_group));
for (unsigned i = 0; i < num_queries; i++) { for (unsigned i = 0; i < num_queries; i++) {
unsigned idx = query_types[i] - FD_QUERY_FIRST_PERFCNTR; unsigned idx = query_types[i] - FD_QUERY_FIRST_PERFCNTR;
/* verify valid query_type, ie. is it actually a perfcntr? */ /* verify valid query_type, ie. is it actually a perfcntr? */
if ((query_types[i] < FD_QUERY_FIRST_PERFCNTR) || if ((query_types[i] < FD_QUERY_FIRST_PERFCNTR) ||
(idx >= screen->num_perfcntr_queries)) { (idx >= screen->num_perfcntr_queries)) {
mesa_loge("invalid batch query query_type: %u", query_types[i]); mesa_loge("invalid batch query query_type: %u", query_types[i]);
goto error; goto error;
} }
struct fd_batch_query_entry *entry = &data->query_entries[i]; struct fd_batch_query_entry *entry = &data->query_entries[i];
struct pipe_driver_query_info *pq = &screen->perfcntr_queries[idx]; struct pipe_driver_query_info *pq = &screen->perfcntr_queries[idx];
entry->gid = pq->group_id; entry->gid = pq->group_id;
/* the perfcntr_queries[] table flattens all the countables /* the perfcntr_queries[] table flattens all the countables
* for each group in series, ie: * for each group in series, ie:
* *
* (G0,C0), .., (G0,Cn), (G1,C0), .., (G1,Cm), ... * (G0,C0), .., (G0,Cn), (G1,C0), .., (G1,Cm), ...
* *
* So to find the countable index just step back through the * So to find the countable index just step back through the
* table to find the first entry with the same group-id. * table to find the first entry with the same group-id.
*/ */
while (pq > screen->perfcntr_queries) { while (pq > screen->perfcntr_queries) {
pq--; pq--;
if (pq->group_id == entry->gid) if (pq->group_id == entry->gid)
entry->cid++; entry->cid++;
} }
if (counters_per_group[entry->gid] >= if (counters_per_group[entry->gid] >=
screen->perfcntr_groups[entry->gid].num_counters) { screen->perfcntr_groups[entry->gid].num_counters) {
mesa_loge("too many counters for group %u", entry->gid); mesa_loge("too many counters for group %u", entry->gid);
goto error; goto error;
} }
counters_per_group[entry->gid]++; counters_per_group[entry->gid]++;
} }
q = fd_acc_create_query2(ctx, 0, 0, &perfcntr); q = fd_acc_create_query2(ctx, 0, 0, &perfcntr);
aq = fd_acc_query(q); aq = fd_acc_query(q);
/* sample buffer size is based on # of queries: */ /* sample buffer size is based on # of queries: */
aq->size = num_queries * sizeof(struct fd2_query_sample); aq->size = num_queries * sizeof(struct fd2_query_sample);
aq->query_data = data; aq->query_data = data;
return (struct pipe_query *)q; return (struct pipe_query *)q;
error: error:
free(data); free(data);
return NULL; return NULL;
} }
void void
fd2_query_context_init(struct pipe_context *pctx) fd2_query_context_init(struct pipe_context *pctx) disable_thread_safety_analysis
disable_thread_safety_analysis
{ {
struct fd_context *ctx = fd_context(pctx); struct fd_context *ctx = fd_context(pctx);
ctx->create_query = fd_acc_create_query; ctx->create_query = fd_acc_create_query;
ctx->query_update_batch = fd_acc_query_update_batch; ctx->query_update_batch = fd_acc_query_update_batch;
pctx->create_batch_query = fd2_create_batch_query; pctx->create_batch_query = fd2_create_batch_query;
} }

View file

@ -24,88 +24,86 @@
* Rob Clark <robclark@freedesktop.org> * Rob Clark <robclark@freedesktop.org>
*/ */
#include "pipe/p_state.h" #include "pipe/p_state.h"
#include "util/u_string.h"
#include "util/u_memory.h" #include "util/u_memory.h"
#include "util/u_string.h"
#include "fd2_rasterizer.h"
#include "fd2_context.h" #include "fd2_context.h"
#include "fd2_rasterizer.h"
#include "fd2_util.h" #include "fd2_util.h"
void * void *
fd2_rasterizer_state_create(struct pipe_context *pctx, fd2_rasterizer_state_create(struct pipe_context *pctx,
const struct pipe_rasterizer_state *cso) const struct pipe_rasterizer_state *cso)
{ {
struct fd2_rasterizer_stateobj *so; struct fd2_rasterizer_stateobj *so;
float psize_min, psize_max; float psize_min, psize_max;
so = CALLOC_STRUCT(fd2_rasterizer_stateobj); so = CALLOC_STRUCT(fd2_rasterizer_stateobj);
if (!so) if (!so)
return NULL; return NULL;
if (cso->point_size_per_vertex) { if (cso->point_size_per_vertex) {
psize_min = util_get_min_point_size(cso); psize_min = util_get_min_point_size(cso);
psize_max = 8192.0 - 0.0625; psize_max = 8192.0 - 0.0625;
} else { } else {
/* Force the point size to be as if the vertex output was disabled. */ /* Force the point size to be as if the vertex output was disabled. */
psize_min = cso->point_size; psize_min = cso->point_size;
psize_max = cso->point_size; psize_max = cso->point_size;
} }
so->base = *cso; so->base = *cso;
so->pa_sc_line_stipple = cso->line_stipple_enable ? so->pa_sc_line_stipple =
A2XX_PA_SC_LINE_STIPPLE_LINE_PATTERN(cso->line_stipple_pattern) | cso->line_stipple_enable
A2XX_PA_SC_LINE_STIPPLE_REPEAT_COUNT(cso->line_stipple_factor) : 0; ? A2XX_PA_SC_LINE_STIPPLE_LINE_PATTERN(cso->line_stipple_pattern) |
A2XX_PA_SC_LINE_STIPPLE_REPEAT_COUNT(cso->line_stipple_factor)
: 0;
so->pa_cl_clip_cntl = 0; // TODO so->pa_cl_clip_cntl = 0; // TODO
so->pa_su_vtx_cntl = so->pa_su_vtx_cntl =
A2XX_PA_SU_VTX_CNTL_PIX_CENTER(cso->half_pixel_center ? PIXCENTER_OGL : PIXCENTER_D3D) | A2XX_PA_SU_VTX_CNTL_PIX_CENTER(cso->half_pixel_center ? PIXCENTER_OGL
A2XX_PA_SU_VTX_CNTL_QUANT_MODE(ONE_SIXTEENTH); : PIXCENTER_D3D) |
A2XX_PA_SU_VTX_CNTL_QUANT_MODE(ONE_SIXTEENTH);
so->pa_su_point_size = so->pa_su_point_size = A2XX_PA_SU_POINT_SIZE_HEIGHT(cso->point_size / 2) |
A2XX_PA_SU_POINT_SIZE_HEIGHT(cso->point_size/2) | A2XX_PA_SU_POINT_SIZE_WIDTH(cso->point_size / 2);
A2XX_PA_SU_POINT_SIZE_WIDTH(cso->point_size/2);
so->pa_su_point_minmax = so->pa_su_point_minmax = A2XX_PA_SU_POINT_MINMAX_MIN(psize_min / 2) |
A2XX_PA_SU_POINT_MINMAX_MIN(psize_min/2) | A2XX_PA_SU_POINT_MINMAX_MAX(psize_max / 2);
A2XX_PA_SU_POINT_MINMAX_MAX(psize_max/2);
so->pa_su_line_cntl = so->pa_su_line_cntl = A2XX_PA_SU_LINE_CNTL_WIDTH(cso->line_width / 2);
A2XX_PA_SU_LINE_CNTL_WIDTH(cso->line_width/2);
so->pa_su_sc_mode_cntl = so->pa_su_sc_mode_cntl =
A2XX_PA_SU_SC_MODE_CNTL_VTX_WINDOW_OFFSET_ENABLE | A2XX_PA_SU_SC_MODE_CNTL_VTX_WINDOW_OFFSET_ENABLE |
A2XX_PA_SU_SC_MODE_CNTL_FRONT_PTYPE(fd_polygon_mode(cso->fill_front)) | A2XX_PA_SU_SC_MODE_CNTL_FRONT_PTYPE(fd_polygon_mode(cso->fill_front)) |
A2XX_PA_SU_SC_MODE_CNTL_BACK_PTYPE(fd_polygon_mode(cso->fill_back)); A2XX_PA_SU_SC_MODE_CNTL_BACK_PTYPE(fd_polygon_mode(cso->fill_back));
if (cso->cull_face & PIPE_FACE_FRONT) if (cso->cull_face & PIPE_FACE_FRONT)
so->pa_su_sc_mode_cntl |= A2XX_PA_SU_SC_MODE_CNTL_CULL_FRONT; so->pa_su_sc_mode_cntl |= A2XX_PA_SU_SC_MODE_CNTL_CULL_FRONT;
if (cso->cull_face & PIPE_FACE_BACK) if (cso->cull_face & PIPE_FACE_BACK)
so->pa_su_sc_mode_cntl |= A2XX_PA_SU_SC_MODE_CNTL_CULL_BACK; so->pa_su_sc_mode_cntl |= A2XX_PA_SU_SC_MODE_CNTL_CULL_BACK;
if (!cso->flatshade_first) if (!cso->flatshade_first)
so->pa_su_sc_mode_cntl |= A2XX_PA_SU_SC_MODE_CNTL_PROVOKING_VTX_LAST; so->pa_su_sc_mode_cntl |= A2XX_PA_SU_SC_MODE_CNTL_PROVOKING_VTX_LAST;
if (!cso->front_ccw) if (!cso->front_ccw)
so->pa_su_sc_mode_cntl |= A2XX_PA_SU_SC_MODE_CNTL_FACE; so->pa_su_sc_mode_cntl |= A2XX_PA_SU_SC_MODE_CNTL_FACE;
if (cso->line_stipple_enable) if (cso->line_stipple_enable)
so->pa_su_sc_mode_cntl |= A2XX_PA_SU_SC_MODE_CNTL_LINE_STIPPLE_ENABLE; so->pa_su_sc_mode_cntl |= A2XX_PA_SU_SC_MODE_CNTL_LINE_STIPPLE_ENABLE;
if (cso->multisample) if (cso->multisample)
so->pa_su_sc_mode_cntl |= A2XX_PA_SU_SC_MODE_CNTL_MSAA_ENABLE; so->pa_su_sc_mode_cntl |= A2XX_PA_SU_SC_MODE_CNTL_MSAA_ENABLE;
if (cso->fill_front != PIPE_POLYGON_MODE_FILL || if (cso->fill_front != PIPE_POLYGON_MODE_FILL ||
cso->fill_back != PIPE_POLYGON_MODE_FILL) cso->fill_back != PIPE_POLYGON_MODE_FILL)
so->pa_su_sc_mode_cntl |= A2XX_PA_SU_SC_MODE_CNTL_POLYMODE(POLY_DUALMODE); so->pa_su_sc_mode_cntl |= A2XX_PA_SU_SC_MODE_CNTL_POLYMODE(POLY_DUALMODE);
else else
so->pa_su_sc_mode_cntl |= A2XX_PA_SU_SC_MODE_CNTL_POLYMODE(POLY_DISABLED); so->pa_su_sc_mode_cntl |= A2XX_PA_SU_SC_MODE_CNTL_POLYMODE(POLY_DISABLED);
if (cso->offset_tri) if (cso->offset_tri)
so->pa_su_sc_mode_cntl |= so->pa_su_sc_mode_cntl |=
A2XX_PA_SU_SC_MODE_CNTL_POLY_OFFSET_FRONT_ENABLE | A2XX_PA_SU_SC_MODE_CNTL_POLY_OFFSET_FRONT_ENABLE |
A2XX_PA_SU_SC_MODE_CNTL_POLY_OFFSET_BACK_ENABLE | A2XX_PA_SU_SC_MODE_CNTL_POLY_OFFSET_BACK_ENABLE |
A2XX_PA_SU_SC_MODE_CNTL_POLY_OFFSET_PARA_ENABLE; A2XX_PA_SU_SC_MODE_CNTL_POLY_OFFSET_PARA_ENABLE;
return so; return so;
} }

View file

@ -27,27 +27,27 @@
#ifndef FD2_RASTERIZER_H_ #ifndef FD2_RASTERIZER_H_
#define FD2_RASTERIZER_H_ #define FD2_RASTERIZER_H_
#include "pipe/p_state.h"
#include "pipe/p_context.h" #include "pipe/p_context.h"
#include "pipe/p_state.h"
struct fd2_rasterizer_stateobj { struct fd2_rasterizer_stateobj {
struct pipe_rasterizer_state base; struct pipe_rasterizer_state base;
uint32_t pa_sc_line_stipple; uint32_t pa_sc_line_stipple;
uint32_t pa_cl_clip_cntl; uint32_t pa_cl_clip_cntl;
uint32_t pa_su_vtx_cntl; uint32_t pa_su_vtx_cntl;
uint32_t pa_su_point_size; uint32_t pa_su_point_size;
uint32_t pa_su_point_minmax; uint32_t pa_su_point_minmax;
uint32_t pa_su_line_cntl; uint32_t pa_su_line_cntl;
uint32_t pa_su_sc_mode_cntl; uint32_t pa_su_sc_mode_cntl;
}; };
static inline struct fd2_rasterizer_stateobj * static inline struct fd2_rasterizer_stateobj *
fd2_rasterizer_stateobj(struct pipe_rasterizer_state *rast) fd2_rasterizer_stateobj(struct pipe_rasterizer_state *rast)
{ {
return (struct fd2_rasterizer_stateobj *)rast; return (struct fd2_rasterizer_stateobj *)rast;
} }
void * fd2_rasterizer_state_create(struct pipe_context *pctx, void *fd2_rasterizer_state_create(struct pipe_context *pctx,
const struct pipe_rasterizer_state *cso); const struct pipe_rasterizer_state *cso);
#endif /* FD2_RASTERIZER_H_ */ #endif /* FD2_RASTERIZER_H_ */

View file

@ -29,40 +29,40 @@
uint32_t uint32_t
fd2_setup_slices(struct fd_resource *rsc) fd2_setup_slices(struct fd_resource *rsc)
{ {
struct pipe_resource *prsc = &rsc->b.b; struct pipe_resource *prsc = &rsc->b.b;
enum pipe_format format = prsc->format; enum pipe_format format = prsc->format;
uint32_t height0 = util_format_get_nblocksy(format, prsc->height0); uint32_t height0 = util_format_get_nblocksy(format, prsc->height0);
uint32_t level, size = 0; uint32_t level, size = 0;
/* 32 pixel alignment */ /* 32 pixel alignment */
fdl_set_pitchalign(&rsc->layout, fdl_cpp_shift(&rsc->layout) + 5); fdl_set_pitchalign(&rsc->layout, fdl_cpp_shift(&rsc->layout) + 5);
for (level = 0; level <= prsc->last_level; level++) { for (level = 0; level <= prsc->last_level; level++) {
struct fdl_slice *slice = fd_resource_slice(rsc, level); struct fdl_slice *slice = fd_resource_slice(rsc, level);
uint32_t pitch = fdl2_pitch(&rsc->layout, level); uint32_t pitch = fdl2_pitch(&rsc->layout, level);
uint32_t nblocksy = align(u_minify(height0, level), 32); uint32_t nblocksy = align(u_minify(height0, level), 32);
/* mipmaps have power of two sizes in memory */ /* mipmaps have power of two sizes in memory */
if (level) if (level)
nblocksy = util_next_power_of_two(nblocksy); nblocksy = util_next_power_of_two(nblocksy);
slice->offset = size; slice->offset = size;
slice->size0 = align(pitch * nblocksy, 4096); slice->size0 = align(pitch * nblocksy, 4096);
size += slice->size0 * u_minify(prsc->depth0, level) * prsc->array_size; size += slice->size0 * u_minify(prsc->depth0, level) * prsc->array_size;
} }
return size; return size;
} }
unsigned unsigned
fd2_tile_mode(const struct pipe_resource *tmpl) fd2_tile_mode(const struct pipe_resource *tmpl)
{ {
/* disable tiling for cube maps, freedreno uses a 2D array for the staging texture, /* disable tiling for cube maps, freedreno uses a 2D array for the staging
* (a2xx supports 2D arrays but it is not implemented) * texture, (a2xx supports 2D arrays but it is not implemented)
*/ */
if (tmpl->target == PIPE_TEXTURE_CUBE) if (tmpl->target == PIPE_TEXTURE_CUBE)
return 0; return 0;
/* we can enable tiling for any resource we can render to */ /* we can enable tiling for any resource we can render to */
return (tmpl->bind & PIPE_BIND_RENDER_TARGET) ? 1 : 0; return (tmpl->bind & PIPE_BIND_RENDER_TARGET) ? 1 : 0;
} }

View file

@ -27,90 +27,84 @@
#include "pipe/p_screen.h" #include "pipe/p_screen.h"
#include "util/format/u_format.h" #include "util/format/u_format.h"
#include "fd2_screen.h"
#include "fd2_context.h" #include "fd2_context.h"
#include "fd2_emit.h" #include "fd2_emit.h"
#include "fd2_util.h"
#include "fd2_resource.h" #include "fd2_resource.h"
#include "fd2_screen.h"
#include "fd2_util.h"
static bool static bool
fd2_screen_is_format_supported(struct pipe_screen *pscreen, fd2_screen_is_format_supported(struct pipe_screen *pscreen,
enum pipe_format format, enum pipe_format format,
enum pipe_texture_target target, enum pipe_texture_target target,
unsigned sample_count, unsigned sample_count,
unsigned storage_sample_count, unsigned storage_sample_count, unsigned usage)
unsigned usage)
{ {
unsigned retval = 0; unsigned retval = 0;
if ((target >= PIPE_MAX_TEXTURE_TYPES) || if ((target >= PIPE_MAX_TEXTURE_TYPES) ||
(sample_count > 1)) { /* TODO add MSAA */ (sample_count > 1)) { /* TODO add MSAA */
DBG("not supported: format=%s, target=%d, sample_count=%d, usage=%x", DBG("not supported: format=%s, target=%d, sample_count=%d, usage=%x",
util_format_name(format), target, sample_count, usage); util_format_name(format), target, sample_count, usage);
return false; return false;
} }
if (MAX2(1, sample_count) != MAX2(1, storage_sample_count)) if (MAX2(1, sample_count) != MAX2(1, storage_sample_count))
return false; return false;
if ((usage & PIPE_BIND_RENDER_TARGET) && if ((usage & PIPE_BIND_RENDER_TARGET) &&
fd2_pipe2color(format) != (enum a2xx_colorformatx)~0) { fd2_pipe2color(format) != (enum a2xx_colorformatx) ~0) {
retval |= PIPE_BIND_RENDER_TARGET; retval |= PIPE_BIND_RENDER_TARGET;
} }
if ((usage & (PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_VERTEX_BUFFER)) && if ((usage & (PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_VERTEX_BUFFER)) &&
!util_format_is_srgb(format) && !util_format_is_srgb(format) && !util_format_is_pure_integer(format) &&
!util_format_is_pure_integer(format) && fd2_pipe2surface(format).format != FMT_INVALID) {
fd2_pipe2surface(format).format != FMT_INVALID) { retval |= usage & PIPE_BIND_VERTEX_BUFFER;
retval |= usage & PIPE_BIND_VERTEX_BUFFER; /* the only npot blocksize supported texture format is R32G32B32_FLOAT */
/* the only npot blocksize supported texture format is R32G32B32_FLOAT */ if (util_is_power_of_two_or_zero(util_format_get_blocksize(format)) ||
if (util_is_power_of_two_or_zero(util_format_get_blocksize(format)) || format == PIPE_FORMAT_R32G32B32_FLOAT)
format == PIPE_FORMAT_R32G32B32_FLOAT) retval |= usage & PIPE_BIND_SAMPLER_VIEW;
retval |= usage & PIPE_BIND_SAMPLER_VIEW; }
}
if ((usage & (PIPE_BIND_RENDER_TARGET | if ((usage & (PIPE_BIND_RENDER_TARGET | PIPE_BIND_DISPLAY_TARGET |
PIPE_BIND_DISPLAY_TARGET | PIPE_BIND_SCANOUT | PIPE_BIND_SHARED)) &&
PIPE_BIND_SCANOUT | (fd2_pipe2color(format) != (enum a2xx_colorformatx) ~0)) {
PIPE_BIND_SHARED)) && retval |= usage & (PIPE_BIND_RENDER_TARGET | PIPE_BIND_DISPLAY_TARGET |
(fd2_pipe2color(format) != (enum a2xx_colorformatx)~0)) { PIPE_BIND_SCANOUT | PIPE_BIND_SHARED);
retval |= usage & (PIPE_BIND_RENDER_TARGET | }
PIPE_BIND_DISPLAY_TARGET |
PIPE_BIND_SCANOUT |
PIPE_BIND_SHARED);
}
if ((usage & PIPE_BIND_DEPTH_STENCIL) && if ((usage & PIPE_BIND_DEPTH_STENCIL) &&
(fd_pipe2depth(format) != (enum adreno_rb_depth_format)~0)) { (fd_pipe2depth(format) != (enum adreno_rb_depth_format) ~0)) {
retval |= PIPE_BIND_DEPTH_STENCIL; retval |= PIPE_BIND_DEPTH_STENCIL;
} }
if ((usage & PIPE_BIND_INDEX_BUFFER) && if ((usage & PIPE_BIND_INDEX_BUFFER) &&
(fd_pipe2index(format) != (enum pc_di_index_size)~0)) { (fd_pipe2index(format) != (enum pc_di_index_size) ~0)) {
retval |= PIPE_BIND_INDEX_BUFFER; retval |= PIPE_BIND_INDEX_BUFFER;
} }
if (retval != usage) { if (retval != usage) {
DBG("not supported: format=%s, target=%d, sample_count=%d, " DBG("not supported: format=%s, target=%d, sample_count=%d, "
"usage=%x, retval=%x", util_format_name(format), "usage=%x, retval=%x",
target, sample_count, usage, retval); util_format_name(format), target, sample_count, usage, retval);
} }
return retval == usage; return retval == usage;
} }
void void
fd2_screen_init(struct pipe_screen *pscreen) fd2_screen_init(struct pipe_screen *pscreen)
{ {
struct fd_screen *screen = fd_screen(pscreen); struct fd_screen *screen = fd_screen(pscreen);
screen->max_rts = 1; screen->max_rts = 1;
pscreen->context_create = fd2_context_create; pscreen->context_create = fd2_context_create;
pscreen->is_format_supported = fd2_screen_is_format_supported; pscreen->is_format_supported = fd2_screen_is_format_supported;
screen->setup_slices = fd2_setup_slices; screen->setup_slices = fd2_setup_slices;
if (FD_DBG(TTILE)) if (FD_DBG(TTILE))
screen->tile_mode = fd2_tile_mode; screen->tile_mode = fd2_tile_mode;
fd2_emit_init_screen(pscreen); fd2_emit_init_screen(pscreen);
} }

View file

@ -25,9 +25,9 @@
*/ */
#include "pipe/p_state.h" #include "pipe/p_state.h"
#include "util/u_string.h"
#include "util/u_memory.h"
#include "util/u_inlines.h" #include "util/u_inlines.h"
#include "util/u_memory.h"
#include "util/u_string.h"
#include "fd2_texture.h" #include "fd2_texture.h"
#include "fd2_util.h" #include "fd2_util.h"
@ -35,200 +35,190 @@
static enum sq_tex_clamp static enum sq_tex_clamp
tex_clamp(unsigned wrap) tex_clamp(unsigned wrap)
{ {
switch (wrap) { switch (wrap) {
case PIPE_TEX_WRAP_REPEAT: case PIPE_TEX_WRAP_REPEAT:
return SQ_TEX_WRAP; return SQ_TEX_WRAP;
case PIPE_TEX_WRAP_CLAMP: case PIPE_TEX_WRAP_CLAMP:
return SQ_TEX_CLAMP_HALF_BORDER; return SQ_TEX_CLAMP_HALF_BORDER;
case PIPE_TEX_WRAP_CLAMP_TO_EDGE: case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
return SQ_TEX_CLAMP_LAST_TEXEL; return SQ_TEX_CLAMP_LAST_TEXEL;
case PIPE_TEX_WRAP_CLAMP_TO_BORDER: case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
return SQ_TEX_CLAMP_BORDER; return SQ_TEX_CLAMP_BORDER;
case PIPE_TEX_WRAP_MIRROR_REPEAT: case PIPE_TEX_WRAP_MIRROR_REPEAT:
return SQ_TEX_MIRROR; return SQ_TEX_MIRROR;
case PIPE_TEX_WRAP_MIRROR_CLAMP: case PIPE_TEX_WRAP_MIRROR_CLAMP:
return SQ_TEX_MIRROR_ONCE_HALF_BORDER; return SQ_TEX_MIRROR_ONCE_HALF_BORDER;
case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE: case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
return SQ_TEX_MIRROR_ONCE_LAST_TEXEL; return SQ_TEX_MIRROR_ONCE_LAST_TEXEL;
case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER: case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
return SQ_TEX_MIRROR_ONCE_BORDER; return SQ_TEX_MIRROR_ONCE_BORDER;
default: default:
DBG("invalid wrap: %u", wrap); DBG("invalid wrap: %u", wrap);
return 0; return 0;
} }
} }
static enum sq_tex_filter static enum sq_tex_filter
tex_filter(unsigned filter) tex_filter(unsigned filter)
{ {
switch (filter) { switch (filter) {
case PIPE_TEX_FILTER_NEAREST: case PIPE_TEX_FILTER_NEAREST:
return SQ_TEX_FILTER_POINT; return SQ_TEX_FILTER_POINT;
case PIPE_TEX_FILTER_LINEAR: case PIPE_TEX_FILTER_LINEAR:
return SQ_TEX_FILTER_BILINEAR; return SQ_TEX_FILTER_BILINEAR;
default: default:
DBG("invalid filter: %u", filter); DBG("invalid filter: %u", filter);
return 0; return 0;
} }
} }
static enum sq_tex_filter static enum sq_tex_filter
mip_filter(unsigned filter) mip_filter(unsigned filter)
{ {
switch (filter) { switch (filter) {
case PIPE_TEX_MIPFILTER_NONE: case PIPE_TEX_MIPFILTER_NONE:
return SQ_TEX_FILTER_BASEMAP; return SQ_TEX_FILTER_BASEMAP;
case PIPE_TEX_MIPFILTER_NEAREST: case PIPE_TEX_MIPFILTER_NEAREST:
return SQ_TEX_FILTER_POINT; return SQ_TEX_FILTER_POINT;
case PIPE_TEX_MIPFILTER_LINEAR: case PIPE_TEX_MIPFILTER_LINEAR:
return SQ_TEX_FILTER_BILINEAR; return SQ_TEX_FILTER_BILINEAR;
default: default:
DBG("invalid filter: %u", filter); DBG("invalid filter: %u", filter);
return 0; return 0;
} }
} }
static void * static void *
fd2_sampler_state_create(struct pipe_context *pctx, fd2_sampler_state_create(struct pipe_context *pctx,
const struct pipe_sampler_state *cso) const struct pipe_sampler_state *cso)
{ {
struct fd2_sampler_stateobj *so = CALLOC_STRUCT(fd2_sampler_stateobj); struct fd2_sampler_stateobj *so = CALLOC_STRUCT(fd2_sampler_stateobj);
if (!so) if (!so)
return NULL; return NULL;
so->base = *cso; so->base = *cso;
/* TODO /* TODO
* cso->max_anisotropy * cso->max_anisotropy
* cso->normalized_coords (dealt with by shader for rect textures?) * cso->normalized_coords (dealt with by shader for rect textures?)
*/ */
/* SQ_TEX0_PITCH() must be OR'd in later when we know the bound texture: */ /* SQ_TEX0_PITCH() must be OR'd in later when we know the bound texture: */
so->tex0 = so->tex0 = A2XX_SQ_TEX_0_CLAMP_X(tex_clamp(cso->wrap_s)) |
A2XX_SQ_TEX_0_CLAMP_X(tex_clamp(cso->wrap_s)) | A2XX_SQ_TEX_0_CLAMP_Y(tex_clamp(cso->wrap_t)) |
A2XX_SQ_TEX_0_CLAMP_Y(tex_clamp(cso->wrap_t)) | A2XX_SQ_TEX_0_CLAMP_Z(tex_clamp(cso->wrap_r));
A2XX_SQ_TEX_0_CLAMP_Z(tex_clamp(cso->wrap_r));
so->tex3 = so->tex3 = A2XX_SQ_TEX_3_XY_MAG_FILTER(tex_filter(cso->mag_img_filter)) |
A2XX_SQ_TEX_3_XY_MAG_FILTER(tex_filter(cso->mag_img_filter)) | A2XX_SQ_TEX_3_XY_MIN_FILTER(tex_filter(cso->min_img_filter)) |
A2XX_SQ_TEX_3_XY_MIN_FILTER(tex_filter(cso->min_img_filter)) | A2XX_SQ_TEX_3_MIP_FILTER(mip_filter(cso->min_mip_filter));
A2XX_SQ_TEX_3_MIP_FILTER(mip_filter(cso->min_mip_filter));
so->tex4 = 0; so->tex4 = 0;
if (cso->min_mip_filter != PIPE_TEX_MIPFILTER_NONE) if (cso->min_mip_filter != PIPE_TEX_MIPFILTER_NONE)
so->tex4 = A2XX_SQ_TEX_4_LOD_BIAS(cso->lod_bias); so->tex4 = A2XX_SQ_TEX_4_LOD_BIAS(cso->lod_bias);
return so; return so;
} }
static void static void
fd2_sampler_states_bind(struct pipe_context *pctx, fd2_sampler_states_bind(struct pipe_context *pctx, enum pipe_shader_type shader,
enum pipe_shader_type shader, unsigned start, unsigned start, unsigned nr, void **hwcso) in_dt
unsigned nr, void **hwcso)
in_dt
{ {
if (!hwcso) if (!hwcso)
nr = 0; nr = 0;
if (shader == PIPE_SHADER_FRAGMENT) { if (shader == PIPE_SHADER_FRAGMENT) {
struct fd_context *ctx = fd_context(pctx); struct fd_context *ctx = fd_context(pctx);
/* on a2xx, since there is a flat address space for textures/samplers, /* on a2xx, since there is a flat address space for textures/samplers,
* a change in # of fragment textures/samplers will trigger patching and * a change in # of fragment textures/samplers will trigger patching and
* re-emitting the vertex shader: * re-emitting the vertex shader:
*/ */
if (nr != ctx->tex[PIPE_SHADER_FRAGMENT].num_samplers) if (nr != ctx->tex[PIPE_SHADER_FRAGMENT].num_samplers)
ctx->dirty |= FD_DIRTY_TEXSTATE; ctx->dirty |= FD_DIRTY_TEXSTATE;
} }
fd_sampler_states_bind(pctx, shader, start, nr, hwcso); fd_sampler_states_bind(pctx, shader, start, nr, hwcso);
} }
static enum sq_tex_dimension static enum sq_tex_dimension
tex_dimension(unsigned target) tex_dimension(unsigned target)
{ {
switch (target) { switch (target) {
default: default:
assert(0); assert(0);
case PIPE_TEXTURE_1D: case PIPE_TEXTURE_1D:
assert(0); /* TODO */ assert(0); /* TODO */
return SQ_TEX_DIMENSION_1D; return SQ_TEX_DIMENSION_1D;
case PIPE_TEXTURE_RECT: case PIPE_TEXTURE_RECT:
case PIPE_TEXTURE_2D: case PIPE_TEXTURE_2D:
return SQ_TEX_DIMENSION_2D; return SQ_TEX_DIMENSION_2D;
case PIPE_TEXTURE_3D: case PIPE_TEXTURE_3D:
assert(0); /* TODO */ assert(0); /* TODO */
return SQ_TEX_DIMENSION_3D; return SQ_TEX_DIMENSION_3D;
case PIPE_TEXTURE_CUBE: case PIPE_TEXTURE_CUBE:
return SQ_TEX_DIMENSION_CUBE; return SQ_TEX_DIMENSION_CUBE;
} }
} }
static struct pipe_sampler_view * static struct pipe_sampler_view *
fd2_sampler_view_create(struct pipe_context *pctx, struct pipe_resource *prsc, fd2_sampler_view_create(struct pipe_context *pctx, struct pipe_resource *prsc,
const struct pipe_sampler_view *cso) const struct pipe_sampler_view *cso)
{ {
struct fd2_pipe_sampler_view *so = CALLOC_STRUCT(fd2_pipe_sampler_view); struct fd2_pipe_sampler_view *so = CALLOC_STRUCT(fd2_pipe_sampler_view);
struct fd_resource *rsc = fd_resource(prsc); struct fd_resource *rsc = fd_resource(prsc);
struct surface_format fmt = fd2_pipe2surface(cso->format); struct surface_format fmt = fd2_pipe2surface(cso->format);
if (!so) if (!so)
return NULL; return NULL;
so->base = *cso; so->base = *cso;
pipe_reference(NULL, &prsc->reference); pipe_reference(NULL, &prsc->reference);
so->base.texture = prsc; so->base.texture = prsc;
so->base.reference.count = 1; so->base.reference.count = 1;
so->base.context = pctx; so->base.context = pctx;
so->tex0 = so->tex0 = A2XX_SQ_TEX_0_SIGN_X(fmt.sign) | A2XX_SQ_TEX_0_SIGN_Y(fmt.sign) |
A2XX_SQ_TEX_0_SIGN_X(fmt.sign) | A2XX_SQ_TEX_0_SIGN_Z(fmt.sign) | A2XX_SQ_TEX_0_SIGN_W(fmt.sign) |
A2XX_SQ_TEX_0_SIGN_Y(fmt.sign) | A2XX_SQ_TEX_0_PITCH(fdl2_pitch_pixels(&rsc->layout, 0) *
A2XX_SQ_TEX_0_SIGN_Z(fmt.sign) | util_format_get_blockwidth(prsc->format)) |
A2XX_SQ_TEX_0_SIGN_W(fmt.sign) | COND(rsc->layout.tile_mode, A2XX_SQ_TEX_0_TILED);
A2XX_SQ_TEX_0_PITCH(fdl2_pitch_pixels(&rsc->layout, 0) * so->tex1 = A2XX_SQ_TEX_1_FORMAT(fmt.format) |
util_format_get_blockwidth(prsc->format)) | A2XX_SQ_TEX_1_CLAMP_POLICY(SQ_TEX_CLAMP_POLICY_OGL);
COND(rsc->layout.tile_mode, A2XX_SQ_TEX_0_TILED); so->tex2 = A2XX_SQ_TEX_2_HEIGHT(prsc->height0 - 1) |
so->tex1 = A2XX_SQ_TEX_2_WIDTH(prsc->width0 - 1);
A2XX_SQ_TEX_1_FORMAT(fmt.format) | so->tex3 = A2XX_SQ_TEX_3_NUM_FORMAT(fmt.num_format) |
A2XX_SQ_TEX_1_CLAMP_POLICY(SQ_TEX_CLAMP_POLICY_OGL); fd2_tex_swiz(cso->format, cso->swizzle_r, cso->swizzle_g,
so->tex2 = cso->swizzle_b, cso->swizzle_a) |
A2XX_SQ_TEX_2_HEIGHT(prsc->height0 - 1) | A2XX_SQ_TEX_3_EXP_ADJUST(fmt.exp_adjust);
A2XX_SQ_TEX_2_WIDTH(prsc->width0 - 1);
so->tex3 =
A2XX_SQ_TEX_3_NUM_FORMAT(fmt.num_format) |
fd2_tex_swiz(cso->format, cso->swizzle_r, cso->swizzle_g,
cso->swizzle_b, cso->swizzle_a) |
A2XX_SQ_TEX_3_EXP_ADJUST(fmt.exp_adjust);
so->tex4 = so->tex4 = A2XX_SQ_TEX_4_MIP_MIN_LEVEL(fd_sampler_first_level(cso)) |
A2XX_SQ_TEX_4_MIP_MIN_LEVEL(fd_sampler_first_level(cso)) | A2XX_SQ_TEX_4_MIP_MAX_LEVEL(fd_sampler_last_level(cso));
A2XX_SQ_TEX_4_MIP_MAX_LEVEL(fd_sampler_last_level(cso));
so->tex5 = A2XX_SQ_TEX_5_DIMENSION(tex_dimension(prsc->target)); so->tex5 = A2XX_SQ_TEX_5_DIMENSION(tex_dimension(prsc->target));
return &so->base; return &so->base;
} }
static void static void
fd2_set_sampler_views(struct pipe_context *pctx, enum pipe_shader_type shader, fd2_set_sampler_views(struct pipe_context *pctx, enum pipe_shader_type shader,
unsigned start, unsigned nr, unsigned unbind_num_trailing_slots, unsigned start, unsigned nr,
struct pipe_sampler_view **views) unsigned unbind_num_trailing_slots,
in_dt struct pipe_sampler_view **views) in_dt
{ {
if (shader == PIPE_SHADER_FRAGMENT) { if (shader == PIPE_SHADER_FRAGMENT) {
struct fd_context *ctx = fd_context(pctx); struct fd_context *ctx = fd_context(pctx);
/* on a2xx, since there is a flat address space for textures/samplers, /* on a2xx, since there is a flat address space for textures/samplers,
* a change in # of fragment textures/samplers will trigger patching and * a change in # of fragment textures/samplers will trigger patching and
* re-emitting the vertex shader: * re-emitting the vertex shader:
*/ */
if (nr != ctx->tex[PIPE_SHADER_FRAGMENT].num_textures) if (nr != ctx->tex[PIPE_SHADER_FRAGMENT].num_textures)
ctx->dirty |= FD_DIRTY_TEXSTATE; ctx->dirty |= FD_DIRTY_TEXSTATE;
} }
fd_set_sampler_views(pctx, shader, start, nr, unbind_num_trailing_slots, views); fd_set_sampler_views(pctx, shader, start, nr, unbind_num_trailing_slots,
views);
} }
/* map gallium sampler-id to hw const-idx.. adreno uses a flat address /* map gallium sampler-id to hw const-idx.. adreno uses a flat address
@ -244,19 +234,18 @@ fd2_set_sampler_views(struct pipe_context *pctx, enum pipe_shader_type shader,
*/ */
unsigned unsigned
fd2_get_const_idx(struct fd_context *ctx, struct fd_texture_stateobj *tex, fd2_get_const_idx(struct fd_context *ctx, struct fd_texture_stateobj *tex,
unsigned samp_id) unsigned samp_id) assert_dt
assert_dt
{ {
if (tex == &ctx->tex[PIPE_SHADER_FRAGMENT]) if (tex == &ctx->tex[PIPE_SHADER_FRAGMENT])
return samp_id; return samp_id;
return samp_id + ctx->tex[PIPE_SHADER_FRAGMENT].num_samplers; return samp_id + ctx->tex[PIPE_SHADER_FRAGMENT].num_samplers;
} }
void void
fd2_texture_init(struct pipe_context *pctx) fd2_texture_init(struct pipe_context *pctx)
{ {
pctx->create_sampler_state = fd2_sampler_state_create; pctx->create_sampler_state = fd2_sampler_state_create;
pctx->bind_sampler_states = fd2_sampler_states_bind; pctx->bind_sampler_states = fd2_sampler_states_bind;
pctx->create_sampler_view = fd2_sampler_view_create; pctx->create_sampler_view = fd2_sampler_view_create;
pctx->set_sampler_views = fd2_set_sampler_views; pctx->set_sampler_views = fd2_set_sampler_views;
} }

View file

@ -29,36 +29,36 @@
#include "pipe/p_context.h" #include "pipe/p_context.h"
#include "freedreno_texture.h"
#include "freedreno_resource.h" #include "freedreno_resource.h"
#include "freedreno_texture.h"
#include "fd2_context.h" #include "fd2_context.h"
#include "fd2_util.h" #include "fd2_util.h"
struct fd2_sampler_stateobj { struct fd2_sampler_stateobj {
struct pipe_sampler_state base; struct pipe_sampler_state base;
uint32_t tex0, tex3, tex4; uint32_t tex0, tex3, tex4;
}; };
static inline struct fd2_sampler_stateobj * static inline struct fd2_sampler_stateobj *
fd2_sampler_stateobj(struct pipe_sampler_state *samp) fd2_sampler_stateobj(struct pipe_sampler_state *samp)
{ {
return (struct fd2_sampler_stateobj *)samp; return (struct fd2_sampler_stateobj *)samp;
} }
struct fd2_pipe_sampler_view { struct fd2_pipe_sampler_view {
struct pipe_sampler_view base; struct pipe_sampler_view base;
uint32_t tex0, tex1, tex2, tex3, tex4, tex5; uint32_t tex0, tex1, tex2, tex3, tex4, tex5;
}; };
static inline struct fd2_pipe_sampler_view * static inline struct fd2_pipe_sampler_view *
fd2_pipe_sampler_view(struct pipe_sampler_view *pview) fd2_pipe_sampler_view(struct pipe_sampler_view *pview)
{ {
return (struct fd2_pipe_sampler_view *)pview; return (struct fd2_pipe_sampler_view *)pview;
} }
unsigned fd2_get_const_idx(struct fd_context *ctx, unsigned fd2_get_const_idx(struct fd_context *ctx,
struct fd_texture_stateobj *tex, unsigned samp_id); struct fd_texture_stateobj *tex, unsigned samp_id);
void fd2_texture_init(struct pipe_context *pctx); void fd2_texture_init(struct pipe_context *pctx);

View file

@ -32,53 +32,54 @@
static enum a2xx_sq_surfaceformat static enum a2xx_sq_surfaceformat
pipe2surface(enum pipe_format format, struct surface_format *fmt) pipe2surface(enum pipe_format format, struct surface_format *fmt)
{ {
const struct util_format_description *desc = util_format_description(format); const struct util_format_description *desc = util_format_description(format);
if (desc->layout != UTIL_FORMAT_LAYOUT_PLAIN) { if (desc->layout != UTIL_FORMAT_LAYOUT_PLAIN) {
switch (format) { switch (format) {
/* Compressed textures. */ /* Compressed textures. */
case PIPE_FORMAT_ETC1_RGB8: case PIPE_FORMAT_ETC1_RGB8:
return FMT_ETC1_RGB; return FMT_ETC1_RGB;
case PIPE_FORMAT_DXT1_RGB: case PIPE_FORMAT_DXT1_RGB:
case PIPE_FORMAT_DXT1_RGBA: case PIPE_FORMAT_DXT1_RGBA:
return FMT_DXT1; return FMT_DXT1;
case PIPE_FORMAT_DXT3_RGBA: case PIPE_FORMAT_DXT3_RGBA:
return FMT_DXT2_3; return FMT_DXT2_3;
case PIPE_FORMAT_DXT5_RGBA: case PIPE_FORMAT_DXT5_RGBA:
return FMT_DXT4_5; return FMT_DXT4_5;
case PIPE_FORMAT_ATC_RGB: case PIPE_FORMAT_ATC_RGB:
return FMT_ATI_TC_555_565_RGB; return FMT_ATI_TC_555_565_RGB;
case PIPE_FORMAT_ATC_RGBA_EXPLICIT: case PIPE_FORMAT_ATC_RGBA_EXPLICIT:
return FMT_ATI_TC_555_565_RGBA; return FMT_ATI_TC_555_565_RGBA;
case PIPE_FORMAT_ATC_RGBA_INTERPOLATED: case PIPE_FORMAT_ATC_RGBA_INTERPOLATED:
return FMT_ATI_TC_555_565_RGBA_INTERP; return FMT_ATI_TC_555_565_RGBA_INTERP;
/* YUV buffers. */ /* YUV buffers. */
case PIPE_FORMAT_UYVY: case PIPE_FORMAT_UYVY:
return FMT_Y1_Cr_Y0_Cb; return FMT_Y1_Cr_Y0_Cb;
case PIPE_FORMAT_YUYV: case PIPE_FORMAT_YUYV:
return FMT_Cr_Y1_Cb_Y0; return FMT_Cr_Y1_Cb_Y0;
default: default:
return ~0; return ~0;
} }
} }
uint32_t channel_size = 0; uint32_t channel_size = 0;
for (unsigned i = 0; i < 4; i++) for (unsigned i = 0; i < 4; i++)
channel_size |= desc->channel[i].size << i*8; channel_size |= desc->channel[i].size << i * 8;
unsigned i = util_format_get_first_non_void_channel(format); unsigned i = util_format_get_first_non_void_channel(format);
if (desc->channel[i].type == UTIL_FORMAT_TYPE_SIGNED || if (desc->channel[i].type == UTIL_FORMAT_TYPE_SIGNED ||
desc->channel[i].type == UTIL_FORMAT_TYPE_FIXED) desc->channel[i].type == UTIL_FORMAT_TYPE_FIXED)
fmt->sign = SQ_TEX_SIGN_SIGNED; fmt->sign = SQ_TEX_SIGN_SIGNED;
if (!desc->channel[i].normalized) if (!desc->channel[i].normalized)
fmt->num_format = SQ_TEX_NUM_FORMAT_INT; fmt->num_format = SQ_TEX_NUM_FORMAT_INT;
if (desc->channel[i].type == UTIL_FORMAT_TYPE_FIXED) if (desc->channel[i].type == UTIL_FORMAT_TYPE_FIXED)
fmt->exp_adjust = -16; fmt->exp_adjust = -16;
/* Note: the 3 channel 24bpp/48bpp/96bpp formats are only for vertex fetch /* Note: the 3 channel 24bpp/48bpp/96bpp formats are only for vertex fetch
* we can use the 4 channel format and ignore the 4th component just isn't used * we can use the 4 channel format and ignore the 4th component just isn't
* XXX: is it possible for the extra loaded component to cause a MMU fault? * used
*/ * XXX: is it possible for the extra loaded component to cause a MMU fault?
*/
#define CASE(r, g, b, a) case (r | g << 8 | b << 16 | a << 24) #define CASE(r, g, b, a) case (r | g << 8 | b << 16 | a << 24)
@ -119,116 +120,125 @@ pipe2surface(enum pipe_format format, struct surface_format *fmt)
/* clang-format on */ /* clang-format on */
#undef CASE #undef CASE
return ~0; return ~0;
} }
struct surface_format struct surface_format
fd2_pipe2surface(enum pipe_format format) fd2_pipe2surface(enum pipe_format format)
{ {
struct surface_format fmt = { struct surface_format fmt = {
.sign = SQ_TEX_SIGN_UNSIGNED, .sign = SQ_TEX_SIGN_UNSIGNED,
.num_format = SQ_TEX_NUM_FORMAT_FRAC, .num_format = SQ_TEX_NUM_FORMAT_FRAC,
.exp_adjust = 0, .exp_adjust = 0,
}; };
fmt.format = pipe2surface(format, &fmt); fmt.format = pipe2surface(format, &fmt);
return fmt; return fmt;
} }
enum a2xx_colorformatx enum a2xx_colorformatx
fd2_pipe2color(enum pipe_format format) fd2_pipe2color(enum pipe_format format)
{ {
switch (format) { switch (format) {
/* 8-bit buffers. */ /* 8-bit buffers. */
case PIPE_FORMAT_R8_UNORM: case PIPE_FORMAT_R8_UNORM:
return COLORX_8; return COLORX_8;
case PIPE_FORMAT_B2G3R3_UNORM: case PIPE_FORMAT_B2G3R3_UNORM:
return COLORX_2_3_3; /* note: untested */ return COLORX_2_3_3; /* note: untested */
/* 16-bit buffers. */ /* 16-bit buffers. */
case PIPE_FORMAT_B5G6R5_UNORM: case PIPE_FORMAT_B5G6R5_UNORM:
return COLORX_5_6_5; return COLORX_5_6_5;
case PIPE_FORMAT_B5G5R5A1_UNORM: case PIPE_FORMAT_B5G5R5A1_UNORM:
case PIPE_FORMAT_B5G5R5X1_UNORM: case PIPE_FORMAT_B5G5R5X1_UNORM:
return COLORX_1_5_5_5; return COLORX_1_5_5_5;
case PIPE_FORMAT_B4G4R4A4_UNORM: case PIPE_FORMAT_B4G4R4A4_UNORM:
case PIPE_FORMAT_B4G4R4X4_UNORM: case PIPE_FORMAT_B4G4R4X4_UNORM:
return COLORX_4_4_4_4; return COLORX_4_4_4_4;
case PIPE_FORMAT_R8G8_UNORM: case PIPE_FORMAT_R8G8_UNORM:
return COLORX_8_8; return COLORX_8_8;
/* 32-bit buffers. */ /* 32-bit buffers. */
case PIPE_FORMAT_B8G8R8A8_UNORM: case PIPE_FORMAT_B8G8R8A8_UNORM:
case PIPE_FORMAT_B8G8R8X8_UNORM: case PIPE_FORMAT_B8G8R8X8_UNORM:
case PIPE_FORMAT_R8G8B8A8_UNORM: case PIPE_FORMAT_R8G8B8A8_UNORM:
case PIPE_FORMAT_R8G8B8X8_UNORM: case PIPE_FORMAT_R8G8B8X8_UNORM:
return COLORX_8_8_8_8; return COLORX_8_8_8_8;
/* Note: snorm untested */ /* Note: snorm untested */
case PIPE_FORMAT_R8G8B8A8_SNORM: case PIPE_FORMAT_R8G8B8A8_SNORM:
case PIPE_FORMAT_R8G8B8X8_SNORM: case PIPE_FORMAT_R8G8B8X8_SNORM:
return COLORX_S8_8_8_8; return COLORX_S8_8_8_8;
/* float buffers */ /* float buffers */
case PIPE_FORMAT_R16_FLOAT: case PIPE_FORMAT_R16_FLOAT:
return COLORX_16_FLOAT; return COLORX_16_FLOAT;
case PIPE_FORMAT_R16G16_FLOAT: case PIPE_FORMAT_R16G16_FLOAT:
return COLORX_16_16_FLOAT; return COLORX_16_16_FLOAT;
case PIPE_FORMAT_R16G16B16A16_FLOAT: case PIPE_FORMAT_R16G16B16A16_FLOAT:
return COLORX_16_16_16_16_FLOAT; return COLORX_16_16_16_16_FLOAT;
case PIPE_FORMAT_R32_FLOAT: case PIPE_FORMAT_R32_FLOAT:
return COLORX_32_FLOAT; return COLORX_32_FLOAT;
case PIPE_FORMAT_R32G32_FLOAT: case PIPE_FORMAT_R32G32_FLOAT:
return COLORX_32_32_FLOAT; return COLORX_32_32_FLOAT;
case PIPE_FORMAT_R32G32B32A32_FLOAT: case PIPE_FORMAT_R32G32B32A32_FLOAT:
return COLORX_32_32_32_32_FLOAT; return COLORX_32_32_32_32_FLOAT;
default: default:
return ~0; return ~0;
} }
} }
static inline enum sq_tex_swiz static inline enum sq_tex_swiz
tex_swiz(unsigned swiz) tex_swiz(unsigned swiz)
{ {
switch (swiz) { switch (swiz) {
default: default:
case PIPE_SWIZZLE_X: return SQ_TEX_X; case PIPE_SWIZZLE_X:
case PIPE_SWIZZLE_Y: return SQ_TEX_Y; return SQ_TEX_X;
case PIPE_SWIZZLE_Z: return SQ_TEX_Z; case PIPE_SWIZZLE_Y:
case PIPE_SWIZZLE_W: return SQ_TEX_W; return SQ_TEX_Y;
case PIPE_SWIZZLE_0: return SQ_TEX_ZERO; case PIPE_SWIZZLE_Z:
case PIPE_SWIZZLE_1: return SQ_TEX_ONE; return SQ_TEX_Z;
} case PIPE_SWIZZLE_W:
return SQ_TEX_W;
case PIPE_SWIZZLE_0:
return SQ_TEX_ZERO;
case PIPE_SWIZZLE_1:
return SQ_TEX_ONE;
}
} }
uint32_t uint32_t
fd2_tex_swiz(enum pipe_format format, unsigned swizzle_r, unsigned swizzle_g, fd2_tex_swiz(enum pipe_format format, unsigned swizzle_r, unsigned swizzle_g,
unsigned swizzle_b, unsigned swizzle_a) unsigned swizzle_b, unsigned swizzle_a)
{ {
const struct util_format_description *desc = const struct util_format_description *desc = util_format_description(format);
util_format_description(format); unsigned char swiz[4] =
unsigned char swiz[4] = { {
swizzle_r, swizzle_g, swizzle_b, swizzle_a, swizzle_r,
}, rswiz[4]; swizzle_g,
swizzle_b,
swizzle_a,
},
rswiz[4];
util_format_compose_swizzles(desc->swizzle, swiz, rswiz); util_format_compose_swizzles(desc->swizzle, swiz, rswiz);
return A2XX_SQ_TEX_3_SWIZ_X(tex_swiz(rswiz[0])) | return A2XX_SQ_TEX_3_SWIZ_X(tex_swiz(rswiz[0])) |
A2XX_SQ_TEX_3_SWIZ_Y(tex_swiz(rswiz[1])) | A2XX_SQ_TEX_3_SWIZ_Y(tex_swiz(rswiz[1])) |
A2XX_SQ_TEX_3_SWIZ_Z(tex_swiz(rswiz[2])) | A2XX_SQ_TEX_3_SWIZ_Z(tex_swiz(rswiz[2])) |
A2XX_SQ_TEX_3_SWIZ_W(tex_swiz(rswiz[3])); A2XX_SQ_TEX_3_SWIZ_W(tex_swiz(rswiz[3]));
} }
uint32_t uint32_t
fd2_vtx_swiz(enum pipe_format format, unsigned swizzle) fd2_vtx_swiz(enum pipe_format format, unsigned swizzle)
{ {
const struct util_format_description *desc = const struct util_format_description *desc = util_format_description(format);
util_format_description(format); unsigned char swiz[4], rswiz[4];
unsigned char swiz[4], rswiz[4];
for (unsigned i = 0; i < 4; i++) for (unsigned i = 0; i < 4; i++)
swiz[i] = (swizzle >> i * 3) & 7; swiz[i] = (swizzle >> i * 3) & 7;
util_format_compose_swizzles(desc->swizzle, swiz, rswiz); util_format_compose_swizzles(desc->swizzle, swiz, rswiz);
return rswiz[0] | rswiz[1] << 3 | rswiz[2] << 6 | rswiz[3] << 9; return rswiz[0] | rswiz[1] << 3 | rswiz[2] << 6 | rswiz[3] << 9;
} }

View file

@ -33,23 +33,25 @@
struct surface_format { struct surface_format {
/* If enum is a signed type, 0x7f is out of range. Cast it to avoid warnings. */ /* If enum is a signed type, 0x7f is out of range. Cast it to avoid warnings. */
#define FMT_INVALID ((enum a2xx_sq_surfaceformat) 0x7f) #define FMT_INVALID ((enum a2xx_sq_surfaceformat)0x7f)
enum a2xx_sq_surfaceformat format : 7; enum a2xx_sq_surfaceformat format : 7;
enum sq_tex_sign sign : 2; enum sq_tex_sign sign : 2;
enum sq_tex_num_format num_format : 1; enum sq_tex_num_format num_format : 1;
int exp_adjust : 6; int exp_adjust : 6;
}; };
struct surface_format fd2_pipe2surface(enum pipe_format format); struct surface_format fd2_pipe2surface(enum pipe_format format);
enum a2xx_colorformatx fd2_pipe2color(enum pipe_format format); enum a2xx_colorformatx fd2_pipe2color(enum pipe_format format);
uint32_t fd2_tex_swiz(enum pipe_format format, unsigned swizzle_r, uint32_t fd2_tex_swiz(enum pipe_format format, unsigned swizzle_r,
unsigned swizzle_g, unsigned swizzle_b, unsigned swizzle_a); unsigned swizzle_g, unsigned swizzle_b,
unsigned swizzle_a);
uint32_t fd2_vtx_swiz(enum pipe_format format, unsigned swizzle); uint32_t fd2_vtx_swiz(enum pipe_format format, unsigned swizzle);
/* convert x,y to dword */ /* convert x,y to dword */
static inline uint32_t xy2d(uint16_t x, uint16_t y) static inline uint32_t
xy2d(uint16_t x, uint16_t y)
{ {
return ((y & 0x3fff) << 16) | (x & 0x3fff); return ((y & 0x3fff) << 16) | (x & 0x3fff);
} }
#endif /* FD2_UTIL_H_ */ #endif /* FD2_UTIL_H_ */

View file

@ -24,72 +24,71 @@
* Rob Clark <robclark@freedesktop.org> * Rob Clark <robclark@freedesktop.org>
*/ */
#include "pipe/p_state.h" #include "pipe/p_state.h"
#include "util/u_string.h"
#include "util/u_memory.h" #include "util/u_memory.h"
#include "util/u_string.h"
#include "fd2_zsa.h"
#include "fd2_context.h" #include "fd2_context.h"
#include "fd2_util.h" #include "fd2_util.h"
#include "fd2_zsa.h"
void * void *
fd2_zsa_state_create(struct pipe_context *pctx, fd2_zsa_state_create(struct pipe_context *pctx,
const struct pipe_depth_stencil_alpha_state *cso) const struct pipe_depth_stencil_alpha_state *cso)
{ {
struct fd2_zsa_stateobj *so; struct fd2_zsa_stateobj *so;
so = CALLOC_STRUCT(fd2_zsa_stateobj); so = CALLOC_STRUCT(fd2_zsa_stateobj);
if (!so) if (!so)
return NULL; return NULL;
so->base = *cso; so->base = *cso;
so->rb_depthcontrol |= so->rb_depthcontrol |=
A2XX_RB_DEPTHCONTROL_ZFUNC(cso->depth_func); /* maps 1:1 */ A2XX_RB_DEPTHCONTROL_ZFUNC(cso->depth_func); /* maps 1:1 */
if (cso->depth_enabled) if (cso->depth_enabled)
so->rb_depthcontrol |= A2XX_RB_DEPTHCONTROL_Z_ENABLE | so->rb_depthcontrol |=
COND(!cso->alpha_enabled, A2XX_RB_DEPTHCONTROL_EARLY_Z_ENABLE); A2XX_RB_DEPTHCONTROL_Z_ENABLE |
if (cso->depth_writemask) COND(!cso->alpha_enabled, A2XX_RB_DEPTHCONTROL_EARLY_Z_ENABLE);
so->rb_depthcontrol |= A2XX_RB_DEPTHCONTROL_Z_WRITE_ENABLE; if (cso->depth_writemask)
so->rb_depthcontrol |= A2XX_RB_DEPTHCONTROL_Z_WRITE_ENABLE;
if (cso->stencil[0].enabled) { if (cso->stencil[0].enabled) {
const struct pipe_stencil_state *s = &cso->stencil[0]; const struct pipe_stencil_state *s = &cso->stencil[0];
so->rb_depthcontrol |= so->rb_depthcontrol |=
A2XX_RB_DEPTHCONTROL_STENCIL_ENABLE | A2XX_RB_DEPTHCONTROL_STENCIL_ENABLE |
A2XX_RB_DEPTHCONTROL_STENCILFUNC(s->func) | /* maps 1:1 */ A2XX_RB_DEPTHCONTROL_STENCILFUNC(s->func) | /* maps 1:1 */
A2XX_RB_DEPTHCONTROL_STENCILFAIL(fd_stencil_op(s->fail_op)) | A2XX_RB_DEPTHCONTROL_STENCILFAIL(fd_stencil_op(s->fail_op)) |
A2XX_RB_DEPTHCONTROL_STENCILZPASS(fd_stencil_op(s->zpass_op)) | A2XX_RB_DEPTHCONTROL_STENCILZPASS(fd_stencil_op(s->zpass_op)) |
A2XX_RB_DEPTHCONTROL_STENCILZFAIL(fd_stencil_op(s->zfail_op)); A2XX_RB_DEPTHCONTROL_STENCILZFAIL(fd_stencil_op(s->zfail_op));
so->rb_stencilrefmask |= so->rb_stencilrefmask |=
0xff000000 | /* ??? */ 0xff000000 | /* ??? */
A2XX_RB_STENCILREFMASK_STENCILWRITEMASK(s->writemask) | A2XX_RB_STENCILREFMASK_STENCILWRITEMASK(s->writemask) |
A2XX_RB_STENCILREFMASK_STENCILMASK(s->valuemask); A2XX_RB_STENCILREFMASK_STENCILMASK(s->valuemask);
if (cso->stencil[1].enabled) { if (cso->stencil[1].enabled) {
const struct pipe_stencil_state *bs = &cso->stencil[1]; const struct pipe_stencil_state *bs = &cso->stencil[1];
so->rb_depthcontrol |= so->rb_depthcontrol |=
A2XX_RB_DEPTHCONTROL_BACKFACE_ENABLE | A2XX_RB_DEPTHCONTROL_BACKFACE_ENABLE |
A2XX_RB_DEPTHCONTROL_STENCILFUNC_BF(bs->func) | /* maps 1:1 */ A2XX_RB_DEPTHCONTROL_STENCILFUNC_BF(bs->func) | /* maps 1:1 */
A2XX_RB_DEPTHCONTROL_STENCILFAIL_BF(fd_stencil_op(bs->fail_op)) | A2XX_RB_DEPTHCONTROL_STENCILFAIL_BF(fd_stencil_op(bs->fail_op)) |
A2XX_RB_DEPTHCONTROL_STENCILZPASS_BF(fd_stencil_op(bs->zpass_op)) | A2XX_RB_DEPTHCONTROL_STENCILZPASS_BF(fd_stencil_op(bs->zpass_op)) |
A2XX_RB_DEPTHCONTROL_STENCILZFAIL_BF(fd_stencil_op(bs->zfail_op)); A2XX_RB_DEPTHCONTROL_STENCILZFAIL_BF(fd_stencil_op(bs->zfail_op));
so->rb_stencilrefmask_bf |= so->rb_stencilrefmask_bf |=
0xff000000 | /* ??? */ 0xff000000 | /* ??? */
A2XX_RB_STENCILREFMASK_STENCILWRITEMASK(bs->writemask) | A2XX_RB_STENCILREFMASK_STENCILWRITEMASK(bs->writemask) |
A2XX_RB_STENCILREFMASK_STENCILMASK(bs->valuemask); A2XX_RB_STENCILREFMASK_STENCILMASK(bs->valuemask);
} }
} }
if (cso->alpha_enabled) { if (cso->alpha_enabled) {
so->rb_colorcontrol = so->rb_colorcontrol = A2XX_RB_COLORCONTROL_ALPHA_FUNC(cso->alpha_func) |
A2XX_RB_COLORCONTROL_ALPHA_FUNC(cso->alpha_func) | A2XX_RB_COLORCONTROL_ALPHA_TEST_ENABLE;
A2XX_RB_COLORCONTROL_ALPHA_TEST_ENABLE; so->rb_alpha_ref = fui(cso->alpha_ref_value);
so->rb_alpha_ref = fui(cso->alpha_ref_value); }
}
return so; return so;
} }

View file

@ -27,28 +27,27 @@
#ifndef FD2_ZSA_H_ #ifndef FD2_ZSA_H_
#define FD2_ZSA_H_ #define FD2_ZSA_H_
#include "pipe/p_state.h"
#include "pipe/p_context.h" #include "pipe/p_context.h"
#include "pipe/p_state.h"
#include "freedreno_util.h" #include "freedreno_util.h"
struct fd2_zsa_stateobj { struct fd2_zsa_stateobj {
struct pipe_depth_stencil_alpha_state base; struct pipe_depth_stencil_alpha_state base;
uint32_t rb_depthcontrol; uint32_t rb_depthcontrol;
uint32_t rb_colorcontrol; /* must be OR'd w/ blend->rb_colorcontrol */ uint32_t rb_colorcontrol; /* must be OR'd w/ blend->rb_colorcontrol */
uint32_t rb_alpha_ref; uint32_t rb_alpha_ref;
uint32_t rb_stencilrefmask; uint32_t rb_stencilrefmask;
uint32_t rb_stencilrefmask_bf; uint32_t rb_stencilrefmask_bf;
}; };
static inline struct fd2_zsa_stateobj * static inline struct fd2_zsa_stateobj *
fd2_zsa_stateobj(struct pipe_depth_stencil_alpha_state *zsa) fd2_zsa_stateobj(struct pipe_depth_stencil_alpha_state *zsa)
{ {
return (struct fd2_zsa_stateobj *)zsa; return (struct fd2_zsa_stateobj *)zsa;
} }
void * fd2_zsa_state_create(struct pipe_context *pctx, void *fd2_zsa_state_create(struct pipe_context *pctx,
const struct pipe_depth_stencil_alpha_state *cso); const struct pipe_depth_stencil_alpha_state *cso);
#endif /* FD2_ZSA_H_ */ #endif /* FD2_ZSA_H_ */

View file

@ -26,120 +26,124 @@
#include "ir2_private.h" #include "ir2_private.h"
static bool scalar_possible(struct ir2_instr *instr) static bool
scalar_possible(struct ir2_instr *instr)
{ {
if (instr->alu.scalar_opc == SCALAR_NONE) if (instr->alu.scalar_opc == SCALAR_NONE)
return false; return false;
return src_ncomp(instr) == 1; return src_ncomp(instr) == 1;
} }
static bool is_alu_compatible(struct ir2_instr *a, struct ir2_instr *b) static bool
is_alu_compatible(struct ir2_instr *a, struct ir2_instr *b)
{ {
if (!a) if (!a)
return true; return true;
/* dont use same instruction twice */ /* dont use same instruction twice */
if (a == b) if (a == b)
return false; return false;
/* PRED_SET must be alone */ /* PRED_SET must be alone */
if (b->alu.scalar_opc >= PRED_SETEs && if (b->alu.scalar_opc >= PRED_SETEs &&
b->alu.scalar_opc <= PRED_SET_RESTOREs) b->alu.scalar_opc <= PRED_SET_RESTOREs)
return false; return false;
/* must write to same export (issues otherwise?) */ /* must write to same export (issues otherwise?) */
return a->alu.export == b->alu.export; return a->alu.export == b->alu.export;
} }
/* priority of vector instruction for scheduling (lower=higher prio) */ /* priority of vector instruction for scheduling (lower=higher prio) */
static unsigned alu_vector_prio(struct ir2_instr *instr) static unsigned
alu_vector_prio(struct ir2_instr *instr)
{ {
if (instr->alu.vector_opc == VECTOR_NONE) if (instr->alu.vector_opc == VECTOR_NONE)
return ~0u; return ~0u;
if (is_export(instr)) if (is_export(instr))
return 4; return 4;
/* TODO check src type and ncomps */ /* TODO check src type and ncomps */
if (instr->src_count == 3) if (instr->src_count == 3)
return 0; return 0;
if (!scalar_possible(instr)) if (!scalar_possible(instr))
return 1; return 1;
return instr->src_count == 2 ? 2 : 3; return instr->src_count == 2 ? 2 : 3;
} }
/* priority of scalar instruction for scheduling (lower=higher prio) */ /* priority of scalar instruction for scheduling (lower=higher prio) */
static unsigned alu_scalar_prio(struct ir2_instr *instr) static unsigned
alu_scalar_prio(struct ir2_instr *instr)
{ {
if (!scalar_possible(instr)) if (!scalar_possible(instr))
return ~0u; return ~0u;
/* this case is dealt with later */ /* this case is dealt with later */
if (instr->src_count > 1) if (instr->src_count > 1)
return ~0u; return ~0u;
if (is_export(instr)) if (is_export(instr))
return 4; return 4;
/* PRED to end of block */ /* PRED to end of block */
if (instr->alu.scalar_opc >= PRED_SETEs && if (instr->alu.scalar_opc >= PRED_SETEs &&
instr->alu.scalar_opc <= PRED_SET_RESTOREs) instr->alu.scalar_opc <= PRED_SET_RESTOREs)
return 5; return 5;
/* scalar only have highest priority */ /* scalar only have highest priority */
return instr->alu.vector_opc == VECTOR_NONE ? 0 : 3; return instr->alu.vector_opc == VECTOR_NONE ? 0 : 3;
} }
/* this is a bit messy: /* this is a bit messy:
* we want to find a slot where we can insert a scalar MOV with * we want to find a slot where we can insert a scalar MOV with
* a vector instruction that was already scheduled * a vector instruction that was already scheduled
*/ */
static struct ir2_sched_instr* static struct ir2_sched_instr *
insert(struct ir2_context *ctx, unsigned block_idx, unsigned reg_idx, insert(struct ir2_context *ctx, unsigned block_idx, unsigned reg_idx,
struct ir2_src src1, unsigned *comp) struct ir2_src src1, unsigned *comp)
{ {
struct ir2_sched_instr *sched = NULL, *s; struct ir2_sched_instr *sched = NULL, *s;
unsigned i, mask = 0xf; unsigned i, mask = 0xf;
/* go first earliest point where the mov can be inserted */ /* go first earliest point where the mov can be inserted */
for (i = ctx->instr_sched_count-1; i > 0; i--) { for (i = ctx->instr_sched_count - 1; i > 0; i--) {
s = &ctx->instr_sched[i - 1]; s = &ctx->instr_sched[i - 1];
if (s->instr && s->instr->block_idx != block_idx) if (s->instr && s->instr->block_idx != block_idx)
break; break;
if (s->instr_s && s->instr_s->block_idx != block_idx) if (s->instr_s && s->instr_s->block_idx != block_idx)
break; break;
if (src1.type == IR2_SRC_SSA) { if (src1.type == IR2_SRC_SSA) {
if ((s->instr && s->instr->idx == src1.num) || if ((s->instr && s->instr->idx == src1.num) ||
(s->instr_s && s->instr_s->idx == src1.num)) (s->instr_s && s->instr_s->idx == src1.num))
break; break;
} }
unsigned mr = ~(s->reg_state[reg_idx/8] >> reg_idx%8*4 & 0xf); unsigned mr = ~(s->reg_state[reg_idx / 8] >> reg_idx % 8 * 4 & 0xf);
if ((mask & mr) == 0) if ((mask & mr) == 0)
break; break;
mask &= mr; mask &= mr;
if (s->instr_s || s->instr->src_count == 3) if (s->instr_s || s->instr->src_count == 3)
continue; continue;
if (s->instr->type != IR2_ALU || s->instr->alu.export >= 0) if (s->instr->type != IR2_ALU || s->instr->alu.export >= 0)
continue; continue;
sched = s; sched = s;
} }
*comp = ffs(mask) - 1; *comp = ffs(mask) - 1;
if (sched) { if (sched) {
for (s = sched; s != &ctx->instr_sched[ctx->instr_sched_count]; s++) for (s = sched; s != &ctx->instr_sched[ctx->instr_sched_count]; s++)
s->reg_state[reg_idx/8] |= 1 << (*comp+reg_idx%8*4); s->reg_state[reg_idx / 8] |= 1 << (*comp + reg_idx % 8 * 4);
} }
return sched; return sched;
} }
/* case1: /* case1:
@ -152,313 +156,326 @@ insert(struct ir2_context *ctx, unsigned block_idx, unsigned reg_idx,
static bool static bool
scalarize_case1(struct ir2_context *ctx, struct ir2_instr *instr, bool order) scalarize_case1(struct ir2_context *ctx, struct ir2_instr *instr, bool order)
{ {
struct ir2_src src0 = instr->src[ order]; struct ir2_src src0 = instr->src[order];
struct ir2_src src1 = instr->src[!order]; struct ir2_src src1 = instr->src[!order];
struct ir2_sched_instr *sched; struct ir2_sched_instr *sched;
struct ir2_instr *ins; struct ir2_instr *ins;
struct ir2_reg *reg; struct ir2_reg *reg;
unsigned idx, comp; unsigned idx, comp;
switch (src0.type) { switch (src0.type) {
case IR2_SRC_CONST: case IR2_SRC_CONST:
case IR2_SRC_INPUT: case IR2_SRC_INPUT:
return false; return false;
default: default:
break; break;
} }
/* TODO, insert needs logic for this */ /* TODO, insert needs logic for this */
if (src1.type == IR2_SRC_REG) if (src1.type == IR2_SRC_REG)
return false; return false;
/* we could do something if they match src1.. */ /* we could do something if they match src1.. */
if (src0.negate || src0.abs) if (src0.negate || src0.abs)
return false; return false;
reg = get_reg_src(ctx, &src0); reg = get_reg_src(ctx, &src0);
/* result not used more since we will overwrite */ /* result not used more since we will overwrite */
for (int i = 0; i < 4; i++) for (int i = 0; i < 4; i++)
if (reg->comp[i].ref_count != !!(instr->alu.write_mask & 1 << i)) if (reg->comp[i].ref_count != !!(instr->alu.write_mask & 1 << i))
return false; return false;
/* find a place to insert the mov */ /* find a place to insert the mov */
sched = insert(ctx, instr->block_idx, reg->idx, src1, &comp); sched = insert(ctx, instr->block_idx, reg->idx, src1, &comp);
if (!sched) if (!sched)
return false; return false;
ins = &ctx->instr[idx = ctx->instr_count++]; ins = &ctx->instr[idx = ctx->instr_count++];
ins->idx = idx; ins->idx = idx;
ins->type = IR2_ALU; ins->type = IR2_ALU;
ins->src[0] = src1; ins->src[0] = src1;
ins->src_count = 1; ins->src_count = 1;
ins->is_ssa = true; ins->is_ssa = true;
ins->ssa.idx = reg->idx; ins->ssa.idx = reg->idx;
ins->ssa.ncomp = 1; ins->ssa.ncomp = 1;
ins->ssa.comp[0].c = comp; ins->ssa.comp[0].c = comp;
ins->alu.scalar_opc = MAXs; ins->alu.scalar_opc = MAXs;
ins->alu.export = -1; ins->alu.export = -1;
ins->alu.write_mask = 1; ins->alu.write_mask = 1;
ins->pred = instr->pred; ins->pred = instr->pred;
ins->block_idx = instr->block_idx; ins->block_idx = instr->block_idx;
instr->src[0] = src0; instr->src[0] = src0;
instr->alu.src1_swizzle = comp; instr->alu.src1_swizzle = comp;
sched->instr_s = ins; sched->instr_s = ins;
return true; return true;
} }
/* fill sched with next fetch or (vector and/or scalar) alu instruction */ /* fill sched with next fetch or (vector and/or scalar) alu instruction */
static int sched_next(struct ir2_context *ctx, struct ir2_sched_instr *sched) static int
sched_next(struct ir2_context *ctx, struct ir2_sched_instr *sched)
{ {
struct ir2_instr *avail[0x100], *instr_v = NULL, *instr_s = NULL; struct ir2_instr *avail[0x100], *instr_v = NULL, *instr_s = NULL;
unsigned avail_count = 0; unsigned avail_count = 0;
instr_alloc_type_t export = ~0u; instr_alloc_type_t export = ~0u;
int block_idx = -1; int block_idx = -1;
/* XXX merge this loop with the other one somehow? */ /* XXX merge this loop with the other one somehow? */
ir2_foreach_instr(instr, ctx) { ir2_foreach_instr(instr, ctx)
if (!instr->need_emit) {
continue; if (!instr->need_emit)
if (is_export(instr)) continue;
export = MIN2(export, export_buf(instr->alu.export)); if (is_export(instr))
} export = MIN2(export, export_buf(instr->alu.export));
}
ir2_foreach_instr(instr, ctx) { ir2_foreach_instr(instr, ctx)
if (!instr->need_emit) {
continue; if (!instr->need_emit)
continue;
/* dont mix exports */ /* dont mix exports */
if (is_export(instr) && export_buf(instr->alu.export) != export) if (is_export(instr) && export_buf(instr->alu.export) != export)
continue; continue;
if (block_idx < 0) if (block_idx < 0)
block_idx = instr->block_idx; block_idx = instr->block_idx;
else if (block_idx != instr->block_idx || /* must be same block */ else if (block_idx != instr->block_idx || /* must be same block */
instr->type == IR2_CF || /* CF/MEM must be alone */ instr->type == IR2_CF || /* CF/MEM must be alone */
(is_export(instr) && export == SQ_MEMORY)) (is_export(instr) && export == SQ_MEMORY))
break; break;
/* it works because IR2_CF is always at end of block /* it works because IR2_CF is always at end of block
* and somewhat same idea with MEM exports, which might not be alone * and somewhat same idea with MEM exports, which might not be alone
* but will end up in-order at least * but will end up in-order at least
*/ */
/* check if dependencies are satisfied */ /* check if dependencies are satisfied */
bool is_ok = true; bool is_ok = true;
ir2_foreach_src(src, instr) { ir2_foreach_src(src, instr)
if (src->type == IR2_SRC_REG) { {
/* need to check if all previous instructions in the block if (src->type == IR2_SRC_REG) {
* which write the reg have been emitted /* need to check if all previous instructions in the block
* slow.. * which write the reg have been emitted
* XXX: check components instead of whole register * slow..
*/ * XXX: check components instead of whole register
struct ir2_reg *reg = get_reg_src(ctx, src); */
ir2_foreach_instr(p, ctx) { struct ir2_reg *reg = get_reg_src(ctx, src);
if (!p->is_ssa && p->reg == reg && p->idx < instr->idx) ir2_foreach_instr(p, ctx)
is_ok &= !p->need_emit; {
} if (!p->is_ssa && p->reg == reg && p->idx < instr->idx)
} else if (src->type == IR2_SRC_SSA) { is_ok &= !p->need_emit;
/* in this case its easy, just check need_emit */ }
is_ok &= !ctx->instr[src->num].need_emit; } else if (src->type == IR2_SRC_SSA) {
} /* in this case its easy, just check need_emit */
} is_ok &= !ctx->instr[src->num].need_emit;
/* don't reorder non-ssa write before read */ }
if (!instr->is_ssa) { }
ir2_foreach_instr(p, ctx) { /* don't reorder non-ssa write before read */
if (!p->need_emit || p->idx >= instr->idx) if (!instr->is_ssa) {
continue; ir2_foreach_instr(p, ctx)
{
if (!p->need_emit || p->idx >= instr->idx)
continue;
ir2_foreach_src(src, p) { ir2_foreach_src(src, p)
if (get_reg_src(ctx, src) == instr->reg) {
is_ok = false; if (get_reg_src(ctx, src) == instr->reg)
} is_ok = false;
} }
} }
/* don't reorder across predicates */ }
if (avail_count && instr->pred != avail[0]->pred) /* don't reorder across predicates */
is_ok = false; if (avail_count && instr->pred != avail[0]->pred)
is_ok = false;
if (!is_ok) if (!is_ok)
continue; continue;
avail[avail_count++] = instr; avail[avail_count++] = instr;
} }
if (!avail_count) { if (!avail_count) {
assert(block_idx == -1); assert(block_idx == -1);
return -1; return -1;
} }
/* priority to FETCH instructions */ /* priority to FETCH instructions */
ir2_foreach_avail(instr) { ir2_foreach_avail(instr)
if (instr->type == IR2_ALU) {
continue; if (instr->type == IR2_ALU)
continue;
ra_src_free(ctx, instr); ra_src_free(ctx, instr);
ra_reg(ctx, get_reg(instr), -1, false, 0); ra_reg(ctx, get_reg(instr), -1, false, 0);
instr->need_emit = false; instr->need_emit = false;
sched->instr = instr; sched->instr = instr;
sched->instr_s = NULL; sched->instr_s = NULL;
return block_idx; return block_idx;
} }
/* TODO precompute priorities */ /* TODO precompute priorities */
unsigned prio_v = ~0u, prio_s = ~0u, prio; unsigned prio_v = ~0u, prio_s = ~0u, prio;
ir2_foreach_avail(instr) { ir2_foreach_avail(instr)
prio = alu_vector_prio(instr); {
if (prio < prio_v) { prio = alu_vector_prio(instr);
instr_v = instr; if (prio < prio_v) {
prio_v = prio; instr_v = instr;
} prio_v = prio;
} }
}
/* TODO can still insert scalar if src_count=3, if smart about it */ /* TODO can still insert scalar if src_count=3, if smart about it */
if (!instr_v || instr_v->src_count < 3) { if (!instr_v || instr_v->src_count < 3) {
ir2_foreach_avail(instr) { ir2_foreach_avail(instr)
bool compat = is_alu_compatible(instr_v, instr); {
bool compat = is_alu_compatible(instr_v, instr);
prio = alu_scalar_prio(instr); prio = alu_scalar_prio(instr);
if (prio >= prio_v && !compat) if (prio >= prio_v && !compat)
continue; continue;
if (prio < prio_s) { if (prio < prio_s) {
instr_s = instr; instr_s = instr;
prio_s = prio; prio_s = prio;
if (!compat) if (!compat)
instr_v = NULL; instr_v = NULL;
} }
} }
} }
assert(instr_v || instr_s); assert(instr_v || instr_s);
/* now, we try more complex insertion of vector instruction as scalar /* now, we try more complex insertion of vector instruction as scalar
* TODO: if we are smart we can still insert if instr_v->src_count==3 * TODO: if we are smart we can still insert if instr_v->src_count==3
*/ */
if (!instr_s && instr_v->src_count < 3) { if (!instr_s && instr_v->src_count < 3) {
ir2_foreach_avail(instr) { ir2_foreach_avail(instr)
if (!is_alu_compatible(instr_v, instr) || !scalar_possible(instr)) {
continue; if (!is_alu_compatible(instr_v, instr) || !scalar_possible(instr))
continue;
/* at this point, src_count should always be 2 */ /* at this point, src_count should always be 2 */
assert(instr->src_count == 2); assert(instr->src_count == 2);
if (scalarize_case1(ctx, instr, 0)) { if (scalarize_case1(ctx, instr, 0)) {
instr_s = instr; instr_s = instr;
break; break;
} }
if (scalarize_case1(ctx, instr, 1)) { if (scalarize_case1(ctx, instr, 1)) {
instr_s = instr; instr_s = instr;
break; break;
} }
} }
} }
/* free src registers */ /* free src registers */
if (instr_v) { if (instr_v) {
instr_v->need_emit = false; instr_v->need_emit = false;
ra_src_free(ctx, instr_v); ra_src_free(ctx, instr_v);
} }
if (instr_s) { if (instr_s) {
instr_s->need_emit = false; instr_s->need_emit = false;
ra_src_free(ctx, instr_s); ra_src_free(ctx, instr_s);
} }
/* allocate dst registers */ /* allocate dst registers */
if (instr_v) if (instr_v)
ra_reg(ctx, get_reg(instr_v), -1, is_export(instr_v), instr_v->alu.write_mask); ra_reg(ctx, get_reg(instr_v), -1, is_export(instr_v),
instr_v->alu.write_mask);
if (instr_s) if (instr_s)
ra_reg(ctx, get_reg(instr_s), -1, is_export(instr_s), instr_s->alu.write_mask); ra_reg(ctx, get_reg(instr_s), -1, is_export(instr_s),
instr_s->alu.write_mask);
sched->instr = instr_v; sched->instr = instr_v;
sched->instr_s = instr_s; sched->instr_s = instr_s;
return block_idx; return block_idx;
} }
/* scheduling: determine order of instructions */ /* scheduling: determine order of instructions */
static void schedule_instrs(struct ir2_context *ctx) static void
schedule_instrs(struct ir2_context *ctx)
{ {
struct ir2_sched_instr *sched; struct ir2_sched_instr *sched;
int block_idx; int block_idx;
/* allocate input registers */ /* allocate input registers */
for (unsigned idx = 0; idx < ARRAY_SIZE(ctx->input); idx++) for (unsigned idx = 0; idx < ARRAY_SIZE(ctx->input); idx++)
if (ctx->input[idx].initialized) if (ctx->input[idx].initialized)
ra_reg(ctx, &ctx->input[idx], idx, false, 0); ra_reg(ctx, &ctx->input[idx], idx, false, 0);
for (;;) { for (;;) {
sched = &ctx->instr_sched[ctx->instr_sched_count++]; sched = &ctx->instr_sched[ctx->instr_sched_count++];
block_idx = sched_next(ctx, sched); block_idx = sched_next(ctx, sched);
if (block_idx < 0) if (block_idx < 0)
break; break;
memcpy(sched->reg_state, ctx->reg_state, sizeof(ctx->reg_state)); memcpy(sched->reg_state, ctx->reg_state, sizeof(ctx->reg_state));
/* catch texture fetch after scheduling and insert the /* catch texture fetch after scheduling and insert the
* SET_TEX_LOD right before it if necessary * SET_TEX_LOD right before it if necessary
* TODO clean this up * TODO clean this up
*/ */
struct ir2_instr *instr = sched->instr, *tex_lod; struct ir2_instr *instr = sched->instr, *tex_lod;
if (instr && instr->type == IR2_FETCH && if (instr && instr->type == IR2_FETCH && instr->fetch.opc == TEX_FETCH &&
instr->fetch.opc == TEX_FETCH && instr->src_count == 2) { instr->src_count == 2) {
/* generate the SET_LOD instruction */ /* generate the SET_LOD instruction */
tex_lod = &ctx->instr[ctx->instr_count++]; tex_lod = &ctx->instr[ctx->instr_count++];
tex_lod->type = IR2_FETCH; tex_lod->type = IR2_FETCH;
tex_lod->block_idx = instr->block_idx; tex_lod->block_idx = instr->block_idx;
tex_lod->pred = instr->pred; tex_lod->pred = instr->pred;
tex_lod->fetch.opc = TEX_SET_TEX_LOD; tex_lod->fetch.opc = TEX_SET_TEX_LOD;
tex_lod->src[0] = instr->src[1]; tex_lod->src[0] = instr->src[1];
tex_lod->src_count = 1; tex_lod->src_count = 1;
sched[1] = sched[0]; sched[1] = sched[0];
sched->instr = tex_lod; sched->instr = tex_lod;
ctx->instr_sched_count++; ctx->instr_sched_count++;
} }
bool free_block = true; bool free_block = true;
ir2_foreach_instr(instr, ctx) ir2_foreach_instr(instr, ctx) free_block &= instr->block_idx != block_idx;
free_block &= instr->block_idx != block_idx; if (free_block)
if (free_block) ra_block_free(ctx, block_idx);
ra_block_free(ctx, block_idx); };
}; ctx->instr_sched_count--;
ctx->instr_sched_count--;
} }
void void
ir2_compile(struct fd2_shader_stateobj *so, unsigned variant, ir2_compile(struct fd2_shader_stateobj *so, unsigned variant,
struct fd2_shader_stateobj *fp) struct fd2_shader_stateobj *fp)
{ {
struct ir2_context ctx = { }; struct ir2_context ctx = {};
bool binning = !fp && so->type == MESA_SHADER_VERTEX; bool binning = !fp && so->type == MESA_SHADER_VERTEX;
if (fp) if (fp)
so->variant[variant].f = fp->variant[0].f; so->variant[variant].f = fp->variant[0].f;
ctx.so = so; ctx.so = so;
ctx.info = &so->variant[variant].info; ctx.info = &so->variant[variant].info;
ctx.f = &so->variant[variant].f; ctx.f = &so->variant[variant].f;
ctx.info->max_reg = -1; ctx.info->max_reg = -1;
/* convert nir to internal representation */ /* convert nir to internal representation */
ir2_nir_compile(&ctx, binning); ir2_nir_compile(&ctx, binning);
/* copy propagate srcs */ /* copy propagate srcs */
cp_src(&ctx); cp_src(&ctx);
/* get ref_counts and kill non-needed instructions */ /* get ref_counts and kill non-needed instructions */
ra_count_refs(&ctx); ra_count_refs(&ctx);
/* remove movs used to write outputs */ /* remove movs used to write outputs */
cp_export(&ctx); cp_export(&ctx);
/* instruction order.. and vector->scalar conversions */ /* instruction order.. and vector->scalar conversions */
schedule_instrs(&ctx); schedule_instrs(&ctx);
/* finally, assemble to bitcode */ /* finally, assemble to bitcode */
assemble(&ctx, binning); assemble(&ctx, binning);
} }

View file

@ -31,66 +31,66 @@
#include "pipe/p_context.h" #include "pipe/p_context.h"
struct ir2_fetch_info { struct ir2_fetch_info {
/* dword offset of the fetch instruction */ /* dword offset of the fetch instruction */
uint16_t offset; uint16_t offset;
union { union {
/* swizzle to merge with tgsi swizzle */ /* swizzle to merge with tgsi swizzle */
struct { struct {
uint16_t dst_swiz; uint16_t dst_swiz;
} vtx; } vtx;
/* sampler id to patch const_idx */ /* sampler id to patch const_idx */
struct { struct {
uint16_t samp_id; uint16_t samp_id;
uint8_t src_swiz; uint8_t src_swiz;
} tex; } tex;
}; };
}; };
struct ir2_shader_info { struct ir2_shader_info {
/* compiler shader */ /* compiler shader */
uint32_t *dwords; uint32_t *dwords;
/* size of the compiled shader in dwords */ /* size of the compiled shader in dwords */
uint16_t sizedwords; uint16_t sizedwords;
/* highest GPR # used by shader */ /* highest GPR # used by shader */
int8_t max_reg; int8_t max_reg;
/* offset in dwords of first MEMORY export CF (for a20x hw binning) */ /* offset in dwords of first MEMORY export CF (for a20x hw binning) */
int16_t mem_export_ptr; int16_t mem_export_ptr;
/* fetch instruction info for patching */ /* fetch instruction info for patching */
uint16_t num_fetch_instrs; uint16_t num_fetch_instrs;
struct ir2_fetch_info fetch_info[64]; struct ir2_fetch_info fetch_info[64];
}; };
struct ir2_frag_linkage { struct ir2_frag_linkage {
unsigned inputs_count; unsigned inputs_count;
struct { struct {
uint8_t slot; uint8_t slot;
uint8_t ncomp; uint8_t ncomp;
} inputs[16]; } inputs[16];
/* driver_location of fragcoord.zw, -1 if not used */ /* driver_location of fragcoord.zw, -1 if not used */
int fragcoord; int fragcoord;
}; };
struct ir2_shader_variant { struct ir2_shader_variant {
struct ir2_shader_info info; struct ir2_shader_info info;
struct ir2_frag_linkage f; struct ir2_frag_linkage f;
}; };
struct fd2_shader_stateobj; struct fd2_shader_stateobj;
struct tgsi_token; struct tgsi_token;
void ir2_compile(struct fd2_shader_stateobj *so, unsigned variant, void ir2_compile(struct fd2_shader_stateobj *so, unsigned variant,
struct fd2_shader_stateobj *fp); struct fd2_shader_stateobj *fp);
struct nir_shader *ir2_tgsi_to_nir(const struct tgsi_token *tokens, struct nir_shader *ir2_tgsi_to_nir(const struct tgsi_token *tokens,
struct pipe_screen *screen); struct pipe_screen *screen);
const nir_shader_compiler_options *ir2_get_compiler_options(void); const nir_shader_compiler_options *ir2_get_compiler_options(void);
int ir2_optimize_nir(nir_shader *s, bool lower); int ir2_optimize_nir(nir_shader *s, bool lower);
#endif /* IR2_H_ */ #endif /* IR2_H_ */

View file

@ -29,22 +29,22 @@
static unsigned static unsigned
src_swizzle(struct ir2_context *ctx, struct ir2_src *src, unsigned ncomp) src_swizzle(struct ir2_context *ctx, struct ir2_src *src, unsigned ncomp)
{ {
struct ir2_reg_component *comps; struct ir2_reg_component *comps;
unsigned swiz = 0; unsigned swiz = 0;
switch (src->type) { switch (src->type) {
case IR2_SRC_SSA: case IR2_SRC_SSA:
case IR2_SRC_REG: case IR2_SRC_REG:
break; break;
default: default:
return src->swizzle; return src->swizzle;
} }
/* we need to take into account where the components were allocated */ /* we need to take into account where the components were allocated */
comps = get_reg_src(ctx, src)->comp; comps = get_reg_src(ctx, src)->comp;
for (int i = 0; i < ncomp; i++) { for (int i = 0; i < ncomp; i++) {
swiz |= swiz_set(comps[swiz_get(src->swizzle, i)].c, i); swiz |= swiz_set(comps[swiz_get(src->swizzle, i)].c, i);
} }
return swiz; return swiz;
} }
/* alu instr need to take into how the output components are allocated */ /* alu instr need to take into how the output components are allocated */
@ -54,46 +54,47 @@ src_swizzle(struct ir2_context *ctx, struct ir2_src *src, unsigned ncomp)
static unsigned static unsigned
alu_swizzle_scalar(struct ir2_context *ctx, struct ir2_src *reg) alu_swizzle_scalar(struct ir2_context *ctx, struct ir2_src *reg)
{ {
/* hardware seems to take from W, but swizzle everywhere just in case */ /* hardware seems to take from W, but swizzle everywhere just in case */
return swiz_merge(src_swizzle(ctx, reg, 1), IR2_SWIZZLE_XXXX); return swiz_merge(src_swizzle(ctx, reg, 1), IR2_SWIZZLE_XXXX);
} }
static unsigned static unsigned
alu_swizzle(struct ir2_context *ctx, struct ir2_instr *instr, struct ir2_src *src) alu_swizzle(struct ir2_context *ctx, struct ir2_instr *instr,
struct ir2_src *src)
{ {
struct ir2_reg_component *comp = get_reg(instr)->comp; struct ir2_reg_component *comp = get_reg(instr)->comp;
unsigned swiz0 = src_swizzle(ctx, src, src_ncomp(instr)); unsigned swiz0 = src_swizzle(ctx, src, src_ncomp(instr));
unsigned swiz = 0; unsigned swiz = 0;
/* non per component special cases */ /* non per component special cases */
switch (instr->alu.vector_opc) { switch (instr->alu.vector_opc) {
case PRED_SETE_PUSHv ... PRED_SETGTE_PUSHv: case PRED_SETE_PUSHv ... PRED_SETGTE_PUSHv:
return alu_swizzle_scalar(ctx, src); return alu_swizzle_scalar(ctx, src);
case DOT2ADDv: case DOT2ADDv:
case DOT3v: case DOT3v:
case DOT4v: case DOT4v:
case CUBEv: case CUBEv:
return swiz0; return swiz0;
default: default:
break; break;
} }
for (int i = 0, j = 0; i < dst_ncomp(instr); j++) { for (int i = 0, j = 0; i < dst_ncomp(instr); j++) {
if (instr->alu.write_mask & 1 << j) { if (instr->alu.write_mask & 1 << j) {
if (comp[j].c != 7) if (comp[j].c != 7)
swiz |= swiz_set(i, comp[j].c); swiz |= swiz_set(i, comp[j].c);
i++; i++;
} }
} }
return swiz_merge(swiz0, swiz); return swiz_merge(swiz0, swiz);
} }
static unsigned static unsigned
alu_swizzle_scalar2(struct ir2_context *ctx, struct ir2_src *src, unsigned s1) alu_swizzle_scalar2(struct ir2_context *ctx, struct ir2_src *src, unsigned s1)
{ {
/* hardware seems to take from ZW, but swizzle everywhere (ABAB) */ /* hardware seems to take from ZW, but swizzle everywhere (ABAB) */
unsigned s0 = swiz_get(src_swizzle(ctx, src, 1), 0); unsigned s0 = swiz_get(src_swizzle(ctx, src, 1), 0);
return swiz_merge(swiz_set(s0, 0) | swiz_set(s1, 1), IR2_SWIZZLE_XYXY); return swiz_merge(swiz_set(s0, 0) | swiz_set(s1, 1), IR2_SWIZZLE_XYXY);
} }
/* write_mask needs to be transformed by allocation information */ /* write_mask needs to be transformed by allocation information */
@ -101,15 +102,15 @@ alu_swizzle_scalar2(struct ir2_context *ctx, struct ir2_src *src, unsigned s1)
static unsigned static unsigned
alu_write_mask(struct ir2_context *ctx, struct ir2_instr *instr) alu_write_mask(struct ir2_context *ctx, struct ir2_instr *instr)
{ {
struct ir2_reg_component *comp = get_reg(instr)->comp; struct ir2_reg_component *comp = get_reg(instr)->comp;
unsigned write_mask = 0; unsigned write_mask = 0;
for (int i = 0; i < 4; i++) { for (int i = 0; i < 4; i++) {
if (instr->alu.write_mask & 1 << i) if (instr->alu.write_mask & 1 << i)
write_mask |= 1 << comp[i].c; write_mask |= 1 << comp[i].c;
} }
return write_mask; return write_mask;
} }
/* fetch instructions can swizzle dest, but src swizzle needs conversion */ /* fetch instructions can swizzle dest, but src swizzle needs conversion */
@ -117,432 +118,436 @@ alu_write_mask(struct ir2_context *ctx, struct ir2_instr *instr)
static unsigned static unsigned
fetch_swizzle(struct ir2_context *ctx, struct ir2_src *src, unsigned ncomp) fetch_swizzle(struct ir2_context *ctx, struct ir2_src *src, unsigned ncomp)
{ {
unsigned alu_swiz = src_swizzle(ctx, src, ncomp); unsigned alu_swiz = src_swizzle(ctx, src, ncomp);
unsigned swiz = 0; unsigned swiz = 0;
for (int i = 0; i < ncomp; i++) for (int i = 0; i < ncomp; i++)
swiz |= swiz_get(alu_swiz, i) << i * 2; swiz |= swiz_get(alu_swiz, i) << i * 2;
return swiz; return swiz;
} }
static unsigned static unsigned
fetch_dst_swiz(struct ir2_context *ctx, struct ir2_instr *instr) fetch_dst_swiz(struct ir2_context *ctx, struct ir2_instr *instr)
{ {
struct ir2_reg_component *comp = get_reg(instr)->comp; struct ir2_reg_component *comp = get_reg(instr)->comp;
unsigned dst_swiz = 0xfff; unsigned dst_swiz = 0xfff;
for (int i = 0; i < dst_ncomp(instr); i++) { for (int i = 0; i < dst_ncomp(instr); i++) {
dst_swiz &= ~(7 << comp[i].c * 3); dst_swiz &= ~(7 << comp[i].c * 3);
dst_swiz |= i << comp[i].c * 3; dst_swiz |= i << comp[i].c * 3;
} }
return dst_swiz; return dst_swiz;
} }
/* register / export # for instr */ /* register / export # for instr */
static unsigned static unsigned
dst_to_reg(struct ir2_context *ctx, struct ir2_instr *instr) dst_to_reg(struct ir2_context *ctx, struct ir2_instr *instr)
{ {
if (is_export(instr)) if (is_export(instr))
return instr->alu.export; return instr->alu.export;
return get_reg(instr)->idx; return get_reg(instr)->idx;
} }
/* register # for src */ /* register # for src */
static unsigned src_to_reg(struct ir2_context *ctx, struct ir2_src *src) static unsigned
src_to_reg(struct ir2_context *ctx, struct ir2_src *src)
{ {
return get_reg_src(ctx, src)->idx; return get_reg_src(ctx, src)->idx;
} }
static unsigned src_reg_byte(struct ir2_context *ctx, struct ir2_src *src) static unsigned
src_reg_byte(struct ir2_context *ctx, struct ir2_src *src)
{ {
if (src->type == IR2_SRC_CONST) { if (src->type == IR2_SRC_CONST) {
assert(!src->abs); /* no abs bit for const */ assert(!src->abs); /* no abs bit for const */
return src->num; return src->num;
} }
return src_to_reg(ctx, src) | (src->abs ? 0x80 : 0); return src_to_reg(ctx, src) | (src->abs ? 0x80 : 0);
} }
/* produce the 12 byte binary instruction for a given sched_instr */ /* produce the 12 byte binary instruction for a given sched_instr */
static void static void
fill_instr(struct ir2_context *ctx, struct ir2_sched_instr *sched, fill_instr(struct ir2_context *ctx, struct ir2_sched_instr *sched, instr_t *bc,
instr_t *bc, bool * is_fetch) bool *is_fetch)
{ {
struct ir2_instr *instr = sched->instr, *instr_s, *instr_v; struct ir2_instr *instr = sched->instr, *instr_s, *instr_v;
*bc = (instr_t) {}; *bc = (instr_t){};
if (instr && instr->type == IR2_FETCH) { if (instr && instr->type == IR2_FETCH) {
*is_fetch = true; *is_fetch = true;
bc->fetch.opc = instr->fetch.opc; bc->fetch.opc = instr->fetch.opc;
bc->fetch.pred_select = !!instr->pred; bc->fetch.pred_select = !!instr->pred;
bc->fetch.pred_condition = instr->pred & 1; bc->fetch.pred_condition = instr->pred & 1;
struct ir2_src *src = instr->src; struct ir2_src *src = instr->src;
if (instr->fetch.opc == VTX_FETCH) { if (instr->fetch.opc == VTX_FETCH) {
instr_fetch_vtx_t *vtx = &bc->fetch.vtx; instr_fetch_vtx_t *vtx = &bc->fetch.vtx;
assert(instr->fetch.vtx.const_idx <= 0x1f); assert(instr->fetch.vtx.const_idx <= 0x1f);
assert(instr->fetch.vtx.const_idx_sel <= 0x3); assert(instr->fetch.vtx.const_idx_sel <= 0x3);
vtx->src_reg = src_to_reg(ctx, src); vtx->src_reg = src_to_reg(ctx, src);
vtx->src_swiz = fetch_swizzle(ctx, src, 1); vtx->src_swiz = fetch_swizzle(ctx, src, 1);
vtx->dst_reg = dst_to_reg(ctx, instr); vtx->dst_reg = dst_to_reg(ctx, instr);
vtx->dst_swiz = fetch_dst_swiz(ctx, instr); vtx->dst_swiz = fetch_dst_swiz(ctx, instr);
vtx->must_be_one = 1; vtx->must_be_one = 1;
vtx->const_index = instr->fetch.vtx.const_idx; vtx->const_index = instr->fetch.vtx.const_idx;
vtx->const_index_sel = instr->fetch.vtx.const_idx_sel; vtx->const_index_sel = instr->fetch.vtx.const_idx_sel;
/* other fields will be patched */ /* other fields will be patched */
/* XXX seems like every FETCH but the first has /* XXX seems like every FETCH but the first has
* this bit set: * this bit set:
*/ */
vtx->reserved3 = instr->idx ? 0x1 : 0x0; vtx->reserved3 = instr->idx ? 0x1 : 0x0;
vtx->reserved0 = instr->idx ? 0x2 : 0x3; vtx->reserved0 = instr->idx ? 0x2 : 0x3;
} else if (instr->fetch.opc == TEX_FETCH) { } else if (instr->fetch.opc == TEX_FETCH) {
instr_fetch_tex_t *tex = &bc->fetch.tex; instr_fetch_tex_t *tex = &bc->fetch.tex;
tex->src_reg = src_to_reg(ctx, src); tex->src_reg = src_to_reg(ctx, src);
tex->src_swiz = fetch_swizzle(ctx, src, 3); tex->src_swiz = fetch_swizzle(ctx, src, 3);
tex->dst_reg = dst_to_reg(ctx, instr); tex->dst_reg = dst_to_reg(ctx, instr);
tex->dst_swiz = fetch_dst_swiz(ctx, instr); tex->dst_swiz = fetch_dst_swiz(ctx, instr);
/* tex->const_idx = patch_fetches */ /* tex->const_idx = patch_fetches */
tex->mag_filter = TEX_FILTER_USE_FETCH_CONST; tex->mag_filter = TEX_FILTER_USE_FETCH_CONST;
tex->min_filter = TEX_FILTER_USE_FETCH_CONST; tex->min_filter = TEX_FILTER_USE_FETCH_CONST;
tex->mip_filter = TEX_FILTER_USE_FETCH_CONST; tex->mip_filter = TEX_FILTER_USE_FETCH_CONST;
tex->aniso_filter = ANISO_FILTER_USE_FETCH_CONST; tex->aniso_filter = ANISO_FILTER_USE_FETCH_CONST;
tex->arbitrary_filter = ARBITRARY_FILTER_USE_FETCH_CONST; tex->arbitrary_filter = ARBITRARY_FILTER_USE_FETCH_CONST;
tex->vol_mag_filter = TEX_FILTER_USE_FETCH_CONST; tex->vol_mag_filter = TEX_FILTER_USE_FETCH_CONST;
tex->vol_min_filter = TEX_FILTER_USE_FETCH_CONST; tex->vol_min_filter = TEX_FILTER_USE_FETCH_CONST;
tex->use_comp_lod = ctx->so->type == MESA_SHADER_FRAGMENT; tex->use_comp_lod = ctx->so->type == MESA_SHADER_FRAGMENT;
tex->use_reg_lod = instr->src_count == 2; tex->use_reg_lod = instr->src_count == 2;
tex->sample_location = SAMPLE_CENTER; tex->sample_location = SAMPLE_CENTER;
tex->tx_coord_denorm = instr->fetch.tex.is_rect; tex->tx_coord_denorm = instr->fetch.tex.is_rect;
} else if (instr->fetch.opc == TEX_SET_TEX_LOD) { } else if (instr->fetch.opc == TEX_SET_TEX_LOD) {
instr_fetch_tex_t *tex = &bc->fetch.tex; instr_fetch_tex_t *tex = &bc->fetch.tex;
tex->src_reg = src_to_reg(ctx, src); tex->src_reg = src_to_reg(ctx, src);
tex->src_swiz = fetch_swizzle(ctx, src, 1); tex->src_swiz = fetch_swizzle(ctx, src, 1);
tex->dst_reg = 0; tex->dst_reg = 0;
tex->dst_swiz = 0xfff; tex->dst_swiz = 0xfff;
tex->mag_filter = TEX_FILTER_USE_FETCH_CONST; tex->mag_filter = TEX_FILTER_USE_FETCH_CONST;
tex->min_filter = TEX_FILTER_USE_FETCH_CONST; tex->min_filter = TEX_FILTER_USE_FETCH_CONST;
tex->mip_filter = TEX_FILTER_USE_FETCH_CONST; tex->mip_filter = TEX_FILTER_USE_FETCH_CONST;
tex->aniso_filter = ANISO_FILTER_USE_FETCH_CONST; tex->aniso_filter = ANISO_FILTER_USE_FETCH_CONST;
tex->arbitrary_filter = ARBITRARY_FILTER_USE_FETCH_CONST; tex->arbitrary_filter = ARBITRARY_FILTER_USE_FETCH_CONST;
tex->vol_mag_filter = TEX_FILTER_USE_FETCH_CONST; tex->vol_mag_filter = TEX_FILTER_USE_FETCH_CONST;
tex->vol_min_filter = TEX_FILTER_USE_FETCH_CONST; tex->vol_min_filter = TEX_FILTER_USE_FETCH_CONST;
tex->use_comp_lod = 1; tex->use_comp_lod = 1;
tex->use_reg_lod = 0; tex->use_reg_lod = 0;
tex->sample_location = SAMPLE_CENTER; tex->sample_location = SAMPLE_CENTER;
} else { } else {
assert(0); assert(0);
} }
return; return;
} }
instr_v = sched->instr; instr_v = sched->instr;
instr_s = sched->instr_s; instr_s = sched->instr_s;
if (instr_v) { if (instr_v) {
struct ir2_src src1, src2, *src3; struct ir2_src src1, src2, *src3;
src1 = instr_v->src[0]; src1 = instr_v->src[0];
src2 = instr_v->src[instr_v->src_count > 1]; src2 = instr_v->src[instr_v->src_count > 1];
src3 = instr_v->src_count == 3 ? &instr_v->src[2] : NULL; src3 = instr_v->src_count == 3 ? &instr_v->src[2] : NULL;
bc->alu.vector_opc = instr_v->alu.vector_opc; bc->alu.vector_opc = instr_v->alu.vector_opc;
bc->alu.vector_write_mask = alu_write_mask(ctx, instr_v); bc->alu.vector_write_mask = alu_write_mask(ctx, instr_v);
bc->alu.vector_dest = dst_to_reg(ctx, instr_v); bc->alu.vector_dest = dst_to_reg(ctx, instr_v);
bc->alu.vector_clamp = instr_v->alu.saturate; bc->alu.vector_clamp = instr_v->alu.saturate;
bc->alu.export_data = instr_v->alu.export >= 0; bc->alu.export_data = instr_v->alu.export >= 0;
/* single operand SETEv, use 0.0f as src2 */ /* single operand SETEv, use 0.0f as src2 */
if (instr_v->src_count == 1 && if (instr_v->src_count == 1 &&
(bc->alu.vector_opc == SETEv || (bc->alu.vector_opc == SETEv || bc->alu.vector_opc == SETNEv ||
bc->alu.vector_opc == SETNEv || bc->alu.vector_opc == SETGTv || bc->alu.vector_opc == SETGTEv))
bc->alu.vector_opc == SETGTv || src2 = ir2_zero(ctx);
bc->alu.vector_opc == SETGTEv))
src2 = ir2_zero(ctx);
/* export32 instr for a20x hw binning has this bit set.. /* export32 instr for a20x hw binning has this bit set..
* it seems to do more than change the base address of constants * it seems to do more than change the base address of constants
* XXX this is a hack * XXX this is a hack
*/ */
bc->alu.relative_addr = bc->alu.relative_addr =
(bc->alu.export_data && bc->alu.vector_dest == 32); (bc->alu.export_data && bc->alu.vector_dest == 32);
bc->alu.src1_reg_byte = src_reg_byte(ctx, &src1); bc->alu.src1_reg_byte = src_reg_byte(ctx, &src1);
bc->alu.src1_swiz = alu_swizzle(ctx, instr_v, &src1); bc->alu.src1_swiz = alu_swizzle(ctx, instr_v, &src1);
bc->alu.src1_reg_negate = src1.negate; bc->alu.src1_reg_negate = src1.negate;
bc->alu.src1_sel = src1.type != IR2_SRC_CONST; bc->alu.src1_sel = src1.type != IR2_SRC_CONST;
bc->alu.src2_reg_byte = src_reg_byte(ctx, &src2); bc->alu.src2_reg_byte = src_reg_byte(ctx, &src2);
bc->alu.src2_swiz = alu_swizzle(ctx, instr_v, &src2); bc->alu.src2_swiz = alu_swizzle(ctx, instr_v, &src2);
bc->alu.src2_reg_negate = src2.negate; bc->alu.src2_reg_negate = src2.negate;
bc->alu.src2_sel = src2.type != IR2_SRC_CONST; bc->alu.src2_sel = src2.type != IR2_SRC_CONST;
if (src3) { if (src3) {
bc->alu.src3_reg_byte = src_reg_byte(ctx, src3); bc->alu.src3_reg_byte = src_reg_byte(ctx, src3);
bc->alu.src3_swiz = alu_swizzle(ctx, instr_v, src3); bc->alu.src3_swiz = alu_swizzle(ctx, instr_v, src3);
bc->alu.src3_reg_negate = src3->negate; bc->alu.src3_reg_negate = src3->negate;
bc->alu.src3_sel = src3->type != IR2_SRC_CONST; bc->alu.src3_sel = src3->type != IR2_SRC_CONST;
} }
bc->alu.pred_select = instr_v->pred; bc->alu.pred_select = instr_v->pred;
} }
if (instr_s) { if (instr_s) {
struct ir2_src *src = instr_s->src; struct ir2_src *src = instr_s->src;
bc->alu.scalar_opc = instr_s->alu.scalar_opc; bc->alu.scalar_opc = instr_s->alu.scalar_opc;
bc->alu.scalar_write_mask = alu_write_mask(ctx, instr_s); bc->alu.scalar_write_mask = alu_write_mask(ctx, instr_s);
bc->alu.scalar_dest = dst_to_reg(ctx, instr_s); bc->alu.scalar_dest = dst_to_reg(ctx, instr_s);
bc->alu.scalar_clamp = instr_s->alu.saturate; bc->alu.scalar_clamp = instr_s->alu.saturate;
bc->alu.export_data = instr_s->alu.export >= 0; bc->alu.export_data = instr_s->alu.export >= 0;
if (instr_s->src_count == 1) { if (instr_s->src_count == 1) {
bc->alu.src3_reg_byte = src_reg_byte(ctx, src); bc->alu.src3_reg_byte = src_reg_byte(ctx, src);
bc->alu.src3_swiz = alu_swizzle_scalar(ctx, src); bc->alu.src3_swiz = alu_swizzle_scalar(ctx, src);
bc->alu.src3_reg_negate = src->negate; bc->alu.src3_reg_negate = src->negate;
bc->alu.src3_sel = src->type != IR2_SRC_CONST; bc->alu.src3_sel = src->type != IR2_SRC_CONST;
} else { } else {
assert(instr_s->src_count == 2); assert(instr_s->src_count == 2);
bc->alu.src3_reg_byte = src_reg_byte(ctx, src); bc->alu.src3_reg_byte = src_reg_byte(ctx, src);
bc->alu.src3_swiz = alu_swizzle_scalar2(ctx, src, instr_s->alu.src1_swizzle); bc->alu.src3_swiz =
bc->alu.src3_reg_negate = src->negate; alu_swizzle_scalar2(ctx, src, instr_s->alu.src1_swizzle);
bc->alu.src3_sel = src->type != IR2_SRC_CONST;; bc->alu.src3_reg_negate = src->negate;
} bc->alu.src3_sel = src->type != IR2_SRC_CONST;
;
}
if (instr_v) if (instr_v)
assert(instr_s->pred == instr_v->pred); assert(instr_s->pred == instr_v->pred);
bc->alu.pred_select = instr_s->pred; bc->alu.pred_select = instr_s->pred;
} }
*is_fetch = false; *is_fetch = false;
return; return;
} }
static unsigned static unsigned
write_cfs(struct ir2_context *ctx, instr_cf_t * cfs, unsigned cf_idx, write_cfs(struct ir2_context *ctx, instr_cf_t *cfs, unsigned cf_idx,
instr_cf_alloc_t *alloc, instr_cf_exec_t *exec) instr_cf_alloc_t *alloc, instr_cf_exec_t *exec)
{ {
assert(exec->count); assert(exec->count);
if (alloc) if (alloc)
cfs[cf_idx++].alloc = *alloc; cfs[cf_idx++].alloc = *alloc;
/* for memory alloc offset for patching */ /* for memory alloc offset for patching */
if (alloc && alloc->buffer_select == SQ_MEMORY && if (alloc && alloc->buffer_select == SQ_MEMORY &&
ctx->info->mem_export_ptr == -1) ctx->info->mem_export_ptr == -1)
ctx->info->mem_export_ptr = cf_idx / 2 * 3; ctx->info->mem_export_ptr = cf_idx / 2 * 3;
cfs[cf_idx++].exec = *exec; cfs[cf_idx++].exec = *exec;
exec->address += exec->count; exec->address += exec->count;
exec->serialize = 0; exec->serialize = 0;
exec->count = 0; exec->count = 0;
return cf_idx; return cf_idx;
} }
/* assemble the final shader */ /* assemble the final shader */
void assemble(struct ir2_context *ctx, bool binning) void
assemble(struct ir2_context *ctx, bool binning)
{ {
/* hw seems to have a limit of 384 (num_cf/2+num_instr <= 384) /* hw seems to have a limit of 384 (num_cf/2+num_instr <= 384)
* address is 9 bits so could it be 512 ? * address is 9 bits so could it be 512 ?
*/ */
instr_cf_t cfs[384]; instr_cf_t cfs[384];
instr_t bytecode[384], bc; instr_t bytecode[384], bc;
unsigned block_addr[128]; unsigned block_addr[128];
unsigned num_cf = 0; unsigned num_cf = 0;
/* CF instr state */ /* CF instr state */
instr_cf_exec_t exec = {.opc = EXEC}; instr_cf_exec_t exec = {.opc = EXEC};
instr_cf_alloc_t alloc = {.opc = ALLOC}; instr_cf_alloc_t alloc = {.opc = ALLOC};
int sync_id, sync_id_prev = -1; int sync_id, sync_id_prev = -1;
bool is_fetch = false; bool is_fetch = false;
bool need_sync = true; bool need_sync = true;
bool need_alloc = false; bool need_alloc = false;
unsigned block_idx = 0; unsigned block_idx = 0;
ctx->info->mem_export_ptr = -1; ctx->info->mem_export_ptr = -1;
ctx->info->num_fetch_instrs = 0; ctx->info->num_fetch_instrs = 0;
/* vertex shader always needs to allocate at least one parameter /* vertex shader always needs to allocate at least one parameter
* if it will never happen, * if it will never happen,
*/ */
if (ctx->so->type == MESA_SHADER_VERTEX && ctx->f->inputs_count == 0) { if (ctx->so->type == MESA_SHADER_VERTEX && ctx->f->inputs_count == 0) {
alloc.buffer_select = SQ_PARAMETER_PIXEL; alloc.buffer_select = SQ_PARAMETER_PIXEL;
cfs[num_cf++].alloc = alloc; cfs[num_cf++].alloc = alloc;
} }
block_addr[0] = 0; block_addr[0] = 0;
for (int i = 0, j = 0; j < ctx->instr_sched_count; j++) { for (int i = 0, j = 0; j < ctx->instr_sched_count; j++) {
struct ir2_instr *instr = ctx->instr_sched[j].instr; struct ir2_instr *instr = ctx->instr_sched[j].instr;
/* catch IR2_CF since it isn't a regular instruction */ /* catch IR2_CF since it isn't a regular instruction */
if (instr && instr->type == IR2_CF) { if (instr && instr->type == IR2_CF) {
assert(!need_alloc); /* XXX */ assert(!need_alloc); /* XXX */
/* flush any exec cf before inserting jmp */ /* flush any exec cf before inserting jmp */
if (exec.count) if (exec.count)
num_cf = write_cfs(ctx, cfs, num_cf, NULL, &exec); num_cf = write_cfs(ctx, cfs, num_cf, NULL, &exec);
cfs[num_cf++].jmp_call = (instr_cf_jmp_call_t) { cfs[num_cf++].jmp_call = (instr_cf_jmp_call_t){
.opc = COND_JMP, .opc = COND_JMP,
.address = instr->cf.block_idx, /* will be fixed later */ .address = instr->cf.block_idx, /* will be fixed later */
.force_call = !instr->pred, .force_call = !instr->pred,
.predicated_jmp = 1, .predicated_jmp = 1,
.direction = instr->cf.block_idx > instr->block_idx, .direction = instr->cf.block_idx > instr->block_idx,
.condition = instr->pred & 1, .condition = instr->pred & 1,
}; };
continue; continue;
} }
/* fill the 3 dwords for the instruction */ /* fill the 3 dwords for the instruction */
fill_instr(ctx, &ctx->instr_sched[j], &bc, &is_fetch); fill_instr(ctx, &ctx->instr_sched[j], &bc, &is_fetch);
/* we need to sync between ALU/VTX_FETCH/TEX_FETCH types */ /* we need to sync between ALU/VTX_FETCH/TEX_FETCH types */
sync_id = 0; sync_id = 0;
if (is_fetch) if (is_fetch)
sync_id = bc.fetch.opc == VTX_FETCH ? 1 : 2; sync_id = bc.fetch.opc == VTX_FETCH ? 1 : 2;
need_sync = sync_id != sync_id_prev; need_sync = sync_id != sync_id_prev;
sync_id_prev = sync_id; sync_id_prev = sync_id;
unsigned block; unsigned block;
{ {
if (ctx->instr_sched[j].instr) if (ctx->instr_sched[j].instr)
block = ctx->instr_sched[j].instr->block_idx; block = ctx->instr_sched[j].instr->block_idx;
else else
block = ctx->instr_sched[j].instr_s->block_idx; block = ctx->instr_sched[j].instr_s->block_idx;
assert(block_idx <= block); assert(block_idx <= block);
} }
/* info for patching */ /* info for patching */
if (is_fetch) { if (is_fetch) {
struct ir2_fetch_info *info = struct ir2_fetch_info *info =
&ctx->info->fetch_info[ctx->info->num_fetch_instrs++]; &ctx->info->fetch_info[ctx->info->num_fetch_instrs++];
info->offset = i * 3; /* add cf offset later */ info->offset = i * 3; /* add cf offset later */
if (bc.fetch.opc == VTX_FETCH) { if (bc.fetch.opc == VTX_FETCH) {
info->vtx.dst_swiz = bc.fetch.vtx.dst_swiz; info->vtx.dst_swiz = bc.fetch.vtx.dst_swiz;
} else if (bc.fetch.opc == TEX_FETCH) { } else if (bc.fetch.opc == TEX_FETCH) {
info->tex.samp_id = instr->fetch.tex.samp_id; info->tex.samp_id = instr->fetch.tex.samp_id;
info->tex.src_swiz = bc.fetch.tex.src_swiz; info->tex.src_swiz = bc.fetch.tex.src_swiz;
} else { } else {
ctx->info->num_fetch_instrs--; ctx->info->num_fetch_instrs--;
} }
} }
/* exec cf after 6 instr or when switching between fetch / alu */ /* exec cf after 6 instr or when switching between fetch / alu */
if (exec.count == 6 || (exec.count && (need_sync || block != block_idx))) { if (exec.count == 6 ||
num_cf = write_cfs(ctx, cfs, num_cf, need_alloc ? &alloc : NULL, &exec); (exec.count && (need_sync || block != block_idx))) {
need_alloc = false; num_cf =
} write_cfs(ctx, cfs, num_cf, need_alloc ? &alloc : NULL, &exec);
need_alloc = false;
}
/* update block_addrs for jmp patching */ /* update block_addrs for jmp patching */
while (block_idx < block) while (block_idx < block)
block_addr[++block_idx] = num_cf; block_addr[++block_idx] = num_cf;
/* export - fill alloc cf */ /* export - fill alloc cf */
if (!is_fetch && bc.alu.export_data) { if (!is_fetch && bc.alu.export_data) {
/* get the export buffer from either vector/scalar dest */ /* get the export buffer from either vector/scalar dest */
instr_alloc_type_t buffer = instr_alloc_type_t buffer = export_buf(bc.alu.vector_dest);
export_buf(bc.alu.vector_dest); if (bc.alu.scalar_write_mask) {
if (bc.alu.scalar_write_mask) { if (bc.alu.vector_write_mask)
if (bc.alu.vector_write_mask) assert(buffer == export_buf(bc.alu.scalar_dest));
assert(buffer == export_buf(bc.alu.scalar_dest)); buffer = export_buf(bc.alu.scalar_dest);
buffer = export_buf(bc.alu.scalar_dest); }
}
/* flush previous alloc if the buffer changes */ /* flush previous alloc if the buffer changes */
bool need_new_alloc = buffer != alloc.buffer_select; bool need_new_alloc = buffer != alloc.buffer_select;
/* memory export always in 32/33 pair, new alloc on 32 */ /* memory export always in 32/33 pair, new alloc on 32 */
if (bc.alu.vector_dest == 32) if (bc.alu.vector_dest == 32)
need_new_alloc = true; need_new_alloc = true;
if (need_new_alloc && exec.count) { if (need_new_alloc && exec.count) {
num_cf = write_cfs(ctx, cfs, num_cf, need_alloc ? &alloc : NULL, &exec); num_cf =
need_alloc = false; write_cfs(ctx, cfs, num_cf, need_alloc ? &alloc : NULL, &exec);
} need_alloc = false;
}
need_alloc |= need_new_alloc; need_alloc |= need_new_alloc;
alloc.size = 0; alloc.size = 0;
alloc.buffer_select = buffer; alloc.buffer_select = buffer;
if (buffer == SQ_PARAMETER_PIXEL && ctx->so->type == MESA_SHADER_VERTEX) if (buffer == SQ_PARAMETER_PIXEL &&
alloc.size = ctx->f->inputs_count - 1; ctx->so->type == MESA_SHADER_VERTEX)
alloc.size = ctx->f->inputs_count - 1;
if (buffer == SQ_POSITION) if (buffer == SQ_POSITION)
alloc.size = ctx->so->writes_psize; alloc.size = ctx->so->writes_psize;
} }
if (is_fetch) if (is_fetch)
exec.serialize |= 0x1 << exec.count * 2; exec.serialize |= 0x1 << exec.count * 2;
if (need_sync) if (need_sync)
exec.serialize |= 0x2 << exec.count * 2; exec.serialize |= 0x2 << exec.count * 2;
need_sync = false; need_sync = false;
exec.count += 1; exec.count += 1;
bytecode[i++] = bc; bytecode[i++] = bc;
} }
/* final exec cf */ /* final exec cf */
exec.opc = EXEC_END; exec.opc = EXEC_END;
num_cf = num_cf = write_cfs(ctx, cfs, num_cf, need_alloc ? &alloc : NULL, &exec);
write_cfs(ctx, cfs, num_cf, need_alloc ? &alloc : NULL, &exec);
/* insert nop to get an even # of CFs */ /* insert nop to get an even # of CFs */
if (num_cf % 2) if (num_cf % 2)
cfs[num_cf++] = (instr_cf_t) { cfs[num_cf++] = (instr_cf_t){.opc = NOP};
.opc = NOP};
/* patch cf addrs */ /* patch cf addrs */
for (int idx = 0; idx < num_cf; idx++) { for (int idx = 0; idx < num_cf; idx++) {
switch (cfs[idx].opc) { switch (cfs[idx].opc) {
case NOP: case NOP:
case ALLOC: case ALLOC:
break; break;
case EXEC: case EXEC:
case EXEC_END: case EXEC_END:
cfs[idx].exec.address += num_cf / 2; cfs[idx].exec.address += num_cf / 2;
break; break;
case COND_JMP: case COND_JMP:
cfs[idx].jmp_call.address = block_addr[cfs[idx].jmp_call.address]; cfs[idx].jmp_call.address = block_addr[cfs[idx].jmp_call.address];
break; break;
default: default:
assert(0); assert(0);
} }
} }
/* concatenate cfs and alu/fetch */ /* concatenate cfs and alu/fetch */
uint32_t cfdwords = num_cf / 2 * 3; uint32_t cfdwords = num_cf / 2 * 3;
uint32_t alufetchdwords = exec.address * 3; uint32_t alufetchdwords = exec.address * 3;
uint32_t sizedwords = cfdwords + alufetchdwords; uint32_t sizedwords = cfdwords + alufetchdwords;
uint32_t *dwords = malloc(sizedwords * 4); uint32_t *dwords = malloc(sizedwords * 4);
assert(dwords); assert(dwords);
memcpy(dwords, cfs, cfdwords * 4); memcpy(dwords, cfs, cfdwords * 4);
memcpy(&dwords[cfdwords], bytecode, alufetchdwords * 4); memcpy(&dwords[cfdwords], bytecode, alufetchdwords * 4);
/* finalize ir2_shader_info */ /* finalize ir2_shader_info */
ctx->info->dwords = dwords; ctx->info->dwords = dwords;
ctx->info->sizedwords = sizedwords; ctx->info->sizedwords = sizedwords;
for (int i = 0; i < ctx->info->num_fetch_instrs; i++) for (int i = 0; i < ctx->info->num_fetch_instrs; i++)
ctx->info->fetch_info[i].offset += cfdwords; ctx->info->fetch_info[i].offset += cfdwords;
if (FD_DBG(DISASM)) { if (FD_DBG(DISASM)) {
DBG("disassemble: type=%d", ctx->so->type); DBG("disassemble: type=%d", ctx->so->type);
disasm_a2xx(dwords, sizedwords, 0, ctx->so->type); disasm_a2xx(dwords, sizedwords, 0, ctx->so->type);
} }
} }

View file

@ -26,20 +26,22 @@
#include "ir2_private.h" #include "ir2_private.h"
static bool is_mov(struct ir2_instr *instr) static bool
is_mov(struct ir2_instr *instr)
{ {
return instr->type == IR2_ALU && instr->alu.vector_opc == MAXv && return instr->type == IR2_ALU && instr->alu.vector_opc == MAXv &&
instr->src_count == 1; instr->src_count == 1;
} }
static void src_combine(struct ir2_src *src, struct ir2_src b) static void
src_combine(struct ir2_src *src, struct ir2_src b)
{ {
src->num = b.num; src->num = b.num;
src->type = b.type; src->type = b.type;
src->swizzle = swiz_merge(b.swizzle, src->swizzle); src->swizzle = swiz_merge(b.swizzle, src->swizzle);
if (!src->abs) /* if we have abs we don't care about previous negate */ if (!src->abs) /* if we have abs we don't care about previous negate */
src->negate ^= b.negate; src->negate ^= b.negate;
src->abs |= b.abs; src->abs |= b.abs;
} }
/* cp_src: replace src regs when they refer to a mov instruction /* cp_src: replace src regs when they refer to a mov instruction
@ -49,37 +51,40 @@ static void src_combine(struct ir2_src *src, struct ir2_src b)
* becomes: * becomes:
* ALU: MULADDv R7 = C7, R10, R0.xxxx * ALU: MULADDv R7 = C7, R10, R0.xxxx
*/ */
void cp_src(struct ir2_context *ctx) void
cp_src(struct ir2_context *ctx)
{ {
struct ir2_instr *p; struct ir2_instr *p;
ir2_foreach_instr(instr, ctx) { ir2_foreach_instr(instr, ctx)
ir2_foreach_src(src, instr) { {
/* loop to replace recursively */ ir2_foreach_src(src, instr)
do { {
if (src->type != IR2_SRC_SSA) /* loop to replace recursively */
break; do {
if (src->type != IR2_SRC_SSA)
break;
p = &ctx->instr[src->num]; p = &ctx->instr[src->num];
/* don't work across blocks to avoid possible issues */ /* don't work across blocks to avoid possible issues */
if (p->block_idx != instr->block_idx) if (p->block_idx != instr->block_idx)
break; break;
if (!is_mov(p)) if (!is_mov(p))
break; break;
if (p->alu.saturate) if (p->alu.saturate)
break; break;
/* cant apply abs to const src, const src only for alu */ /* cant apply abs to const src, const src only for alu */
if (p->src[0].type == IR2_SRC_CONST && if (p->src[0].type == IR2_SRC_CONST &&
(src->abs || instr->type != IR2_ALU)) (src->abs || instr->type != IR2_ALU))
break; break;
src_combine(src, p->src[0]); src_combine(src, p->src[0]);
} while (1); } while (1);
} }
} }
} }
/* cp_export: replace mov to export when possible /* cp_export: replace mov to export when possible
@ -94,136 +99,138 @@ void cp_src(struct ir2_context *ctx)
* ALU: MAXv export0.xyz_ = R0.xxx?, C8.xxx? * ALU: MAXv export0.xyz_ = R0.xxx?, C8.xxx?
* *
*/ */
void cp_export(struct ir2_context *ctx) void
cp_export(struct ir2_context *ctx)
{ {
struct ir2_instr *c[4], *ins[4]; struct ir2_instr *c[4], *ins[4];
struct ir2_src *src; struct ir2_src *src;
struct ir2_reg *reg; struct ir2_reg *reg;
unsigned ncomp; unsigned ncomp;
ir2_foreach_instr(instr, ctx) { ir2_foreach_instr(instr, ctx)
if (!is_export(instr)) /* TODO */ {
continue; if (!is_export(instr)) /* TODO */
continue;
if (!is_mov(instr)) if (!is_mov(instr))
continue; continue;
src = &instr->src[0]; src = &instr->src[0];
if (src->negate || src->abs) /* TODO handle these cases */ if (src->negate || src->abs) /* TODO handle these cases */
continue; continue;
if (src->type == IR2_SRC_INPUT || src->type == IR2_SRC_CONST) if (src->type == IR2_SRC_INPUT || src->type == IR2_SRC_CONST)
continue; continue;
reg = get_reg_src(ctx, src); reg = get_reg_src(ctx, src);
ncomp = dst_ncomp(instr); ncomp = dst_ncomp(instr);
unsigned reswiz[4] = {}; unsigned reswiz[4] = {};
unsigned num_instr = 0; unsigned num_instr = 0;
/* fill array c with pointers to instrs that write each component */ /* fill array c with pointers to instrs that write each component */
if (src->type == IR2_SRC_SSA) { if (src->type == IR2_SRC_SSA) {
struct ir2_instr *instr = &ctx->instr[src->num]; struct ir2_instr *instr = &ctx->instr[src->num];
if (instr->type != IR2_ALU) if (instr->type != IR2_ALU)
continue; continue;
for (int i = 0; i < ncomp; i++) for (int i = 0; i < ncomp; i++)
c[i] = instr; c[i] = instr;
ins[num_instr++] = instr; ins[num_instr++] = instr;
reswiz[0] = src->swizzle; reswiz[0] = src->swizzle;
} else { } else {
bool ok = true; bool ok = true;
unsigned write_mask = 0; unsigned write_mask = 0;
ir2_foreach_instr(instr, ctx) { ir2_foreach_instr(instr, ctx)
if (instr->is_ssa || instr->reg != reg) {
continue; if (instr->is_ssa || instr->reg != reg)
continue;
/* set by non-ALU */ /* set by non-ALU */
if (instr->type != IR2_ALU) { if (instr->type != IR2_ALU) {
ok = false; ok = false;
break; break;
} }
/* component written more than once */ /* component written more than once */
if (write_mask & instr->alu.write_mask) { if (write_mask & instr->alu.write_mask) {
ok = false; ok = false;
break; break;
} }
write_mask |= instr->alu.write_mask; write_mask |= instr->alu.write_mask;
/* src pointers for components */ /* src pointers for components */
for (int i = 0, j = 0; i < 4; i++) { for (int i = 0, j = 0; i < 4; i++) {
unsigned k = swiz_get(src->swizzle, i); unsigned k = swiz_get(src->swizzle, i);
if (instr->alu.write_mask & 1 << k) { if (instr->alu.write_mask & 1 << k) {
c[i] = instr; c[i] = instr;
/* reswiz = compressed src->swizzle */ /* reswiz = compressed src->swizzle */
unsigned x = 0; unsigned x = 0;
for (int i = 0; i < k; i++) for (int i = 0; i < k; i++)
x += !!(instr->alu.write_mask & 1 << i); x += !!(instr->alu.write_mask & 1 << i);
assert(src->swizzle || x == j); assert(src->swizzle || x == j);
reswiz[num_instr] |= swiz_set(x, j++); reswiz[num_instr] |= swiz_set(x, j++);
} }
} }
ins[num_instr++] = instr; ins[num_instr++] = instr;
} }
if (!ok) if (!ok)
continue; continue;
} }
bool redirect = true; bool redirect = true;
/* must all be in same block */ /* must all be in same block */
for (int i = 0; i < ncomp; i++) for (int i = 0; i < ncomp; i++)
redirect &= (c[i]->block_idx == instr->block_idx); redirect &= (c[i]->block_idx == instr->block_idx);
/* no other instr using the value */ /* no other instr using the value */
ir2_foreach_instr(p, ctx) { ir2_foreach_instr(p, ctx)
if (p == instr) {
continue; if (p == instr)
ir2_foreach_src(src, p) continue;
redirect &= reg != get_reg_src(ctx, src); ir2_foreach_src(src, p) redirect &= reg != get_reg_src(ctx, src);
} }
if (!redirect) if (!redirect)
continue; continue;
/* redirect the instructions writing to the register */ /* redirect the instructions writing to the register */
for (int i = 0; i < num_instr; i++) { for (int i = 0; i < num_instr; i++) {
struct ir2_instr *p = ins[i]; struct ir2_instr *p = ins[i];
p->alu.export = instr->alu.export; p->alu.export = instr->alu.export;
p->alu.write_mask = 0; p->alu.write_mask = 0;
p->is_ssa = true; p->is_ssa = true;
p->ssa.ncomp = 0; p->ssa.ncomp = 0;
memset(p->ssa.comp, 0, sizeof(p->ssa.comp)); memset(p->ssa.comp, 0, sizeof(p->ssa.comp));
p->alu.saturate |= instr->alu.saturate; p->alu.saturate |= instr->alu.saturate;
switch (p->alu.vector_opc) { switch (p->alu.vector_opc) {
case PRED_SETE_PUSHv ... PRED_SETGTE_PUSHv: case PRED_SETE_PUSHv ... PRED_SETGTE_PUSHv:
case DOT2ADDv: case DOT2ADDv:
case DOT3v: case DOT3v:
case DOT4v: case DOT4v:
case CUBEv: case CUBEv:
continue; continue;
default: default:
break; break;
} }
ir2_foreach_src(s, p) ir2_foreach_src(s, p) swiz_merge_p(&s->swizzle, reswiz[i]);
swiz_merge_p(&s->swizzle, reswiz[i]); }
}
for (int i = 0; i < ncomp; i++) { for (int i = 0; i < ncomp; i++) {
c[i]->alu.write_mask |= (1 << i); c[i]->alu.write_mask |= (1 << i);
c[i]->ssa.ncomp++; c[i]->ssa.ncomp++;
} }
instr->type = IR2_NONE; instr->type = IR2_NONE;
instr->need_emit = false; instr->need_emit = false;
} }
} }

File diff suppressed because it is too large Load diff

View file

@ -24,175 +24,175 @@
* Jonathan Marek <jonathan@marek.ca> * Jonathan Marek <jonathan@marek.ca>
*/ */
#include <stdlib.h> #include <assert.h>
#include <stdint.h> #include <stdint.h>
#include <stdio.h> #include <stdio.h>
#include <stdlib.h>
#include <string.h> #include <string.h>
#include <assert.h>
#include "ir2.h"
#include "fd2_program.h"
#include "ir2/instr-a2xx.h" #include "ir2/instr-a2xx.h"
#include "fd2_program.h"
#include "ir2.h"
enum ir2_src_type { enum ir2_src_type {
IR2_SRC_SSA, IR2_SRC_SSA,
IR2_SRC_REG, IR2_SRC_REG,
IR2_SRC_INPUT, IR2_SRC_INPUT,
IR2_SRC_CONST, IR2_SRC_CONST,
}; };
struct ir2_src { struct ir2_src {
/* num can mean different things /* num can mean different things
* ssa: index of instruction * ssa: index of instruction
* reg: index in ctx->reg array * reg: index in ctx->reg array
* input: index in ctx->input array * input: index in ctx->input array
* const: constant index (C0, C1, etc) * const: constant index (C0, C1, etc)
*/ */
uint16_t num; uint16_t num;
uint8_t swizzle; uint8_t swizzle;
enum ir2_src_type type : 2; enum ir2_src_type type : 2;
uint8_t abs : 1; uint8_t abs : 1;
uint8_t negate : 1; uint8_t negate : 1;
uint8_t : 4; uint8_t : 4;
}; };
struct ir2_reg_component { struct ir2_reg_component {
uint8_t c : 3; /* assigned x/y/z/w (7=dont write, for fetch instr) */ uint8_t c : 3; /* assigned x/y/z/w (7=dont write, for fetch instr) */
bool alloc : 1; /* is it currently allocated */ bool alloc : 1; /* is it currently allocated */
uint8_t ref_count; /* for ra */ uint8_t ref_count; /* for ra */
}; };
struct ir2_reg { struct ir2_reg {
uint8_t idx; /* assigned hardware register */ uint8_t idx; /* assigned hardware register */
uint8_t ncomp; uint8_t ncomp;
uint8_t loop_depth; uint8_t loop_depth;
bool initialized; bool initialized;
/* block_idx to free on (-1 = free on ref_count==0) */ /* block_idx to free on (-1 = free on ref_count==0) */
int block_idx_free; int block_idx_free;
struct ir2_reg_component comp[4]; struct ir2_reg_component comp[4];
}; };
struct ir2_instr { struct ir2_instr {
unsigned idx; unsigned idx;
unsigned block_idx; unsigned block_idx;
enum { enum {
IR2_NONE, IR2_NONE,
IR2_FETCH, IR2_FETCH,
IR2_ALU, IR2_ALU,
IR2_CF, IR2_CF,
} type : 2; } type : 2;
/* instruction needs to be emitted (for scheduling) */ /* instruction needs to be emitted (for scheduling) */
bool need_emit : 1; bool need_emit : 1;
/* predicate value - (usually) same for entire block */ /* predicate value - (usually) same for entire block */
uint8_t pred : 2; uint8_t pred : 2;
/* src */ /* src */
uint8_t src_count; uint8_t src_count;
struct ir2_src src[4]; struct ir2_src src[4];
/* dst */ /* dst */
bool is_ssa; bool is_ssa;
union { union {
struct ir2_reg ssa; struct ir2_reg ssa;
struct ir2_reg *reg; struct ir2_reg *reg;
}; };
/* type-specific */ /* type-specific */
union { union {
struct { struct {
instr_fetch_opc_t opc : 5; instr_fetch_opc_t opc : 5;
union { union {
struct { struct {
uint8_t const_idx; uint8_t const_idx;
uint8_t const_idx_sel; uint8_t const_idx_sel;
} vtx; } vtx;
struct { struct {
bool is_cube : 1; bool is_cube : 1;
bool is_rect : 1; bool is_rect : 1;
uint8_t samp_id; uint8_t samp_id;
} tex; } tex;
}; };
} fetch; } fetch;
struct { struct {
/* store possible opcs, then we can choose vector/scalar instr */ /* store possible opcs, then we can choose vector/scalar instr */
instr_scalar_opc_t scalar_opc : 6; instr_scalar_opc_t scalar_opc : 6;
instr_vector_opc_t vector_opc : 5; instr_vector_opc_t vector_opc : 5;
/* same as nir */ /* same as nir */
uint8_t write_mask : 4; uint8_t write_mask : 4;
bool saturate : 1; bool saturate : 1;
/* export idx (-1 no export) */ /* export idx (-1 no export) */
int8_t export; int8_t export;
/* for scalarized 2 src instruction */ /* for scalarized 2 src instruction */
uint8_t src1_swizzle; uint8_t src1_swizzle;
} alu; } alu;
struct { struct {
/* jmp dst block_idx */ /* jmp dst block_idx */
uint8_t block_idx; uint8_t block_idx;
} cf; } cf;
}; };
}; };
struct ir2_sched_instr { struct ir2_sched_instr {
uint32_t reg_state[8]; uint32_t reg_state[8];
struct ir2_instr *instr, *instr_s; struct ir2_instr *instr, *instr_s;
}; };
struct ir2_context { struct ir2_context {
struct fd2_shader_stateobj *so; struct fd2_shader_stateobj *so;
unsigned block_idx, pred_idx; unsigned block_idx, pred_idx;
uint8_t pred; uint8_t pred;
bool block_has_jump[64]; bool block_has_jump[64];
unsigned loop_last_block[64]; unsigned loop_last_block[64];
unsigned loop_depth; unsigned loop_depth;
nir_shader *nir; nir_shader *nir;
/* ssa index of position output */ /* ssa index of position output */
struct ir2_src position; struct ir2_src position;
/* to translate SSA ids to instruction ids */ /* to translate SSA ids to instruction ids */
int16_t ssa_map[1024]; int16_t ssa_map[1024];
struct ir2_shader_info *info; struct ir2_shader_info *info;
struct ir2_frag_linkage *f; struct ir2_frag_linkage *f;
int prev_export; int prev_export;
/* RA state */ /* RA state */
struct ir2_reg* live_regs[64]; struct ir2_reg *live_regs[64];
uint32_t reg_state[256/32]; /* 64*4 bits */ uint32_t reg_state[256 / 32]; /* 64*4 bits */
/* inputs */ /* inputs */
struct ir2_reg input[16 + 1]; /* 16 + param */ struct ir2_reg input[16 + 1]; /* 16 + param */
/* non-ssa regs */ /* non-ssa regs */
struct ir2_reg reg[64]; struct ir2_reg reg[64];
unsigned reg_count; unsigned reg_count;
struct ir2_instr instr[0x300]; struct ir2_instr instr[0x300];
unsigned instr_count; unsigned instr_count;
struct ir2_sched_instr instr_sched[0x180]; struct ir2_sched_instr instr_sched[0x180];
unsigned instr_sched_count; unsigned instr_sched_count;
}; };
void assemble(struct ir2_context *ctx, bool binning); void assemble(struct ir2_context *ctx, bool binning);
void ir2_nir_compile(struct ir2_context *ctx, bool binning); void ir2_nir_compile(struct ir2_context *ctx, bool binning);
bool ir2_nir_lower_scalar(nir_shader * shader); bool ir2_nir_lower_scalar(nir_shader *shader);
void ra_count_refs(struct ir2_context *ctx); void ra_count_refs(struct ir2_context *ctx);
void ra_reg(struct ir2_context *ctx, struct ir2_reg *reg, int force_idx, void ra_reg(struct ir2_context *ctx, struct ir2_reg *reg, int force_idx,
bool export, uint8_t export_writemask); bool export, uint8_t export_writemask);
void ra_src_free(struct ir2_context *ctx, struct ir2_instr *instr); void ra_src_free(struct ir2_context *ctx, struct ir2_instr *instr);
void ra_block_free(struct ir2_context *ctx, unsigned block); void ra_block_free(struct ir2_context *ctx, unsigned block);
@ -201,196 +201,212 @@ void cp_export(struct ir2_context *ctx);
/* utils */ /* utils */
enum { enum {
IR2_SWIZZLE_Y = 1 << 0, IR2_SWIZZLE_Y = 1 << 0,
IR2_SWIZZLE_Z = 2 << 0, IR2_SWIZZLE_Z = 2 << 0,
IR2_SWIZZLE_W = 3 << 0, IR2_SWIZZLE_W = 3 << 0,
IR2_SWIZZLE_ZW = 2 << 0 | 2 << 2, IR2_SWIZZLE_ZW = 2 << 0 | 2 << 2,
IR2_SWIZZLE_YXW = 1 << 0 | 3 << 2 | 1 << 4, IR2_SWIZZLE_YXW = 1 << 0 | 3 << 2 | 1 << 4,
IR2_SWIZZLE_XXXX = 0 << 0 | 3 << 2 | 2 << 4 | 1 << 6, IR2_SWIZZLE_XXXX = 0 << 0 | 3 << 2 | 2 << 4 | 1 << 6,
IR2_SWIZZLE_YYYY = 1 << 0 | 0 << 2 | 3 << 4 | 2 << 6, IR2_SWIZZLE_YYYY = 1 << 0 | 0 << 2 | 3 << 4 | 2 << 6,
IR2_SWIZZLE_ZZZZ = 2 << 0 | 1 << 2 | 0 << 4 | 3 << 6, IR2_SWIZZLE_ZZZZ = 2 << 0 | 1 << 2 | 0 << 4 | 3 << 6,
IR2_SWIZZLE_WWWW = 3 << 0 | 2 << 2 | 1 << 4 | 0 << 6, IR2_SWIZZLE_WWWW = 3 << 0 | 2 << 2 | 1 << 4 | 0 << 6,
IR2_SWIZZLE_WYWW = 3 << 0 | 0 << 2 | 1 << 4 | 0 << 6, IR2_SWIZZLE_WYWW = 3 << 0 | 0 << 2 | 1 << 4 | 0 << 6,
IR2_SWIZZLE_XYXY = 0 << 0 | 0 << 2 | 2 << 4 | 2 << 6, IR2_SWIZZLE_XYXY = 0 << 0 | 0 << 2 | 2 << 4 | 2 << 6,
IR2_SWIZZLE_ZZXY = 2 << 0 | 1 << 2 | 2 << 4 | 2 << 6, IR2_SWIZZLE_ZZXY = 2 << 0 | 1 << 2 | 2 << 4 | 2 << 6,
IR2_SWIZZLE_YXZZ = 1 << 0 | 3 << 2 | 0 << 4 | 3 << 6, IR2_SWIZZLE_YXZZ = 1 << 0 | 3 << 2 | 0 << 4 | 3 << 6,
}; };
#define compile_error(ctx, args...) ({ \ #define compile_error(ctx, args...) \
printf(args); \ ({ \
assert(0); \ printf(args); \
}) assert(0); \
})
static inline struct ir2_src static inline struct ir2_src
ir2_src(uint16_t num, uint8_t swizzle, enum ir2_src_type type) ir2_src(uint16_t num, uint8_t swizzle, enum ir2_src_type type)
{ {
return (struct ir2_src) { return (struct ir2_src){.num = num, .swizzle = swizzle, .type = type};
.num = num,
.swizzle = swizzle,
.type = type
};
} }
/* ir2_assemble uses it .. */ /* ir2_assemble uses it .. */
struct ir2_src ir2_zero(struct ir2_context *ctx); struct ir2_src ir2_zero(struct ir2_context *ctx);
#define ir2_foreach_instr(it, ctx) \ #define ir2_foreach_instr(it, ctx) \
for (struct ir2_instr *it = (ctx)->instr; ({ \ for (struct ir2_instr *it = (ctx)->instr; ({ \
while (it != &(ctx)->instr[(ctx)->instr_count] && it->type == IR2_NONE) it++; \ while (it != &(ctx)->instr[(ctx)->instr_count] && \
it != &(ctx)->instr[(ctx)->instr_count]; }); it++) it->type == IR2_NONE) \
it++; \
it != &(ctx)->instr[(ctx)->instr_count]; \
}); \
it++)
#define ir2_foreach_live_reg(it, ctx) \ #define ir2_foreach_live_reg(it, ctx) \
for (struct ir2_reg **__ptr = (ctx)->live_regs, *it; ({ \ for (struct ir2_reg **__ptr = (ctx)->live_regs, *it; ({ \
while (__ptr != &(ctx)->live_regs[64] && *__ptr == NULL) __ptr++; \ while (__ptr != &(ctx)->live_regs[64] && *__ptr == NULL) \
__ptr != &(ctx)->live_regs[64] ? (it=*__ptr) : NULL; }); it++) __ptr++; \
__ptr != &(ctx)->live_regs[64] ? (it = *__ptr) : NULL; \
}); \
it++)
#define ir2_foreach_avail(it) \ #define ir2_foreach_avail(it) \
for (struct ir2_instr **__instrp = avail, *it; \ for (struct ir2_instr **__instrp = avail, *it; \
it = *__instrp, __instrp != &avail[avail_count]; __instrp++) it = *__instrp, __instrp != &avail[avail_count]; __instrp++)
#define ir2_foreach_src(it, instr) \ #define ir2_foreach_src(it, instr) \
for (struct ir2_src *it = instr->src; \ for (struct ir2_src *it = instr->src; it != &instr->src[instr->src_count]; \
it != &instr->src[instr->src_count]; it++) it++)
/* mask for register allocation /* mask for register allocation
* 64 registers with 4 components each = 256 bits * 64 registers with 4 components each = 256 bits
*/ */
/* typedef struct { /* typedef struct {
uint64_t data[4]; uint64_t data[4];
} regmask_t; */ } regmask_t; */
static inline bool mask_isset(uint32_t * mask, unsigned num) static inline bool
mask_isset(uint32_t *mask, unsigned num)
{ {
return ! !(mask[num / 32] & 1 << num % 32); return !!(mask[num / 32] & 1 << num % 32);
} }
static inline void mask_set(uint32_t * mask, unsigned num) static inline void
mask_set(uint32_t *mask, unsigned num)
{ {
mask[num / 32] |= 1 << num % 32; mask[num / 32] |= 1 << num % 32;
} }
static inline void mask_unset(uint32_t * mask, unsigned num) static inline void
mask_unset(uint32_t *mask, unsigned num)
{ {
mask[num / 32] &= ~(1 << num % 32); mask[num / 32] &= ~(1 << num % 32);
} }
static inline unsigned mask_reg(uint32_t * mask, unsigned num) static inline unsigned
mask_reg(uint32_t *mask, unsigned num)
{ {
return mask[num / 8] >> num % 8 * 4 & 0xf; return mask[num / 8] >> num % 8 * 4 & 0xf;
} }
static inline bool is_export(struct ir2_instr *instr) static inline bool
is_export(struct ir2_instr *instr)
{ {
return instr->type == IR2_ALU && instr->alu.export >= 0; return instr->type == IR2_ALU && instr->alu.export >= 0;
} }
static inline instr_alloc_type_t export_buf(unsigned num) static inline instr_alloc_type_t
export_buf(unsigned num)
{ {
return num < 32 ? SQ_PARAMETER_PIXEL : return num < 32 ? SQ_PARAMETER_PIXEL : num >= 62 ? SQ_POSITION : SQ_MEMORY;
num >= 62 ? SQ_POSITION : SQ_MEMORY;
} }
/* component c for channel i */ /* component c for channel i */
static inline unsigned swiz_set(unsigned c, unsigned i) static inline unsigned
swiz_set(unsigned c, unsigned i)
{ {
return ((c - i) & 3) << i * 2; return ((c - i) & 3) << i * 2;
} }
/* get swizzle in channel i */ /* get swizzle in channel i */
static inline unsigned swiz_get(unsigned swiz, unsigned i) static inline unsigned
swiz_get(unsigned swiz, unsigned i)
{ {
return ((swiz >> i * 2) + i) & 3; return ((swiz >> i * 2) + i) & 3;
} }
static inline unsigned swiz_merge(unsigned swiz0, unsigned swiz1) static inline unsigned
swiz_merge(unsigned swiz0, unsigned swiz1)
{ {
unsigned swiz = 0; unsigned swiz = 0;
for (int i = 0; i < 4; i++) for (int i = 0; i < 4; i++)
swiz |= swiz_set(swiz_get(swiz0, swiz_get(swiz1, i)), i); swiz |= swiz_set(swiz_get(swiz0, swiz_get(swiz1, i)), i);
return swiz; return swiz;
} }
static inline void swiz_merge_p(uint8_t *swiz0, unsigned swiz1) static inline void
swiz_merge_p(uint8_t *swiz0, unsigned swiz1)
{ {
unsigned swiz = 0; unsigned swiz = 0;
for (int i = 0; i < 4; i++) for (int i = 0; i < 4; i++)
swiz |= swiz_set(swiz_get(*swiz0, swiz_get(swiz1, i)), i); swiz |= swiz_set(swiz_get(*swiz0, swiz_get(swiz1, i)), i);
*swiz0 = swiz; *swiz0 = swiz;
} }
static inline struct ir2_reg * get_reg(struct ir2_instr *instr) static inline struct ir2_reg *
get_reg(struct ir2_instr *instr)
{ {
return instr->is_ssa ? &instr->ssa : instr->reg; return instr->is_ssa ? &instr->ssa : instr->reg;
} }
static inline struct ir2_reg * static inline struct ir2_reg *
get_reg_src(struct ir2_context *ctx, struct ir2_src *src) get_reg_src(struct ir2_context *ctx, struct ir2_src *src)
{ {
switch (src->type) { switch (src->type) {
case IR2_SRC_INPUT: case IR2_SRC_INPUT:
return &ctx->input[src->num]; return &ctx->input[src->num];
case IR2_SRC_SSA: case IR2_SRC_SSA:
return &ctx->instr[src->num].ssa; return &ctx->instr[src->num].ssa;
case IR2_SRC_REG: case IR2_SRC_REG:
return &ctx->reg[src->num]; return &ctx->reg[src->num];
default: default:
return NULL; return NULL;
} }
} }
/* gets a ncomp value for the dst */ /* gets a ncomp value for the dst */
static inline unsigned dst_ncomp(struct ir2_instr *instr) static inline unsigned
dst_ncomp(struct ir2_instr *instr)
{ {
if (instr->is_ssa) if (instr->is_ssa)
return instr->ssa.ncomp; return instr->ssa.ncomp;
if (instr->type == IR2_FETCH) if (instr->type == IR2_FETCH)
return instr->reg->ncomp; return instr->reg->ncomp;
assert(instr->type == IR2_ALU); assert(instr->type == IR2_ALU);
unsigned ncomp = 0; unsigned ncomp = 0;
for (int i = 0; i < instr->reg->ncomp; i++) for (int i = 0; i < instr->reg->ncomp; i++)
ncomp += !!(instr->alu.write_mask & 1 << i); ncomp += !!(instr->alu.write_mask & 1 << i);
return ncomp; return ncomp;
} }
/* gets a ncomp value for the src registers */ /* gets a ncomp value for the src registers */
static inline unsigned src_ncomp(struct ir2_instr *instr) static inline unsigned
src_ncomp(struct ir2_instr *instr)
{ {
if (instr->type == IR2_FETCH) { if (instr->type == IR2_FETCH) {
switch (instr->fetch.opc) { switch (instr->fetch.opc) {
case VTX_FETCH: case VTX_FETCH:
return 1; return 1;
case TEX_FETCH: case TEX_FETCH:
return instr->fetch.tex.is_cube ? 3 : 2; return instr->fetch.tex.is_cube ? 3 : 2;
case TEX_SET_TEX_LOD: case TEX_SET_TEX_LOD:
return 1; return 1;
default: default:
assert(0); assert(0);
} }
} }
switch (instr->alu.scalar_opc) { switch (instr->alu.scalar_opc) {
case PRED_SETEs ... KILLONEs: case PRED_SETEs ... KILLONEs:
return 1; return 1;
default: default:
break; break;
} }
switch (instr->alu.vector_opc) { switch (instr->alu.vector_opc) {
case DOT2ADDv: case DOT2ADDv:
return 2; return 2;
case DOT3v: case DOT3v:
return 3; return 3;
case DOT4v: case DOT4v:
case CUBEv: case CUBEv:
case PRED_SETE_PUSHv: case PRED_SETE_PUSHv:
return 4; return 4;
default: default:
return dst_ncomp(instr); return dst_ncomp(instr);
} }
} }

View file

@ -27,201 +27,217 @@
#include "ir2_private.h" #include "ir2_private.h"
/* if an instruction has side effects, we should never kill it */ /* if an instruction has side effects, we should never kill it */
static bool has_side_effects(struct ir2_instr *instr) static bool
has_side_effects(struct ir2_instr *instr)
{ {
if (instr->type == IR2_CF) if (instr->type == IR2_CF)
return true; return true;
else if (instr->type == IR2_FETCH) else if (instr->type == IR2_FETCH)
return false; return false;
switch (instr->alu.scalar_opc) { switch (instr->alu.scalar_opc) {
case PRED_SETEs ... KILLONEs: case PRED_SETEs ... KILLONEs:
return true; return true;
default: default:
break; break;
} }
switch (instr->alu.vector_opc) { switch (instr->alu.vector_opc) {
case PRED_SETE_PUSHv ... KILLNEv: case PRED_SETE_PUSHv ... KILLNEv:
return true; return true;
default: default:
break; break;
} }
return instr->alu.export >= 0; return instr->alu.export >= 0;
} }
/* mark an instruction as required, and all its sources recursively */ /* mark an instruction as required, and all its sources recursively */
static void set_need_emit(struct ir2_context *ctx, struct ir2_instr *instr) static void
set_need_emit(struct ir2_context *ctx, struct ir2_instr *instr)
{ {
struct ir2_reg *reg; struct ir2_reg *reg;
/* don't repeat work already done */ /* don't repeat work already done */
if (instr->need_emit) if (instr->need_emit)
return; return;
instr->need_emit = true; instr->need_emit = true;
ir2_foreach_src(src, instr) { ir2_foreach_src(src, instr)
switch (src->type) { {
case IR2_SRC_SSA: switch (src->type) {
set_need_emit(ctx, &ctx->instr[src->num]); case IR2_SRC_SSA:
break; set_need_emit(ctx, &ctx->instr[src->num]);
case IR2_SRC_REG: break;
/* slow .. */ case IR2_SRC_REG:
reg = get_reg_src(ctx, src); /* slow .. */
ir2_foreach_instr(instr, ctx) { reg = get_reg_src(ctx, src);
if (!instr->is_ssa && instr->reg == reg) ir2_foreach_instr(instr, ctx)
set_need_emit(ctx, instr); {
} if (!instr->is_ssa && instr->reg == reg)
break; set_need_emit(ctx, instr);
default: }
break; break;
} default:
} break;
}
}
} }
/* get current bit mask of allocated components for a register */ /* get current bit mask of allocated components for a register */
static unsigned reg_mask(struct ir2_context *ctx, unsigned idx) static unsigned
reg_mask(struct ir2_context *ctx, unsigned idx)
{ {
return ctx->reg_state[idx/8] >> idx%8*4 & 0xf; return ctx->reg_state[idx / 8] >> idx % 8 * 4 & 0xf;
} }
static void reg_setmask(struct ir2_context *ctx, unsigned idx, unsigned c) static void
reg_setmask(struct ir2_context *ctx, unsigned idx, unsigned c)
{ {
idx = idx * 4 + c; idx = idx * 4 + c;
ctx->reg_state[idx/32] |= 1 << idx%32; ctx->reg_state[idx / 32] |= 1 << idx % 32;
} }
static void reg_freemask(struct ir2_context *ctx, unsigned idx, unsigned c) static void
reg_freemask(struct ir2_context *ctx, unsigned idx, unsigned c)
{ {
idx = idx * 4 + c; idx = idx * 4 + c;
ctx->reg_state[idx/32] &= ~(1 << idx%32); ctx->reg_state[idx / 32] &= ~(1 << idx % 32);
} }
void ra_count_refs(struct ir2_context *ctx) void
ra_count_refs(struct ir2_context *ctx)
{ {
struct ir2_reg *reg; struct ir2_reg *reg;
/* mark instructions as needed /* mark instructions as needed
* need to do this because "substitutions" pass makes many movs not needed * need to do this because "substitutions" pass makes many movs not needed
*/ */
ir2_foreach_instr(instr, ctx) { ir2_foreach_instr(instr, ctx)
if (has_side_effects(instr)) {
set_need_emit(ctx, instr); if (has_side_effects(instr))
} set_need_emit(ctx, instr);
}
/* compute ref_counts */ /* compute ref_counts */
ir2_foreach_instr(instr, ctx) { ir2_foreach_instr(instr, ctx)
/* kill non-needed so they can be skipped */ {
if (!instr->need_emit) { /* kill non-needed so they can be skipped */
instr->type = IR2_NONE; if (!instr->need_emit) {
continue; instr->type = IR2_NONE;
} continue;
}
ir2_foreach_src(src, instr) { ir2_foreach_src(src, instr)
if (src->type == IR2_SRC_CONST) {
continue; if (src->type == IR2_SRC_CONST)
continue;
reg = get_reg_src(ctx, src); reg = get_reg_src(ctx, src);
for (int i = 0; i < src_ncomp(instr); i++) for (int i = 0; i < src_ncomp(instr); i++)
reg->comp[swiz_get(src->swizzle, i)].ref_count++; reg->comp[swiz_get(src->swizzle, i)].ref_count++;
} }
} }
} }
void ra_reg(struct ir2_context *ctx, struct ir2_reg *reg, int force_idx, void
bool export, uint8_t export_writemask) ra_reg(struct ir2_context *ctx, struct ir2_reg *reg, int force_idx, bool export,
uint8_t export_writemask)
{ {
/* for export, don't allocate anything but set component layout */ /* for export, don't allocate anything but set component layout */
if (export) { if (export) {
for (int i = 0; i < 4; i++) for (int i = 0; i < 4; i++)
reg->comp[i].c = i; reg->comp[i].c = i;
return; return;
} }
unsigned idx = force_idx; unsigned idx = force_idx;
/* TODO: allocate into the same register if theres room /* TODO: allocate into the same register if theres room
* note: the blob doesn't do it, so verify that it is indeed better * note: the blob doesn't do it, so verify that it is indeed better
* also, doing it would conflict with scalar mov insertion * also, doing it would conflict with scalar mov insertion
*/ */
/* check if already allocated */ /* check if already allocated */
for (int i = 0; i < reg->ncomp; i++) { for (int i = 0; i < reg->ncomp; i++) {
if (reg->comp[i].alloc) if (reg->comp[i].alloc)
return; return;
} }
if (force_idx < 0) { if (force_idx < 0) {
for (idx = 0; idx < 64; idx++) { for (idx = 0; idx < 64; idx++) {
if (reg_mask(ctx, idx) == 0) if (reg_mask(ctx, idx) == 0)
break; break;
} }
} }
assert(idx != 64); /* TODO ran out of register space.. */ assert(idx != 64); /* TODO ran out of register space.. */
/* update max_reg value */ /* update max_reg value */
ctx->info->max_reg = MAX2(ctx->info->max_reg, (int) idx); ctx->info->max_reg = MAX2(ctx->info->max_reg, (int)idx);
unsigned mask = reg_mask(ctx, idx); unsigned mask = reg_mask(ctx, idx);
for (int i = 0; i < reg->ncomp; i++) { for (int i = 0; i < reg->ncomp; i++) {
/* don't allocate never used values */ /* don't allocate never used values */
if (reg->comp[i].ref_count == 0) { if (reg->comp[i].ref_count == 0) {
reg->comp[i].c = 7; reg->comp[i].c = 7;
continue; continue;
} }
/* TODO */ /* TODO */
unsigned c = 1 ? i : (ffs(~mask) - 1); unsigned c = 1 ? i : (ffs(~mask) - 1);
mask |= 1 << c; mask |= 1 << c;
reg->comp[i].c = c; reg->comp[i].c = c;
reg_setmask(ctx, idx, c); reg_setmask(ctx, idx, c);
reg->comp[i].alloc = true; reg->comp[i].alloc = true;
} }
reg->idx = idx; reg->idx = idx;
ctx->live_regs[reg->idx] = reg; ctx->live_regs[reg->idx] = reg;
} }
/* reduce srcs ref_count and free if needed */ /* reduce srcs ref_count and free if needed */
void ra_src_free(struct ir2_context *ctx, struct ir2_instr *instr) void
ra_src_free(struct ir2_context *ctx, struct ir2_instr *instr)
{ {
struct ir2_reg *reg; struct ir2_reg *reg;
struct ir2_reg_component *comp; struct ir2_reg_component *comp;
ir2_foreach_src(src, instr) { ir2_foreach_src(src, instr)
if (src->type == IR2_SRC_CONST) {
continue; if (src->type == IR2_SRC_CONST)
continue;
reg = get_reg_src(ctx, src); reg = get_reg_src(ctx, src);
/* XXX use before write case */ /* XXX use before write case */
for (int i = 0; i < src_ncomp(instr); i++) { for (int i = 0; i < src_ncomp(instr); i++) {
comp = &reg->comp[swiz_get(src->swizzle, i)]; comp = &reg->comp[swiz_get(src->swizzle, i)];
if (!--comp->ref_count && reg->block_idx_free < 0) { if (!--comp->ref_count && reg->block_idx_free < 0) {
reg_freemask(ctx, reg->idx, comp->c); reg_freemask(ctx, reg->idx, comp->c);
comp->alloc = false; comp->alloc = false;
} }
} }
} }
} }
/* free any regs left for a block */ /* free any regs left for a block */
void ra_block_free(struct ir2_context *ctx, unsigned block) void
ra_block_free(struct ir2_context *ctx, unsigned block)
{ {
ir2_foreach_live_reg(reg, ctx) { ir2_foreach_live_reg(reg, ctx)
if (reg->block_idx_free != block) {
continue; if (reg->block_idx_free != block)
continue;
for (int i = 0; i < reg->ncomp; i++) { for (int i = 0; i < reg->ncomp; i++) {
if (!reg->comp[i].alloc) /* XXX should never be true? */ if (!reg->comp[i].alloc) /* XXX should never be true? */
continue; continue;
reg_freemask(ctx, reg->idx, reg->comp[i].c); reg_freemask(ctx, reg->idx, reg->comp[i].c);
reg->comp[i].alloc = false; reg->comp[i].alloc = false;
} }
ctx->live_regs[reg->idx] = NULL; ctx->live_regs[reg->idx] = NULL;
} }
} }

View file

@ -27,88 +27,92 @@
#include "pipe/p_state.h" #include "pipe/p_state.h"
#include "util/u_blend.h" #include "util/u_blend.h"
#include "util/u_dual_blend.h" #include "util/u_dual_blend.h"
#include "util/u_string.h"
#include "util/u_memory.h" #include "util/u_memory.h"
#include "util/u_string.h"
#include "fd3_blend.h" #include "fd3_blend.h"
#include "fd3_context.h" #include "fd3_context.h"
#include "fd3_format.h" #include "fd3_format.h"
static enum a3xx_rb_blend_opcode static enum a3xx_rb_blend_opcode
blend_func(unsigned func) blend_func(unsigned func)
{ {
switch (func) { switch (func) {
case PIPE_BLEND_ADD: case PIPE_BLEND_ADD:
return BLEND_DST_PLUS_SRC; return BLEND_DST_PLUS_SRC;
case PIPE_BLEND_MIN: case PIPE_BLEND_MIN:
return BLEND_MIN_DST_SRC; return BLEND_MIN_DST_SRC;
case PIPE_BLEND_MAX: case PIPE_BLEND_MAX:
return BLEND_MAX_DST_SRC; return BLEND_MAX_DST_SRC;
case PIPE_BLEND_SUBTRACT: case PIPE_BLEND_SUBTRACT:
return BLEND_SRC_MINUS_DST; return BLEND_SRC_MINUS_DST;
case PIPE_BLEND_REVERSE_SUBTRACT: case PIPE_BLEND_REVERSE_SUBTRACT:
return BLEND_DST_MINUS_SRC; return BLEND_DST_MINUS_SRC;
default: default:
DBG("invalid blend func: %x", func); DBG("invalid blend func: %x", func);
return 0; return 0;
} }
} }
void * void *
fd3_blend_state_create(struct pipe_context *pctx, fd3_blend_state_create(struct pipe_context *pctx,
const struct pipe_blend_state *cso) const struct pipe_blend_state *cso)
{ {
struct fd3_blend_stateobj *so; struct fd3_blend_stateobj *so;
enum a3xx_rop_code rop = ROP_COPY; enum a3xx_rop_code rop = ROP_COPY;
bool reads_dest = false; bool reads_dest = false;
int i; int i;
if (cso->logicop_enable) { if (cso->logicop_enable) {
rop = cso->logicop_func; /* maps 1:1 */ rop = cso->logicop_func; /* maps 1:1 */
reads_dest = util_logicop_reads_dest(cso->logicop_func); reads_dest = util_logicop_reads_dest(cso->logicop_func);
} }
so = CALLOC_STRUCT(fd3_blend_stateobj); so = CALLOC_STRUCT(fd3_blend_stateobj);
if (!so) if (!so)
return NULL; return NULL;
so->base = *cso; so->base = *cso;
for (i = 0; i < ARRAY_SIZE(so->rb_mrt); i++) { for (i = 0; i < ARRAY_SIZE(so->rb_mrt); i++) {
const struct pipe_rt_blend_state *rt; const struct pipe_rt_blend_state *rt;
if (cso->independent_blend_enable) if (cso->independent_blend_enable)
rt = &cso->rt[i]; rt = &cso->rt[i];
else else
rt = &cso->rt[0]; rt = &cso->rt[0];
so->rb_mrt[i].blend_control = so->rb_mrt[i].blend_control =
A3XX_RB_MRT_BLEND_CONTROL_RGB_SRC_FACTOR(fd_blend_factor(rt->rgb_src_factor)) | A3XX_RB_MRT_BLEND_CONTROL_RGB_SRC_FACTOR(
A3XX_RB_MRT_BLEND_CONTROL_RGB_BLEND_OPCODE(blend_func(rt->rgb_func)) | fd_blend_factor(rt->rgb_src_factor)) |
A3XX_RB_MRT_BLEND_CONTROL_RGB_DEST_FACTOR(fd_blend_factor(rt->rgb_dst_factor)) | A3XX_RB_MRT_BLEND_CONTROL_RGB_BLEND_OPCODE(blend_func(rt->rgb_func)) |
A3XX_RB_MRT_BLEND_CONTROL_ALPHA_SRC_FACTOR(fd_blend_factor(rt->alpha_src_factor)) | A3XX_RB_MRT_BLEND_CONTROL_RGB_DEST_FACTOR(
A3XX_RB_MRT_BLEND_CONTROL_ALPHA_BLEND_OPCODE(blend_func(rt->alpha_func)) | fd_blend_factor(rt->rgb_dst_factor)) |
A3XX_RB_MRT_BLEND_CONTROL_ALPHA_DEST_FACTOR(fd_blend_factor(rt->alpha_dst_factor)); A3XX_RB_MRT_BLEND_CONTROL_ALPHA_SRC_FACTOR(
fd_blend_factor(rt->alpha_src_factor)) |
A3XX_RB_MRT_BLEND_CONTROL_ALPHA_BLEND_OPCODE(
blend_func(rt->alpha_func)) |
A3XX_RB_MRT_BLEND_CONTROL_ALPHA_DEST_FACTOR(
fd_blend_factor(rt->alpha_dst_factor));
so->rb_mrt[i].control = so->rb_mrt[i].control =
A3XX_RB_MRT_CONTROL_ROP_CODE(rop) | A3XX_RB_MRT_CONTROL_ROP_CODE(rop) |
A3XX_RB_MRT_CONTROL_COMPONENT_ENABLE(rt->colormask); A3XX_RB_MRT_CONTROL_COMPONENT_ENABLE(rt->colormask);
if (rt->blend_enable) if (rt->blend_enable)
so->rb_mrt[i].control |= so->rb_mrt[i].control |= A3XX_RB_MRT_CONTROL_READ_DEST_ENABLE |
A3XX_RB_MRT_CONTROL_READ_DEST_ENABLE | A3XX_RB_MRT_CONTROL_BLEND |
A3XX_RB_MRT_CONTROL_BLEND | A3XX_RB_MRT_CONTROL_BLEND2;
A3XX_RB_MRT_CONTROL_BLEND2;
if (reads_dest) if (reads_dest)
so->rb_mrt[i].control |= A3XX_RB_MRT_CONTROL_READ_DEST_ENABLE; so->rb_mrt[i].control |= A3XX_RB_MRT_CONTROL_READ_DEST_ENABLE;
if (cso->dither) if (cso->dither)
so->rb_mrt[i].control |= A3XX_RB_MRT_CONTROL_DITHER_MODE(DITHER_ALWAYS); so->rb_mrt[i].control |=
} A3XX_RB_MRT_CONTROL_DITHER_MODE(DITHER_ALWAYS);
}
if (cso->rt[0].blend_enable && util_blend_state_is_dual(cso, 0)) if (cso->rt[0].blend_enable && util_blend_state_is_dual(cso, 0))
so->rb_render_control = A3XX_RB_RENDER_CONTROL_DUAL_COLOR_IN_ENABLE; so->rb_render_control = A3XX_RB_RENDER_CONTROL_DUAL_COLOR_IN_ENABLE;
return so; return so;
} }

View file

@ -27,27 +27,27 @@
#ifndef FD3_BLEND_H_ #ifndef FD3_BLEND_H_
#define FD3_BLEND_H_ #define FD3_BLEND_H_
#include "pipe/p_state.h"
#include "pipe/p_context.h" #include "pipe/p_context.h"
#include "pipe/p_state.h"
#include "freedreno_util.h" #include "freedreno_util.h"
struct fd3_blend_stateobj { struct fd3_blend_stateobj {
struct pipe_blend_state base; struct pipe_blend_state base;
uint32_t rb_render_control; uint32_t rb_render_control;
struct { struct {
uint32_t blend_control; uint32_t blend_control;
uint32_t control; uint32_t control;
} rb_mrt[A3XX_MAX_RENDER_TARGETS]; } rb_mrt[A3XX_MAX_RENDER_TARGETS];
}; };
static inline struct fd3_blend_stateobj * static inline struct fd3_blend_stateobj *
fd3_blend_stateobj(struct pipe_blend_state *blend) fd3_blend_stateobj(struct pipe_blend_state *blend)
{ {
return (struct fd3_blend_stateobj *)blend; return (struct fd3_blend_stateobj *)blend;
} }
void * fd3_blend_state_create(struct pipe_context *pctx, void *fd3_blend_state_create(struct pipe_context *pctx,
const struct pipe_blend_state *cso); const struct pipe_blend_state *cso);
#endif /* FD3_BLEND_H_ */ #endif /* FD3_BLEND_H_ */

View file

@ -26,8 +26,8 @@
#include "freedreno_query_hw.h" #include "freedreno_query_hw.h"
#include "fd3_context.h"
#include "fd3_blend.h" #include "fd3_blend.h"
#include "fd3_context.h"
#include "fd3_draw.h" #include "fd3_draw.h"
#include "fd3_emit.h" #include "fd3_emit.h"
#include "fd3_gmem.h" #include "fd3_gmem.h"
@ -38,25 +38,24 @@
#include "fd3_zsa.h" #include "fd3_zsa.h"
static void static void
fd3_context_destroy(struct pipe_context *pctx) fd3_context_destroy(struct pipe_context *pctx) in_dt
in_dt
{ {
struct fd3_context *fd3_ctx = fd3_context(fd_context(pctx)); struct fd3_context *fd3_ctx = fd3_context(fd_context(pctx));
u_upload_destroy(fd3_ctx->border_color_uploader); u_upload_destroy(fd3_ctx->border_color_uploader);
pipe_resource_reference(&fd3_ctx->border_color_buf, NULL); pipe_resource_reference(&fd3_ctx->border_color_buf, NULL);
fd_context_destroy(pctx); fd_context_destroy(pctx);
fd_bo_del(fd3_ctx->vs_pvt_mem); fd_bo_del(fd3_ctx->vs_pvt_mem);
fd_bo_del(fd3_ctx->fs_pvt_mem); fd_bo_del(fd3_ctx->fs_pvt_mem);
fd_bo_del(fd3_ctx->vsc_size_mem); fd_bo_del(fd3_ctx->vsc_size_mem);
fd_context_cleanup_common_vbos(&fd3_ctx->base); fd_context_cleanup_common_vbos(&fd3_ctx->base);
fd_hw_query_fini(pctx); fd_hw_query_fini(pctx);
free(fd3_ctx); free(fd3_ctx);
} }
/* clang-format off */ /* clang-format off */
@ -73,55 +72,55 @@ static const uint8_t primtypes[] = {
/* clang-format on */ /* clang-format on */
struct pipe_context * struct pipe_context *
fd3_context_create(struct pipe_screen *pscreen, void *priv, unsigned flags) fd3_context_create(struct pipe_screen *pscreen, void *priv,
in_dt unsigned flags) in_dt
{ {
struct fd_screen *screen = fd_screen(pscreen); struct fd_screen *screen = fd_screen(pscreen);
struct fd3_context *fd3_ctx = CALLOC_STRUCT(fd3_context); struct fd3_context *fd3_ctx = CALLOC_STRUCT(fd3_context);
struct pipe_context *pctx; struct pipe_context *pctx;
if (!fd3_ctx) if (!fd3_ctx)
return NULL; return NULL;
pctx = &fd3_ctx->base.base; pctx = &fd3_ctx->base.base;
pctx->screen = pscreen; pctx->screen = pscreen;
fd3_ctx->base.dev = fd_device_ref(screen->dev); fd3_ctx->base.dev = fd_device_ref(screen->dev);
fd3_ctx->base.screen = fd_screen(pscreen); fd3_ctx->base.screen = fd_screen(pscreen);
fd3_ctx->base.last.key = &fd3_ctx->last_key; fd3_ctx->base.last.key = &fd3_ctx->last_key;
pctx->destroy = fd3_context_destroy; pctx->destroy = fd3_context_destroy;
pctx->create_blend_state = fd3_blend_state_create; pctx->create_blend_state = fd3_blend_state_create;
pctx->create_rasterizer_state = fd3_rasterizer_state_create; pctx->create_rasterizer_state = fd3_rasterizer_state_create;
pctx->create_depth_stencil_alpha_state = fd3_zsa_state_create; pctx->create_depth_stencil_alpha_state = fd3_zsa_state_create;
fd3_draw_init(pctx); fd3_draw_init(pctx);
fd3_gmem_init(pctx); fd3_gmem_init(pctx);
fd3_texture_init(pctx); fd3_texture_init(pctx);
fd3_prog_init(pctx); fd3_prog_init(pctx);
fd3_emit_init(pctx); fd3_emit_init(pctx);
pctx = fd_context_init(&fd3_ctx->base, pscreen, primtypes, priv, flags); pctx = fd_context_init(&fd3_ctx->base, pscreen, primtypes, priv, flags);
if (!pctx) if (!pctx)
return NULL; return NULL;
fd_hw_query_init(pctx); fd_hw_query_init(pctx);
fd3_ctx->vs_pvt_mem = fd_bo_new(screen->dev, 0x2000, fd3_ctx->vs_pvt_mem =
DRM_FREEDRENO_GEM_TYPE_KMEM, "vs_pvt"); fd_bo_new(screen->dev, 0x2000, DRM_FREEDRENO_GEM_TYPE_KMEM, "vs_pvt");
fd3_ctx->fs_pvt_mem = fd_bo_new(screen->dev, 0x2000, fd3_ctx->fs_pvt_mem =
DRM_FREEDRENO_GEM_TYPE_KMEM, "fs_pvt"); fd_bo_new(screen->dev, 0x2000, DRM_FREEDRENO_GEM_TYPE_KMEM, "fs_pvt");
fd3_ctx->vsc_size_mem = fd_bo_new(screen->dev, 0x1000, fd3_ctx->vsc_size_mem =
DRM_FREEDRENO_GEM_TYPE_KMEM, "vsc_size"); fd_bo_new(screen->dev, 0x1000, DRM_FREEDRENO_GEM_TYPE_KMEM, "vsc_size");
fd_context_setup_common_vbos(&fd3_ctx->base); fd_context_setup_common_vbos(&fd3_ctx->base);
fd3_query_context_init(pctx); fd3_query_context_init(pctx);
fd3_ctx->border_color_uploader = u_upload_create(pctx, 4096, 0, fd3_ctx->border_color_uploader =
PIPE_USAGE_STREAM, 0); u_upload_create(pctx, 4096, 0, PIPE_USAGE_STREAM, 0);
return pctx; return pctx;
} }

View file

@ -33,31 +33,30 @@
#include "ir3/ir3_shader.h" #include "ir3/ir3_shader.h"
struct fd3_context { struct fd3_context {
struct fd_context base; struct fd_context base;
struct fd_bo *vs_pvt_mem, *fs_pvt_mem; struct fd_bo *vs_pvt_mem, *fs_pvt_mem;
/* This only needs to be 4 * num_of_pipes bytes (ie. 32 bytes). We /* This only needs to be 4 * num_of_pipes bytes (ie. 32 bytes). We
* could combine it with another allocation. * could combine it with another allocation.
*/ */
struct fd_bo *vsc_size_mem; struct fd_bo *vsc_size_mem;
struct u_upload_mgr *border_color_uploader; struct u_upload_mgr *border_color_uploader;
struct pipe_resource *border_color_buf; struct pipe_resource *border_color_buf;
/* storage for ctx->last.key: */ /* storage for ctx->last.key: */
struct ir3_shader_key last_key; struct ir3_shader_key last_key;
}; };
static inline struct fd3_context * static inline struct fd3_context *
fd3_context(struct fd_context *ctx) fd3_context(struct fd_context *ctx)
{ {
return (struct fd3_context *)ctx; return (struct fd3_context *)ctx;
} }
struct pipe_context * struct pipe_context *fd3_context_create(struct pipe_screen *pscreen, void *priv,
fd3_context_create(struct pipe_screen *pscreen, void *priv, unsigned flags); unsigned flags);
#endif /* FD3_CONTEXT_H_ */ #endif /* FD3_CONTEXT_H_ */

View file

@ -25,142 +25,146 @@
*/ */
#include "pipe/p_state.h" #include "pipe/p_state.h"
#include "util/u_string.h" #include "util/format/u_format.h"
#include "util/u_memory.h" #include "util/u_memory.h"
#include "util/u_prim.h" #include "util/u_prim.h"
#include "util/format/u_format.h" #include "util/u_string.h"
#include "freedreno_state.h"
#include "freedreno_resource.h" #include "freedreno_resource.h"
#include "freedreno_state.h"
#include "fd3_draw.h"
#include "fd3_context.h" #include "fd3_context.h"
#include "fd3_draw.h"
#include "fd3_emit.h" #include "fd3_emit.h"
#include "fd3_program.h"
#include "fd3_format.h" #include "fd3_format.h"
#include "fd3_program.h"
#include "fd3_zsa.h" #include "fd3_zsa.h"
static inline uint32_t static inline uint32_t
add_sat(uint32_t a, int32_t b) add_sat(uint32_t a, int32_t b)
{ {
int64_t ret = (uint64_t)a + (int64_t)b; int64_t ret = (uint64_t)a + (int64_t)b;
if (ret > ~0U) if (ret > ~0U)
return ~0U; return ~0U;
if (ret < 0) if (ret < 0)
return 0; return 0;
return (uint32_t)ret; return (uint32_t)ret;
} }
static void static void
draw_impl(struct fd_context *ctx, struct fd_ringbuffer *ring, draw_impl(struct fd_context *ctx, struct fd_ringbuffer *ring,
struct fd3_emit *emit, unsigned index_offset) struct fd3_emit *emit, unsigned index_offset) assert_dt
assert_dt
{ {
const struct pipe_draw_info *info = emit->info; const struct pipe_draw_info *info = emit->info;
enum pc_di_primtype primtype = ctx->primtypes[info->mode]; enum pc_di_primtype primtype = ctx->primtypes[info->mode];
fd3_emit_state(ctx, ring, emit); fd3_emit_state(ctx, ring, emit);
if (emit->dirty & (FD_DIRTY_VTXBUF | FD_DIRTY_VTXSTATE)) if (emit->dirty & (FD_DIRTY_VTXBUF | FD_DIRTY_VTXSTATE))
fd3_emit_vertex_bufs(ring, emit); fd3_emit_vertex_bufs(ring, emit);
OUT_PKT0(ring, REG_A3XX_PC_VERTEX_REUSE_BLOCK_CNTL, 1); OUT_PKT0(ring, REG_A3XX_PC_VERTEX_REUSE_BLOCK_CNTL, 1);
OUT_RING(ring, 0x0000000b); /* PC_VERTEX_REUSE_BLOCK_CNTL */ OUT_RING(ring, 0x0000000b); /* PC_VERTEX_REUSE_BLOCK_CNTL */
OUT_PKT0(ring, REG_A3XX_VFD_INDEX_MIN, 4); OUT_PKT0(ring, REG_A3XX_VFD_INDEX_MIN, 4);
OUT_RING(ring, info->index_bounds_valid ? add_sat(info->min_index, info->index_size ? info->index_bias : 0) : 0); /* VFD_INDEX_MIN */ OUT_RING(ring, info->index_bounds_valid
OUT_RING(ring, info->index_bounds_valid ? add_sat(info->max_index, info->index_size ? info->index_bias : 0) : ~0); /* VFD_INDEX_MAX */ ? add_sat(info->min_index,
OUT_RING(ring, info->start_instance); /* VFD_INSTANCEID_OFFSET */ info->index_size ? info->index_bias : 0)
OUT_RING(ring, info->index_size ? info->index_bias : emit->draw->start); /* VFD_INDEX_OFFSET */ : 0); /* VFD_INDEX_MIN */
OUT_RING(ring, info->index_bounds_valid
? add_sat(info->max_index,
info->index_size ? info->index_bias : 0)
: ~0); /* VFD_INDEX_MAX */
OUT_RING(ring, info->start_instance); /* VFD_INSTANCEID_OFFSET */
OUT_RING(ring, info->index_size ? info->index_bias
: emit->draw->start); /* VFD_INDEX_OFFSET */
OUT_PKT0(ring, REG_A3XX_PC_RESTART_INDEX, 1); OUT_PKT0(ring, REG_A3XX_PC_RESTART_INDEX, 1);
OUT_RING(ring, info->primitive_restart ? /* PC_RESTART_INDEX */ OUT_RING(ring, info->primitive_restart ? /* PC_RESTART_INDEX */
info->restart_index : 0xffffffff); info->restart_index
: 0xffffffff);
/* points + psize -> spritelist: */ /* points + psize -> spritelist: */
if (ctx->rasterizer->point_size_per_vertex && if (ctx->rasterizer->point_size_per_vertex &&
fd3_emit_get_vp(emit)->writes_psize && fd3_emit_get_vp(emit)->writes_psize && (info->mode == PIPE_PRIM_POINTS))
(info->mode == PIPE_PRIM_POINTS)) primtype = DI_PT_POINTLIST_PSIZE;
primtype = DI_PT_POINTLIST_PSIZE;
fd_draw_emit(ctx->batch, ring, primtype, fd_draw_emit(ctx->batch, ring, primtype,
emit->binning_pass ? IGNORE_VISIBILITY : USE_VISIBILITY, emit->binning_pass ? IGNORE_VISIBILITY : USE_VISIBILITY, info,
info, emit->draw, index_offset); emit->draw, index_offset);
} }
static bool static bool
fd3_draw_vbo(struct fd_context *ctx, const struct pipe_draw_info *info, fd3_draw_vbo(struct fd_context *ctx, const struct pipe_draw_info *info,
const struct pipe_draw_indirect_info *indirect, const struct pipe_draw_indirect_info *indirect,
const struct pipe_draw_start_count *draw, const struct pipe_draw_start_count *draw,
unsigned index_offset) unsigned index_offset) in_dt
in_dt
{ {
struct fd3_emit emit = { struct fd3_emit emit = {
.debug = &ctx->debug, .debug = &ctx->debug,
.vtx = &ctx->vtx, .vtx = &ctx->vtx,
.info = info, .info = info,
.indirect = indirect, .indirect = indirect,
.draw = draw, .draw = draw,
.key = { .key =
.vs = ctx->prog.vs, {
.fs = ctx->prog.fs, .vs = ctx->prog.vs,
}, .fs = ctx->prog.fs,
.rasterflat = ctx->rasterizer->flatshade, },
.sprite_coord_enable = ctx->rasterizer->sprite_coord_enable, .rasterflat = ctx->rasterizer->flatshade,
.sprite_coord_mode = ctx->rasterizer->sprite_coord_mode, .sprite_coord_enable = ctx->rasterizer->sprite_coord_enable,
}; .sprite_coord_mode = ctx->rasterizer->sprite_coord_mode,
};
if (info->mode != PIPE_PRIM_MAX && if (info->mode != PIPE_PRIM_MAX && !indirect && !info->primitive_restart &&
!indirect && !u_trim_pipe_prim(info->mode, (unsigned *)&draw->count))
!info->primitive_restart && return false;
!u_trim_pipe_prim(info->mode, (unsigned*)&draw->count))
return false;
if (fd3_needs_manual_clipping(ir3_get_shader(ctx->prog.vs), ctx->rasterizer)) if (fd3_needs_manual_clipping(ir3_get_shader(ctx->prog.vs), ctx->rasterizer))
emit.key.key.ucp_enables = ctx->rasterizer->clip_plane_enable; emit.key.key.ucp_enables = ctx->rasterizer->clip_plane_enable;
ir3_fixup_shader_state(&ctx->base, &emit.key.key); ir3_fixup_shader_state(&ctx->base, &emit.key.key);
unsigned dirty = ctx->dirty; unsigned dirty = ctx->dirty;
emit.prog = fd3_program_state(ir3_cache_lookup(ctx->shader_cache, &emit.key, &ctx->debug)); emit.prog = fd3_program_state(
ir3_cache_lookup(ctx->shader_cache, &emit.key, &ctx->debug));
/* bail if compile failed: */ /* bail if compile failed: */
if (!emit.prog) if (!emit.prog)
return false; return false;
const struct ir3_shader_variant *vp = fd3_emit_get_vp(&emit); const struct ir3_shader_variant *vp = fd3_emit_get_vp(&emit);
const struct ir3_shader_variant *fp = fd3_emit_get_fp(&emit); const struct ir3_shader_variant *fp = fd3_emit_get_fp(&emit);
ir3_update_max_tf_vtx(ctx, vp); ir3_update_max_tf_vtx(ctx, vp);
/* do regular pass first: */ /* do regular pass first: */
if (unlikely(ctx->stats_users > 0)) { if (unlikely(ctx->stats_users > 0)) {
ctx->stats.vs_regs += ir3_shader_halfregs(vp); ctx->stats.vs_regs += ir3_shader_halfregs(vp);
ctx->stats.fs_regs += ir3_shader_halfregs(fp); ctx->stats.fs_regs += ir3_shader_halfregs(fp);
} }
emit.binning_pass = false; emit.binning_pass = false;
emit.dirty = dirty; emit.dirty = dirty;
draw_impl(ctx, ctx->batch->draw, &emit, index_offset); draw_impl(ctx, ctx->batch->draw, &emit, index_offset);
/* and now binning pass: */ /* and now binning pass: */
emit.binning_pass = true; emit.binning_pass = true;
emit.dirty = dirty & ~(FD_DIRTY_BLEND); emit.dirty = dirty & ~(FD_DIRTY_BLEND);
emit.vs = NULL; /* we changed key so need to refetch vs */ emit.vs = NULL; /* we changed key so need to refetch vs */
emit.fs = NULL; emit.fs = NULL;
draw_impl(ctx, ctx->batch->binning, &emit, index_offset); draw_impl(ctx, ctx->batch->binning, &emit, index_offset);
fd_context_all_clean(ctx); fd_context_all_clean(ctx);
return true; return true;
} }
void void
fd3_draw_init(struct pipe_context *pctx) fd3_draw_init(struct pipe_context *pctx) disable_thread_safety_analysis
disable_thread_safety_analysis
{ {
struct fd_context *ctx = fd_context(pctx); struct fd_context *ctx = fd_context(pctx);
ctx->draw_vbo = fd3_draw_vbo; ctx->draw_vbo = fd3_draw_vbo;
} }

File diff suppressed because it is too large Load diff

View file

@ -29,69 +29,71 @@
#include "pipe/p_context.h" #include "pipe/p_context.h"
#include "freedreno_batch.h"
#include "freedreno_context.h"
#include "fd3_format.h" #include "fd3_format.h"
#include "fd3_program.h" #include "fd3_program.h"
#include "freedreno_batch.h"
#include "freedreno_context.h"
#include "ir3_cache.h" #include "ir3_cache.h"
#include "ir3_gallium.h" #include "ir3_gallium.h"
struct fd_ringbuffer; struct fd_ringbuffer;
void fd3_emit_gmem_restore_tex(struct fd_ringbuffer *ring, void fd3_emit_gmem_restore_tex(struct fd_ringbuffer *ring,
struct pipe_surface **psurf, int bufs); struct pipe_surface **psurf, int bufs);
/* grouped together emit-state for prog/vertex/state emit: */ /* grouped together emit-state for prog/vertex/state emit: */
struct fd3_emit { struct fd3_emit {
struct pipe_debug_callback *debug; struct pipe_debug_callback *debug;
const struct fd_vertex_state *vtx; const struct fd_vertex_state *vtx;
const struct fd3_program_state *prog; const struct fd3_program_state *prog;
const struct pipe_draw_info *info; const struct pipe_draw_info *info;
const struct pipe_draw_indirect_info *indirect; const struct pipe_draw_indirect_info *indirect;
const struct pipe_draw_start_count *draw; const struct pipe_draw_start_count *draw;
bool binning_pass; bool binning_pass;
struct ir3_cache_key key; struct ir3_cache_key key;
enum fd_dirty_3d_state dirty; enum fd_dirty_3d_state dirty;
uint32_t sprite_coord_enable; uint32_t sprite_coord_enable;
bool sprite_coord_mode; bool sprite_coord_mode;
bool rasterflat; bool rasterflat;
bool skip_consts; bool skip_consts;
/* cached to avoid repeated lookups of same variants: */ /* cached to avoid repeated lookups of same variants: */
const struct ir3_shader_variant *vs, *fs; const struct ir3_shader_variant *vs, *fs;
}; };
static inline const struct ir3_shader_variant * static inline const struct ir3_shader_variant *
fd3_emit_get_vp(struct fd3_emit *emit) fd3_emit_get_vp(struct fd3_emit *emit)
{ {
if (!emit->vs) { if (!emit->vs) {
emit->vs = emit->binning_pass ? emit->prog->bs : emit->prog->vs; emit->vs = emit->binning_pass ? emit->prog->bs : emit->prog->vs;
} }
return emit->vs; return emit->vs;
} }
static inline const struct ir3_shader_variant * static inline const struct ir3_shader_variant *
fd3_emit_get_fp(struct fd3_emit *emit) fd3_emit_get_fp(struct fd3_emit *emit)
{ {
if (!emit->fs) { if (!emit->fs) {
if (emit->binning_pass) { if (emit->binning_pass) {
/* use dummy stateobj to simplify binning vs non-binning: */ /* use dummy stateobj to simplify binning vs non-binning: */
static const struct ir3_shader_variant binning_fs = {}; static const struct ir3_shader_variant binning_fs = {};
emit->fs = &binning_fs; emit->fs = &binning_fs;
} else { } else {
emit->fs = emit->prog->fs; emit->fs = emit->prog->fs;
} }
} }
return emit->fs; return emit->fs;
} }
void fd3_emit_vertex_bufs(struct fd_ringbuffer *ring, struct fd3_emit *emit) assert_dt; void fd3_emit_vertex_bufs(struct fd_ringbuffer *ring,
struct fd3_emit *emit) assert_dt;
void fd3_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring, void fd3_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring,
struct fd3_emit *emit) assert_dt; struct fd3_emit *emit) assert_dt;
void fd3_emit_restore(struct fd_batch *batch, struct fd_ringbuffer *ring) assert_dt; void fd3_emit_restore(struct fd_batch *batch,
struct fd_ringbuffer *ring) assert_dt;
void fd3_emit_init_screen(struct pipe_screen *pscreen); void fd3_emit_init_screen(struct pipe_screen *pscreen);
void fd3_emit_init(struct pipe_context *pctx); void fd3_emit_init(struct pipe_context *pctx);
@ -99,19 +101,19 @@ void fd3_emit_init(struct pipe_context *pctx);
static inline void static inline void
fd3_emit_ib(struct fd_ringbuffer *ring, struct fd_ringbuffer *target) fd3_emit_ib(struct fd_ringbuffer *ring, struct fd_ringbuffer *target)
{ {
__OUT_IB(ring, true, target); __OUT_IB(ring, true, target);
} }
static inline void static inline void
fd3_emit_cache_flush(struct fd_batch *batch, struct fd_ringbuffer *ring) fd3_emit_cache_flush(struct fd_batch *batch,
assert_dt struct fd_ringbuffer *ring) assert_dt
{ {
fd_wfi(batch, ring); fd_wfi(batch, ring);
OUT_PKT0(ring, REG_A3XX_UCHE_CACHE_INVALIDATE0_REG, 2); OUT_PKT0(ring, REG_A3XX_UCHE_CACHE_INVALIDATE0_REG, 2);
OUT_RING(ring, A3XX_UCHE_CACHE_INVALIDATE0_REG_ADDR(0)); OUT_RING(ring, A3XX_UCHE_CACHE_INVALIDATE0_REG_ADDR(0));
OUT_RING(ring, A3XX_UCHE_CACHE_INVALIDATE1_REG_ADDR(0) | OUT_RING(ring, A3XX_UCHE_CACHE_INVALIDATE1_REG_ADDR(0) |
A3XX_UCHE_CACHE_INVALIDATE1_REG_OPCODE(INVALIDATE) | A3XX_UCHE_CACHE_INVALIDATE1_REG_OPCODE(INVALIDATE) |
A3XX_UCHE_CACHE_INVALIDATE1_REG_ENTIRE_CACHE); A3XX_UCHE_CACHE_INVALIDATE1_REG_ENTIRE_CACHE);
} }
#endif /* FD3_EMIT_H */ #endif /* FD3_EMIT_H */

View file

@ -32,42 +32,36 @@
*/ */
struct fd3_format { struct fd3_format {
enum a3xx_vtx_fmt vtx; enum a3xx_vtx_fmt vtx;
enum a3xx_tex_fmt tex; enum a3xx_tex_fmt tex;
enum a3xx_color_fmt rb; enum a3xx_color_fmt rb;
enum a3xx_color_swap swap; enum a3xx_color_swap swap;
boolean present; boolean present;
}; };
/* vertex + texture */ /* vertex + texture */
#define VT(pipe, fmt, rbfmt, swapfmt) \ #define VT(pipe, fmt, rbfmt, swapfmt) \
[PIPE_FORMAT_ ## pipe] = { \ [PIPE_FORMAT_##pipe] = {.present = 1, \
.present = 1, \ .vtx = VFMT_##fmt, \
.vtx = VFMT_ ## fmt, \ .tex = TFMT_##fmt, \
.tex = TFMT_ ## fmt, \ .rb = RB_##rbfmt, \
.rb = RB_ ## rbfmt, \ .swap = swapfmt}
.swap = swapfmt \
}
/* texture-only */ /* texture-only */
#define _T(pipe, fmt, rbfmt, swapfmt) \ #define _T(pipe, fmt, rbfmt, swapfmt) \
[PIPE_FORMAT_ ## pipe] = { \ [PIPE_FORMAT_##pipe] = {.present = 1, \
.present = 1, \ .vtx = VFMT_NONE, \
.vtx = VFMT_NONE, \ .tex = TFMT_##fmt, \
.tex = TFMT_ ## fmt, \ .rb = RB_##rbfmt, \
.rb = RB_ ## rbfmt, \ .swap = swapfmt}
.swap = swapfmt \
}
/* vertex-only */ /* vertex-only */
#define V_(pipe, fmt, rbfmt, swapfmt) \ #define V_(pipe, fmt, rbfmt, swapfmt) \
[PIPE_FORMAT_ ## pipe] = { \ [PIPE_FORMAT_##pipe] = {.present = 1, \
.present = 1, \ .vtx = VFMT_##fmt, \
.vtx = VFMT_ ## fmt, \ .tex = TFMT_NONE, \
.tex = TFMT_NONE, \ .rb = RB_##rbfmt, \
.rb = RB_ ## rbfmt, \ .swap = swapfmt}
.swap = swapfmt \
}
/* clang-format off */ /* clang-format off */
static struct fd3_format formats[PIPE_FORMAT_COUNT] = { static struct fd3_format formats[PIPE_FORMAT_COUNT] = {
@ -294,80 +288,90 @@ static struct fd3_format formats[PIPE_FORMAT_COUNT] = {
enum a3xx_vtx_fmt enum a3xx_vtx_fmt
fd3_pipe2vtx(enum pipe_format format) fd3_pipe2vtx(enum pipe_format format)
{ {
if (!formats[format].present) if (!formats[format].present)
return VFMT_NONE; return VFMT_NONE;
return formats[format].vtx; return formats[format].vtx;
} }
enum a3xx_tex_fmt enum a3xx_tex_fmt
fd3_pipe2tex(enum pipe_format format) fd3_pipe2tex(enum pipe_format format)
{ {
if (!formats[format].present) if (!formats[format].present)
return TFMT_NONE; return TFMT_NONE;
return formats[format].tex; return formats[format].tex;
} }
enum a3xx_color_fmt enum a3xx_color_fmt
fd3_pipe2color(enum pipe_format format) fd3_pipe2color(enum pipe_format format)
{ {
if (!formats[format].present) if (!formats[format].present)
return RB_NONE; return RB_NONE;
return formats[format].rb; return formats[format].rb;
} }
enum a3xx_color_swap enum a3xx_color_swap
fd3_pipe2swap(enum pipe_format format) fd3_pipe2swap(enum pipe_format format)
{ {
if (!formats[format].present) if (!formats[format].present)
return WZYX; return WZYX;
return formats[format].swap; return formats[format].swap;
} }
enum a3xx_color_fmt enum a3xx_color_fmt
fd3_fs_output_format(enum pipe_format format) fd3_fs_output_format(enum pipe_format format)
{ {
if (util_format_is_srgb(format)) if (util_format_is_srgb(format))
return RB_R16G16B16A16_FLOAT; return RB_R16G16B16A16_FLOAT;
switch (format) { switch (format) {
case PIPE_FORMAT_R16_FLOAT: case PIPE_FORMAT_R16_FLOAT:
case PIPE_FORMAT_R16G16_FLOAT: case PIPE_FORMAT_R16G16_FLOAT:
case PIPE_FORMAT_R11G11B10_FLOAT: case PIPE_FORMAT_R11G11B10_FLOAT:
return RB_R16G16B16A16_FLOAT; return RB_R16G16B16A16_FLOAT;
case PIPE_FORMAT_L8_UNORM: case PIPE_FORMAT_L8_UNORM:
return RB_R8G8B8A8_UNORM; return RB_R8G8B8A8_UNORM;
default: default:
return fd3_pipe2color(format); return fd3_pipe2color(format);
} }
} }
static inline enum a3xx_tex_swiz static inline enum a3xx_tex_swiz
tex_swiz(unsigned swiz) tex_swiz(unsigned swiz)
{ {
switch (swiz) { switch (swiz) {
default: default:
case PIPE_SWIZZLE_X: return A3XX_TEX_X; case PIPE_SWIZZLE_X:
case PIPE_SWIZZLE_Y: return A3XX_TEX_Y; return A3XX_TEX_X;
case PIPE_SWIZZLE_Z: return A3XX_TEX_Z; case PIPE_SWIZZLE_Y:
case PIPE_SWIZZLE_W: return A3XX_TEX_W; return A3XX_TEX_Y;
case PIPE_SWIZZLE_0: return A3XX_TEX_ZERO; case PIPE_SWIZZLE_Z:
case PIPE_SWIZZLE_1: return A3XX_TEX_ONE; return A3XX_TEX_Z;
} case PIPE_SWIZZLE_W:
return A3XX_TEX_W;
case PIPE_SWIZZLE_0:
return A3XX_TEX_ZERO;
case PIPE_SWIZZLE_1:
return A3XX_TEX_ONE;
}
} }
uint32_t uint32_t
fd3_tex_swiz(enum pipe_format format, unsigned swizzle_r, unsigned swizzle_g, fd3_tex_swiz(enum pipe_format format, unsigned swizzle_r, unsigned swizzle_g,
unsigned swizzle_b, unsigned swizzle_a) unsigned swizzle_b, unsigned swizzle_a)
{ {
const struct util_format_description *desc = const struct util_format_description *desc = util_format_description(format);
util_format_description(format); unsigned char swiz[4] =
unsigned char swiz[4] = { {
swizzle_r, swizzle_g, swizzle_b, swizzle_a, swizzle_r,
}, rswiz[4]; swizzle_g,
swizzle_b,
swizzle_a,
},
rswiz[4];
util_format_compose_swizzles(desc->swizzle, swiz, rswiz); util_format_compose_swizzles(desc->swizzle, swiz, rswiz);
return A3XX_TEX_CONST_0_SWIZ_X(tex_swiz(rswiz[0])) | return A3XX_TEX_CONST_0_SWIZ_X(tex_swiz(rswiz[0])) |
A3XX_TEX_CONST_0_SWIZ_Y(tex_swiz(rswiz[1])) | A3XX_TEX_CONST_0_SWIZ_Y(tex_swiz(rswiz[1])) |
A3XX_TEX_CONST_0_SWIZ_Z(tex_swiz(rswiz[2])) | A3XX_TEX_CONST_0_SWIZ_Z(tex_swiz(rswiz[2])) |
A3XX_TEX_CONST_0_SWIZ_W(tex_swiz(rswiz[3])); A3XX_TEX_CONST_0_SWIZ_W(tex_swiz(rswiz[3]));
} }

View file

@ -37,6 +37,7 @@ enum a3xx_color_fmt fd3_fs_output_format(enum pipe_format format);
enum a3xx_color_swap fd3_pipe2swap(enum pipe_format format); enum a3xx_color_swap fd3_pipe2swap(enum pipe_format format);
uint32_t fd3_tex_swiz(enum pipe_format format, unsigned swizzle_r, uint32_t fd3_tex_swiz(enum pipe_format format, unsigned swizzle_r,
unsigned swizzle_g, unsigned swizzle_b, unsigned swizzle_a); unsigned swizzle_g, unsigned swizzle_b,
unsigned swizzle_a);
#endif /* FD3_FORMAT_H_ */ #endif /* FD3_FORMAT_H_ */

File diff suppressed because it is too large Load diff

View file

@ -25,465 +25,467 @@
*/ */
#include "pipe/p_state.h" #include "pipe/p_state.h"
#include "util/u_string.h" #include "util/format/u_format.h"
#include "util/u_inlines.h"
#include "util/u_math.h" #include "util/u_math.h"
#include "util/u_memory.h" #include "util/u_memory.h"
#include "util/u_inlines.h" #include "util/u_string.h"
#include "util/format/u_format.h"
#include "freedreno_program.h" #include "freedreno_program.h"
#include "fd3_program.h"
#include "fd3_emit.h" #include "fd3_emit.h"
#include "fd3_texture.h"
#include "fd3_format.h" #include "fd3_format.h"
#include "fd3_program.h"
#include "fd3_texture.h"
bool bool
fd3_needs_manual_clipping(const struct ir3_shader *shader, fd3_needs_manual_clipping(const struct ir3_shader *shader,
const struct pipe_rasterizer_state *rast) const struct pipe_rasterizer_state *rast)
{ {
uint64_t outputs = ir3_shader_outputs(shader); uint64_t outputs = ir3_shader_outputs(shader);
return (!rast->depth_clip_near || return (!rast->depth_clip_near ||
util_bitcount(rast->clip_plane_enable) > 6 || util_bitcount(rast->clip_plane_enable) > 6 ||
outputs & ((1ULL << VARYING_SLOT_CLIP_VERTEX) | outputs & ((1ULL << VARYING_SLOT_CLIP_VERTEX) |
(1ULL << VARYING_SLOT_CLIP_DIST0) | (1ULL << VARYING_SLOT_CLIP_DIST0) |
(1ULL << VARYING_SLOT_CLIP_DIST1))); (1ULL << VARYING_SLOT_CLIP_DIST1)));
} }
static void static void
emit_shader(struct fd_ringbuffer *ring, const struct ir3_shader_variant *so) emit_shader(struct fd_ringbuffer *ring, const struct ir3_shader_variant *so)
{ {
const struct ir3_info *si = &so->info; const struct ir3_info *si = &so->info;
enum adreno_state_block sb; enum adreno_state_block sb;
enum adreno_state_src src; enum adreno_state_src src;
uint32_t i, sz, *bin; uint32_t i, sz, *bin;
if (so->type == MESA_SHADER_VERTEX) { if (so->type == MESA_SHADER_VERTEX) {
sb = SB_VERT_SHADER; sb = SB_VERT_SHADER;
} else { } else {
sb = SB_FRAG_SHADER; sb = SB_FRAG_SHADER;
} }
if (FD_DBG(DIRECT)) { if (FD_DBG(DIRECT)) {
sz = si->sizedwords; sz = si->sizedwords;
src = SS_DIRECT; src = SS_DIRECT;
bin = fd_bo_map(so->bo); bin = fd_bo_map(so->bo);
} else { } else {
sz = 0; sz = 0;
src = SS_INDIRECT; src = SS_INDIRECT;
bin = NULL; bin = NULL;
} }
OUT_PKT3(ring, CP_LOAD_STATE, 2 + sz); OUT_PKT3(ring, CP_LOAD_STATE, 2 + sz);
OUT_RING(ring, CP_LOAD_STATE_0_DST_OFF(0) | OUT_RING(ring, CP_LOAD_STATE_0_DST_OFF(0) | CP_LOAD_STATE_0_STATE_SRC(src) |
CP_LOAD_STATE_0_STATE_SRC(src) | CP_LOAD_STATE_0_STATE_BLOCK(sb) |
CP_LOAD_STATE_0_STATE_BLOCK(sb) | CP_LOAD_STATE_0_NUM_UNIT(so->instrlen));
CP_LOAD_STATE_0_NUM_UNIT(so->instrlen)); if (bin) {
if (bin) { OUT_RING(ring, CP_LOAD_STATE_1_EXT_SRC_ADDR(0) |
OUT_RING(ring, CP_LOAD_STATE_1_EXT_SRC_ADDR(0) | CP_LOAD_STATE_1_STATE_TYPE(ST_SHADER));
CP_LOAD_STATE_1_STATE_TYPE(ST_SHADER)); } else {
} else { OUT_RELOC(ring, so->bo, 0, CP_LOAD_STATE_1_STATE_TYPE(ST_SHADER), 0);
OUT_RELOC(ring, so->bo, 0, }
CP_LOAD_STATE_1_STATE_TYPE(ST_SHADER), 0); for (i = 0; i < sz; i++) {
} OUT_RING(ring, bin[i]);
for (i = 0; i < sz; i++) { }
OUT_RING(ring, bin[i]);
}
} }
void void
fd3_program_emit(struct fd_ringbuffer *ring, struct fd3_emit *emit, fd3_program_emit(struct fd_ringbuffer *ring, struct fd3_emit *emit, int nr,
int nr, struct pipe_surface **bufs) struct pipe_surface **bufs)
{ {
const struct ir3_shader_variant *vp, *fp; const struct ir3_shader_variant *vp, *fp;
const struct ir3_info *vsi, *fsi; const struct ir3_info *vsi, *fsi;
enum a3xx_instrbuffermode fpbuffer, vpbuffer; enum a3xx_instrbuffermode fpbuffer, vpbuffer;
uint32_t fpbuffersz, vpbuffersz, fsoff; uint32_t fpbuffersz, vpbuffersz, fsoff;
uint32_t pos_regid, posz_regid, psize_regid; uint32_t pos_regid, posz_regid, psize_regid;
uint32_t ij_regid[4], face_regid, coord_regid, zwcoord_regid; uint32_t ij_regid[4], face_regid, coord_regid, zwcoord_regid;
uint32_t color_regid[4] = {0}; uint32_t color_regid[4] = {0};
int constmode; int constmode;
int i, j; int i, j;
debug_assert(nr <= ARRAY_SIZE(color_regid)); debug_assert(nr <= ARRAY_SIZE(color_regid));
vp = fd3_emit_get_vp(emit); vp = fd3_emit_get_vp(emit);
fp = fd3_emit_get_fp(emit); fp = fd3_emit_get_fp(emit);
vsi = &vp->info; vsi = &vp->info;
fsi = &fp->info; fsi = &fp->info;
fpbuffer = BUFFER; fpbuffer = BUFFER;
vpbuffer = BUFFER; vpbuffer = BUFFER;
fpbuffersz = fp->instrlen; fpbuffersz = fp->instrlen;
vpbuffersz = vp->instrlen; vpbuffersz = vp->instrlen;
/* /*
* Decide whether to use BUFFER or CACHE mode for VS and FS. It * Decide whether to use BUFFER or CACHE mode for VS and FS. It
* appears like 256 is the hard limit, but when the combined size * appears like 256 is the hard limit, but when the combined size
* exceeds 128 then blob will try to keep FS in BUFFER mode and * exceeds 128 then blob will try to keep FS in BUFFER mode and
* switch to CACHE for VS until VS is too large. The blob seems * switch to CACHE for VS until VS is too large. The blob seems
* to switch FS out of BUFFER mode at slightly under 128. But * to switch FS out of BUFFER mode at slightly under 128. But
* a bit fuzzy on the decision tree, so use slightly conservative * a bit fuzzy on the decision tree, so use slightly conservative
* limits. * limits.
* *
* TODO check if these thresholds for BUFFER vs CACHE mode are the * TODO check if these thresholds for BUFFER vs CACHE mode are the
* same for all a3xx or whether we need to consider the gpuid * same for all a3xx or whether we need to consider the gpuid
*/ */
if ((fpbuffersz + vpbuffersz) > 128) { if ((fpbuffersz + vpbuffersz) > 128) {
if (fpbuffersz < 112) { if (fpbuffersz < 112) {
/* FP:BUFFER VP:CACHE */ /* FP:BUFFER VP:CACHE */
vpbuffer = CACHE; vpbuffer = CACHE;
vpbuffersz = 256 - fpbuffersz; vpbuffersz = 256 - fpbuffersz;
} else if (vpbuffersz < 112) { } else if (vpbuffersz < 112) {
/* FP:CACHE VP:BUFFER */ /* FP:CACHE VP:BUFFER */
fpbuffer = CACHE; fpbuffer = CACHE;
fpbuffersz = 256 - vpbuffersz; fpbuffersz = 256 - vpbuffersz;
} else { } else {
/* FP:CACHE VP:CACHE */ /* FP:CACHE VP:CACHE */
vpbuffer = fpbuffer = CACHE; vpbuffer = fpbuffer = CACHE;
vpbuffersz = fpbuffersz = 192; vpbuffersz = fpbuffersz = 192;
} }
} }
if (fpbuffer == BUFFER) { if (fpbuffer == BUFFER) {
fsoff = 128 - fpbuffersz; fsoff = 128 - fpbuffersz;
} else { } else {
fsoff = 256 - fpbuffersz; fsoff = 256 - fpbuffersz;
} }
/* seems like vs->constlen + fs->constlen > 256, then CONSTMODE=1 */ /* seems like vs->constlen + fs->constlen > 256, then CONSTMODE=1 */
constmode = ((vp->constlen + fp->constlen) > 256) ? 1 : 0; constmode = ((vp->constlen + fp->constlen) > 256) ? 1 : 0;
pos_regid = ir3_find_output_regid(vp, VARYING_SLOT_POS); pos_regid = ir3_find_output_regid(vp, VARYING_SLOT_POS);
posz_regid = ir3_find_output_regid(fp, FRAG_RESULT_DEPTH); posz_regid = ir3_find_output_regid(fp, FRAG_RESULT_DEPTH);
psize_regid = ir3_find_output_regid(vp, VARYING_SLOT_PSIZ); psize_regid = ir3_find_output_regid(vp, VARYING_SLOT_PSIZ);
if (fp->color0_mrt) { if (fp->color0_mrt) {
color_regid[0] = color_regid[1] = color_regid[2] = color_regid[3] = color_regid[0] = color_regid[1] = color_regid[2] = color_regid[3] =
ir3_find_output_regid(fp, FRAG_RESULT_COLOR); ir3_find_output_regid(fp, FRAG_RESULT_COLOR);
} else { } else {
color_regid[0] = ir3_find_output_regid(fp, FRAG_RESULT_DATA0); color_regid[0] = ir3_find_output_regid(fp, FRAG_RESULT_DATA0);
color_regid[1] = ir3_find_output_regid(fp, FRAG_RESULT_DATA1); color_regid[1] = ir3_find_output_regid(fp, FRAG_RESULT_DATA1);
color_regid[2] = ir3_find_output_regid(fp, FRAG_RESULT_DATA2); color_regid[2] = ir3_find_output_regid(fp, FRAG_RESULT_DATA2);
color_regid[3] = ir3_find_output_regid(fp, FRAG_RESULT_DATA3); color_regid[3] = ir3_find_output_regid(fp, FRAG_RESULT_DATA3);
} }
face_regid = ir3_find_sysval_regid(fp, SYSTEM_VALUE_FRONT_FACE); face_regid = ir3_find_sysval_regid(fp, SYSTEM_VALUE_FRONT_FACE);
coord_regid = ir3_find_sysval_regid(fp, SYSTEM_VALUE_FRAG_COORD); coord_regid = ir3_find_sysval_regid(fp, SYSTEM_VALUE_FRAG_COORD);
zwcoord_regid = (coord_regid == regid(63,0)) ? regid(63,0) : (coord_regid + 2); zwcoord_regid =
ij_regid[0] = ir3_find_sysval_regid(fp, SYSTEM_VALUE_BARYCENTRIC_PERSP_PIXEL); (coord_regid == regid(63, 0)) ? regid(63, 0) : (coord_regid + 2);
ij_regid[1] = ir3_find_sysval_regid(fp, SYSTEM_VALUE_BARYCENTRIC_LINEAR_PIXEL); ij_regid[0] =
ij_regid[2] = ir3_find_sysval_regid(fp, SYSTEM_VALUE_BARYCENTRIC_PERSP_CENTROID); ir3_find_sysval_regid(fp, SYSTEM_VALUE_BARYCENTRIC_PERSP_PIXEL);
ij_regid[3] = ir3_find_sysval_regid(fp, SYSTEM_VALUE_BARYCENTRIC_LINEAR_CENTROID); ij_regid[1] =
ir3_find_sysval_regid(fp, SYSTEM_VALUE_BARYCENTRIC_LINEAR_PIXEL);
ij_regid[2] =
ir3_find_sysval_regid(fp, SYSTEM_VALUE_BARYCENTRIC_PERSP_CENTROID);
ij_regid[3] =
ir3_find_sysval_regid(fp, SYSTEM_VALUE_BARYCENTRIC_LINEAR_CENTROID);
/* adjust regids for alpha output formats. there is no alpha render /* adjust regids for alpha output formats. there is no alpha render
* format, so it's just treated like red * format, so it's just treated like red
*/ */
for (i = 0; i < nr; i++) for (i = 0; i < nr; i++)
if (util_format_is_alpha(pipe_surface_format(bufs[i]))) if (util_format_is_alpha(pipe_surface_format(bufs[i])))
color_regid[i] += 3; color_regid[i] += 3;
/* we could probably divide this up into things that need to be /* we could probably divide this up into things that need to be
* emitted if frag-prog is dirty vs if vert-prog is dirty.. * emitted if frag-prog is dirty vs if vert-prog is dirty..
*/ */
OUT_PKT0(ring, REG_A3XX_HLSQ_CONTROL_0_REG, 6); OUT_PKT0(ring, REG_A3XX_HLSQ_CONTROL_0_REG, 6);
OUT_RING(ring, A3XX_HLSQ_CONTROL_0_REG_FSTHREADSIZE(FOUR_QUADS) | OUT_RING(ring, A3XX_HLSQ_CONTROL_0_REG_FSTHREADSIZE(FOUR_QUADS) |
A3XX_HLSQ_CONTROL_0_REG_FSSUPERTHREADENABLE | A3XX_HLSQ_CONTROL_0_REG_FSSUPERTHREADENABLE |
A3XX_HLSQ_CONTROL_0_REG_CONSTMODE(constmode) | A3XX_HLSQ_CONTROL_0_REG_CONSTMODE(constmode) |
/* NOTE: I guess SHADERRESTART and CONSTFULLUPDATE maybe /* NOTE: I guess SHADERRESTART and CONSTFULLUPDATE maybe
* flush some caches? I think we only need to set those * flush some caches? I think we only need to set those
* bits if we have updated const or shader.. * bits if we have updated const or shader..
*/ */
A3XX_HLSQ_CONTROL_0_REG_SPSHADERRESTART | A3XX_HLSQ_CONTROL_0_REG_SPSHADERRESTART |
A3XX_HLSQ_CONTROL_0_REG_SPCONSTFULLUPDATE); A3XX_HLSQ_CONTROL_0_REG_SPCONSTFULLUPDATE);
OUT_RING(ring, A3XX_HLSQ_CONTROL_1_REG_VSTHREADSIZE(TWO_QUADS) | OUT_RING(ring, A3XX_HLSQ_CONTROL_1_REG_VSTHREADSIZE(TWO_QUADS) |
A3XX_HLSQ_CONTROL_1_REG_VSSUPERTHREADENABLE | A3XX_HLSQ_CONTROL_1_REG_VSSUPERTHREADENABLE |
A3XX_HLSQ_CONTROL_1_REG_FRAGCOORDXYREGID(coord_regid) | A3XX_HLSQ_CONTROL_1_REG_FRAGCOORDXYREGID(coord_regid) |
A3XX_HLSQ_CONTROL_1_REG_FRAGCOORDZWREGID(zwcoord_regid)); A3XX_HLSQ_CONTROL_1_REG_FRAGCOORDZWREGID(zwcoord_regid));
OUT_RING(ring, A3XX_HLSQ_CONTROL_2_REG_PRIMALLOCTHRESHOLD(31) | OUT_RING(ring, A3XX_HLSQ_CONTROL_2_REG_PRIMALLOCTHRESHOLD(31) |
A3XX_HLSQ_CONTROL_2_REG_FACENESSREGID(face_regid)); A3XX_HLSQ_CONTROL_2_REG_FACENESSREGID(face_regid));
OUT_RING(ring, OUT_RING(ring,
A3XX_HLSQ_CONTROL_3_REG_IJPERSPCENTERREGID(ij_regid[0]) | A3XX_HLSQ_CONTROL_3_REG_IJPERSPCENTERREGID(ij_regid[0]) |
A3XX_HLSQ_CONTROL_3_REG_IJNONPERSPCENTERREGID(ij_regid[1]) | A3XX_HLSQ_CONTROL_3_REG_IJNONPERSPCENTERREGID(ij_regid[1]) |
A3XX_HLSQ_CONTROL_3_REG_IJPERSPCENTROIDREGID(ij_regid[2]) | A3XX_HLSQ_CONTROL_3_REG_IJPERSPCENTROIDREGID(ij_regid[2]) |
A3XX_HLSQ_CONTROL_3_REG_IJNONPERSPCENTROIDREGID(ij_regid[3])); A3XX_HLSQ_CONTROL_3_REG_IJNONPERSPCENTROIDREGID(ij_regid[3]));
OUT_RING(ring, A3XX_HLSQ_VS_CONTROL_REG_CONSTLENGTH(vp->constlen) | OUT_RING(ring, A3XX_HLSQ_VS_CONTROL_REG_CONSTLENGTH(vp->constlen) |
A3XX_HLSQ_VS_CONTROL_REG_CONSTSTARTOFFSET(0) | A3XX_HLSQ_VS_CONTROL_REG_CONSTSTARTOFFSET(0) |
A3XX_HLSQ_VS_CONTROL_REG_INSTRLENGTH(vpbuffersz)); A3XX_HLSQ_VS_CONTROL_REG_INSTRLENGTH(vpbuffersz));
OUT_RING(ring, A3XX_HLSQ_FS_CONTROL_REG_CONSTLENGTH(fp->constlen) | OUT_RING(ring, A3XX_HLSQ_FS_CONTROL_REG_CONSTLENGTH(fp->constlen) |
A3XX_HLSQ_FS_CONTROL_REG_CONSTSTARTOFFSET(128) | A3XX_HLSQ_FS_CONTROL_REG_CONSTSTARTOFFSET(128) |
A3XX_HLSQ_FS_CONTROL_REG_INSTRLENGTH(fpbuffersz)); A3XX_HLSQ_FS_CONTROL_REG_INSTRLENGTH(fpbuffersz));
OUT_PKT0(ring, REG_A3XX_SP_SP_CTRL_REG, 1); OUT_PKT0(ring, REG_A3XX_SP_SP_CTRL_REG, 1);
OUT_RING(ring, A3XX_SP_SP_CTRL_REG_CONSTMODE(constmode) | OUT_RING(ring, A3XX_SP_SP_CTRL_REG_CONSTMODE(constmode) |
COND(emit->binning_pass, A3XX_SP_SP_CTRL_REG_BINNING) | COND(emit->binning_pass, A3XX_SP_SP_CTRL_REG_BINNING) |
A3XX_SP_SP_CTRL_REG_SLEEPMODE(1) | A3XX_SP_SP_CTRL_REG_SLEEPMODE(1) |
A3XX_SP_SP_CTRL_REG_L0MODE(0)); A3XX_SP_SP_CTRL_REG_L0MODE(0));
OUT_PKT0(ring, REG_A3XX_SP_VS_LENGTH_REG, 1); OUT_PKT0(ring, REG_A3XX_SP_VS_LENGTH_REG, 1);
OUT_RING(ring, A3XX_SP_VS_LENGTH_REG_SHADERLENGTH(vp->instrlen)); OUT_RING(ring, A3XX_SP_VS_LENGTH_REG_SHADERLENGTH(vp->instrlen));
OUT_PKT0(ring, REG_A3XX_SP_VS_CTRL_REG0, 3); OUT_PKT0(ring, REG_A3XX_SP_VS_CTRL_REG0, 3);
OUT_RING(ring, A3XX_SP_VS_CTRL_REG0_THREADMODE(MULTI) | OUT_RING(ring,
A3XX_SP_VS_CTRL_REG0_INSTRBUFFERMODE(vpbuffer) | A3XX_SP_VS_CTRL_REG0_THREADMODE(MULTI) |
COND(vpbuffer == CACHE, A3XX_SP_VS_CTRL_REG0_CACHEINVALID) | A3XX_SP_VS_CTRL_REG0_INSTRBUFFERMODE(vpbuffer) |
A3XX_SP_VS_CTRL_REG0_HALFREGFOOTPRINT(vsi->max_half_reg + 1) | COND(vpbuffer == CACHE, A3XX_SP_VS_CTRL_REG0_CACHEINVALID) |
A3XX_SP_VS_CTRL_REG0_FULLREGFOOTPRINT(vsi->max_reg + 1) | A3XX_SP_VS_CTRL_REG0_HALFREGFOOTPRINT(vsi->max_half_reg + 1) |
A3XX_SP_VS_CTRL_REG0_THREADSIZE(TWO_QUADS) | A3XX_SP_VS_CTRL_REG0_FULLREGFOOTPRINT(vsi->max_reg + 1) |
A3XX_SP_VS_CTRL_REG0_SUPERTHREADMODE | A3XX_SP_VS_CTRL_REG0_THREADSIZE(TWO_QUADS) |
A3XX_SP_VS_CTRL_REG0_LENGTH(vpbuffersz)); A3XX_SP_VS_CTRL_REG0_SUPERTHREADMODE |
OUT_RING(ring, A3XX_SP_VS_CTRL_REG1_CONSTLENGTH(vp->constlen) | A3XX_SP_VS_CTRL_REG0_LENGTH(vpbuffersz));
A3XX_SP_VS_CTRL_REG1_INITIALOUTSTANDING(vp->total_in) | OUT_RING(ring,
A3XX_SP_VS_CTRL_REG1_CONSTFOOTPRINT(MAX2(vp->constlen - 1, 0))); A3XX_SP_VS_CTRL_REG1_CONSTLENGTH(vp->constlen) |
OUT_RING(ring, A3XX_SP_VS_PARAM_REG_POSREGID(pos_regid) | A3XX_SP_VS_CTRL_REG1_INITIALOUTSTANDING(vp->total_in) |
A3XX_SP_VS_PARAM_REG_PSIZEREGID(psize_regid) | A3XX_SP_VS_CTRL_REG1_CONSTFOOTPRINT(MAX2(vp->constlen - 1, 0)));
A3XX_SP_VS_PARAM_REG_TOTALVSOUTVAR(fp->varying_in)); OUT_RING(ring, A3XX_SP_VS_PARAM_REG_POSREGID(pos_regid) |
A3XX_SP_VS_PARAM_REG_PSIZEREGID(psize_regid) |
A3XX_SP_VS_PARAM_REG_TOTALVSOUTVAR(fp->varying_in));
struct ir3_shader_linkage l = {0}; struct ir3_shader_linkage l = {0};
ir3_link_shaders(&l, vp, fp, false); ir3_link_shaders(&l, vp, fp, false);
for (i = 0, j = 0; (i < 16) && (j < l.cnt); i++) { for (i = 0, j = 0; (i < 16) && (j < l.cnt); i++) {
uint32_t reg = 0; uint32_t reg = 0;
OUT_PKT0(ring, REG_A3XX_SP_VS_OUT_REG(i), 1); OUT_PKT0(ring, REG_A3XX_SP_VS_OUT_REG(i), 1);
reg |= A3XX_SP_VS_OUT_REG_A_REGID(l.var[j].regid); reg |= A3XX_SP_VS_OUT_REG_A_REGID(l.var[j].regid);
reg |= A3XX_SP_VS_OUT_REG_A_COMPMASK(l.var[j].compmask); reg |= A3XX_SP_VS_OUT_REG_A_COMPMASK(l.var[j].compmask);
j++; j++;
reg |= A3XX_SP_VS_OUT_REG_B_REGID(l.var[j].regid); reg |= A3XX_SP_VS_OUT_REG_B_REGID(l.var[j].regid);
reg |= A3XX_SP_VS_OUT_REG_B_COMPMASK(l.var[j].compmask); reg |= A3XX_SP_VS_OUT_REG_B_COMPMASK(l.var[j].compmask);
j++; j++;
OUT_RING(ring, reg); OUT_RING(ring, reg);
} }
for (i = 0, j = 0; (i < 8) && (j < l.cnt); i++) { for (i = 0, j = 0; (i < 8) && (j < l.cnt); i++) {
uint32_t reg = 0; uint32_t reg = 0;
OUT_PKT0(ring, REG_A3XX_SP_VS_VPC_DST_REG(i), 1); OUT_PKT0(ring, REG_A3XX_SP_VS_VPC_DST_REG(i), 1);
reg |= A3XX_SP_VS_VPC_DST_REG_OUTLOC0(l.var[j++].loc + 8); reg |= A3XX_SP_VS_VPC_DST_REG_OUTLOC0(l.var[j++].loc + 8);
reg |= A3XX_SP_VS_VPC_DST_REG_OUTLOC1(l.var[j++].loc + 8); reg |= A3XX_SP_VS_VPC_DST_REG_OUTLOC1(l.var[j++].loc + 8);
reg |= A3XX_SP_VS_VPC_DST_REG_OUTLOC2(l.var[j++].loc + 8); reg |= A3XX_SP_VS_VPC_DST_REG_OUTLOC2(l.var[j++].loc + 8);
reg |= A3XX_SP_VS_VPC_DST_REG_OUTLOC3(l.var[j++].loc + 8); reg |= A3XX_SP_VS_VPC_DST_REG_OUTLOC3(l.var[j++].loc + 8);
OUT_RING(ring, reg); OUT_RING(ring, reg);
} }
OUT_PKT0(ring, REG_A3XX_SP_VS_OBJ_OFFSET_REG, 2); OUT_PKT0(ring, REG_A3XX_SP_VS_OBJ_OFFSET_REG, 2);
OUT_RING(ring, A3XX_SP_VS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET(0) | OUT_RING(ring, A3XX_SP_VS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET(0) |
A3XX_SP_VS_OBJ_OFFSET_REG_SHADEROBJOFFSET(0)); A3XX_SP_VS_OBJ_OFFSET_REG_SHADEROBJOFFSET(0));
OUT_RELOC(ring, vp->bo, 0, 0, 0); /* SP_VS_OBJ_START_REG */ OUT_RELOC(ring, vp->bo, 0, 0, 0); /* SP_VS_OBJ_START_REG */
if (emit->binning_pass) { if (emit->binning_pass) {
OUT_PKT0(ring, REG_A3XX_SP_FS_LENGTH_REG, 1); OUT_PKT0(ring, REG_A3XX_SP_FS_LENGTH_REG, 1);
OUT_RING(ring, 0x00000000); OUT_RING(ring, 0x00000000);
OUT_PKT0(ring, REG_A3XX_SP_FS_CTRL_REG0, 2); OUT_PKT0(ring, REG_A3XX_SP_FS_CTRL_REG0, 2);
OUT_RING(ring, A3XX_SP_FS_CTRL_REG0_THREADMODE(MULTI) | OUT_RING(ring, A3XX_SP_FS_CTRL_REG0_THREADMODE(MULTI) |
A3XX_SP_FS_CTRL_REG0_INSTRBUFFERMODE(BUFFER)); A3XX_SP_FS_CTRL_REG0_INSTRBUFFERMODE(BUFFER));
OUT_RING(ring, 0x00000000); OUT_RING(ring, 0x00000000);
OUT_PKT0(ring, REG_A3XX_SP_FS_OBJ_OFFSET_REG, 1); OUT_PKT0(ring, REG_A3XX_SP_FS_OBJ_OFFSET_REG, 1);
OUT_RING(ring, A3XX_SP_FS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET(128) | OUT_RING(ring, A3XX_SP_FS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET(128) |
A3XX_SP_FS_OBJ_OFFSET_REG_SHADEROBJOFFSET(0)); A3XX_SP_FS_OBJ_OFFSET_REG_SHADEROBJOFFSET(0));
} else { } else {
OUT_PKT0(ring, REG_A3XX_SP_FS_LENGTH_REG, 1); OUT_PKT0(ring, REG_A3XX_SP_FS_LENGTH_REG, 1);
OUT_RING(ring, A3XX_SP_FS_LENGTH_REG_SHADERLENGTH(fp->instrlen)); OUT_RING(ring, A3XX_SP_FS_LENGTH_REG_SHADERLENGTH(fp->instrlen));
OUT_PKT0(ring, REG_A3XX_SP_FS_CTRL_REG0, 2); OUT_PKT0(ring, REG_A3XX_SP_FS_CTRL_REG0, 2);
OUT_RING(ring, A3XX_SP_FS_CTRL_REG0_THREADMODE(MULTI) | OUT_RING(ring,
A3XX_SP_FS_CTRL_REG0_INSTRBUFFERMODE(fpbuffer) | A3XX_SP_FS_CTRL_REG0_THREADMODE(MULTI) |
COND(fpbuffer == CACHE, A3XX_SP_FS_CTRL_REG0_CACHEINVALID) | A3XX_SP_FS_CTRL_REG0_INSTRBUFFERMODE(fpbuffer) |
A3XX_SP_FS_CTRL_REG0_HALFREGFOOTPRINT(fsi->max_half_reg + 1) | COND(fpbuffer == CACHE, A3XX_SP_FS_CTRL_REG0_CACHEINVALID) |
A3XX_SP_FS_CTRL_REG0_FULLREGFOOTPRINT(fsi->max_reg + 1) | A3XX_SP_FS_CTRL_REG0_HALFREGFOOTPRINT(fsi->max_half_reg + 1) |
A3XX_SP_FS_CTRL_REG0_INOUTREGOVERLAP | A3XX_SP_FS_CTRL_REG0_FULLREGFOOTPRINT(fsi->max_reg + 1) |
A3XX_SP_FS_CTRL_REG0_THREADSIZE(FOUR_QUADS) | A3XX_SP_FS_CTRL_REG0_INOUTREGOVERLAP |
A3XX_SP_FS_CTRL_REG0_SUPERTHREADMODE | A3XX_SP_FS_CTRL_REG0_THREADSIZE(FOUR_QUADS) |
COND(fp->need_pixlod, A3XX_SP_FS_CTRL_REG0_PIXLODENABLE) | A3XX_SP_FS_CTRL_REG0_SUPERTHREADMODE |
A3XX_SP_FS_CTRL_REG0_LENGTH(fpbuffersz)); COND(fp->need_pixlod, A3XX_SP_FS_CTRL_REG0_PIXLODENABLE) |
OUT_RING(ring, A3XX_SP_FS_CTRL_REG1_CONSTLENGTH(fp->constlen) | A3XX_SP_FS_CTRL_REG0_LENGTH(fpbuffersz));
A3XX_SP_FS_CTRL_REG1_INITIALOUTSTANDING(fp->sysval_in) | OUT_RING(ring, A3XX_SP_FS_CTRL_REG1_CONSTLENGTH(fp->constlen) |
A3XX_SP_FS_CTRL_REG1_CONSTFOOTPRINT(MAX2(fp->constlen - 1, 0)) | A3XX_SP_FS_CTRL_REG1_INITIALOUTSTANDING(fp->sysval_in) |
A3XX_SP_FS_CTRL_REG1_HALFPRECVAROFFSET(63)); A3XX_SP_FS_CTRL_REG1_CONSTFOOTPRINT(
MAX2(fp->constlen - 1, 0)) |
A3XX_SP_FS_CTRL_REG1_HALFPRECVAROFFSET(63));
OUT_PKT0(ring, REG_A3XX_SP_FS_OBJ_OFFSET_REG, 2); OUT_PKT0(ring, REG_A3XX_SP_FS_OBJ_OFFSET_REG, 2);
OUT_RING(ring, A3XX_SP_FS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET( OUT_RING(ring, A3XX_SP_FS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET(
MAX2(128, vp->constlen)) | MAX2(128, vp->constlen)) |
A3XX_SP_FS_OBJ_OFFSET_REG_SHADEROBJOFFSET(fsoff)); A3XX_SP_FS_OBJ_OFFSET_REG_SHADEROBJOFFSET(fsoff));
OUT_RELOC(ring, fp->bo, 0, 0, 0); /* SP_FS_OBJ_START_REG */ OUT_RELOC(ring, fp->bo, 0, 0, 0); /* SP_FS_OBJ_START_REG */
} }
OUT_PKT0(ring, REG_A3XX_SP_FS_OUTPUT_REG, 1); OUT_PKT0(ring, REG_A3XX_SP_FS_OUTPUT_REG, 1);
OUT_RING(ring, OUT_RING(ring, COND(fp->writes_pos, A3XX_SP_FS_OUTPUT_REG_DEPTH_ENABLE) |
COND(fp->writes_pos, A3XX_SP_FS_OUTPUT_REG_DEPTH_ENABLE) | A3XX_SP_FS_OUTPUT_REG_DEPTH_REGID(posz_regid) |
A3XX_SP_FS_OUTPUT_REG_DEPTH_REGID(posz_regid) | A3XX_SP_FS_OUTPUT_REG_MRT(MAX2(1, nr) - 1));
A3XX_SP_FS_OUTPUT_REG_MRT(MAX2(1, nr) - 1));
OUT_PKT0(ring, REG_A3XX_SP_FS_MRT_REG(0), 4); OUT_PKT0(ring, REG_A3XX_SP_FS_MRT_REG(0), 4);
for (i = 0; i < 4; i++) { for (i = 0; i < 4; i++) {
uint32_t mrt_reg = A3XX_SP_FS_MRT_REG_REGID(color_regid[i]) | uint32_t mrt_reg =
COND(color_regid[i] & HALF_REG_ID, A3XX_SP_FS_MRT_REG_HALF_PRECISION); A3XX_SP_FS_MRT_REG_REGID(color_regid[i]) |
COND(color_regid[i] & HALF_REG_ID, A3XX_SP_FS_MRT_REG_HALF_PRECISION);
if (i < nr) { if (i < nr) {
enum pipe_format fmt = pipe_surface_format(bufs[i]); enum pipe_format fmt = pipe_surface_format(bufs[i]);
mrt_reg |= COND(util_format_is_pure_uint(fmt), A3XX_SP_FS_MRT_REG_UINT) | mrt_reg |=
COND(util_format_is_pure_sint(fmt), A3XX_SP_FS_MRT_REG_SINT); COND(util_format_is_pure_uint(fmt), A3XX_SP_FS_MRT_REG_UINT) |
} COND(util_format_is_pure_sint(fmt), A3XX_SP_FS_MRT_REG_SINT);
OUT_RING(ring, mrt_reg); }
} OUT_RING(ring, mrt_reg);
}
if (emit->binning_pass) { if (emit->binning_pass) {
OUT_PKT0(ring, REG_A3XX_VPC_ATTR, 2); OUT_PKT0(ring, REG_A3XX_VPC_ATTR, 2);
OUT_RING(ring, A3XX_VPC_ATTR_THRDASSIGN(1) | OUT_RING(ring, A3XX_VPC_ATTR_THRDASSIGN(1) | A3XX_VPC_ATTR_LMSIZE(1) |
A3XX_VPC_ATTR_LMSIZE(1) | COND(vp->writes_psize, A3XX_VPC_ATTR_PSIZE));
COND(vp->writes_psize, A3XX_VPC_ATTR_PSIZE)); OUT_RING(ring, 0x00000000);
OUT_RING(ring, 0x00000000); } else {
} else { uint32_t vinterp[4], flatshade[2], vpsrepl[4];
uint32_t vinterp[4], flatshade[2], vpsrepl[4];
memset(vinterp, 0, sizeof(vinterp)); memset(vinterp, 0, sizeof(vinterp));
memset(flatshade, 0, sizeof(flatshade)); memset(flatshade, 0, sizeof(flatshade));
memset(vpsrepl, 0, sizeof(vpsrepl)); memset(vpsrepl, 0, sizeof(vpsrepl));
/* figure out VARYING_INTERP / FLAT_SHAD register values: */ /* figure out VARYING_INTERP / FLAT_SHAD register values: */
for (j = -1; (j = ir3_next_varying(fp, j)) < (int)fp->inputs_count; ) { for (j = -1; (j = ir3_next_varying(fp, j)) < (int)fp->inputs_count;) {
/* NOTE: varyings are packed, so if compmask is 0xb /* NOTE: varyings are packed, so if compmask is 0xb
* then first, third, and fourth component occupy * then first, third, and fourth component occupy
* three consecutive varying slots: * three consecutive varying slots:
*/ */
unsigned compmask = fp->inputs[j].compmask; unsigned compmask = fp->inputs[j].compmask;
uint32_t inloc = fp->inputs[j].inloc; uint32_t inloc = fp->inputs[j].inloc;
if (fp->inputs[j].flat || if (fp->inputs[j].flat ||
(fp->inputs[j].rasterflat && emit->rasterflat)) { (fp->inputs[j].rasterflat && emit->rasterflat)) {
uint32_t loc = inloc; uint32_t loc = inloc;
for (i = 0; i < 4; i++) { for (i = 0; i < 4; i++) {
if (compmask & (1 << i)) { if (compmask & (1 << i)) {
vinterp[loc / 16] |= FLAT << ((loc % 16) * 2); vinterp[loc / 16] |= FLAT << ((loc % 16) * 2);
flatshade[loc / 32] |= 1 << (loc % 32); flatshade[loc / 32] |= 1 << (loc % 32);
loc++; loc++;
} }
} }
} }
bool coord_mode = emit->sprite_coord_mode; bool coord_mode = emit->sprite_coord_mode;
if (ir3_point_sprite(fp, j, emit->sprite_coord_enable, &coord_mode)) { if (ir3_point_sprite(fp, j, emit->sprite_coord_enable, &coord_mode)) {
/* mask is two 2-bit fields, where: /* mask is two 2-bit fields, where:
* '01' -> S * '01' -> S
* '10' -> T * '10' -> T
* '11' -> 1 - T (flip mode) * '11' -> 1 - T (flip mode)
*/ */
unsigned mask = coord_mode ? 0b1101 : 0b1001; unsigned mask = coord_mode ? 0b1101 : 0b1001;
uint32_t loc = inloc; uint32_t loc = inloc;
if (compmask & 0x1) { if (compmask & 0x1) {
vpsrepl[loc / 16] |= ((mask >> 0) & 0x3) << ((loc % 16) * 2); vpsrepl[loc / 16] |= ((mask >> 0) & 0x3) << ((loc % 16) * 2);
loc++; loc++;
} }
if (compmask & 0x2) { if (compmask & 0x2) {
vpsrepl[loc / 16] |= ((mask >> 2) & 0x3) << ((loc % 16) * 2); vpsrepl[loc / 16] |= ((mask >> 2) & 0x3) << ((loc % 16) * 2);
loc++; loc++;
} }
if (compmask & 0x4) { if (compmask & 0x4) {
/* .z <- 0.0f */ /* .z <- 0.0f */
vinterp[loc / 16] |= 0b10 << ((loc % 16) * 2); vinterp[loc / 16] |= 0b10 << ((loc % 16) * 2);
loc++; loc++;
} }
if (compmask & 0x8) { if (compmask & 0x8) {
/* .w <- 1.0f */ /* .w <- 1.0f */
vinterp[loc / 16] |= 0b11 << ((loc % 16) * 2); vinterp[loc / 16] |= 0b11 << ((loc % 16) * 2);
loc++; loc++;
} }
} }
} }
OUT_PKT0(ring, REG_A3XX_VPC_ATTR, 2); OUT_PKT0(ring, REG_A3XX_VPC_ATTR, 2);
OUT_RING(ring, A3XX_VPC_ATTR_TOTALATTR(fp->total_in) | OUT_RING(ring, A3XX_VPC_ATTR_TOTALATTR(fp->total_in) |
A3XX_VPC_ATTR_THRDASSIGN(1) | A3XX_VPC_ATTR_THRDASSIGN(1) | A3XX_VPC_ATTR_LMSIZE(1) |
A3XX_VPC_ATTR_LMSIZE(1) | COND(vp->writes_psize, A3XX_VPC_ATTR_PSIZE));
COND(vp->writes_psize, A3XX_VPC_ATTR_PSIZE)); OUT_RING(ring, A3XX_VPC_PACK_NUMFPNONPOSVAR(fp->total_in) |
OUT_RING(ring, A3XX_VPC_PACK_NUMFPNONPOSVAR(fp->total_in) | A3XX_VPC_PACK_NUMNONPOSVSVAR(fp->total_in));
A3XX_VPC_PACK_NUMNONPOSVSVAR(fp->total_in));
OUT_PKT0(ring, REG_A3XX_VPC_VARYING_INTERP_MODE(0), 4); OUT_PKT0(ring, REG_A3XX_VPC_VARYING_INTERP_MODE(0), 4);
OUT_RING(ring, vinterp[0]); /* VPC_VARYING_INTERP[0].MODE */ OUT_RING(ring, vinterp[0]); /* VPC_VARYING_INTERP[0].MODE */
OUT_RING(ring, vinterp[1]); /* VPC_VARYING_INTERP[1].MODE */ OUT_RING(ring, vinterp[1]); /* VPC_VARYING_INTERP[1].MODE */
OUT_RING(ring, vinterp[2]); /* VPC_VARYING_INTERP[2].MODE */ OUT_RING(ring, vinterp[2]); /* VPC_VARYING_INTERP[2].MODE */
OUT_RING(ring, vinterp[3]); /* VPC_VARYING_INTERP[3].MODE */ OUT_RING(ring, vinterp[3]); /* VPC_VARYING_INTERP[3].MODE */
OUT_PKT0(ring, REG_A3XX_VPC_VARYING_PS_REPL_MODE(0), 4); OUT_PKT0(ring, REG_A3XX_VPC_VARYING_PS_REPL_MODE(0), 4);
OUT_RING(ring, vpsrepl[0]); /* VPC_VARYING_PS_REPL[0].MODE */ OUT_RING(ring, vpsrepl[0]); /* VPC_VARYING_PS_REPL[0].MODE */
OUT_RING(ring, vpsrepl[1]); /* VPC_VARYING_PS_REPL[1].MODE */ OUT_RING(ring, vpsrepl[1]); /* VPC_VARYING_PS_REPL[1].MODE */
OUT_RING(ring, vpsrepl[2]); /* VPC_VARYING_PS_REPL[2].MODE */ OUT_RING(ring, vpsrepl[2]); /* VPC_VARYING_PS_REPL[2].MODE */
OUT_RING(ring, vpsrepl[3]); /* VPC_VARYING_PS_REPL[3].MODE */ OUT_RING(ring, vpsrepl[3]); /* VPC_VARYING_PS_REPL[3].MODE */
OUT_PKT0(ring, REG_A3XX_SP_FS_FLAT_SHAD_MODE_REG_0, 2); OUT_PKT0(ring, REG_A3XX_SP_FS_FLAT_SHAD_MODE_REG_0, 2);
OUT_RING(ring, flatshade[0]); /* SP_FS_FLAT_SHAD_MODE_REG_0 */ OUT_RING(ring, flatshade[0]); /* SP_FS_FLAT_SHAD_MODE_REG_0 */
OUT_RING(ring, flatshade[1]); /* SP_FS_FLAT_SHAD_MODE_REG_1 */ OUT_RING(ring, flatshade[1]); /* SP_FS_FLAT_SHAD_MODE_REG_1 */
} }
if (vpbuffer == BUFFER) if (vpbuffer == BUFFER)
emit_shader(ring, vp); emit_shader(ring, vp);
OUT_PKT0(ring, REG_A3XX_VFD_PERFCOUNTER0_SELECT, 1); OUT_PKT0(ring, REG_A3XX_VFD_PERFCOUNTER0_SELECT, 1);
OUT_RING(ring, 0x00000000); /* VFD_PERFCOUNTER0_SELECT */ OUT_RING(ring, 0x00000000); /* VFD_PERFCOUNTER0_SELECT */
if (!emit->binning_pass) { if (!emit->binning_pass) {
if (fpbuffer == BUFFER) if (fpbuffer == BUFFER)
emit_shader(ring, fp); emit_shader(ring, fp);
OUT_PKT0(ring, REG_A3XX_VFD_PERFCOUNTER0_SELECT, 1); OUT_PKT0(ring, REG_A3XX_VFD_PERFCOUNTER0_SELECT, 1);
OUT_RING(ring, 0x00000000); /* VFD_PERFCOUNTER0_SELECT */ OUT_RING(ring, 0x00000000); /* VFD_PERFCOUNTER0_SELECT */
} }
} }
static struct ir3_program_state * static struct ir3_program_state *
fd3_program_create(void *data, struct ir3_shader_variant *bs, fd3_program_create(void *data, struct ir3_shader_variant *bs,
struct ir3_shader_variant *vs, struct ir3_shader_variant *vs, struct ir3_shader_variant *hs,
struct ir3_shader_variant *hs, struct ir3_shader_variant *ds, struct ir3_shader_variant *gs,
struct ir3_shader_variant *ds, struct ir3_shader_variant *fs,
struct ir3_shader_variant *gs, const struct ir3_shader_key *key) in_dt
struct ir3_shader_variant *fs,
const struct ir3_shader_key *key)
in_dt
{ {
struct fd_context *ctx = fd_context(data); struct fd_context *ctx = fd_context(data);
struct fd3_program_state *state = CALLOC_STRUCT(fd3_program_state); struct fd3_program_state *state = CALLOC_STRUCT(fd3_program_state);
tc_assert_driver_thread(ctx->tc); tc_assert_driver_thread(ctx->tc);
state->bs = bs; state->bs = bs;
state->vs = vs; state->vs = vs;
state->fs = fs; state->fs = fs;
return &state->base; return &state->base;
} }
static void static void
fd3_program_destroy(void *data, struct ir3_program_state *state) fd3_program_destroy(void *data, struct ir3_program_state *state)
{ {
struct fd3_program_state *so = fd3_program_state(state); struct fd3_program_state *so = fd3_program_state(state);
free(so); free(so);
} }
static const struct ir3_cache_funcs cache_funcs = { static const struct ir3_cache_funcs cache_funcs = {
.create_state = fd3_program_create, .create_state = fd3_program_create,
.destroy_state = fd3_program_destroy, .destroy_state = fd3_program_destroy,
}; };
void void
fd3_prog_init(struct pipe_context *pctx) fd3_prog_init(struct pipe_context *pctx)
{ {
struct fd_context *ctx = fd_context(pctx); struct fd_context *ctx = fd_context(pctx);
ctx->shader_cache = ir3_cache_create(&cache_funcs, ctx); ctx->shader_cache = ir3_cache_create(&cache_funcs, ctx);
ir3_prog_init(pctx); ir3_prog_init(pctx);
fd_prog_init(pctx); fd_prog_init(pctx);
} }

View file

@ -36,24 +36,24 @@
struct fd3_emit; struct fd3_emit;
struct fd3_program_state { struct fd3_program_state {
struct ir3_program_state base; struct ir3_program_state base;
struct ir3_shader_variant *bs; /* VS for when emit->binning */ struct ir3_shader_variant *bs; /* VS for when emit->binning */
struct ir3_shader_variant *vs; struct ir3_shader_variant *vs;
struct ir3_shader_variant *fs; /* FS for when !emit->binning */ struct ir3_shader_variant *fs; /* FS for when !emit->binning */
}; };
static inline struct fd3_program_state * static inline struct fd3_program_state *
fd3_program_state(struct ir3_program_state *state) fd3_program_state(struct ir3_program_state *state)
{ {
return (struct fd3_program_state *)state; return (struct fd3_program_state *)state;
} }
void fd3_program_emit(struct fd_ringbuffer *ring, struct fd3_emit *emit, void fd3_program_emit(struct fd_ringbuffer *ring, struct fd3_emit *emit, int nr,
int nr, struct pipe_surface **bufs); struct pipe_surface **bufs);
void fd3_prog_init(struct pipe_context *pctx); void fd3_prog_init(struct pipe_context *pctx);
bool fd3_needs_manual_clipping(const struct ir3_shader *, bool fd3_needs_manual_clipping(const struct ir3_shader *,
const struct pipe_rasterizer_state *); const struct pipe_rasterizer_state *);
#endif /* FD3_PROGRAM_H_ */ #endif /* FD3_PROGRAM_H_ */

View file

@ -24,17 +24,16 @@
* Rob Clark <robclark@freedesktop.org> * Rob Clark <robclark@freedesktop.org>
*/ */
#include "freedreno_query_hw.h"
#include "freedreno_batch.h" #include "freedreno_batch.h"
#include "freedreno_context.h" #include "freedreno_context.h"
#include "freedreno_query_hw.h"
#include "freedreno_util.h" #include "freedreno_util.h"
#include "fd3_query.h"
#include "fd3_format.h" #include "fd3_format.h"
#include "fd3_query.h"
struct fd_rb_samp_ctrs { struct fd_rb_samp_ctrs {
uint64_t ctr[16]; uint64_t ctr[16];
}; };
/* /*
@ -47,104 +46,103 @@ struct fd_rb_samp_ctrs {
static struct fd_hw_sample * static struct fd_hw_sample *
occlusion_get_sample(struct fd_batch *batch, struct fd_ringbuffer *ring) occlusion_get_sample(struct fd_batch *batch, struct fd_ringbuffer *ring)
{ {
struct fd_hw_sample *samp = struct fd_hw_sample *samp =
fd_hw_sample_init(batch, sizeof(struct fd_rb_samp_ctrs)); fd_hw_sample_init(batch, sizeof(struct fd_rb_samp_ctrs));
/* Set RB_SAMPLE_COUNT_ADDR to samp->offset plus value of /* Set RB_SAMPLE_COUNT_ADDR to samp->offset plus value of
* HW_QUERY_BASE_REG register: * HW_QUERY_BASE_REG register:
*/ */
OUT_PKT3(ring, CP_SET_CONSTANT, 3); OUT_PKT3(ring, CP_SET_CONSTANT, 3);
OUT_RING(ring, CP_REG(REG_A3XX_RB_SAMPLE_COUNT_ADDR) | 0x80000000); OUT_RING(ring, CP_REG(REG_A3XX_RB_SAMPLE_COUNT_ADDR) | 0x80000000);
OUT_RING(ring, HW_QUERY_BASE_REG); OUT_RING(ring, HW_QUERY_BASE_REG);
OUT_RING(ring, samp->offset); OUT_RING(ring, samp->offset);
OUT_PKT0(ring, REG_A3XX_RB_SAMPLE_COUNT_CONTROL, 1); OUT_PKT0(ring, REG_A3XX_RB_SAMPLE_COUNT_CONTROL, 1);
OUT_RING(ring, A3XX_RB_SAMPLE_COUNT_CONTROL_COPY); OUT_RING(ring, A3XX_RB_SAMPLE_COUNT_CONTROL_COPY);
OUT_PKT3(ring, CP_DRAW_INDX, 3); OUT_PKT3(ring, CP_DRAW_INDX, 3);
OUT_RING(ring, 0x00000000); OUT_RING(ring, 0x00000000);
OUT_RING(ring, DRAW(DI_PT_POINTLIST_PSIZE, DI_SRC_SEL_AUTO_INDEX, OUT_RING(ring, DRAW(DI_PT_POINTLIST_PSIZE, DI_SRC_SEL_AUTO_INDEX,
INDEX_SIZE_IGN, USE_VISIBILITY, 0)); INDEX_SIZE_IGN, USE_VISIBILITY, 0));
OUT_RING(ring, 0); /* NumIndices */ OUT_RING(ring, 0); /* NumIndices */
fd_event_write(batch, ring, ZPASS_DONE); fd_event_write(batch, ring, ZPASS_DONE);
OUT_PKT0(ring, REG_A3XX_RBBM_PERFCTR_CTL, 1); OUT_PKT0(ring, REG_A3XX_RBBM_PERFCTR_CTL, 1);
OUT_RING(ring, A3XX_RBBM_PERFCTR_CTL_ENABLE); OUT_RING(ring, A3XX_RBBM_PERFCTR_CTL_ENABLE);
OUT_PKT0(ring, REG_A3XX_VBIF_PERF_CNT_EN, 1); OUT_PKT0(ring, REG_A3XX_VBIF_PERF_CNT_EN, 1);
OUT_RING(ring, A3XX_VBIF_PERF_CNT_EN_CNT0 | OUT_RING(ring, A3XX_VBIF_PERF_CNT_EN_CNT0 | A3XX_VBIF_PERF_CNT_EN_CNT1 |
A3XX_VBIF_PERF_CNT_EN_CNT1 | A3XX_VBIF_PERF_CNT_EN_PWRCNT0 |
A3XX_VBIF_PERF_CNT_EN_PWRCNT0 | A3XX_VBIF_PERF_CNT_EN_PWRCNT1 |
A3XX_VBIF_PERF_CNT_EN_PWRCNT1 | A3XX_VBIF_PERF_CNT_EN_PWRCNT2);
A3XX_VBIF_PERF_CNT_EN_PWRCNT2);
return samp; return samp;
} }
static uint64_t static uint64_t
count_samples(const struct fd_rb_samp_ctrs *start, count_samples(const struct fd_rb_samp_ctrs *start,
const struct fd_rb_samp_ctrs *end) const struct fd_rb_samp_ctrs *end)
{ {
uint64_t n = 0; uint64_t n = 0;
unsigned i; unsigned i;
/* not quite sure what all of these are, possibly different /* not quite sure what all of these are, possibly different
* counters for each MRT render target: * counters for each MRT render target:
*/ */
for (i = 0; i < 16; i += 4) for (i = 0; i < 16; i += 4)
n += end->ctr[i] - start->ctr[i]; n += end->ctr[i] - start->ctr[i];
return n; return n;
} }
static void static void
occlusion_counter_accumulate_result(struct fd_context *ctx, occlusion_counter_accumulate_result(struct fd_context *ctx, const void *start,
const void *start, const void *end, const void *end,
union pipe_query_result *result) union pipe_query_result *result)
{ {
uint64_t n = count_samples(start, end); uint64_t n = count_samples(start, end);
result->u64 += n; result->u64 += n;
} }
static void static void
occlusion_predicate_accumulate_result(struct fd_context *ctx, occlusion_predicate_accumulate_result(struct fd_context *ctx, const void *start,
const void *start, const void *end, const void *end,
union pipe_query_result *result) union pipe_query_result *result)
{ {
uint64_t n = count_samples(start, end); uint64_t n = count_samples(start, end);
result->b |= (n > 0); result->b |= (n > 0);
} }
static const struct fd_hw_sample_provider occlusion_counter = { static const struct fd_hw_sample_provider occlusion_counter = {
.query_type = PIPE_QUERY_OCCLUSION_COUNTER, .query_type = PIPE_QUERY_OCCLUSION_COUNTER,
.get_sample = occlusion_get_sample, .get_sample = occlusion_get_sample,
.accumulate_result = occlusion_counter_accumulate_result, .accumulate_result = occlusion_counter_accumulate_result,
}; };
static const struct fd_hw_sample_provider occlusion_predicate = { static const struct fd_hw_sample_provider occlusion_predicate = {
.query_type = PIPE_QUERY_OCCLUSION_PREDICATE, .query_type = PIPE_QUERY_OCCLUSION_PREDICATE,
.get_sample = occlusion_get_sample, .get_sample = occlusion_get_sample,
.accumulate_result = occlusion_predicate_accumulate_result, .accumulate_result = occlusion_predicate_accumulate_result,
}; };
static const struct fd_hw_sample_provider occlusion_predicate_conservative = { static const struct fd_hw_sample_provider occlusion_predicate_conservative = {
.query_type = PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE, .query_type = PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE,
.get_sample = occlusion_get_sample, .get_sample = occlusion_get_sample,
.accumulate_result = occlusion_predicate_accumulate_result, .accumulate_result = occlusion_predicate_accumulate_result,
}; };
void fd3_query_context_init(struct pipe_context *pctx) void
disable_thread_safety_analysis fd3_query_context_init(struct pipe_context *pctx) disable_thread_safety_analysis
{ {
struct fd_context *ctx = fd_context(pctx); struct fd_context *ctx = fd_context(pctx);
ctx->create_query = fd_hw_create_query; ctx->create_query = fd_hw_create_query;
ctx->query_prepare = fd_hw_query_prepare; ctx->query_prepare = fd_hw_query_prepare;
ctx->query_prepare_tile = fd_hw_query_prepare_tile; ctx->query_prepare_tile = fd_hw_query_prepare_tile;
ctx->query_update_batch = fd_hw_query_update_batch; ctx->query_update_batch = fd_hw_query_update_batch;
fd_hw_query_register_provider(pctx, &occlusion_counter); fd_hw_query_register_provider(pctx, &occlusion_counter);
fd_hw_query_register_provider(pctx, &occlusion_predicate); fd_hw_query_register_provider(pctx, &occlusion_predicate);
fd_hw_query_register_provider(pctx, &occlusion_predicate_conservative); fd_hw_query_register_provider(pctx, &occlusion_predicate_conservative);
} }

View file

@ -24,80 +24,79 @@
* Rob Clark <robclark@freedesktop.org> * Rob Clark <robclark@freedesktop.org>
*/ */
#include "pipe/p_state.h" #include "pipe/p_state.h"
#include "util/u_string.h"
#include "util/u_memory.h" #include "util/u_memory.h"
#include "util/u_string.h"
#include "fd3_rasterizer.h"
#include "fd3_context.h" #include "fd3_context.h"
#include "fd3_format.h" #include "fd3_format.h"
#include "fd3_rasterizer.h"
void * void *
fd3_rasterizer_state_create(struct pipe_context *pctx, fd3_rasterizer_state_create(struct pipe_context *pctx,
const struct pipe_rasterizer_state *cso) const struct pipe_rasterizer_state *cso)
{ {
struct fd3_rasterizer_stateobj *so; struct fd3_rasterizer_stateobj *so;
float psize_min, psize_max; float psize_min, psize_max;
so = CALLOC_STRUCT(fd3_rasterizer_stateobj); so = CALLOC_STRUCT(fd3_rasterizer_stateobj);
if (!so) if (!so)
return NULL; return NULL;
so->base = *cso; so->base = *cso;
if (cso->point_size_per_vertex) { if (cso->point_size_per_vertex) {
psize_min = util_get_min_point_size(cso); psize_min = util_get_min_point_size(cso);
psize_max = 4092; psize_max = 4092;
} else { } else {
/* Force the point size to be as if the vertex output was disabled. */ /* Force the point size to be as if the vertex output was disabled. */
psize_min = cso->point_size; psize_min = cso->point_size;
psize_max = cso->point_size; psize_max = cso->point_size;
} }
/* /*
if (cso->line_stipple_enable) { if (cso->line_stipple_enable) {
??? TODO line stipple ??? TODO line stipple
} }
TODO cso->half_pixel_center TODO cso->half_pixel_center
if (cso->multisample) if (cso->multisample)
TODO TODO
*/ */
so->gras_cl_clip_cntl = so->gras_cl_clip_cntl =
COND(cso->clip_halfz, A3XX_GRAS_CL_CLIP_CNTL_ZERO_GB_SCALE_Z); COND(cso->clip_halfz, A3XX_GRAS_CL_CLIP_CNTL_ZERO_GB_SCALE_Z);
so->gras_su_point_minmax = so->gras_su_point_minmax = A3XX_GRAS_SU_POINT_MINMAX_MIN(psize_min) |
A3XX_GRAS_SU_POINT_MINMAX_MIN(psize_min) | A3XX_GRAS_SU_POINT_MINMAX_MAX(psize_max);
A3XX_GRAS_SU_POINT_MINMAX_MAX(psize_max); so->gras_su_point_size = A3XX_GRAS_SU_POINT_SIZE(cso->point_size);
so->gras_su_point_size = A3XX_GRAS_SU_POINT_SIZE(cso->point_size); so->gras_su_poly_offset_scale =
so->gras_su_poly_offset_scale = A3XX_GRAS_SU_POLY_OFFSET_SCALE_VAL(cso->offset_scale);
A3XX_GRAS_SU_POLY_OFFSET_SCALE_VAL(cso->offset_scale); so->gras_su_poly_offset_offset =
so->gras_su_poly_offset_offset = A3XX_GRAS_SU_POLY_OFFSET_OFFSET(cso->offset_units * 2.0f);
A3XX_GRAS_SU_POLY_OFFSET_OFFSET(cso->offset_units * 2.0f);
so->gras_su_mode_control = so->gras_su_mode_control =
A3XX_GRAS_SU_MODE_CONTROL_LINEHALFWIDTH(cso->line_width/2.0); A3XX_GRAS_SU_MODE_CONTROL_LINEHALFWIDTH(cso->line_width / 2.0);
so->pc_prim_vtx_cntl = so->pc_prim_vtx_cntl = A3XX_PC_PRIM_VTX_CNTL_POLYMODE_FRONT_PTYPE(
A3XX_PC_PRIM_VTX_CNTL_POLYMODE_FRONT_PTYPE(fd_polygon_mode(cso->fill_front)) | fd_polygon_mode(cso->fill_front)) |
A3XX_PC_PRIM_VTX_CNTL_POLYMODE_BACK_PTYPE(fd_polygon_mode(cso->fill_back)); A3XX_PC_PRIM_VTX_CNTL_POLYMODE_BACK_PTYPE(
fd_polygon_mode(cso->fill_back));
if (cso->fill_front != PIPE_POLYGON_MODE_FILL || if (cso->fill_front != PIPE_POLYGON_MODE_FILL ||
cso->fill_back != PIPE_POLYGON_MODE_FILL) cso->fill_back != PIPE_POLYGON_MODE_FILL)
so->pc_prim_vtx_cntl |= A3XX_PC_PRIM_VTX_CNTL_POLYMODE_ENABLE; so->pc_prim_vtx_cntl |= A3XX_PC_PRIM_VTX_CNTL_POLYMODE_ENABLE;
if (cso->cull_face & PIPE_FACE_FRONT) if (cso->cull_face & PIPE_FACE_FRONT)
so->gras_su_mode_control |= A3XX_GRAS_SU_MODE_CONTROL_CULL_FRONT; so->gras_su_mode_control |= A3XX_GRAS_SU_MODE_CONTROL_CULL_FRONT;
if (cso->cull_face & PIPE_FACE_BACK) if (cso->cull_face & PIPE_FACE_BACK)
so->gras_su_mode_control |= A3XX_GRAS_SU_MODE_CONTROL_CULL_BACK; so->gras_su_mode_control |= A3XX_GRAS_SU_MODE_CONTROL_CULL_BACK;
if (!cso->front_ccw) if (!cso->front_ccw)
so->gras_su_mode_control |= A3XX_GRAS_SU_MODE_CONTROL_FRONT_CW; so->gras_su_mode_control |= A3XX_GRAS_SU_MODE_CONTROL_FRONT_CW;
if (!cso->flatshade_first) if (!cso->flatshade_first)
so->pc_prim_vtx_cntl |= A3XX_PC_PRIM_VTX_CNTL_PROVOKING_VTX_LAST; so->pc_prim_vtx_cntl |= A3XX_PC_PRIM_VTX_CNTL_PROVOKING_VTX_LAST;
if (cso->offset_tri) if (cso->offset_tri)
so->gras_su_mode_control |= A3XX_GRAS_SU_MODE_CONTROL_POLY_OFFSET; so->gras_su_mode_control |= A3XX_GRAS_SU_MODE_CONTROL_POLY_OFFSET;
if (!cso->depth_clip_near) if (!cso->depth_clip_near)
so->gras_cl_clip_cntl |= A3XX_GRAS_CL_CLIP_CNTL_CLIP_DISABLE; so->gras_cl_clip_cntl |= A3XX_GRAS_CL_CLIP_CNTL_CLIP_DISABLE;
return so; return so;
} }

View file

@ -27,28 +27,28 @@
#ifndef FD3_RASTERIZER_H_ #ifndef FD3_RASTERIZER_H_
#define FD3_RASTERIZER_H_ #define FD3_RASTERIZER_H_
#include "pipe/p_state.h"
#include "pipe/p_context.h" #include "pipe/p_context.h"
#include "pipe/p_state.h"
struct fd3_rasterizer_stateobj { struct fd3_rasterizer_stateobj {
struct pipe_rasterizer_state base; struct pipe_rasterizer_state base;
uint32_t gras_su_point_minmax; uint32_t gras_su_point_minmax;
uint32_t gras_su_point_size; uint32_t gras_su_point_size;
uint32_t gras_su_poly_offset_scale; uint32_t gras_su_poly_offset_scale;
uint32_t gras_su_poly_offset_offset; uint32_t gras_su_poly_offset_offset;
uint32_t gras_su_mode_control; uint32_t gras_su_mode_control;
uint32_t gras_cl_clip_cntl; uint32_t gras_cl_clip_cntl;
uint32_t pc_prim_vtx_cntl; uint32_t pc_prim_vtx_cntl;
}; };
static inline struct fd3_rasterizer_stateobj * static inline struct fd3_rasterizer_stateobj *
fd3_rasterizer_stateobj(struct pipe_rasterizer_state *rast) fd3_rasterizer_stateobj(struct pipe_rasterizer_state *rast)
{ {
return (struct fd3_rasterizer_stateobj *)rast; return (struct fd3_rasterizer_stateobj *)rast;
} }
void * fd3_rasterizer_state_create(struct pipe_context *pctx, void *fd3_rasterizer_state_create(struct pipe_context *pctx,
const struct pipe_rasterizer_state *cso); const struct pipe_rasterizer_state *cso);
#endif /* FD3_RASTERIZER_H_ */ #endif /* FD3_RASTERIZER_H_ */

View file

@ -26,95 +26,96 @@
#include "fd3_format.h" #include "fd3_format.h"
static uint32_t static uint32_t
setup_slices(struct fd_resource *rsc, uint32_t alignment, enum pipe_format format) setup_slices(struct fd_resource *rsc, uint32_t alignment,
enum pipe_format format)
{ {
struct pipe_resource *prsc = &rsc->b.b; struct pipe_resource *prsc = &rsc->b.b;
uint32_t level, size = 0; uint32_t level, size = 0;
uint32_t width0 = prsc->width0; uint32_t width0 = prsc->width0;
if (rsc->layout.tile_mode && prsc->target != PIPE_TEXTURE_CUBE) if (rsc->layout.tile_mode && prsc->target != PIPE_TEXTURE_CUBE)
width0 = util_next_power_of_two(width0); width0 = util_next_power_of_two(width0);
/* 32 pixel alignment */ /* 32 pixel alignment */
fdl_set_pitchalign(&rsc->layout, fdl_cpp_shift(&rsc->layout) + 5); fdl_set_pitchalign(&rsc->layout, fdl_cpp_shift(&rsc->layout) + 5);
for (level = 0; level <= prsc->last_level; level++) { for (level = 0; level <= prsc->last_level; level++) {
struct fdl_slice *slice = fd_resource_slice(rsc, level); struct fdl_slice *slice = fd_resource_slice(rsc, level);
uint32_t pitch = fdl_pitch(&rsc->layout, level); uint32_t pitch = fdl_pitch(&rsc->layout, level);
uint32_t height = u_minify(prsc->height0, level); uint32_t height = u_minify(prsc->height0, level);
if (rsc->layout.tile_mode) { if (rsc->layout.tile_mode) {
height = align(height, 4); height = align(height, 4);
if (prsc->target != PIPE_TEXTURE_CUBE) if (prsc->target != PIPE_TEXTURE_CUBE)
height = util_next_power_of_two(height); height = util_next_power_of_two(height);
} }
uint32_t nblocksy = util_format_get_nblocksy(format, height); uint32_t nblocksy = util_format_get_nblocksy(format, height);
slice->offset = size; slice->offset = size;
/* 1d array and 2d array textures must all have the same layer size /* 1d array and 2d array textures must all have the same layer size
* for each miplevel on a3xx. 3d textures can have different layer * for each miplevel on a3xx. 3d textures can have different layer
* sizes for high levels, but the hw auto-sizer is buggy (or at least * sizes for high levels, but the hw auto-sizer is buggy (or at least
* different than what this code does), so as soon as the layer size * different than what this code does), so as soon as the layer size
* range gets into range, we stop reducing it. * range gets into range, we stop reducing it.
*/ */
if (prsc->target == PIPE_TEXTURE_3D && ( if (prsc->target == PIPE_TEXTURE_3D &&
level == 1 || (level == 1 ||
(level > 1 && fd_resource_slice(rsc, level - 1)->size0 > 0xf000))) (level > 1 && fd_resource_slice(rsc, level - 1)->size0 > 0xf000)))
slice->size0 = align(nblocksy * pitch, alignment); slice->size0 = align(nblocksy * pitch, alignment);
else if (level == 0 || alignment == 1) else if (level == 0 || alignment == 1)
slice->size0 = align(nblocksy * pitch, alignment); slice->size0 = align(nblocksy * pitch, alignment);
else else
slice->size0 = fd_resource_slice(rsc, level - 1)->size0; slice->size0 = fd_resource_slice(rsc, level - 1)->size0;
size += slice->size0 * u_minify(prsc->depth0, level) * prsc->array_size; size += slice->size0 * u_minify(prsc->depth0, level) * prsc->array_size;
} }
return size; return size;
} }
uint32_t uint32_t
fd3_setup_slices(struct fd_resource *rsc) fd3_setup_slices(struct fd_resource *rsc)
{ {
uint32_t alignment; uint32_t alignment;
switch (rsc->b.b.target) { switch (rsc->b.b.target) {
case PIPE_TEXTURE_3D: case PIPE_TEXTURE_3D:
case PIPE_TEXTURE_1D_ARRAY: case PIPE_TEXTURE_1D_ARRAY:
case PIPE_TEXTURE_2D_ARRAY: case PIPE_TEXTURE_2D_ARRAY:
alignment = 4096; alignment = 4096;
break; break;
default: default:
alignment = 1; alignment = 1;
break; break;
} }
return setup_slices(rsc, alignment, rsc->b.b.format); return setup_slices(rsc, alignment, rsc->b.b.format);
} }
static bool static bool
ok_format(enum pipe_format pfmt) ok_format(enum pipe_format pfmt)
{ {
enum a3xx_color_fmt fmt = fd3_pipe2color(pfmt); enum a3xx_color_fmt fmt = fd3_pipe2color(pfmt);
if (fmt == RB_NONE) if (fmt == RB_NONE)
return false; return false;
switch (pfmt) { switch (pfmt) {
case PIPE_FORMAT_R8_UINT: case PIPE_FORMAT_R8_UINT:
case PIPE_FORMAT_R8_SINT: case PIPE_FORMAT_R8_SINT:
case PIPE_FORMAT_Z32_FLOAT: case PIPE_FORMAT_Z32_FLOAT:
return false; return false;
default: default:
break; break;
} }
return true; return true;
} }
unsigned unsigned
fd3_tile_mode(const struct pipe_resource *tmpl) fd3_tile_mode(const struct pipe_resource *tmpl)
{ {
if (ok_format(tmpl->format)) if (ok_format(tmpl->format))
return TILE_4X4; return TILE_4X4;
return LINEAR; return LINEAR;
} }

View file

@ -27,90 +27,84 @@
#include "pipe/p_screen.h" #include "pipe/p_screen.h"
#include "util/format/u_format.h" #include "util/format/u_format.h"
#include "fd3_screen.h"
#include "fd3_context.h" #include "fd3_context.h"
#include "fd3_format.h"
#include "fd3_emit.h" #include "fd3_emit.h"
#include "fd3_format.h"
#include "fd3_resource.h" #include "fd3_resource.h"
#include "fd3_screen.h"
#include "ir3/ir3_compiler.h" #include "ir3/ir3_compiler.h"
static bool static bool
fd3_screen_is_format_supported(struct pipe_screen *pscreen, fd3_screen_is_format_supported(struct pipe_screen *pscreen,
enum pipe_format format, enum pipe_format format,
enum pipe_texture_target target, enum pipe_texture_target target,
unsigned sample_count, unsigned sample_count,
unsigned storage_sample_count, unsigned storage_sample_count, unsigned usage)
unsigned usage)
{ {
unsigned retval = 0; unsigned retval = 0;
if ((target >= PIPE_MAX_TEXTURE_TYPES) || if ((target >= PIPE_MAX_TEXTURE_TYPES) ||
(sample_count > 1)) { /* TODO add MSAA */ (sample_count > 1)) { /* TODO add MSAA */
DBG("not supported: format=%s, target=%d, sample_count=%d, usage=%x", DBG("not supported: format=%s, target=%d, sample_count=%d, usage=%x",
util_format_name(format), target, sample_count, usage); util_format_name(format), target, sample_count, usage);
return false; return false;
} }
if (MAX2(1, sample_count) != MAX2(1, storage_sample_count)) if (MAX2(1, sample_count) != MAX2(1, storage_sample_count))
return false; return false;
if ((usage & PIPE_BIND_VERTEX_BUFFER) && if ((usage & PIPE_BIND_VERTEX_BUFFER) &&
(fd3_pipe2vtx(format) != VFMT_NONE)) { (fd3_pipe2vtx(format) != VFMT_NONE)) {
retval |= PIPE_BIND_VERTEX_BUFFER; retval |= PIPE_BIND_VERTEX_BUFFER;
} }
if ((usage & PIPE_BIND_SAMPLER_VIEW) && if ((usage & PIPE_BIND_SAMPLER_VIEW) &&
(fd3_pipe2tex(format) != TFMT_NONE)) { (fd3_pipe2tex(format) != TFMT_NONE)) {
retval |= PIPE_BIND_SAMPLER_VIEW; retval |= PIPE_BIND_SAMPLER_VIEW;
} }
if ((usage & (PIPE_BIND_RENDER_TARGET | if ((usage & (PIPE_BIND_RENDER_TARGET | PIPE_BIND_DISPLAY_TARGET |
PIPE_BIND_DISPLAY_TARGET | PIPE_BIND_SCANOUT | PIPE_BIND_SHARED | PIPE_BIND_BLENDABLE)) &&
PIPE_BIND_SCANOUT | (fd3_pipe2color(format) != RB_NONE) &&
PIPE_BIND_SHARED | (fd3_pipe2tex(format) != TFMT_NONE)) {
PIPE_BIND_BLENDABLE)) && retval |= usage & (PIPE_BIND_RENDER_TARGET | PIPE_BIND_DISPLAY_TARGET |
(fd3_pipe2color(format) != RB_NONE) && PIPE_BIND_SCANOUT | PIPE_BIND_SHARED);
(fd3_pipe2tex(format) != TFMT_NONE)) { if (!util_format_is_pure_integer(format))
retval |= usage & (PIPE_BIND_RENDER_TARGET | retval |= usage & PIPE_BIND_BLENDABLE;
PIPE_BIND_DISPLAY_TARGET | }
PIPE_BIND_SCANOUT |
PIPE_BIND_SHARED);
if (!util_format_is_pure_integer(format))
retval |= usage & PIPE_BIND_BLENDABLE;
}
if ((usage & PIPE_BIND_DEPTH_STENCIL) && if ((usage & PIPE_BIND_DEPTH_STENCIL) &&
(fd_pipe2depth(format) != (enum adreno_rb_depth_format)~0) && (fd_pipe2depth(format) != (enum adreno_rb_depth_format) ~0) &&
(fd3_pipe2tex(format) != TFMT_NONE)) { (fd3_pipe2tex(format) != TFMT_NONE)) {
retval |= PIPE_BIND_DEPTH_STENCIL; retval |= PIPE_BIND_DEPTH_STENCIL;
} }
if ((usage & PIPE_BIND_INDEX_BUFFER) && if ((usage & PIPE_BIND_INDEX_BUFFER) &&
(fd_pipe2index(format) != (enum pc_di_index_size)~0)) { (fd_pipe2index(format) != (enum pc_di_index_size) ~0)) {
retval |= PIPE_BIND_INDEX_BUFFER; retval |= PIPE_BIND_INDEX_BUFFER;
} }
if (retval != usage) { if (retval != usage) {
DBG("not supported: format=%s, target=%d, sample_count=%d, " DBG("not supported: format=%s, target=%d, sample_count=%d, "
"usage=%x, retval=%x", util_format_name(format), "usage=%x, retval=%x",
target, sample_count, usage, retval); util_format_name(format), target, sample_count, usage, retval);
} }
return retval == usage; return retval == usage;
} }
void void
fd3_screen_init(struct pipe_screen *pscreen) fd3_screen_init(struct pipe_screen *pscreen)
{ {
struct fd_screen *screen = fd_screen(pscreen); struct fd_screen *screen = fd_screen(pscreen);
screen->max_rts = A3XX_MAX_RENDER_TARGETS; screen->max_rts = A3XX_MAX_RENDER_TARGETS;
pscreen->context_create = fd3_context_create; pscreen->context_create = fd3_context_create;
pscreen->is_format_supported = fd3_screen_is_format_supported; pscreen->is_format_supported = fd3_screen_is_format_supported;
fd3_emit_init_screen(pscreen); fd3_emit_init_screen(pscreen);
ir3_screen_init(pscreen); ir3_screen_init(pscreen);
screen->setup_slices = fd3_setup_slices; screen->setup_slices = fd3_setup_slices;
if (FD_DBG(TTILE)) if (FD_DBG(TTILE))
screen->tile_mode = fd3_tile_mode; screen->tile_mode = fd3_tile_mode;
} }

View file

@ -25,204 +25,199 @@
*/ */
#include "pipe/p_state.h" #include "pipe/p_state.h"
#include "util/u_string.h"
#include "util/u_memory.h"
#include "util/u_inlines.h"
#include "util/format/u_format.h" #include "util/format/u_format.h"
#include "util/u_inlines.h"
#include "util/u_memory.h"
#include "util/u_string.h"
#include "fd3_texture.h"
#include "fd3_format.h" #include "fd3_format.h"
#include "fd3_texture.h"
static enum a3xx_tex_clamp static enum a3xx_tex_clamp
tex_clamp(unsigned wrap, bool *needs_border) tex_clamp(unsigned wrap, bool *needs_border)
{ {
switch (wrap) { switch (wrap) {
case PIPE_TEX_WRAP_REPEAT: case PIPE_TEX_WRAP_REPEAT:
return A3XX_TEX_REPEAT; return A3XX_TEX_REPEAT;
case PIPE_TEX_WRAP_CLAMP_TO_EDGE: case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
return A3XX_TEX_CLAMP_TO_EDGE; return A3XX_TEX_CLAMP_TO_EDGE;
case PIPE_TEX_WRAP_CLAMP_TO_BORDER: case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
*needs_border = true; *needs_border = true;
return A3XX_TEX_CLAMP_TO_BORDER; return A3XX_TEX_CLAMP_TO_BORDER;
case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE: case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
/* only works for PoT.. need to emulate otherwise! */ /* only works for PoT.. need to emulate otherwise! */
return A3XX_TEX_MIRROR_CLAMP; return A3XX_TEX_MIRROR_CLAMP;
case PIPE_TEX_WRAP_MIRROR_REPEAT: case PIPE_TEX_WRAP_MIRROR_REPEAT:
return A3XX_TEX_MIRROR_REPEAT; return A3XX_TEX_MIRROR_REPEAT;
case PIPE_TEX_WRAP_MIRROR_CLAMP: case PIPE_TEX_WRAP_MIRROR_CLAMP:
case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER: case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
/* these two we could perhaps emulate, but we currently /* these two we could perhaps emulate, but we currently
* just don't advertise PIPE_CAP_TEXTURE_MIRROR_CLAMP * just don't advertise PIPE_CAP_TEXTURE_MIRROR_CLAMP
*/ */
default: default:
DBG("invalid wrap: %u", wrap); DBG("invalid wrap: %u", wrap);
return 0; return 0;
} }
} }
static enum a3xx_tex_filter static enum a3xx_tex_filter
tex_filter(unsigned filter, bool aniso) tex_filter(unsigned filter, bool aniso)
{ {
switch (filter) { switch (filter) {
case PIPE_TEX_FILTER_NEAREST: case PIPE_TEX_FILTER_NEAREST:
return A3XX_TEX_NEAREST; return A3XX_TEX_NEAREST;
case PIPE_TEX_FILTER_LINEAR: case PIPE_TEX_FILTER_LINEAR:
return aniso ? A3XX_TEX_ANISO : A3XX_TEX_LINEAR; return aniso ? A3XX_TEX_ANISO : A3XX_TEX_LINEAR;
default: default:
DBG("invalid filter: %u", filter); DBG("invalid filter: %u", filter);
return 0; return 0;
} }
} }
static void * static void *
fd3_sampler_state_create(struct pipe_context *pctx, fd3_sampler_state_create(struct pipe_context *pctx,
const struct pipe_sampler_state *cso) const struct pipe_sampler_state *cso)
{ {
struct fd3_sampler_stateobj *so = CALLOC_STRUCT(fd3_sampler_stateobj); struct fd3_sampler_stateobj *so = CALLOC_STRUCT(fd3_sampler_stateobj);
unsigned aniso = util_last_bit(MIN2(cso->max_anisotropy >> 1, 8)); unsigned aniso = util_last_bit(MIN2(cso->max_anisotropy >> 1, 8));
bool miplinear = false; bool miplinear = false;
if (!so) if (!so)
return NULL; return NULL;
if (cso->min_mip_filter == PIPE_TEX_MIPFILTER_LINEAR) if (cso->min_mip_filter == PIPE_TEX_MIPFILTER_LINEAR)
miplinear = true; miplinear = true;
so->base = *cso; so->base = *cso;
so->needs_border = false; so->needs_border = false;
so->texsamp0 = so->texsamp0 =
COND(!cso->normalized_coords, A3XX_TEX_SAMP_0_UNNORM_COORDS) | COND(!cso->normalized_coords, A3XX_TEX_SAMP_0_UNNORM_COORDS) |
COND(!cso->seamless_cube_map, A3XX_TEX_SAMP_0_CUBEMAPSEAMLESSFILTOFF) | COND(!cso->seamless_cube_map, A3XX_TEX_SAMP_0_CUBEMAPSEAMLESSFILTOFF) |
COND(miplinear, A3XX_TEX_SAMP_0_MIPFILTER_LINEAR) | COND(miplinear, A3XX_TEX_SAMP_0_MIPFILTER_LINEAR) |
A3XX_TEX_SAMP_0_XY_MAG(tex_filter(cso->mag_img_filter, aniso)) | A3XX_TEX_SAMP_0_XY_MAG(tex_filter(cso->mag_img_filter, aniso)) |
A3XX_TEX_SAMP_0_XY_MIN(tex_filter(cso->min_img_filter, aniso)) | A3XX_TEX_SAMP_0_XY_MIN(tex_filter(cso->min_img_filter, aniso)) |
A3XX_TEX_SAMP_0_ANISO(aniso) | A3XX_TEX_SAMP_0_ANISO(aniso) |
A3XX_TEX_SAMP_0_WRAP_S(tex_clamp(cso->wrap_s, &so->needs_border)) | A3XX_TEX_SAMP_0_WRAP_S(tex_clamp(cso->wrap_s, &so->needs_border)) |
A3XX_TEX_SAMP_0_WRAP_T(tex_clamp(cso->wrap_t, &so->needs_border)) | A3XX_TEX_SAMP_0_WRAP_T(tex_clamp(cso->wrap_t, &so->needs_border)) |
A3XX_TEX_SAMP_0_WRAP_R(tex_clamp(cso->wrap_r, &so->needs_border)); A3XX_TEX_SAMP_0_WRAP_R(tex_clamp(cso->wrap_r, &so->needs_border));
if (cso->compare_mode) if (cso->compare_mode)
so->texsamp0 |= A3XX_TEX_SAMP_0_COMPARE_FUNC(cso->compare_func); /* maps 1:1 */ so->texsamp0 |=
A3XX_TEX_SAMP_0_COMPARE_FUNC(cso->compare_func); /* maps 1:1 */
so->texsamp1 = A3XX_TEX_SAMP_1_LOD_BIAS(cso->lod_bias); so->texsamp1 = A3XX_TEX_SAMP_1_LOD_BIAS(cso->lod_bias);
if (cso->min_mip_filter != PIPE_TEX_MIPFILTER_NONE) { if (cso->min_mip_filter != PIPE_TEX_MIPFILTER_NONE) {
so->texsamp1 |= so->texsamp1 |= A3XX_TEX_SAMP_1_MIN_LOD(cso->min_lod) |
A3XX_TEX_SAMP_1_MIN_LOD(cso->min_lod) | A3XX_TEX_SAMP_1_MAX_LOD(cso->max_lod);
A3XX_TEX_SAMP_1_MAX_LOD(cso->max_lod); } else {
} else { /* If we're not doing mipmap filtering, we still need a slightly > 0
/* If we're not doing mipmap filtering, we still need a slightly > 0 * LOD clamp so the HW can decide between min and mag filtering of
* LOD clamp so the HW can decide between min and mag filtering of * level 0.
* level 0. */
*/ so->texsamp1 |= A3XX_TEX_SAMP_1_MIN_LOD(MIN2(cso->min_lod, 0.125)) |
so->texsamp1 |= A3XX_TEX_SAMP_1_MAX_LOD(MIN2(cso->max_lod, 0.125));
A3XX_TEX_SAMP_1_MIN_LOD(MIN2(cso->min_lod, 0.125)) | }
A3XX_TEX_SAMP_1_MAX_LOD(MIN2(cso->max_lod, 0.125));
}
return so; return so;
} }
static enum a3xx_tex_type static enum a3xx_tex_type
tex_type(unsigned target) tex_type(unsigned target)
{ {
switch (target) { switch (target) {
default: default:
assert(0); assert(0);
case PIPE_BUFFER: case PIPE_BUFFER:
case PIPE_TEXTURE_1D: case PIPE_TEXTURE_1D:
case PIPE_TEXTURE_1D_ARRAY: case PIPE_TEXTURE_1D_ARRAY:
return A3XX_TEX_1D; return A3XX_TEX_1D;
case PIPE_TEXTURE_RECT: case PIPE_TEXTURE_RECT:
case PIPE_TEXTURE_2D: case PIPE_TEXTURE_2D:
case PIPE_TEXTURE_2D_ARRAY: case PIPE_TEXTURE_2D_ARRAY:
return A3XX_TEX_2D; return A3XX_TEX_2D;
case PIPE_TEXTURE_3D: case PIPE_TEXTURE_3D:
return A3XX_TEX_3D; return A3XX_TEX_3D;
case PIPE_TEXTURE_CUBE: case PIPE_TEXTURE_CUBE:
case PIPE_TEXTURE_CUBE_ARRAY: case PIPE_TEXTURE_CUBE_ARRAY:
return A3XX_TEX_CUBE; return A3XX_TEX_CUBE;
} }
} }
static struct pipe_sampler_view * static struct pipe_sampler_view *
fd3_sampler_view_create(struct pipe_context *pctx, struct pipe_resource *prsc, fd3_sampler_view_create(struct pipe_context *pctx, struct pipe_resource *prsc,
const struct pipe_sampler_view *cso) const struct pipe_sampler_view *cso)
{ {
struct fd3_pipe_sampler_view *so = CALLOC_STRUCT(fd3_pipe_sampler_view); struct fd3_pipe_sampler_view *so = CALLOC_STRUCT(fd3_pipe_sampler_view);
struct fd_resource *rsc = fd_resource(prsc); struct fd_resource *rsc = fd_resource(prsc);
unsigned lvl; unsigned lvl;
if (!so) if (!so)
return NULL; return NULL;
so->base = *cso; so->base = *cso;
pipe_reference(NULL, &prsc->reference); pipe_reference(NULL, &prsc->reference);
so->base.texture = prsc; so->base.texture = prsc;
so->base.reference.count = 1; so->base.reference.count = 1;
so->base.context = pctx; so->base.context = pctx;
so->texconst0 = so->texconst0 = A3XX_TEX_CONST_0_TILE_MODE(rsc->layout.tile_mode) |
A3XX_TEX_CONST_0_TILE_MODE(rsc->layout.tile_mode) | A3XX_TEX_CONST_0_TYPE(tex_type(prsc->target)) |
A3XX_TEX_CONST_0_TYPE(tex_type(prsc->target)) | A3XX_TEX_CONST_0_FMT(fd3_pipe2tex(cso->format)) |
A3XX_TEX_CONST_0_FMT(fd3_pipe2tex(cso->format)) | fd3_tex_swiz(cso->format, cso->swizzle_r, cso->swizzle_g,
fd3_tex_swiz(cso->format, cso->swizzle_r, cso->swizzle_g, cso->swizzle_b, cso->swizzle_a);
cso->swizzle_b, cso->swizzle_a);
if (prsc->target == PIPE_BUFFER || util_format_is_pure_integer(cso->format)) if (prsc->target == PIPE_BUFFER || util_format_is_pure_integer(cso->format))
so->texconst0 |= A3XX_TEX_CONST_0_NOCONVERT; so->texconst0 |= A3XX_TEX_CONST_0_NOCONVERT;
if (util_format_is_srgb(cso->format)) if (util_format_is_srgb(cso->format))
so->texconst0 |= A3XX_TEX_CONST_0_SRGB; so->texconst0 |= A3XX_TEX_CONST_0_SRGB;
if (prsc->target == PIPE_BUFFER) { if (prsc->target == PIPE_BUFFER) {
lvl = 0; lvl = 0;
so->texconst1 = so->texconst1 =
A3XX_TEX_CONST_1_WIDTH(cso->u.buf.size / util_format_get_blocksize(cso->format)) | A3XX_TEX_CONST_1_WIDTH(cso->u.buf.size /
A3XX_TEX_CONST_1_HEIGHT(1); util_format_get_blocksize(cso->format)) |
} else { A3XX_TEX_CONST_1_HEIGHT(1);
unsigned miplevels; } else {
unsigned miplevels;
lvl = fd_sampler_first_level(cso); lvl = fd_sampler_first_level(cso);
miplevels = fd_sampler_last_level(cso) - lvl; miplevels = fd_sampler_last_level(cso) - lvl;
so->texconst0 |= A3XX_TEX_CONST_0_MIPLVLS(miplevels); so->texconst0 |= A3XX_TEX_CONST_0_MIPLVLS(miplevels);
so->texconst1 = so->texconst1 = A3XX_TEX_CONST_1_PITCHALIGN(rsc->layout.pitchalign - 4) |
A3XX_TEX_CONST_1_PITCHALIGN(rsc->layout.pitchalign - 4) | A3XX_TEX_CONST_1_WIDTH(u_minify(prsc->width0, lvl)) |
A3XX_TEX_CONST_1_WIDTH(u_minify(prsc->width0, lvl)) | A3XX_TEX_CONST_1_HEIGHT(u_minify(prsc->height0, lvl));
A3XX_TEX_CONST_1_HEIGHT(u_minify(prsc->height0, lvl)); }
} /* when emitted, A3XX_TEX_CONST_2_INDX() must be OR'd in: */
/* when emitted, A3XX_TEX_CONST_2_INDX() must be OR'd in: */ struct fdl_slice *slice = fd_resource_slice(rsc, lvl);
struct fdl_slice *slice = fd_resource_slice(rsc, lvl); so->texconst2 = A3XX_TEX_CONST_2_PITCH(fd_resource_pitch(rsc, lvl));
so->texconst2 = switch (prsc->target) {
A3XX_TEX_CONST_2_PITCH(fd_resource_pitch(rsc, lvl)); case PIPE_TEXTURE_1D_ARRAY:
switch (prsc->target) { case PIPE_TEXTURE_2D_ARRAY:
case PIPE_TEXTURE_1D_ARRAY: so->texconst3 = A3XX_TEX_CONST_3_DEPTH(prsc->array_size - 1) |
case PIPE_TEXTURE_2D_ARRAY: A3XX_TEX_CONST_3_LAYERSZ1(slice->size0);
so->texconst3 = break;
A3XX_TEX_CONST_3_DEPTH(prsc->array_size - 1) | case PIPE_TEXTURE_3D:
A3XX_TEX_CONST_3_LAYERSZ1(slice->size0); so->texconst3 = A3XX_TEX_CONST_3_DEPTH(u_minify(prsc->depth0, lvl)) |
break; A3XX_TEX_CONST_3_LAYERSZ1(slice->size0);
case PIPE_TEXTURE_3D: so->texconst3 |= A3XX_TEX_CONST_3_LAYERSZ2(
so->texconst3 = fd_resource_slice(rsc, prsc->last_level)->size0);
A3XX_TEX_CONST_3_DEPTH(u_minify(prsc->depth0, lvl)) | break;
A3XX_TEX_CONST_3_LAYERSZ1(slice->size0); default:
so->texconst3 |= A3XX_TEX_CONST_3_LAYERSZ2( so->texconst3 = 0x00000000;
fd_resource_slice(rsc, prsc->last_level)->size0); break;
break; }
default:
so->texconst3 = 0x00000000;
break;
}
return &so->base; return &so->base;
} }
void void
fd3_texture_init(struct pipe_context *pctx) fd3_texture_init(struct pipe_context *pctx)
{ {
pctx->create_sampler_state = fd3_sampler_state_create; pctx->create_sampler_state = fd3_sampler_state_create;
pctx->bind_sampler_states = fd_sampler_states_bind; pctx->bind_sampler_states = fd_sampler_states_bind;
pctx->create_sampler_view = fd3_sampler_view_create; pctx->create_sampler_view = fd3_sampler_view_create;
pctx->set_sampler_views = fd_set_sampler_views; pctx->set_sampler_views = fd_set_sampler_views;
} }

View file

@ -29,37 +29,37 @@
#include "pipe/p_context.h" #include "pipe/p_context.h"
#include "freedreno_texture.h"
#include "freedreno_resource.h" #include "freedreno_resource.h"
#include "freedreno_texture.h"
#include "fd3_context.h" #include "fd3_context.h"
#include "fd3_format.h" #include "fd3_format.h"
struct fd3_sampler_stateobj { struct fd3_sampler_stateobj {
struct pipe_sampler_state base; struct pipe_sampler_state base;
uint32_t texsamp0, texsamp1; uint32_t texsamp0, texsamp1;
bool needs_border; bool needs_border;
}; };
static inline struct fd3_sampler_stateobj * static inline struct fd3_sampler_stateobj *
fd3_sampler_stateobj(struct pipe_sampler_state *samp) fd3_sampler_stateobj(struct pipe_sampler_state *samp)
{ {
return (struct fd3_sampler_stateobj *)samp; return (struct fd3_sampler_stateobj *)samp;
} }
struct fd3_pipe_sampler_view { struct fd3_pipe_sampler_view {
struct pipe_sampler_view base; struct pipe_sampler_view base;
uint32_t texconst0, texconst1, texconst2, texconst3; uint32_t texconst0, texconst1, texconst2, texconst3;
}; };
static inline struct fd3_pipe_sampler_view * static inline struct fd3_pipe_sampler_view *
fd3_pipe_sampler_view(struct pipe_sampler_view *pview) fd3_pipe_sampler_view(struct pipe_sampler_view *pview)
{ {
return (struct fd3_pipe_sampler_view *)pview; return (struct fd3_pipe_sampler_view *)pview;
} }
unsigned fd3_get_const_idx(struct fd_context *ctx, unsigned fd3_get_const_idx(struct fd_context *ctx,
struct fd_texture_stateobj *tex, unsigned samp_id); struct fd_texture_stateobj *tex, unsigned samp_id);
void fd3_texture_init(struct pipe_context *pctx); void fd3_texture_init(struct pipe_context *pctx);

View file

@ -24,79 +24,75 @@
* Rob Clark <robclark@freedesktop.org> * Rob Clark <robclark@freedesktop.org>
*/ */
#include "pipe/p_state.h" #include "pipe/p_state.h"
#include "util/u_string.h"
#include "util/u_memory.h" #include "util/u_memory.h"
#include "util/u_string.h"
#include "fd3_zsa.h"
#include "fd3_context.h" #include "fd3_context.h"
#include "fd3_format.h" #include "fd3_format.h"
#include "fd3_zsa.h"
void * void *
fd3_zsa_state_create(struct pipe_context *pctx, fd3_zsa_state_create(struct pipe_context *pctx,
const struct pipe_depth_stencil_alpha_state *cso) const struct pipe_depth_stencil_alpha_state *cso)
{ {
struct fd3_zsa_stateobj *so; struct fd3_zsa_stateobj *so;
so = CALLOC_STRUCT(fd3_zsa_stateobj); so = CALLOC_STRUCT(fd3_zsa_stateobj);
if (!so) if (!so)
return NULL; return NULL;
so->base = *cso; so->base = *cso;
so->rb_depth_control |= so->rb_depth_control |=
A3XX_RB_DEPTH_CONTROL_ZFUNC(cso->depth_func); /* maps 1:1 */ A3XX_RB_DEPTH_CONTROL_ZFUNC(cso->depth_func); /* maps 1:1 */
if (cso->depth_enabled) if (cso->depth_enabled)
so->rb_depth_control |= so->rb_depth_control |=
A3XX_RB_DEPTH_CONTROL_Z_ENABLE | A3XX_RB_DEPTH_CONTROL_Z_ENABLE | A3XX_RB_DEPTH_CONTROL_Z_TEST_ENABLE;
A3XX_RB_DEPTH_CONTROL_Z_TEST_ENABLE;
if (cso->depth_writemask) if (cso->depth_writemask)
so->rb_depth_control |= A3XX_RB_DEPTH_CONTROL_Z_WRITE_ENABLE; so->rb_depth_control |= A3XX_RB_DEPTH_CONTROL_Z_WRITE_ENABLE;
if (cso->stencil[0].enabled) { if (cso->stencil[0].enabled) {
const struct pipe_stencil_state *s = &cso->stencil[0]; const struct pipe_stencil_state *s = &cso->stencil[0];
so->rb_stencil_control |= so->rb_stencil_control |=
A3XX_RB_STENCIL_CONTROL_STENCIL_READ | A3XX_RB_STENCIL_CONTROL_STENCIL_READ |
A3XX_RB_STENCIL_CONTROL_STENCIL_ENABLE | A3XX_RB_STENCIL_CONTROL_STENCIL_ENABLE |
A3XX_RB_STENCIL_CONTROL_FUNC(s->func) | /* maps 1:1 */ A3XX_RB_STENCIL_CONTROL_FUNC(s->func) | /* maps 1:1 */
A3XX_RB_STENCIL_CONTROL_FAIL(fd_stencil_op(s->fail_op)) | A3XX_RB_STENCIL_CONTROL_FAIL(fd_stencil_op(s->fail_op)) |
A3XX_RB_STENCIL_CONTROL_ZPASS(fd_stencil_op(s->zpass_op)) | A3XX_RB_STENCIL_CONTROL_ZPASS(fd_stencil_op(s->zpass_op)) |
A3XX_RB_STENCIL_CONTROL_ZFAIL(fd_stencil_op(s->zfail_op)); A3XX_RB_STENCIL_CONTROL_ZFAIL(fd_stencil_op(s->zfail_op));
so->rb_stencilrefmask |= so->rb_stencilrefmask |=
0xff000000 | /* ??? */ 0xff000000 | /* ??? */
A3XX_RB_STENCILREFMASK_STENCILWRITEMASK(s->writemask) | A3XX_RB_STENCILREFMASK_STENCILWRITEMASK(s->writemask) |
A3XX_RB_STENCILREFMASK_STENCILMASK(s->valuemask); A3XX_RB_STENCILREFMASK_STENCILMASK(s->valuemask);
if (cso->stencil[1].enabled) { if (cso->stencil[1].enabled) {
const struct pipe_stencil_state *bs = &cso->stencil[1]; const struct pipe_stencil_state *bs = &cso->stencil[1];
so->rb_stencil_control |= so->rb_stencil_control |=
A3XX_RB_STENCIL_CONTROL_STENCIL_ENABLE_BF | A3XX_RB_STENCIL_CONTROL_STENCIL_ENABLE_BF |
A3XX_RB_STENCIL_CONTROL_FUNC_BF(bs->func) | /* maps 1:1 */ A3XX_RB_STENCIL_CONTROL_FUNC_BF(bs->func) | /* maps 1:1 */
A3XX_RB_STENCIL_CONTROL_FAIL_BF(fd_stencil_op(bs->fail_op)) | A3XX_RB_STENCIL_CONTROL_FAIL_BF(fd_stencil_op(bs->fail_op)) |
A3XX_RB_STENCIL_CONTROL_ZPASS_BF(fd_stencil_op(bs->zpass_op)) | A3XX_RB_STENCIL_CONTROL_ZPASS_BF(fd_stencil_op(bs->zpass_op)) |
A3XX_RB_STENCIL_CONTROL_ZFAIL_BF(fd_stencil_op(bs->zfail_op)); A3XX_RB_STENCIL_CONTROL_ZFAIL_BF(fd_stencil_op(bs->zfail_op));
so->rb_stencilrefmask_bf |= so->rb_stencilrefmask_bf |=
0xff000000 | /* ??? */ 0xff000000 | /* ??? */
A3XX_RB_STENCILREFMASK_STENCILWRITEMASK(bs->writemask) | A3XX_RB_STENCILREFMASK_STENCILWRITEMASK(bs->writemask) |
A3XX_RB_STENCILREFMASK_STENCILMASK(bs->valuemask); A3XX_RB_STENCILREFMASK_STENCILMASK(bs->valuemask);
} }
} }
if (cso->alpha_enabled) { if (cso->alpha_enabled) {
so->rb_render_control = so->rb_render_control =
A3XX_RB_RENDER_CONTROL_ALPHA_TEST | A3XX_RB_RENDER_CONTROL_ALPHA_TEST |
A3XX_RB_RENDER_CONTROL_ALPHA_TEST_FUNC(cso->alpha_func); A3XX_RB_RENDER_CONTROL_ALPHA_TEST_FUNC(cso->alpha_func);
so->rb_alpha_ref = so->rb_alpha_ref = A3XX_RB_ALPHA_REF_UINT(cso->alpha_ref_value * 255.0) |
A3XX_RB_ALPHA_REF_UINT(cso->alpha_ref_value * 255.0) | A3XX_RB_ALPHA_REF_FLOAT(cso->alpha_ref_value);
A3XX_RB_ALPHA_REF_FLOAT(cso->alpha_ref_value); so->rb_depth_control |= A3XX_RB_DEPTH_CONTROL_EARLY_Z_DISABLE;
so->rb_depth_control |= }
A3XX_RB_DEPTH_CONTROL_EARLY_Z_DISABLE;
}
return so; return so;
} }

View file

@ -27,29 +27,28 @@
#ifndef FD3_ZSA_H_ #ifndef FD3_ZSA_H_
#define FD3_ZSA_H_ #define FD3_ZSA_H_
#include "pipe/p_state.h"
#include "pipe/p_context.h" #include "pipe/p_context.h"
#include "pipe/p_state.h"
#include "freedreno_util.h" #include "freedreno_util.h"
struct fd3_zsa_stateobj { struct fd3_zsa_stateobj {
struct pipe_depth_stencil_alpha_state base; struct pipe_depth_stencil_alpha_state base;
uint32_t rb_render_control; uint32_t rb_render_control;
uint32_t rb_alpha_ref; uint32_t rb_alpha_ref;
uint32_t rb_depth_control; uint32_t rb_depth_control;
uint32_t rb_stencil_control; uint32_t rb_stencil_control;
uint32_t rb_stencilrefmask; uint32_t rb_stencilrefmask;
uint32_t rb_stencilrefmask_bf; uint32_t rb_stencilrefmask_bf;
}; };
static inline struct fd3_zsa_stateobj * static inline struct fd3_zsa_stateobj *
fd3_zsa_stateobj(struct pipe_depth_stencil_alpha_state *zsa) fd3_zsa_stateobj(struct pipe_depth_stencil_alpha_state *zsa)
{ {
return (struct fd3_zsa_stateobj *)zsa; return (struct fd3_zsa_stateobj *)zsa;
} }
void * fd3_zsa_state_create(struct pipe_context *pctx, void *fd3_zsa_state_create(struct pipe_context *pctx,
const struct pipe_depth_stencil_alpha_state *cso); const struct pipe_depth_stencil_alpha_state *cso);
#endif /* FD3_ZSA_H_ */ #endif /* FD3_ZSA_H_ */

View file

@ -26,8 +26,8 @@
#include "pipe/p_state.h" #include "pipe/p_state.h"
#include "util/u_blend.h" #include "util/u_blend.h"
#include "util/u_string.h"
#include "util/u_memory.h" #include "util/u_memory.h"
#include "util/u_string.h"
#include "fd4_blend.h" #include "fd4_blend.h"
#include "fd4_context.h" #include "fd4_context.h"
@ -36,83 +36,89 @@
static enum a3xx_rb_blend_opcode static enum a3xx_rb_blend_opcode
blend_func(unsigned func) blend_func(unsigned func)
{ {
switch (func) { switch (func) {
case PIPE_BLEND_ADD: case PIPE_BLEND_ADD:
return BLEND_DST_PLUS_SRC; return BLEND_DST_PLUS_SRC;
case PIPE_BLEND_MIN: case PIPE_BLEND_MIN:
return BLEND_MIN_DST_SRC; return BLEND_MIN_DST_SRC;
case PIPE_BLEND_MAX: case PIPE_BLEND_MAX:
return BLEND_MAX_DST_SRC; return BLEND_MAX_DST_SRC;
case PIPE_BLEND_SUBTRACT: case PIPE_BLEND_SUBTRACT:
return BLEND_SRC_MINUS_DST; return BLEND_SRC_MINUS_DST;
case PIPE_BLEND_REVERSE_SUBTRACT: case PIPE_BLEND_REVERSE_SUBTRACT:
return BLEND_DST_MINUS_SRC; return BLEND_DST_MINUS_SRC;
default: default:
DBG("invalid blend func: %x", func); DBG("invalid blend func: %x", func);
return 0; return 0;
} }
} }
void * void *
fd4_blend_state_create(struct pipe_context *pctx, fd4_blend_state_create(struct pipe_context *pctx,
const struct pipe_blend_state *cso) const struct pipe_blend_state *cso)
{ {
struct fd4_blend_stateobj *so; struct fd4_blend_stateobj *so;
enum a3xx_rop_code rop = ROP_COPY; enum a3xx_rop_code rop = ROP_COPY;
bool reads_dest = false; bool reads_dest = false;
unsigned i, mrt_blend = 0; unsigned i, mrt_blend = 0;
if (cso->logicop_enable) { if (cso->logicop_enable) {
rop = cso->logicop_func; /* maps 1:1 */ rop = cso->logicop_func; /* maps 1:1 */
reads_dest = util_logicop_reads_dest(cso->logicop_func); reads_dest = util_logicop_reads_dest(cso->logicop_func);
} }
so = CALLOC_STRUCT(fd4_blend_stateobj); so = CALLOC_STRUCT(fd4_blend_stateobj);
if (!so) if (!so)
return NULL; return NULL;
so->base = *cso; so->base = *cso;
for (i = 0; i < ARRAY_SIZE(so->rb_mrt); i++) { for (i = 0; i < ARRAY_SIZE(so->rb_mrt); i++) {
const struct pipe_rt_blend_state *rt; const struct pipe_rt_blend_state *rt;
if (cso->independent_blend_enable) if (cso->independent_blend_enable)
rt = &cso->rt[i]; rt = &cso->rt[i];
else else
rt = &cso->rt[0]; rt = &cso->rt[0];
so->rb_mrt[i].blend_control = so->rb_mrt[i].blend_control =
A4XX_RB_MRT_BLEND_CONTROL_RGB_SRC_FACTOR(fd_blend_factor(rt->rgb_src_factor)) | A4XX_RB_MRT_BLEND_CONTROL_RGB_SRC_FACTOR(
A4XX_RB_MRT_BLEND_CONTROL_RGB_BLEND_OPCODE(blend_func(rt->rgb_func)) | fd_blend_factor(rt->rgb_src_factor)) |
A4XX_RB_MRT_BLEND_CONTROL_RGB_DEST_FACTOR(fd_blend_factor(rt->rgb_dst_factor)) | A4XX_RB_MRT_BLEND_CONTROL_RGB_BLEND_OPCODE(blend_func(rt->rgb_func)) |
A4XX_RB_MRT_BLEND_CONTROL_ALPHA_SRC_FACTOR(fd_blend_factor(rt->alpha_src_factor)) | A4XX_RB_MRT_BLEND_CONTROL_RGB_DEST_FACTOR(
A4XX_RB_MRT_BLEND_CONTROL_ALPHA_BLEND_OPCODE(blend_func(rt->alpha_func)) | fd_blend_factor(rt->rgb_dst_factor)) |
A4XX_RB_MRT_BLEND_CONTROL_ALPHA_DEST_FACTOR(fd_blend_factor(rt->alpha_dst_factor)); A4XX_RB_MRT_BLEND_CONTROL_ALPHA_SRC_FACTOR(
fd_blend_factor(rt->alpha_src_factor)) |
A4XX_RB_MRT_BLEND_CONTROL_ALPHA_BLEND_OPCODE(
blend_func(rt->alpha_func)) |
A4XX_RB_MRT_BLEND_CONTROL_ALPHA_DEST_FACTOR(
fd_blend_factor(rt->alpha_dst_factor));
so->rb_mrt[i].control = so->rb_mrt[i].control =
A4XX_RB_MRT_CONTROL_ROP_CODE(rop) | A4XX_RB_MRT_CONTROL_ROP_CODE(rop) |
COND(cso->logicop_enable, A4XX_RB_MRT_CONTROL_ROP_ENABLE) | COND(cso->logicop_enable, A4XX_RB_MRT_CONTROL_ROP_ENABLE) |
A4XX_RB_MRT_CONTROL_COMPONENT_ENABLE(rt->colormask); A4XX_RB_MRT_CONTROL_COMPONENT_ENABLE(rt->colormask);
if (rt->blend_enable) { if (rt->blend_enable) {
so->rb_mrt[i].control |= so->rb_mrt[i].control |= A4XX_RB_MRT_CONTROL_READ_DEST_ENABLE |
A4XX_RB_MRT_CONTROL_READ_DEST_ENABLE | A4XX_RB_MRT_CONTROL_BLEND |
A4XX_RB_MRT_CONTROL_BLEND | A4XX_RB_MRT_CONTROL_BLEND2;
A4XX_RB_MRT_CONTROL_BLEND2; mrt_blend |= (1 << i);
mrt_blend |= (1 << i); }
}
if (reads_dest) { if (reads_dest) {
so->rb_mrt[i].control |= A4XX_RB_MRT_CONTROL_READ_DEST_ENABLE; so->rb_mrt[i].control |= A4XX_RB_MRT_CONTROL_READ_DEST_ENABLE;
mrt_blend |= (1 << i); mrt_blend |= (1 << i);
} }
if (cso->dither) if (cso->dither)
so->rb_mrt[i].buf_info |= A4XX_RB_MRT_BUF_INFO_DITHER_MODE(DITHER_ALWAYS); so->rb_mrt[i].buf_info |=
} A4XX_RB_MRT_BUF_INFO_DITHER_MODE(DITHER_ALWAYS);
}
so->rb_fs_output = A4XX_RB_FS_OUTPUT_ENABLE_BLEND(mrt_blend) | so->rb_fs_output =
COND(cso->independent_blend_enable, A4XX_RB_FS_OUTPUT_INDEPENDENT_BLEND); A4XX_RB_FS_OUTPUT_ENABLE_BLEND(mrt_blend) |
COND(cso->independent_blend_enable, A4XX_RB_FS_OUTPUT_INDEPENDENT_BLEND);
return so; return so;
} }

View file

@ -27,28 +27,28 @@
#ifndef FD4_BLEND_H_ #ifndef FD4_BLEND_H_
#define FD4_BLEND_H_ #define FD4_BLEND_H_
#include "pipe/p_state.h"
#include "pipe/p_context.h" #include "pipe/p_context.h"
#include "pipe/p_state.h"
#include "freedreno_util.h" #include "freedreno_util.h"
struct fd4_blend_stateobj { struct fd4_blend_stateobj {
struct pipe_blend_state base; struct pipe_blend_state base;
struct { struct {
uint32_t control; uint32_t control;
uint32_t buf_info; uint32_t buf_info;
uint32_t blend_control; uint32_t blend_control;
} rb_mrt[A4XX_MAX_RENDER_TARGETS]; } rb_mrt[A4XX_MAX_RENDER_TARGETS];
uint32_t rb_fs_output; uint32_t rb_fs_output;
}; };
static inline struct fd4_blend_stateobj * static inline struct fd4_blend_stateobj *
fd4_blend_stateobj(struct pipe_blend_state *blend) fd4_blend_stateobj(struct pipe_blend_state *blend)
{ {
return (struct fd4_blend_stateobj *)blend; return (struct fd4_blend_stateobj *)blend;
} }
void * fd4_blend_state_create(struct pipe_context *pctx, void *fd4_blend_state_create(struct pipe_context *pctx,
const struct pipe_blend_state *cso); const struct pipe_blend_state *cso);
#endif /* FD4_BLEND_H_ */ #endif /* FD4_BLEND_H_ */

View file

@ -26,8 +26,8 @@
#include "freedreno_query_hw.h" #include "freedreno_query_hw.h"
#include "fd4_context.h"
#include "fd4_blend.h" #include "fd4_blend.h"
#include "fd4_context.h"
#include "fd4_draw.h" #include "fd4_draw.h"
#include "fd4_emit.h" #include "fd4_emit.h"
#include "fd4_gmem.h" #include "fd4_gmem.h"
@ -38,25 +38,24 @@
#include "fd4_zsa.h" #include "fd4_zsa.h"
static void static void
fd4_context_destroy(struct pipe_context *pctx) fd4_context_destroy(struct pipe_context *pctx) in_dt
in_dt
{ {
struct fd4_context *fd4_ctx = fd4_context(fd_context(pctx)); struct fd4_context *fd4_ctx = fd4_context(fd_context(pctx));
u_upload_destroy(fd4_ctx->border_color_uploader); u_upload_destroy(fd4_ctx->border_color_uploader);
pipe_resource_reference(&fd4_ctx->border_color_buf, NULL); pipe_resource_reference(&fd4_ctx->border_color_buf, NULL);
fd_context_destroy(pctx); fd_context_destroy(pctx);
fd_bo_del(fd4_ctx->vs_pvt_mem); fd_bo_del(fd4_ctx->vs_pvt_mem);
fd_bo_del(fd4_ctx->fs_pvt_mem); fd_bo_del(fd4_ctx->fs_pvt_mem);
fd_bo_del(fd4_ctx->vsc_size_mem); fd_bo_del(fd4_ctx->vsc_size_mem);
fd_context_cleanup_common_vbos(&fd4_ctx->base); fd_context_cleanup_common_vbos(&fd4_ctx->base);
fd_hw_query_fini(pctx); fd_hw_query_fini(pctx);
free(fd4_ctx); free(fd4_ctx);
} }
/* clang-format off */ /* clang-format off */
@ -73,55 +72,55 @@ static const uint8_t primtypes[] = {
/* clang-format on */ /* clang-format on */
struct pipe_context * struct pipe_context *
fd4_context_create(struct pipe_screen *pscreen, void *priv, unsigned flags) fd4_context_create(struct pipe_screen *pscreen, void *priv,
in_dt unsigned flags) in_dt
{ {
struct fd_screen *screen = fd_screen(pscreen); struct fd_screen *screen = fd_screen(pscreen);
struct fd4_context *fd4_ctx = CALLOC_STRUCT(fd4_context); struct fd4_context *fd4_ctx = CALLOC_STRUCT(fd4_context);
struct pipe_context *pctx; struct pipe_context *pctx;
if (!fd4_ctx) if (!fd4_ctx)
return NULL; return NULL;
pctx = &fd4_ctx->base.base; pctx = &fd4_ctx->base.base;
pctx->screen = pscreen; pctx->screen = pscreen;
fd4_ctx->base.dev = fd_device_ref(screen->dev); fd4_ctx->base.dev = fd_device_ref(screen->dev);
fd4_ctx->base.screen = fd_screen(pscreen); fd4_ctx->base.screen = fd_screen(pscreen);
fd4_ctx->base.last.key = &fd4_ctx->last_key; fd4_ctx->base.last.key = &fd4_ctx->last_key;
pctx->destroy = fd4_context_destroy; pctx->destroy = fd4_context_destroy;
pctx->create_blend_state = fd4_blend_state_create; pctx->create_blend_state = fd4_blend_state_create;
pctx->create_rasterizer_state = fd4_rasterizer_state_create; pctx->create_rasterizer_state = fd4_rasterizer_state_create;
pctx->create_depth_stencil_alpha_state = fd4_zsa_state_create; pctx->create_depth_stencil_alpha_state = fd4_zsa_state_create;
fd4_draw_init(pctx); fd4_draw_init(pctx);
fd4_gmem_init(pctx); fd4_gmem_init(pctx);
fd4_texture_init(pctx); fd4_texture_init(pctx);
fd4_prog_init(pctx); fd4_prog_init(pctx);
fd4_emit_init(pctx); fd4_emit_init(pctx);
pctx = fd_context_init(&fd4_ctx->base, pscreen, primtypes, priv, flags); pctx = fd_context_init(&fd4_ctx->base, pscreen, primtypes, priv, flags);
if (!pctx) if (!pctx)
return NULL; return NULL;
fd_hw_query_init(pctx); fd_hw_query_init(pctx);
fd4_ctx->vs_pvt_mem = fd_bo_new(screen->dev, 0x2000, fd4_ctx->vs_pvt_mem =
DRM_FREEDRENO_GEM_TYPE_KMEM, "vs_pvt"); fd_bo_new(screen->dev, 0x2000, DRM_FREEDRENO_GEM_TYPE_KMEM, "vs_pvt");
fd4_ctx->fs_pvt_mem = fd_bo_new(screen->dev, 0x2000, fd4_ctx->fs_pvt_mem =
DRM_FREEDRENO_GEM_TYPE_KMEM, "fs_pvt"); fd_bo_new(screen->dev, 0x2000, DRM_FREEDRENO_GEM_TYPE_KMEM, "fs_pvt");
fd4_ctx->vsc_size_mem = fd_bo_new(screen->dev, 0x1000, fd4_ctx->vsc_size_mem =
DRM_FREEDRENO_GEM_TYPE_KMEM, "vsc_size"); fd_bo_new(screen->dev, 0x1000, DRM_FREEDRENO_GEM_TYPE_KMEM, "vsc_size");
fd_context_setup_common_vbos(&fd4_ctx->base); fd_context_setup_common_vbos(&fd4_ctx->base);
fd4_query_context_init(pctx); fd4_query_context_init(pctx);
fd4_ctx->border_color_uploader = u_upload_create(pctx, 4096, 0, fd4_ctx->border_color_uploader =
PIPE_USAGE_STREAM, 0); u_upload_create(pctx, 4096, 0, PIPE_USAGE_STREAM, 0);
return pctx; return pctx;
} }

View file

@ -34,34 +34,34 @@
#include "ir3/ir3_shader.h" #include "ir3/ir3_shader.h"
struct fd4_context { struct fd4_context {
struct fd_context base; struct fd_context base;
struct fd_bo *vs_pvt_mem, *fs_pvt_mem; struct fd_bo *vs_pvt_mem, *fs_pvt_mem;
/* This only needs to be 4 * num_of_pipes bytes (ie. 32 bytes). We /* This only needs to be 4 * num_of_pipes bytes (ie. 32 bytes). We
* could combine it with another allocation. * could combine it with another allocation.
* *
* (upper area used as scratch bo.. see fd4_query) * (upper area used as scratch bo.. see fd4_query)
*/ */
struct fd_bo *vsc_size_mem; struct fd_bo *vsc_size_mem;
struct u_upload_mgr *border_color_uploader; struct u_upload_mgr *border_color_uploader;
struct pipe_resource *border_color_buf; struct pipe_resource *border_color_buf;
/* bitmask of samplers which need astc srgb workaround: */ /* bitmask of samplers which need astc srgb workaround: */
uint16_t vastc_srgb, fastc_srgb; uint16_t vastc_srgb, fastc_srgb;
/* storage for ctx->last.key: */ /* storage for ctx->last.key: */
struct ir3_shader_key last_key; struct ir3_shader_key last_key;
}; };
static inline struct fd4_context * static inline struct fd4_context *
fd4_context(struct fd_context *ctx) fd4_context(struct fd_context *ctx)
{ {
return (struct fd4_context *)ctx; return (struct fd4_context *)ctx;
} }
struct pipe_context * struct pipe_context *fd4_context_create(struct pipe_screen *pscreen, void *priv,
fd4_context_create(struct pipe_screen *pscreen, void *priv, unsigned flags); unsigned flags);
#endif /* FD4_CONTEXT_H_ */ #endif /* FD4_CONTEXT_H_ */

View file

@ -25,150 +25,148 @@
*/ */
#include "pipe/p_state.h" #include "pipe/p_state.h"
#include "util/u_string.h"
#include "util/u_memory.h" #include "util/u_memory.h"
#include "util/u_prim.h" #include "util/u_prim.h"
#include "util/u_string.h"
#include "freedreno_state.h"
#include "freedreno_resource.h" #include "freedreno_resource.h"
#include "freedreno_state.h"
#include "fd4_draw.h"
#include "fd4_context.h" #include "fd4_context.h"
#include "fd4_draw.h"
#include "fd4_emit.h" #include "fd4_emit.h"
#include "fd4_program.h"
#include "fd4_format.h" #include "fd4_format.h"
#include "fd4_program.h"
#include "fd4_zsa.h" #include "fd4_zsa.h"
static void static void
draw_impl(struct fd_context *ctx, struct fd_ringbuffer *ring, draw_impl(struct fd_context *ctx, struct fd_ringbuffer *ring,
struct fd4_emit *emit, unsigned index_offset) struct fd4_emit *emit, unsigned index_offset) assert_dt
assert_dt
{ {
const struct pipe_draw_info *info = emit->info; const struct pipe_draw_info *info = emit->info;
enum pc_di_primtype primtype = ctx->primtypes[info->mode]; enum pc_di_primtype primtype = ctx->primtypes[info->mode];
fd4_emit_state(ctx, ring, emit); fd4_emit_state(ctx, ring, emit);
if (emit->dirty & (FD_DIRTY_VTXBUF | FD_DIRTY_VTXSTATE)) if (emit->dirty & (FD_DIRTY_VTXBUF | FD_DIRTY_VTXSTATE))
fd4_emit_vertex_bufs(ring, emit); fd4_emit_vertex_bufs(ring, emit);
OUT_PKT0(ring, REG_A4XX_VFD_INDEX_OFFSET, 2); OUT_PKT0(ring, REG_A4XX_VFD_INDEX_OFFSET, 2);
OUT_RING(ring, info->index_size ? info->index_bias : emit->draw->start); /* VFD_INDEX_OFFSET */ OUT_RING(ring, info->index_size ? info->index_bias
OUT_RING(ring, info->start_instance); /* ??? UNKNOWN_2209 */ : emit->draw->start); /* VFD_INDEX_OFFSET */
OUT_RING(ring, info->start_instance); /* ??? UNKNOWN_2209 */
OUT_PKT0(ring, REG_A4XX_PC_RESTART_INDEX, 1); OUT_PKT0(ring, REG_A4XX_PC_RESTART_INDEX, 1);
OUT_RING(ring, info->primitive_restart ? /* PC_RESTART_INDEX */ OUT_RING(ring, info->primitive_restart ? /* PC_RESTART_INDEX */
info->restart_index : 0xffffffff); info->restart_index
: 0xffffffff);
/* points + psize -> spritelist: */ /* points + psize -> spritelist: */
if (ctx->rasterizer->point_size_per_vertex && if (ctx->rasterizer->point_size_per_vertex &&
fd4_emit_get_vp(emit)->writes_psize && fd4_emit_get_vp(emit)->writes_psize && (info->mode == PIPE_PRIM_POINTS))
(info->mode == PIPE_PRIM_POINTS)) primtype = DI_PT_POINTLIST_PSIZE;
primtype = DI_PT_POINTLIST_PSIZE;
fd4_draw_emit(ctx->batch, ring, primtype, fd4_draw_emit(ctx->batch, ring, primtype,
emit->binning_pass ? IGNORE_VISIBILITY : USE_VISIBILITY, emit->binning_pass ? IGNORE_VISIBILITY : USE_VISIBILITY, info,
info, emit->indirect, emit->draw, index_offset); emit->indirect, emit->draw, index_offset);
} }
static bool static bool
fd4_draw_vbo(struct fd_context *ctx, const struct pipe_draw_info *info, fd4_draw_vbo(struct fd_context *ctx, const struct pipe_draw_info *info,
const struct pipe_draw_indirect_info *indirect, const struct pipe_draw_indirect_info *indirect,
const struct pipe_draw_start_count *draw, const struct pipe_draw_start_count *draw,
unsigned index_offset) unsigned index_offset) in_dt
in_dt
{ {
struct fd4_context *fd4_ctx = fd4_context(ctx); struct fd4_context *fd4_ctx = fd4_context(ctx);
struct fd4_emit emit = { struct fd4_emit emit = {
.debug = &ctx->debug, .debug = &ctx->debug,
.vtx = &ctx->vtx, .vtx = &ctx->vtx,
.info = info, .info = info,
.indirect = indirect, .indirect = indirect,
.draw = draw, .draw = draw,
.key = { .key =
.vs = ctx->prog.vs, {
.fs = ctx->prog.fs, .vs = ctx->prog.vs,
.key = { .fs = ctx->prog.fs,
.rasterflat = ctx->rasterizer->flatshade, .key =
.ucp_enables = ctx->rasterizer->clip_plane_enable, {
.has_per_samp = fd4_ctx->fastc_srgb || fd4_ctx->vastc_srgb, .rasterflat = ctx->rasterizer->flatshade,
.vastc_srgb = fd4_ctx->vastc_srgb, .ucp_enables = ctx->rasterizer->clip_plane_enable,
.fastc_srgb = fd4_ctx->fastc_srgb, .has_per_samp = fd4_ctx->fastc_srgb || fd4_ctx->vastc_srgb,
}, .vastc_srgb = fd4_ctx->vastc_srgb,
}, .fastc_srgb = fd4_ctx->fastc_srgb,
.rasterflat = ctx->rasterizer->flatshade, },
.sprite_coord_enable = ctx->rasterizer->sprite_coord_enable, },
.sprite_coord_mode = ctx->rasterizer->sprite_coord_mode, .rasterflat = ctx->rasterizer->flatshade,
}; .sprite_coord_enable = ctx->rasterizer->sprite_coord_enable,
.sprite_coord_mode = ctx->rasterizer->sprite_coord_mode,
};
if (info->mode != PIPE_PRIM_MAX && if (info->mode != PIPE_PRIM_MAX && !indirect && !info->primitive_restart &&
!indirect && !u_trim_pipe_prim(info->mode, (unsigned *)&draw->count))
!info->primitive_restart && return false;
!u_trim_pipe_prim(info->mode, (unsigned*)&draw->count))
return false;
ir3_fixup_shader_state(&ctx->base, &emit.key.key); ir3_fixup_shader_state(&ctx->base, &emit.key.key);
enum fd_dirty_3d_state dirty = ctx->dirty; enum fd_dirty_3d_state dirty = ctx->dirty;
emit.prog = fd4_program_state(ir3_cache_lookup(ctx->shader_cache, &emit.key, &ctx->debug)); emit.prog = fd4_program_state(
ir3_cache_lookup(ctx->shader_cache, &emit.key, &ctx->debug));
/* bail if compile failed: */ /* bail if compile failed: */
if (!emit.prog) if (!emit.prog)
return false; return false;
const struct ir3_shader_variant *vp = fd4_emit_get_vp(&emit); const struct ir3_shader_variant *vp = fd4_emit_get_vp(&emit);
const struct ir3_shader_variant *fp = fd4_emit_get_fp(&emit); const struct ir3_shader_variant *fp = fd4_emit_get_fp(&emit);
ir3_update_max_tf_vtx(ctx, vp); ir3_update_max_tf_vtx(ctx, vp);
/* do regular pass first: */ /* do regular pass first: */
if (unlikely(ctx->stats_users > 0)) { if (unlikely(ctx->stats_users > 0)) {
ctx->stats.vs_regs += ir3_shader_halfregs(vp); ctx->stats.vs_regs += ir3_shader_halfregs(vp);
ctx->stats.fs_regs += ir3_shader_halfregs(fp); ctx->stats.fs_regs += ir3_shader_halfregs(fp);
} }
emit.binning_pass = false; emit.binning_pass = false;
emit.dirty = dirty; emit.dirty = dirty;
struct fd_ringbuffer *ring = ctx->batch->draw; struct fd_ringbuffer *ring = ctx->batch->draw;
if (ctx->rasterizer->rasterizer_discard) { if (ctx->rasterizer->rasterizer_discard) {
fd_wfi(ctx->batch, ring); fd_wfi(ctx->batch, ring);
OUT_PKT3(ring, CP_REG_RMW, 3); OUT_PKT3(ring, CP_REG_RMW, 3);
OUT_RING(ring, REG_A4XX_RB_RENDER_CONTROL); OUT_RING(ring, REG_A4XX_RB_RENDER_CONTROL);
OUT_RING(ring, ~A4XX_RB_RENDER_CONTROL_DISABLE_COLOR_PIPE); OUT_RING(ring, ~A4XX_RB_RENDER_CONTROL_DISABLE_COLOR_PIPE);
OUT_RING(ring, A4XX_RB_RENDER_CONTROL_DISABLE_COLOR_PIPE); OUT_RING(ring, A4XX_RB_RENDER_CONTROL_DISABLE_COLOR_PIPE);
} }
draw_impl(ctx, ctx->batch->draw, &emit, index_offset); draw_impl(ctx, ctx->batch->draw, &emit, index_offset);
if (ctx->rasterizer->rasterizer_discard) { if (ctx->rasterizer->rasterizer_discard) {
fd_wfi(ctx->batch, ring); fd_wfi(ctx->batch, ring);
OUT_PKT3(ring, CP_REG_RMW, 3); OUT_PKT3(ring, CP_REG_RMW, 3);
OUT_RING(ring, REG_A4XX_RB_RENDER_CONTROL); OUT_RING(ring, REG_A4XX_RB_RENDER_CONTROL);
OUT_RING(ring, ~A4XX_RB_RENDER_CONTROL_DISABLE_COLOR_PIPE); OUT_RING(ring, ~A4XX_RB_RENDER_CONTROL_DISABLE_COLOR_PIPE);
OUT_RING(ring, 0); OUT_RING(ring, 0);
} }
/* and now binning pass: */ /* and now binning pass: */
emit.binning_pass = true; emit.binning_pass = true;
emit.dirty = dirty & ~(FD_DIRTY_BLEND); emit.dirty = dirty & ~(FD_DIRTY_BLEND);
emit.vs = NULL; /* we changed key so need to refetch vs */ emit.vs = NULL; /* we changed key so need to refetch vs */
emit.fs = NULL; emit.fs = NULL;
draw_impl(ctx, ctx->batch->binning, &emit, index_offset); draw_impl(ctx, ctx->batch->binning, &emit, index_offset);
fd_context_all_clean(ctx); fd_context_all_clean(ctx);
return true; return true;
} }
void void
fd4_draw_init(struct pipe_context *pctx) fd4_draw_init(struct pipe_context *pctx) disable_thread_safety_analysis
disable_thread_safety_analysis
{ {
struct fd_context *ctx = fd_context(pctx); struct fd_context *ctx = fd_context(pctx);
ctx->draw_vbo = fd4_draw_vbo; ctx->draw_vbo = fd4_draw_vbo;
} }

View file

@ -35,118 +35,114 @@ void fd4_draw_init(struct pipe_context *pctx);
/* draw packet changed on a4xx, so cannot reuse one from a2xx/a3xx.. */ /* draw packet changed on a4xx, so cannot reuse one from a2xx/a3xx.. */
static inline uint32_t DRAW4(enum pc_di_primtype prim_type, static inline uint32_t
enum pc_di_src_sel source_select, enum a4xx_index_size index_size, DRAW4(enum pc_di_primtype prim_type, enum pc_di_src_sel source_select,
enum pc_di_vis_cull_mode vis_cull_mode) enum a4xx_index_size index_size, enum pc_di_vis_cull_mode vis_cull_mode)
{ {
return CP_DRAW_INDX_OFFSET_0_PRIM_TYPE(prim_type) | return CP_DRAW_INDX_OFFSET_0_PRIM_TYPE(prim_type) |
CP_DRAW_INDX_OFFSET_0_SOURCE_SELECT(source_select) | CP_DRAW_INDX_OFFSET_0_SOURCE_SELECT(source_select) |
CP_DRAW_INDX_OFFSET_0_INDEX_SIZE(index_size) | CP_DRAW_INDX_OFFSET_0_INDEX_SIZE(index_size) |
CP_DRAW_INDX_OFFSET_0_VIS_CULL(vis_cull_mode); CP_DRAW_INDX_OFFSET_0_VIS_CULL(vis_cull_mode);
} }
static inline void static inline void
fd4_draw(struct fd_batch *batch, struct fd_ringbuffer *ring, fd4_draw(struct fd_batch *batch, struct fd_ringbuffer *ring,
enum pc_di_primtype primtype, enum pc_di_primtype primtype, enum pc_di_vis_cull_mode vismode,
enum pc_di_vis_cull_mode vismode, enum pc_di_src_sel src_sel, uint32_t count, uint32_t instances,
enum pc_di_src_sel src_sel, uint32_t count, enum a4xx_index_size idx_type, uint32_t max_indices,
uint32_t instances, enum a4xx_index_size idx_type, uint32_t idx_offset, struct pipe_resource *idx_buffer)
uint32_t max_indices, uint32_t idx_offset,
struct pipe_resource *idx_buffer)
{ {
/* for debug after a lock up, write a unique counter value /* for debug after a lock up, write a unique counter value
* to scratch7 for each draw, to make it easier to match up * to scratch7 for each draw, to make it easier to match up
* register dumps to cmdstream. The combination of IB * register dumps to cmdstream. The combination of IB
* (scratch6) and DRAW is enough to "triangulate" the * (scratch6) and DRAW is enough to "triangulate" the
* particular draw that caused lockup. * particular draw that caused lockup.
*/ */
emit_marker(ring, 7); emit_marker(ring, 7);
OUT_PKT3(ring, CP_DRAW_INDX_OFFSET, idx_buffer ? 6 : 3); OUT_PKT3(ring, CP_DRAW_INDX_OFFSET, idx_buffer ? 6 : 3);
if (vismode == USE_VISIBILITY) { if (vismode == USE_VISIBILITY) {
/* leave vis mode blank for now, it will be patched up when /* leave vis mode blank for now, it will be patched up when
* we know if we are binning or not * we know if we are binning or not
*/ */
OUT_RINGP(ring, DRAW4(primtype, src_sel, idx_type, 0), OUT_RINGP(ring, DRAW4(primtype, src_sel, idx_type, 0),
&batch->draw_patches); &batch->draw_patches);
} else { } else {
OUT_RING(ring, DRAW4(primtype, src_sel, idx_type, vismode)); OUT_RING(ring, DRAW4(primtype, src_sel, idx_type, vismode));
} }
OUT_RING(ring, instances); /* NumInstances */ OUT_RING(ring, instances); /* NumInstances */
OUT_RING(ring, count); /* NumIndices */ OUT_RING(ring, count); /* NumIndices */
if (idx_buffer) { if (idx_buffer) {
OUT_RING(ring, 0x0); /* XXX */ OUT_RING(ring, 0x0); /* XXX */
OUT_RELOC(ring, fd_resource(idx_buffer)->bo, idx_offset, 0, 0); OUT_RELOC(ring, fd_resource(idx_buffer)->bo, idx_offset, 0, 0);
OUT_RING (ring, max_indices); OUT_RING(ring, max_indices);
} }
emit_marker(ring, 7); emit_marker(ring, 7);
fd_reset_wfi(batch); fd_reset_wfi(batch);
} }
static inline void static inline void
fd4_draw_emit(struct fd_batch *batch, struct fd_ringbuffer *ring, fd4_draw_emit(struct fd_batch *batch, struct fd_ringbuffer *ring,
enum pc_di_primtype primtype, enum pc_di_primtype primtype, enum pc_di_vis_cull_mode vismode,
enum pc_di_vis_cull_mode vismode, const struct pipe_draw_info *info,
const struct pipe_draw_info *info,
const struct pipe_draw_indirect_info *indirect, const struct pipe_draw_indirect_info *indirect,
const struct pipe_draw_start_count *draw, const struct pipe_draw_start_count *draw, unsigned index_offset)
unsigned index_offset)
{ {
struct pipe_resource *idx_buffer = NULL; struct pipe_resource *idx_buffer = NULL;
enum a4xx_index_size idx_type; enum a4xx_index_size idx_type;
enum pc_di_src_sel src_sel; enum pc_di_src_sel src_sel;
uint32_t idx_size, idx_offset; uint32_t idx_size, idx_offset;
if (indirect && indirect->buffer) { if (indirect && indirect->buffer) {
struct fd_resource *ind = fd_resource(indirect->buffer); struct fd_resource *ind = fd_resource(indirect->buffer);
emit_marker(ring, 7); emit_marker(ring, 7);
if (info->index_size) { if (info->index_size) {
struct pipe_resource *idx = info->index.resource; struct pipe_resource *idx = info->index.resource;
OUT_PKT3(ring, CP_DRAW_INDX_INDIRECT, 4); OUT_PKT3(ring, CP_DRAW_INDX_INDIRECT, 4);
OUT_RINGP(ring, DRAW4(primtype, DI_SRC_SEL_DMA, OUT_RINGP(ring,
fd4_size2indextype(info->index_size), 0), DRAW4(primtype, DI_SRC_SEL_DMA,
&batch->draw_patches); fd4_size2indextype(info->index_size), 0),
OUT_RELOC(ring, fd_resource(idx)->bo, index_offset, 0, 0); &batch->draw_patches);
OUT_RING(ring, A4XX_CP_DRAW_INDX_INDIRECT_2_INDX_SIZE( OUT_RELOC(ring, fd_resource(idx)->bo, index_offset, 0, 0);
idx->width0 - index_offset)); OUT_RING(ring, A4XX_CP_DRAW_INDX_INDIRECT_2_INDX_SIZE(idx->width0 -
OUT_RELOC(ring, ind->bo, indirect->offset, 0, 0); index_offset));
} else { OUT_RELOC(ring, ind->bo, indirect->offset, 0, 0);
OUT_PKT3(ring, CP_DRAW_INDIRECT, 2); } else {
OUT_RINGP(ring, DRAW4(primtype, DI_SRC_SEL_AUTO_INDEX, 0, 0), OUT_PKT3(ring, CP_DRAW_INDIRECT, 2);
&batch->draw_patches); OUT_RINGP(ring, DRAW4(primtype, DI_SRC_SEL_AUTO_INDEX, 0, 0),
OUT_RELOC(ring, ind->bo, indirect->offset, 0, 0); &batch->draw_patches);
} OUT_RELOC(ring, ind->bo, indirect->offset, 0, 0);
}
emit_marker(ring, 7); emit_marker(ring, 7);
fd_reset_wfi(batch); fd_reset_wfi(batch);
return; return;
} }
if (info->index_size) { if (info->index_size) {
assert(!info->has_user_indices); assert(!info->has_user_indices);
idx_buffer = info->index.resource; idx_buffer = info->index.resource;
idx_type = fd4_size2indextype(info->index_size); idx_type = fd4_size2indextype(info->index_size);
idx_size = info->index_size * draw->count; idx_size = info->index_size * draw->count;
idx_offset = index_offset + draw->start * info->index_size; idx_offset = index_offset + draw->start * info->index_size;
src_sel = DI_SRC_SEL_DMA; src_sel = DI_SRC_SEL_DMA;
} else { } else {
idx_buffer = NULL; idx_buffer = NULL;
idx_type = INDEX4_SIZE_32_BIT; idx_type = INDEX4_SIZE_32_BIT;
idx_size = 0; idx_size = 0;
idx_offset = 0; idx_offset = 0;
src_sel = DI_SRC_SEL_AUTO_INDEX; src_sel = DI_SRC_SEL_AUTO_INDEX;
} }
fd4_draw(batch, ring, primtype, vismode, src_sel, fd4_draw(batch, ring, primtype, vismode, src_sel, draw->count,
draw->count, info->instance_count, info->instance_count, idx_type, idx_size, idx_offset, idx_buffer);
idx_type, idx_size, idx_offset, idx_buffer);
} }
#endif /* FD4_DRAW_H_ */ #endif /* FD4_DRAW_H_ */

File diff suppressed because it is too large Load diff

View file

@ -29,76 +29,79 @@
#include "pipe/p_context.h" #include "pipe/p_context.h"
#include "freedreno_context.h"
#include "fd4_format.h" #include "fd4_format.h"
#include "fd4_program.h" #include "fd4_program.h"
#include "freedreno_context.h"
#include "ir3_gallium.h" #include "ir3_gallium.h"
struct fd_ringbuffer; struct fd_ringbuffer;
void fd4_emit_gmem_restore_tex(struct fd_ringbuffer *ring, void fd4_emit_gmem_restore_tex(struct fd_ringbuffer *ring, unsigned nr_bufs,
unsigned nr_bufs, struct pipe_surface **bufs); struct pipe_surface **bufs);
/* grouped together emit-state for prog/vertex/state emit: */ /* grouped together emit-state for prog/vertex/state emit: */
struct fd4_emit { struct fd4_emit {
struct pipe_debug_callback *debug; struct pipe_debug_callback *debug;
const struct fd_vertex_state *vtx; const struct fd_vertex_state *vtx;
const struct fd4_program_state *prog; const struct fd4_program_state *prog;
const struct pipe_draw_info *info; const struct pipe_draw_info *info;
const struct pipe_draw_indirect_info *indirect; const struct pipe_draw_indirect_info *indirect;
const struct pipe_draw_start_count *draw; const struct pipe_draw_start_count *draw;
bool binning_pass; bool binning_pass;
struct ir3_cache_key key; struct ir3_cache_key key;
enum fd_dirty_3d_state dirty; enum fd_dirty_3d_state dirty;
uint32_t sprite_coord_enable; /* bitmask */ uint32_t sprite_coord_enable; /* bitmask */
bool sprite_coord_mode; bool sprite_coord_mode;
bool rasterflat; bool rasterflat;
bool no_decode_srgb; bool no_decode_srgb;
bool skip_consts; bool skip_consts;
/* cached to avoid repeated lookups of same variants: */ /* cached to avoid repeated lookups of same variants: */
const struct ir3_shader_variant *vs, *fs; const struct ir3_shader_variant *vs, *fs;
/* TODO: other shader stages.. */ /* TODO: other shader stages.. */
}; };
static inline enum a4xx_color_fmt fd4_emit_format(struct pipe_surface *surf) static inline enum a4xx_color_fmt
fd4_emit_format(struct pipe_surface *surf)
{ {
if (!surf) if (!surf)
return 0; return 0;
return fd4_pipe2color(surf->format); return fd4_pipe2color(surf->format);
} }
static inline const struct ir3_shader_variant * static inline const struct ir3_shader_variant *
fd4_emit_get_vp(struct fd4_emit *emit) fd4_emit_get_vp(struct fd4_emit *emit)
{ {
if (!emit->vs) { if (!emit->vs) {
emit->vs = emit->binning_pass ? emit->prog->bs : emit->prog->vs; emit->vs = emit->binning_pass ? emit->prog->bs : emit->prog->vs;
} }
return emit->vs; return emit->vs;
} }
static inline const struct ir3_shader_variant * static inline const struct ir3_shader_variant *
fd4_emit_get_fp(struct fd4_emit *emit) fd4_emit_get_fp(struct fd4_emit *emit)
{ {
if (!emit->fs) { if (!emit->fs) {
if (emit->binning_pass) { if (emit->binning_pass) {
/* use dummy stateobj to simplify binning vs non-binning: */ /* use dummy stateobj to simplify binning vs non-binning: */
static const struct ir3_shader_variant binning_fs = {}; static const struct ir3_shader_variant binning_fs = {};
emit->fs = &binning_fs; emit->fs = &binning_fs;
} else { } else {
emit->fs = emit->prog->fs; emit->fs = emit->prog->fs;
} }
} }
return emit->fs; return emit->fs;
} }
void fd4_emit_vertex_bufs(struct fd_ringbuffer *ring, struct fd4_emit *emit) assert_dt; void fd4_emit_vertex_bufs(struct fd_ringbuffer *ring,
struct fd4_emit *emit) assert_dt;
void fd4_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring, void fd4_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring,
struct fd4_emit *emit) assert_dt; struct fd4_emit *emit) assert_dt;
void fd4_emit_restore(struct fd_batch *batch, struct fd_ringbuffer *ring) assert_dt; void fd4_emit_restore(struct fd_batch *batch,
struct fd_ringbuffer *ring) assert_dt;
void fd4_emit_init_screen(struct pipe_screen *pscreen); void fd4_emit_init_screen(struct pipe_screen *pscreen);
void fd4_emit_init(struct pipe_context *pctx); void fd4_emit_init(struct pipe_context *pctx);
@ -106,7 +109,7 @@ void fd4_emit_init(struct pipe_context *pctx);
static inline void static inline void
fd4_emit_ib(struct fd_ringbuffer *ring, struct fd_ringbuffer *target) fd4_emit_ib(struct fd_ringbuffer *ring, struct fd_ringbuffer *target)
{ {
__OUT_IB(ring, true, target); __OUT_IB(ring, true, target);
} }
#endif /* FD4_EMIT_H */ #endif /* FD4_EMIT_H */

View file

@ -29,48 +29,41 @@
#include "fd4_format.h" #include "fd4_format.h"
/* Specifies the table of all the formats and their features. Also supplies /* Specifies the table of all the formats and their features. Also supplies
* the helpers that look up various data in those tables. * the helpers that look up various data in those tables.
*/ */
struct fd4_format { struct fd4_format {
enum a4xx_vtx_fmt vtx; enum a4xx_vtx_fmt vtx;
enum a4xx_tex_fmt tex; enum a4xx_tex_fmt tex;
enum a4xx_color_fmt rb; enum a4xx_color_fmt rb;
enum a3xx_color_swap swap; enum a3xx_color_swap swap;
boolean present; boolean present;
}; };
/* vertex + texture */ /* vertex + texture */
#define VT(pipe, fmt, rbfmt, swapfmt) \ #define VT(pipe, fmt, rbfmt, swapfmt) \
[PIPE_FORMAT_ ## pipe] = { \ [PIPE_FORMAT_##pipe] = {.present = 1, \
.present = 1, \ .vtx = VFMT4_##fmt, \
.vtx = VFMT4_ ## fmt, \ .tex = TFMT4_##fmt, \
.tex = TFMT4_ ## fmt, \ .rb = RB4_##rbfmt, \
.rb = RB4_ ## rbfmt, \ .swap = swapfmt}
.swap = swapfmt \
}
/* texture-only */ /* texture-only */
#define _T(pipe, fmt, rbfmt, swapfmt) \ #define _T(pipe, fmt, rbfmt, swapfmt) \
[PIPE_FORMAT_ ## pipe] = { \ [PIPE_FORMAT_##pipe] = {.present = 1, \
.present = 1, \ .vtx = VFMT4_NONE, \
.vtx = VFMT4_NONE, \ .tex = TFMT4_##fmt, \
.tex = TFMT4_ ## fmt, \ .rb = RB4_##rbfmt, \
.rb = RB4_ ## rbfmt, \ .swap = swapfmt}
.swap = swapfmt \
}
/* vertex-only */ /* vertex-only */
#define V_(pipe, fmt, rbfmt, swapfmt) \ #define V_(pipe, fmt, rbfmt, swapfmt) \
[PIPE_FORMAT_ ## pipe] = { \ [PIPE_FORMAT_##pipe] = {.present = 1, \
.present = 1, \ .vtx = VFMT4_##fmt, \
.vtx = VFMT4_ ## fmt, \ .tex = TFMT4_NONE, \
.tex = TFMT4_NONE, \ .rb = RB4_##rbfmt, \
.rb = RB4_ ## rbfmt, \ .swap = swapfmt}
.swap = swapfmt \
}
/* clang-format off */ /* clang-format off */
static struct fd4_format formats[PIPE_FORMAT_COUNT] = { static struct fd4_format formats[PIPE_FORMAT_COUNT] = {
@ -340,84 +333,94 @@ static struct fd4_format formats[PIPE_FORMAT_COUNT] = {
enum a4xx_vtx_fmt enum a4xx_vtx_fmt
fd4_pipe2vtx(enum pipe_format format) fd4_pipe2vtx(enum pipe_format format)
{ {
if (!formats[format].present) if (!formats[format].present)
return VFMT4_NONE; return VFMT4_NONE;
return formats[format].vtx; return formats[format].vtx;
} }
/* convert pipe format to texture sampler format: */ /* convert pipe format to texture sampler format: */
enum a4xx_tex_fmt enum a4xx_tex_fmt
fd4_pipe2tex(enum pipe_format format) fd4_pipe2tex(enum pipe_format format)
{ {
if (!formats[format].present) if (!formats[format].present)
return TFMT4_NONE; return TFMT4_NONE;
return formats[format].tex; return formats[format].tex;
} }
/* convert pipe format to MRT / copydest format used for render-target: */ /* convert pipe format to MRT / copydest format used for render-target: */
enum a4xx_color_fmt enum a4xx_color_fmt
fd4_pipe2color(enum pipe_format format) fd4_pipe2color(enum pipe_format format)
{ {
if (!formats[format].present) if (!formats[format].present)
return RB4_NONE; return RB4_NONE;
return formats[format].rb; return formats[format].rb;
} }
enum a3xx_color_swap enum a3xx_color_swap
fd4_pipe2swap(enum pipe_format format) fd4_pipe2swap(enum pipe_format format)
{ {
if (!formats[format].present) if (!formats[format].present)
return WZYX; return WZYX;
return formats[format].swap; return formats[format].swap;
} }
enum a4xx_depth_format enum a4xx_depth_format
fd4_pipe2depth(enum pipe_format format) fd4_pipe2depth(enum pipe_format format)
{ {
switch (format) { switch (format) {
case PIPE_FORMAT_Z16_UNORM: case PIPE_FORMAT_Z16_UNORM:
return DEPTH4_16; return DEPTH4_16;
case PIPE_FORMAT_Z24X8_UNORM: case PIPE_FORMAT_Z24X8_UNORM:
case PIPE_FORMAT_Z24_UNORM_S8_UINT: case PIPE_FORMAT_Z24_UNORM_S8_UINT:
case PIPE_FORMAT_X8Z24_UNORM: case PIPE_FORMAT_X8Z24_UNORM:
case PIPE_FORMAT_S8_UINT_Z24_UNORM: case PIPE_FORMAT_S8_UINT_Z24_UNORM:
return DEPTH4_24_8; return DEPTH4_24_8;
case PIPE_FORMAT_Z32_FLOAT: case PIPE_FORMAT_Z32_FLOAT:
case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT: case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
return DEPTH4_32; return DEPTH4_32;
default: default:
return ~0; return ~0;
} }
} }
static inline enum a4xx_tex_swiz static inline enum a4xx_tex_swiz
tex_swiz(unsigned swiz) tex_swiz(unsigned swiz)
{ {
switch (swiz) { switch (swiz) {
default: default:
case PIPE_SWIZZLE_X: return A4XX_TEX_X; case PIPE_SWIZZLE_X:
case PIPE_SWIZZLE_Y: return A4XX_TEX_Y; return A4XX_TEX_X;
case PIPE_SWIZZLE_Z: return A4XX_TEX_Z; case PIPE_SWIZZLE_Y:
case PIPE_SWIZZLE_W: return A4XX_TEX_W; return A4XX_TEX_Y;
case PIPE_SWIZZLE_0: return A4XX_TEX_ZERO; case PIPE_SWIZZLE_Z:
case PIPE_SWIZZLE_1: return A4XX_TEX_ONE; return A4XX_TEX_Z;
} case PIPE_SWIZZLE_W:
return A4XX_TEX_W;
case PIPE_SWIZZLE_0:
return A4XX_TEX_ZERO;
case PIPE_SWIZZLE_1:
return A4XX_TEX_ONE;
}
} }
uint32_t uint32_t
fd4_tex_swiz(enum pipe_format format, unsigned swizzle_r, unsigned swizzle_g, fd4_tex_swiz(enum pipe_format format, unsigned swizzle_r, unsigned swizzle_g,
unsigned swizzle_b, unsigned swizzle_a) unsigned swizzle_b, unsigned swizzle_a)
{ {
const struct util_format_description *desc = const struct util_format_description *desc = util_format_description(format);
util_format_description(format); unsigned char swiz[4] =
unsigned char swiz[4] = { {
swizzle_r, swizzle_g, swizzle_b, swizzle_a, swizzle_r,
}, rswiz[4]; swizzle_g,
swizzle_b,
swizzle_a,
},
rswiz[4];
util_format_compose_swizzles(desc->swizzle, swiz, rswiz); util_format_compose_swizzles(desc->swizzle, swiz, rswiz);
return A4XX_TEX_CONST_0_SWIZ_X(tex_swiz(rswiz[0])) | return A4XX_TEX_CONST_0_SWIZ_X(tex_swiz(rswiz[0])) |
A4XX_TEX_CONST_0_SWIZ_Y(tex_swiz(rswiz[1])) | A4XX_TEX_CONST_0_SWIZ_Y(tex_swiz(rswiz[1])) |
A4XX_TEX_CONST_0_SWIZ_Z(tex_swiz(rswiz[2])) | A4XX_TEX_CONST_0_SWIZ_Z(tex_swiz(rswiz[2])) |
A4XX_TEX_CONST_0_SWIZ_W(tex_swiz(rswiz[3])); A4XX_TEX_CONST_0_SWIZ_W(tex_swiz(rswiz[3]));
} }

View file

@ -38,6 +38,7 @@ enum a3xx_color_swap fd4_pipe2swap(enum pipe_format format);
enum a4xx_depth_format fd4_pipe2depth(enum pipe_format format); enum a4xx_depth_format fd4_pipe2depth(enum pipe_format format);
uint32_t fd4_tex_swiz(enum pipe_format format, unsigned swizzle_r, uint32_t fd4_tex_swiz(enum pipe_format format, unsigned swizzle_r,
unsigned swizzle_g, unsigned swizzle_b, unsigned swizzle_a); unsigned swizzle_g, unsigned swizzle_b,
unsigned swizzle_a);
#endif /* FD4_UTIL_H_ */ #endif /* FD4_UTIL_H_ */

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

View file

@ -36,20 +36,20 @@
struct fd4_emit; struct fd4_emit;
struct fd4_program_state { struct fd4_program_state {
struct ir3_program_state base; struct ir3_program_state base;
struct ir3_shader_variant *bs; /* VS for when emit->binning */ struct ir3_shader_variant *bs; /* VS for when emit->binning */
struct ir3_shader_variant *vs; struct ir3_shader_variant *vs;
struct ir3_shader_variant *fs; /* FS for when !emit->binning */ struct ir3_shader_variant *fs; /* FS for when !emit->binning */
}; };
static inline struct fd4_program_state * static inline struct fd4_program_state *
fd4_program_state(struct ir3_program_state *state) fd4_program_state(struct ir3_program_state *state)
{ {
return (struct fd4_program_state *)state; return (struct fd4_program_state *)state;
} }
void fd4_program_emit(struct fd_ringbuffer *ring, struct fd4_emit *emit, void fd4_program_emit(struct fd_ringbuffer *ring, struct fd4_emit *emit, int nr,
int nr, struct pipe_surface **bufs); struct pipe_surface **bufs);
void fd4_prog_init(struct pipe_context *pctx); void fd4_prog_init(struct pipe_context *pctx);

View file

@ -24,18 +24,17 @@
* Rob Clark <robclark@freedesktop.org> * Rob Clark <robclark@freedesktop.org>
*/ */
#include "freedreno_query_hw.h"
#include "freedreno_context.h" #include "freedreno_context.h"
#include "freedreno_query_hw.h"
#include "freedreno_util.h" #include "freedreno_util.h"
#include "fd4_query.h"
#include "fd4_context.h" #include "fd4_context.h"
#include "fd4_draw.h" #include "fd4_draw.h"
#include "fd4_format.h" #include "fd4_format.h"
#include "fd4_query.h"
struct fd_rb_samp_ctrs { struct fd_rb_samp_ctrs {
uint64_t ctr[16]; uint64_t ctr[16];
}; };
/* /*
@ -48,57 +47,56 @@ struct fd_rb_samp_ctrs {
static struct fd_hw_sample * static struct fd_hw_sample *
occlusion_get_sample(struct fd_batch *batch, struct fd_ringbuffer *ring) occlusion_get_sample(struct fd_batch *batch, struct fd_ringbuffer *ring)
{ {
struct fd_hw_sample *samp = struct fd_hw_sample *samp =
fd_hw_sample_init(batch, sizeof(struct fd_rb_samp_ctrs)); fd_hw_sample_init(batch, sizeof(struct fd_rb_samp_ctrs));
/* low bits of sample addr should be zero (since they are control /* low bits of sample addr should be zero (since they are control
* flags in RB_SAMPLE_COUNT_CONTROL): * flags in RB_SAMPLE_COUNT_CONTROL):
*/ */
debug_assert((samp->offset & 0x3) == 0); debug_assert((samp->offset & 0x3) == 0);
/* Set RB_SAMPLE_COUNT_ADDR to samp->offset plus value of /* Set RB_SAMPLE_COUNT_ADDR to samp->offset plus value of
* HW_QUERY_BASE_REG register: * HW_QUERY_BASE_REG register:
*/ */
OUT_PKT3(ring, CP_SET_CONSTANT, 3); OUT_PKT3(ring, CP_SET_CONSTANT, 3);
OUT_RING(ring, CP_REG(REG_A4XX_RB_SAMPLE_COUNT_CONTROL) | 0x80000000); OUT_RING(ring, CP_REG(REG_A4XX_RB_SAMPLE_COUNT_CONTROL) | 0x80000000);
OUT_RING(ring, HW_QUERY_BASE_REG); OUT_RING(ring, HW_QUERY_BASE_REG);
OUT_RING(ring, A4XX_RB_SAMPLE_COUNT_CONTROL_COPY | OUT_RING(ring, A4XX_RB_SAMPLE_COUNT_CONTROL_COPY | samp->offset);
samp->offset);
OUT_PKT3(ring, CP_DRAW_INDX_OFFSET, 3); OUT_PKT3(ring, CP_DRAW_INDX_OFFSET, 3);
OUT_RING(ring, DRAW4(DI_PT_POINTLIST_PSIZE, DI_SRC_SEL_AUTO_INDEX, OUT_RING(ring, DRAW4(DI_PT_POINTLIST_PSIZE, DI_SRC_SEL_AUTO_INDEX,
INDEX4_SIZE_32_BIT, USE_VISIBILITY)); INDEX4_SIZE_32_BIT, USE_VISIBILITY));
OUT_RING(ring, 1); /* NumInstances */ OUT_RING(ring, 1); /* NumInstances */
OUT_RING(ring, 0); /* NumIndices */ OUT_RING(ring, 0); /* NumIndices */
fd_event_write(batch, ring, ZPASS_DONE); fd_event_write(batch, ring, ZPASS_DONE);
return samp; return samp;
} }
static uint64_t static uint64_t
count_samples(const struct fd_rb_samp_ctrs *start, count_samples(const struct fd_rb_samp_ctrs *start,
const struct fd_rb_samp_ctrs *end) const struct fd_rb_samp_ctrs *end)
{ {
return end->ctr[0] - start->ctr[0]; return end->ctr[0] - start->ctr[0];
} }
static void static void
occlusion_counter_accumulate_result(struct fd_context *ctx, occlusion_counter_accumulate_result(struct fd_context *ctx, const void *start,
const void *start, const void *end, const void *end,
union pipe_query_result *result) union pipe_query_result *result)
{ {
uint64_t n = count_samples(start, end); uint64_t n = count_samples(start, end);
result->u64 += n; result->u64 += n;
} }
static void static void
occlusion_predicate_accumulate_result(struct fd_context *ctx, occlusion_predicate_accumulate_result(struct fd_context *ctx, const void *start,
const void *start, const void *end, const void *end,
union pipe_query_result *result) union pipe_query_result *result)
{ {
uint64_t n = count_samples(start, end); uint64_t n = count_samples(start, end);
result->b |= (n > 0); result->b |= (n > 0);
} }
/* /*
@ -109,161 +107,159 @@ occlusion_predicate_accumulate_result(struct fd_context *ctx,
*/ */
static void static void
time_elapsed_enable(struct fd_context *ctx, struct fd_ringbuffer *ring) time_elapsed_enable(struct fd_context *ctx,
assert_dt struct fd_ringbuffer *ring) assert_dt
{ {
/* Right now, the assignment of countable to counter register is /* Right now, the assignment of countable to counter register is
* just hard coded. If we start exposing more countables than we * just hard coded. If we start exposing more countables than we
* have counters, we will need to be more clever. * have counters, we will need to be more clever.
*/ */
struct fd_batch *batch = fd_context_batch_locked(ctx); struct fd_batch *batch = fd_context_batch_locked(ctx);
fd_wfi(batch, ring); fd_wfi(batch, ring);
OUT_PKT0(ring, REG_A4XX_CP_PERFCTR_CP_SEL_0, 1); OUT_PKT0(ring, REG_A4XX_CP_PERFCTR_CP_SEL_0, 1);
OUT_RING(ring, CP_ALWAYS_COUNT); OUT_RING(ring, CP_ALWAYS_COUNT);
fd_batch_unlock_submit(batch); fd_batch_unlock_submit(batch);
fd_batch_reference(&batch, NULL); fd_batch_reference(&batch, NULL);
} }
static struct fd_hw_sample * static struct fd_hw_sample *
time_elapsed_get_sample(struct fd_batch *batch, struct fd_ringbuffer *ring) time_elapsed_get_sample(struct fd_batch *batch,
assert_dt struct fd_ringbuffer *ring) assert_dt
{ {
struct fd_hw_sample *samp = fd_hw_sample_init(batch, sizeof(uint64_t)); struct fd_hw_sample *samp = fd_hw_sample_init(batch, sizeof(uint64_t));
/* use unused part of vsc_size_mem as scratch space, to avoid /* use unused part of vsc_size_mem as scratch space, to avoid
* extra allocation: * extra allocation:
*/ */
struct fd_bo *scratch_bo = fd4_context(batch->ctx)->vsc_size_mem; struct fd_bo *scratch_bo = fd4_context(batch->ctx)->vsc_size_mem;
const int sample_off = 128; const int sample_off = 128;
const int addr_off = sample_off + 8; const int addr_off = sample_off + 8;
debug_assert(batch->ctx->screen->max_freq > 0); debug_assert(batch->ctx->screen->max_freq > 0);
/* Basic issue is that we need to read counter value to a relative /* Basic issue is that we need to read counter value to a relative
* destination (with per-tile offset) rather than absolute dest * destination (with per-tile offset) rather than absolute dest
* addr. But there is no pm4 packet that can do that. This is * addr. But there is no pm4 packet that can do that. This is
* where it would be *really* nice if we could write our own fw * where it would be *really* nice if we could write our own fw
* since afaict implementing the sort of packet we need would be * since afaict implementing the sort of packet we need would be
* trivial. * trivial.
* *
* Instead, we: * Instead, we:
* (1) CP_REG_TO_MEM to do a 64b copy of counter to scratch buffer * (1) CP_REG_TO_MEM to do a 64b copy of counter to scratch buffer
* (2) CP_MEM_WRITE to write per-sample offset to scratch buffer * (2) CP_MEM_WRITE to write per-sample offset to scratch buffer
* (3) CP_REG_TO_MEM w/ accumulate flag to add the per-tile base * (3) CP_REG_TO_MEM w/ accumulate flag to add the per-tile base
* address to the per-sample offset in the scratch buffer * address to the per-sample offset in the scratch buffer
* (4) CP_MEM_TO_REG to copy resulting address from steps #2 and #3 * (4) CP_MEM_TO_REG to copy resulting address from steps #2 and #3
* to CP_ME_NRT_ADDR * to CP_ME_NRT_ADDR
* (5) CP_MEM_TO_REG's to copy saved counter value from scratch * (5) CP_MEM_TO_REG's to copy saved counter value from scratch
* buffer to CP_ME_NRT_DATA to trigger the write out to query * buffer to CP_ME_NRT_DATA to trigger the write out to query
* result buffer * result buffer
* *
* Straightforward, right? * Straightforward, right?
* *
* Maybe could swap the order of things in the scratch buffer to * Maybe could swap the order of things in the scratch buffer to
* put address first, and copy back to CP_ME_NRT_ADDR+DATA in one * put address first, and copy back to CP_ME_NRT_ADDR+DATA in one
* shot, but that's really just polishing a turd.. * shot, but that's really just polishing a turd..
*/ */
fd_wfi(batch, ring); fd_wfi(batch, ring);
/* copy sample counter _LO and _HI to scratch: */ /* copy sample counter _LO and _HI to scratch: */
OUT_PKT3(ring, CP_REG_TO_MEM, 2); OUT_PKT3(ring, CP_REG_TO_MEM, 2);
OUT_RING(ring, CP_REG_TO_MEM_0_REG(REG_A4XX_RBBM_PERFCTR_CP_0_LO) | OUT_RING(ring, CP_REG_TO_MEM_0_REG(REG_A4XX_RBBM_PERFCTR_CP_0_LO) |
CP_REG_TO_MEM_0_64B | CP_REG_TO_MEM_0_64B |
CP_REG_TO_MEM_0_CNT(2)); /* write 2 regs to mem */ CP_REG_TO_MEM_0_CNT(2)); /* write 2 regs to mem */
OUT_RELOC(ring, scratch_bo, sample_off, 0, 0); OUT_RELOC(ring, scratch_bo, sample_off, 0, 0);
/* ok... here we really *would* like to use the CP_SET_CONSTANT /* ok... here we really *would* like to use the CP_SET_CONSTANT
* mode which can add a constant to value in reg2 and write to * mode which can add a constant to value in reg2 and write to
* reg1... *but* that only works for banked/context registers, * reg1... *but* that only works for banked/context registers,
* and CP_ME_NRT_DATA isn't one of those.. so we need to do some * and CP_ME_NRT_DATA isn't one of those.. so we need to do some
* CP math to the scratch buffer instead: * CP math to the scratch buffer instead:
* *
* (note first 8 bytes are counter value, use offset 0x8 for * (note first 8 bytes are counter value, use offset 0x8 for
* address calculation) * address calculation)
*/ */
/* per-sample offset to scratch bo: */ /* per-sample offset to scratch bo: */
OUT_PKT3(ring, CP_MEM_WRITE, 2); OUT_PKT3(ring, CP_MEM_WRITE, 2);
OUT_RELOC(ring, scratch_bo, addr_off, 0, 0); OUT_RELOC(ring, scratch_bo, addr_off, 0, 0);
OUT_RING(ring, samp->offset); OUT_RING(ring, samp->offset);
/* now add to that the per-tile base: */ /* now add to that the per-tile base: */
OUT_PKT3(ring, CP_REG_TO_MEM, 2); OUT_PKT3(ring, CP_REG_TO_MEM, 2);
OUT_RING(ring, CP_REG_TO_MEM_0_REG(HW_QUERY_BASE_REG) | OUT_RING(ring, CP_REG_TO_MEM_0_REG(HW_QUERY_BASE_REG) |
CP_REG_TO_MEM_0_ACCUMULATE | CP_REG_TO_MEM_0_ACCUMULATE |
CP_REG_TO_MEM_0_CNT(0)); /* readback 1 regs */ CP_REG_TO_MEM_0_CNT(0)); /* readback 1 regs */
OUT_RELOC(ring, scratch_bo, addr_off, 0, 0); OUT_RELOC(ring, scratch_bo, addr_off, 0, 0);
/* now copy that back to CP_ME_NRT_ADDR: */ /* now copy that back to CP_ME_NRT_ADDR: */
OUT_PKT3(ring, CP_MEM_TO_REG, 2); OUT_PKT3(ring, CP_MEM_TO_REG, 2);
OUT_RING(ring, REG_A4XX_CP_ME_NRT_ADDR); OUT_RING(ring, REG_A4XX_CP_ME_NRT_ADDR);
OUT_RELOC(ring, scratch_bo, addr_off, 0, 0); OUT_RELOC(ring, scratch_bo, addr_off, 0, 0);
/* and finally, copy sample from scratch buffer to CP_ME_NRT_DATA /* and finally, copy sample from scratch buffer to CP_ME_NRT_DATA
* to trigger the write to result buffer * to trigger the write to result buffer
*/ */
OUT_PKT3(ring, CP_MEM_TO_REG, 2); OUT_PKT3(ring, CP_MEM_TO_REG, 2);
OUT_RING(ring, REG_A4XX_CP_ME_NRT_DATA); OUT_RING(ring, REG_A4XX_CP_ME_NRT_DATA);
OUT_RELOC(ring, scratch_bo, sample_off, 0, 0); OUT_RELOC(ring, scratch_bo, sample_off, 0, 0);
/* and again to get the value of the _HI reg from scratch: */ /* and again to get the value of the _HI reg from scratch: */
OUT_PKT3(ring, CP_MEM_TO_REG, 2); OUT_PKT3(ring, CP_MEM_TO_REG, 2);
OUT_RING(ring, REG_A4XX_CP_ME_NRT_DATA); OUT_RING(ring, REG_A4XX_CP_ME_NRT_DATA);
OUT_RELOC(ring, scratch_bo, sample_off + 0x4, 0, 0); OUT_RELOC(ring, scratch_bo, sample_off + 0x4, 0, 0);
/* Sigh.. */ /* Sigh.. */
return samp; return samp;
} }
static void static void
time_elapsed_accumulate_result(struct fd_context *ctx, time_elapsed_accumulate_result(struct fd_context *ctx, const void *start,
const void *start, const void *end, const void *end, union pipe_query_result *result)
union pipe_query_result *result)
{ {
uint64_t n = *(uint64_t *)end - *(uint64_t *)start; uint64_t n = *(uint64_t *)end - *(uint64_t *)start;
/* max_freq is in Hz, convert cycle count to ns: */ /* max_freq is in Hz, convert cycle count to ns: */
result->u64 += n * 1000000000 / ctx->screen->max_freq; result->u64 += n * 1000000000 / ctx->screen->max_freq;
} }
static void static void
timestamp_accumulate_result(struct fd_context *ctx, timestamp_accumulate_result(struct fd_context *ctx, const void *start,
const void *start, const void *end, const void *end, union pipe_query_result *result)
union pipe_query_result *result)
{ {
/* just return the value from fist tile: */ /* just return the value from fist tile: */
if (result->u64 != 0) if (result->u64 != 0)
return; return;
uint64_t n = *(uint64_t *)start; uint64_t n = *(uint64_t *)start;
/* max_freq is in Hz, convert cycle count to ns: */ /* max_freq is in Hz, convert cycle count to ns: */
result->u64 = n * 1000000000 / ctx->screen->max_freq; result->u64 = n * 1000000000 / ctx->screen->max_freq;
} }
static const struct fd_hw_sample_provider occlusion_counter = { static const struct fd_hw_sample_provider occlusion_counter = {
.query_type = PIPE_QUERY_OCCLUSION_COUNTER, .query_type = PIPE_QUERY_OCCLUSION_COUNTER,
.get_sample = occlusion_get_sample, .get_sample = occlusion_get_sample,
.accumulate_result = occlusion_counter_accumulate_result, .accumulate_result = occlusion_counter_accumulate_result,
}; };
static const struct fd_hw_sample_provider occlusion_predicate = { static const struct fd_hw_sample_provider occlusion_predicate = {
.query_type = PIPE_QUERY_OCCLUSION_PREDICATE, .query_type = PIPE_QUERY_OCCLUSION_PREDICATE,
.get_sample = occlusion_get_sample, .get_sample = occlusion_get_sample,
.accumulate_result = occlusion_predicate_accumulate_result, .accumulate_result = occlusion_predicate_accumulate_result,
}; };
static const struct fd_hw_sample_provider occlusion_predicate_conservative = { static const struct fd_hw_sample_provider occlusion_predicate_conservative = {
.query_type = PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE, .query_type = PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE,
.get_sample = occlusion_get_sample, .get_sample = occlusion_get_sample,
.accumulate_result = occlusion_predicate_accumulate_result, .accumulate_result = occlusion_predicate_accumulate_result,
}; };
static const struct fd_hw_sample_provider time_elapsed = { static const struct fd_hw_sample_provider time_elapsed = {
.query_type = PIPE_QUERY_TIME_ELAPSED, .query_type = PIPE_QUERY_TIME_ELAPSED,
.always = true, .always = true,
.enable = time_elapsed_enable, .enable = time_elapsed_enable,
.get_sample = time_elapsed_get_sample, .get_sample = time_elapsed_get_sample,
.accumulate_result = time_elapsed_accumulate_result, .accumulate_result = time_elapsed_accumulate_result,
}; };
/* NOTE: timestamp query isn't going to give terribly sensible results /* NOTE: timestamp query isn't going to give terribly sensible results
@ -273,26 +269,26 @@ static const struct fd_hw_sample_provider time_elapsed = {
* kind of good enough. * kind of good enough.
*/ */
static const struct fd_hw_sample_provider timestamp = { static const struct fd_hw_sample_provider timestamp = {
.query_type = PIPE_QUERY_TIMESTAMP, .query_type = PIPE_QUERY_TIMESTAMP,
.always = true, .always = true,
.enable = time_elapsed_enable, .enable = time_elapsed_enable,
.get_sample = time_elapsed_get_sample, .get_sample = time_elapsed_get_sample,
.accumulate_result = timestamp_accumulate_result, .accumulate_result = timestamp_accumulate_result,
}; };
void fd4_query_context_init(struct pipe_context *pctx) void
disable_thread_safety_analysis fd4_query_context_init(struct pipe_context *pctx) disable_thread_safety_analysis
{ {
struct fd_context *ctx = fd_context(pctx); struct fd_context *ctx = fd_context(pctx);
ctx->create_query = fd_hw_create_query; ctx->create_query = fd_hw_create_query;
ctx->query_prepare = fd_hw_query_prepare; ctx->query_prepare = fd_hw_query_prepare;
ctx->query_prepare_tile = fd_hw_query_prepare_tile; ctx->query_prepare_tile = fd_hw_query_prepare_tile;
ctx->query_update_batch = fd_hw_query_update_batch; ctx->query_update_batch = fd_hw_query_update_batch;
fd_hw_query_register_provider(pctx, &occlusion_counter); fd_hw_query_register_provider(pctx, &occlusion_counter);
fd_hw_query_register_provider(pctx, &occlusion_predicate); fd_hw_query_register_provider(pctx, &occlusion_predicate);
fd_hw_query_register_provider(pctx, &occlusion_predicate_conservative); fd_hw_query_register_provider(pctx, &occlusion_predicate_conservative);
fd_hw_query_register_provider(pctx, &time_elapsed); fd_hw_query_register_provider(pctx, &time_elapsed);
fd_hw_query_register_provider(pctx, &timestamp); fd_hw_query_register_provider(pctx, &timestamp);
} }

View file

@ -24,84 +24,83 @@
* Rob Clark <robclark@freedesktop.org> * Rob Clark <robclark@freedesktop.org>
*/ */
#include "pipe/p_state.h" #include "pipe/p_state.h"
#include "util/u_string.h"
#include "util/u_memory.h" #include "util/u_memory.h"
#include "util/u_string.h"
#include "fd4_rasterizer.h"
#include "fd4_context.h" #include "fd4_context.h"
#include "fd4_format.h" #include "fd4_format.h"
#include "fd4_rasterizer.h"
void * void *
fd4_rasterizer_state_create(struct pipe_context *pctx, fd4_rasterizer_state_create(struct pipe_context *pctx,
const struct pipe_rasterizer_state *cso) const struct pipe_rasterizer_state *cso)
{ {
struct fd4_rasterizer_stateobj *so; struct fd4_rasterizer_stateobj *so;
float psize_min, psize_max; float psize_min, psize_max;
so = CALLOC_STRUCT(fd4_rasterizer_stateobj); so = CALLOC_STRUCT(fd4_rasterizer_stateobj);
if (!so) if (!so)
return NULL; return NULL;
so->base = *cso; so->base = *cso;
if (cso->point_size_per_vertex) { if (cso->point_size_per_vertex) {
psize_min = util_get_min_point_size(cso); psize_min = util_get_min_point_size(cso);
psize_max = 4092; psize_max = 4092;
} else { } else {
/* Force the point size to be as if the vertex output was disabled. */ /* Force the point size to be as if the vertex output was disabled. */
psize_min = cso->point_size; psize_min = cso->point_size;
psize_max = cso->point_size; psize_max = cso->point_size;
} }
/* /*
if (cso->line_stipple_enable) { if (cso->line_stipple_enable) {
??? TODO line stipple ??? TODO line stipple
} }
TODO cso->half_pixel_center TODO cso->half_pixel_center
if (cso->multisample) if (cso->multisample)
TODO TODO
*/ */
so->gras_cl_clip_cntl = 0x80000; /* ??? */ so->gras_cl_clip_cntl = 0x80000; /* ??? */
so->gras_su_point_minmax = so->gras_su_point_minmax = A4XX_GRAS_SU_POINT_MINMAX_MIN(psize_min) |
A4XX_GRAS_SU_POINT_MINMAX_MIN(psize_min) | A4XX_GRAS_SU_POINT_MINMAX_MAX(psize_max);
A4XX_GRAS_SU_POINT_MINMAX_MAX(psize_max); so->gras_su_point_size = A4XX_GRAS_SU_POINT_SIZE(cso->point_size);
so->gras_su_point_size = A4XX_GRAS_SU_POINT_SIZE(cso->point_size); so->gras_su_poly_offset_scale =
so->gras_su_poly_offset_scale = A4XX_GRAS_SU_POLY_OFFSET_SCALE(cso->offset_scale);
A4XX_GRAS_SU_POLY_OFFSET_SCALE(cso->offset_scale); so->gras_su_poly_offset_offset =
so->gras_su_poly_offset_offset = A4XX_GRAS_SU_POLY_OFFSET_OFFSET(cso->offset_units * 2.0f);
A4XX_GRAS_SU_POLY_OFFSET_OFFSET(cso->offset_units * 2.0f); so->gras_su_poly_offset_clamp =
so->gras_su_poly_offset_clamp = A4XX_GRAS_SU_POLY_OFFSET_CLAMP(cso->offset_clamp);
A4XX_GRAS_SU_POLY_OFFSET_CLAMP(cso->offset_clamp);
so->gras_su_mode_control = so->gras_su_mode_control =
A4XX_GRAS_SU_MODE_CONTROL_LINEHALFWIDTH(cso->line_width/2.0); A4XX_GRAS_SU_MODE_CONTROL_LINEHALFWIDTH(cso->line_width / 2.0);
so->pc_prim_vtx_cntl2 = so->pc_prim_vtx_cntl2 = A4XX_PC_PRIM_VTX_CNTL2_POLYMODE_FRONT_PTYPE(
A4XX_PC_PRIM_VTX_CNTL2_POLYMODE_FRONT_PTYPE(fd_polygon_mode(cso->fill_front)) | fd_polygon_mode(cso->fill_front)) |
A4XX_PC_PRIM_VTX_CNTL2_POLYMODE_BACK_PTYPE(fd_polygon_mode(cso->fill_back)); A4XX_PC_PRIM_VTX_CNTL2_POLYMODE_BACK_PTYPE(
fd_polygon_mode(cso->fill_back));
if (cso->fill_front != PIPE_POLYGON_MODE_FILL || if (cso->fill_front != PIPE_POLYGON_MODE_FILL ||
cso->fill_back != PIPE_POLYGON_MODE_FILL) cso->fill_back != PIPE_POLYGON_MODE_FILL)
so->pc_prim_vtx_cntl2 |= A4XX_PC_PRIM_VTX_CNTL2_POLYMODE_ENABLE; so->pc_prim_vtx_cntl2 |= A4XX_PC_PRIM_VTX_CNTL2_POLYMODE_ENABLE;
if (cso->cull_face & PIPE_FACE_FRONT) if (cso->cull_face & PIPE_FACE_FRONT)
so->gras_su_mode_control |= A4XX_GRAS_SU_MODE_CONTROL_CULL_FRONT; so->gras_su_mode_control |= A4XX_GRAS_SU_MODE_CONTROL_CULL_FRONT;
if (cso->cull_face & PIPE_FACE_BACK) if (cso->cull_face & PIPE_FACE_BACK)
so->gras_su_mode_control |= A4XX_GRAS_SU_MODE_CONTROL_CULL_BACK; so->gras_su_mode_control |= A4XX_GRAS_SU_MODE_CONTROL_CULL_BACK;
if (!cso->front_ccw) if (!cso->front_ccw)
so->gras_su_mode_control |= A4XX_GRAS_SU_MODE_CONTROL_FRONT_CW; so->gras_su_mode_control |= A4XX_GRAS_SU_MODE_CONTROL_FRONT_CW;
if (!cso->flatshade_first) if (!cso->flatshade_first)
so->pc_prim_vtx_cntl |= A4XX_PC_PRIM_VTX_CNTL_PROVOKING_VTX_LAST; so->pc_prim_vtx_cntl |= A4XX_PC_PRIM_VTX_CNTL_PROVOKING_VTX_LAST;
if (cso->offset_tri) if (cso->offset_tri)
so->gras_su_mode_control |= A4XX_GRAS_SU_MODE_CONTROL_POLY_OFFSET; so->gras_su_mode_control |= A4XX_GRAS_SU_MODE_CONTROL_POLY_OFFSET;
if (!cso->depth_clip_near) if (!cso->depth_clip_near)
so->gras_cl_clip_cntl |= A4XX_GRAS_CL_CLIP_CNTL_ZNEAR_CLIP_DISABLE | so->gras_cl_clip_cntl |= A4XX_GRAS_CL_CLIP_CNTL_ZNEAR_CLIP_DISABLE |
A4XX_GRAS_CL_CLIP_CNTL_ZFAR_CLIP_DISABLE; A4XX_GRAS_CL_CLIP_CNTL_ZFAR_CLIP_DISABLE;
if (cso->clip_halfz) if (cso->clip_halfz)
so->gras_cl_clip_cntl |= A4XX_GRAS_CL_CLIP_CNTL_ZERO_GB_SCALE_Z; so->gras_cl_clip_cntl |= A4XX_GRAS_CL_CLIP_CNTL_ZERO_GB_SCALE_Z;
return so; return so;
} }

View file

@ -27,30 +27,30 @@
#ifndef FD4_RASTERIZER_H_ #ifndef FD4_RASTERIZER_H_
#define FD4_RASTERIZER_H_ #define FD4_RASTERIZER_H_
#include "pipe/p_state.h"
#include "pipe/p_context.h" #include "pipe/p_context.h"
#include "pipe/p_state.h"
struct fd4_rasterizer_stateobj { struct fd4_rasterizer_stateobj {
struct pipe_rasterizer_state base; struct pipe_rasterizer_state base;
uint32_t gras_su_point_minmax; uint32_t gras_su_point_minmax;
uint32_t gras_su_point_size; uint32_t gras_su_point_size;
uint32_t gras_su_poly_offset_scale; uint32_t gras_su_poly_offset_scale;
uint32_t gras_su_poly_offset_offset; uint32_t gras_su_poly_offset_offset;
uint32_t gras_su_poly_offset_clamp; uint32_t gras_su_poly_offset_clamp;
uint32_t gras_su_mode_control; uint32_t gras_su_mode_control;
uint32_t gras_cl_clip_cntl; uint32_t gras_cl_clip_cntl;
uint32_t pc_prim_vtx_cntl; uint32_t pc_prim_vtx_cntl;
uint32_t pc_prim_vtx_cntl2; uint32_t pc_prim_vtx_cntl2;
}; };
static inline struct fd4_rasterizer_stateobj * static inline struct fd4_rasterizer_stateobj *
fd4_rasterizer_stateobj(struct pipe_rasterizer_state *rast) fd4_rasterizer_stateobj(struct pipe_rasterizer_state *rast)
{ {
return (struct fd4_rasterizer_stateobj *)rast; return (struct fd4_rasterizer_stateobj *)rast;
} }
void * fd4_rasterizer_state_create(struct pipe_context *pctx, void *fd4_rasterizer_state_create(struct pipe_context *pctx,
const struct pipe_rasterizer_state *cso); const struct pipe_rasterizer_state *cso);
#endif /* FD4_RASTERIZER_H_ */ #endif /* FD4_RASTERIZER_H_ */

View file

@ -30,54 +30,54 @@
uint32_t uint32_t
fd4_setup_slices(struct fd_resource *rsc) fd4_setup_slices(struct fd_resource *rsc)
{ {
struct pipe_resource *prsc = &rsc->b.b; struct pipe_resource *prsc = &rsc->b.b;
enum pipe_format format = prsc->format; enum pipe_format format = prsc->format;
uint32_t level, size = 0; uint32_t level, size = 0;
uint32_t width = prsc->width0; uint32_t width = prsc->width0;
uint32_t height = prsc->height0; uint32_t height = prsc->height0;
uint32_t depth = prsc->depth0; uint32_t depth = prsc->depth0;
/* in layer_first layout, the level (slice) contains just one /* in layer_first layout, the level (slice) contains just one
* layer (since in fact the layer contains the slices) * layer (since in fact the layer contains the slices)
*/ */
uint32_t layers_in_level, alignment; uint32_t layers_in_level, alignment;
if (prsc->target == PIPE_TEXTURE_3D) { if (prsc->target == PIPE_TEXTURE_3D) {
rsc->layout.layer_first = false; rsc->layout.layer_first = false;
layers_in_level = prsc->array_size; layers_in_level = prsc->array_size;
alignment = 4096; alignment = 4096;
} else { } else {
rsc->layout.layer_first = true; rsc->layout.layer_first = true;
layers_in_level = 1; layers_in_level = 1;
alignment = 1; alignment = 1;
} }
/* 32 pixel alignment */ /* 32 pixel alignment */
fdl_set_pitchalign(&rsc->layout, fdl_cpp_shift(&rsc->layout) + 5); fdl_set_pitchalign(&rsc->layout, fdl_cpp_shift(&rsc->layout) + 5);
for (level = 0; level <= prsc->last_level; level++) { for (level = 0; level <= prsc->last_level; level++) {
struct fdl_slice *slice = fd_resource_slice(rsc, level); struct fdl_slice *slice = fd_resource_slice(rsc, level);
uint32_t pitch = fdl_pitch(&rsc->layout, level); uint32_t pitch = fdl_pitch(&rsc->layout, level);
uint32_t nblocksy = util_format_get_nblocksy(format, height); uint32_t nblocksy = util_format_get_nblocksy(format, height);
slice->offset = size; slice->offset = size;
/* 3d textures can have different layer sizes for high levels, but the /* 3d textures can have different layer sizes for high levels, but the
* hw auto-sizer is buggy (or at least different than what this code * hw auto-sizer is buggy (or at least different than what this code
* does), so as soon as the layer size range gets into range, we stop * does), so as soon as the layer size range gets into range, we stop
* reducing it. * reducing it.
*/ */
if (prsc->target == PIPE_TEXTURE_3D && if (prsc->target == PIPE_TEXTURE_3D &&
(level > 1 && fd_resource_slice(rsc, level - 1)->size0 <= 0xf000)) (level > 1 && fd_resource_slice(rsc, level - 1)->size0 <= 0xf000))
slice->size0 = fd_resource_slice(rsc, level - 1)->size0; slice->size0 = fd_resource_slice(rsc, level - 1)->size0;
else else
slice->size0 = align(nblocksy * pitch, alignment); slice->size0 = align(nblocksy * pitch, alignment);
size += slice->size0 * depth * layers_in_level; size += slice->size0 * depth * layers_in_level;
width = u_minify(width, 1); width = u_minify(width, 1);
height = u_minify(height, 1); height = u_minify(height, 1);
depth = u_minify(depth, 1); depth = u_minify(depth, 1);
} }
return size; return size;
} }

View file

@ -27,91 +27,85 @@
#include "pipe/p_screen.h" #include "pipe/p_screen.h"
#include "util/format/u_format.h" #include "util/format/u_format.h"
#include "fd4_screen.h"
#include "fd4_context.h" #include "fd4_context.h"
#include "fd4_emit.h" #include "fd4_emit.h"
#include "fd4_format.h" #include "fd4_format.h"
#include "fd4_resource.h" #include "fd4_resource.h"
#include "fd4_screen.h"
#include "ir3/ir3_compiler.h" #include "ir3/ir3_compiler.h"
static bool static bool
fd4_screen_is_format_supported(struct pipe_screen *pscreen, fd4_screen_is_format_supported(struct pipe_screen *pscreen,
enum pipe_format format, enum pipe_format format,
enum pipe_texture_target target, enum pipe_texture_target target,
unsigned sample_count, unsigned sample_count,
unsigned storage_sample_count, unsigned storage_sample_count, unsigned usage)
unsigned usage)
{ {
unsigned retval = 0; unsigned retval = 0;
if ((target >= PIPE_MAX_TEXTURE_TYPES) || if ((target >= PIPE_MAX_TEXTURE_TYPES) ||
(sample_count > 1)) { /* TODO add MSAA */ (sample_count > 1)) { /* TODO add MSAA */
DBG("not supported: format=%s, target=%d, sample_count=%d, usage=%x", DBG("not supported: format=%s, target=%d, sample_count=%d, usage=%x",
util_format_name(format), target, sample_count, usage); util_format_name(format), target, sample_count, usage);
return false; return false;
} }
if (MAX2(1, sample_count) != MAX2(1, storage_sample_count)) if (MAX2(1, sample_count) != MAX2(1, storage_sample_count))
return false; return false;
if ((usage & PIPE_BIND_VERTEX_BUFFER) && if ((usage & PIPE_BIND_VERTEX_BUFFER) &&
(fd4_pipe2vtx(format) != VFMT4_NONE)) { (fd4_pipe2vtx(format) != VFMT4_NONE)) {
retval |= PIPE_BIND_VERTEX_BUFFER; retval |= PIPE_BIND_VERTEX_BUFFER;
} }
if ((usage & PIPE_BIND_SAMPLER_VIEW) && if ((usage & PIPE_BIND_SAMPLER_VIEW) &&
(fd4_pipe2tex(format) != TFMT4_NONE) && (fd4_pipe2tex(format) != TFMT4_NONE) &&
(target == PIPE_BUFFER || (target == PIPE_BUFFER || util_format_get_blocksize(format) != 12)) {
util_format_get_blocksize(format) != 12)) { retval |= PIPE_BIND_SAMPLER_VIEW;
retval |= PIPE_BIND_SAMPLER_VIEW; }
}
if ((usage & (PIPE_BIND_RENDER_TARGET | if ((usage & (PIPE_BIND_RENDER_TARGET | PIPE_BIND_DISPLAY_TARGET |
PIPE_BIND_DISPLAY_TARGET | PIPE_BIND_SCANOUT | PIPE_BIND_SHARED)) &&
PIPE_BIND_SCANOUT | (fd4_pipe2color(format) != RB4_NONE) &&
PIPE_BIND_SHARED)) && (fd4_pipe2tex(format) != TFMT4_NONE)) {
(fd4_pipe2color(format) != RB4_NONE) && retval |= usage & (PIPE_BIND_RENDER_TARGET | PIPE_BIND_DISPLAY_TARGET |
(fd4_pipe2tex(format) != TFMT4_NONE)) { PIPE_BIND_SCANOUT | PIPE_BIND_SHARED);
retval |= usage & (PIPE_BIND_RENDER_TARGET | }
PIPE_BIND_DISPLAY_TARGET |
PIPE_BIND_SCANOUT |
PIPE_BIND_SHARED);
}
/* For ARB_framebuffer_no_attachments: */ /* For ARB_framebuffer_no_attachments: */
if ((usage & PIPE_BIND_RENDER_TARGET) && (format == PIPE_FORMAT_NONE)) { if ((usage & PIPE_BIND_RENDER_TARGET) && (format == PIPE_FORMAT_NONE)) {
retval |= usage & PIPE_BIND_RENDER_TARGET; retval |= usage & PIPE_BIND_RENDER_TARGET;
} }
if ((usage & PIPE_BIND_DEPTH_STENCIL) && if ((usage & PIPE_BIND_DEPTH_STENCIL) &&
(fd4_pipe2depth(format) != (enum a4xx_depth_format)~0) && (fd4_pipe2depth(format) != (enum a4xx_depth_format) ~0) &&
(fd4_pipe2tex(format) != TFMT4_NONE)) { (fd4_pipe2tex(format) != TFMT4_NONE)) {
retval |= PIPE_BIND_DEPTH_STENCIL; retval |= PIPE_BIND_DEPTH_STENCIL;
} }
if ((usage & PIPE_BIND_INDEX_BUFFER) && if ((usage & PIPE_BIND_INDEX_BUFFER) &&
(fd_pipe2index(format) != (enum pc_di_index_size)~0)) { (fd_pipe2index(format) != (enum pc_di_index_size) ~0)) {
retval |= PIPE_BIND_INDEX_BUFFER; retval |= PIPE_BIND_INDEX_BUFFER;
} }
if (retval != usage) { if (retval != usage) {
DBG("not supported: format=%s, target=%d, sample_count=%d, " DBG("not supported: format=%s, target=%d, sample_count=%d, "
"usage=%x, retval=%x", util_format_name(format), "usage=%x, retval=%x",
target, sample_count, usage, retval); util_format_name(format), target, sample_count, usage, retval);
} }
return retval == usage; return retval == usage;
} }
void void
fd4_screen_init(struct pipe_screen *pscreen) fd4_screen_init(struct pipe_screen *pscreen)
{ {
struct fd_screen *screen = fd_screen(pscreen); struct fd_screen *screen = fd_screen(pscreen);
screen->max_rts = A4XX_MAX_RENDER_TARGETS; screen->max_rts = A4XX_MAX_RENDER_TARGETS;
screen->setup_slices = fd4_setup_slices; screen->setup_slices = fd4_setup_slices;
pscreen->context_create = fd4_context_create; pscreen->context_create = fd4_context_create;
pscreen->is_format_supported = fd4_screen_is_format_supported; pscreen->is_format_supported = fd4_screen_is_format_supported;
fd4_emit_init_screen(pscreen); fd4_emit_init_screen(pscreen);
ir3_screen_init(pscreen); ir3_screen_init(pscreen);
} }

View file

@ -25,264 +25,257 @@
*/ */
#include "pipe/p_state.h" #include "pipe/p_state.h"
#include "util/u_string.h"
#include "util/u_memory.h"
#include "util/u_inlines.h"
#include "util/format/u_format.h" #include "util/format/u_format.h"
#include "util/u_inlines.h"
#include "util/u_memory.h"
#include "util/u_string.h"
#include "fd4_texture.h"
#include "fd4_format.h" #include "fd4_format.h"
#include "fd4_texture.h"
static enum a4xx_tex_clamp static enum a4xx_tex_clamp
tex_clamp(unsigned wrap, bool *needs_border) tex_clamp(unsigned wrap, bool *needs_border)
{ {
switch (wrap) { switch (wrap) {
case PIPE_TEX_WRAP_REPEAT: case PIPE_TEX_WRAP_REPEAT:
return A4XX_TEX_REPEAT; return A4XX_TEX_REPEAT;
case PIPE_TEX_WRAP_CLAMP_TO_EDGE: case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
return A4XX_TEX_CLAMP_TO_EDGE; return A4XX_TEX_CLAMP_TO_EDGE;
case PIPE_TEX_WRAP_CLAMP_TO_BORDER: case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
*needs_border = true; *needs_border = true;
return A4XX_TEX_CLAMP_TO_BORDER; return A4XX_TEX_CLAMP_TO_BORDER;
case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE: case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
/* only works for PoT.. need to emulate otherwise! */ /* only works for PoT.. need to emulate otherwise! */
return A4XX_TEX_MIRROR_CLAMP; return A4XX_TEX_MIRROR_CLAMP;
case PIPE_TEX_WRAP_MIRROR_REPEAT: case PIPE_TEX_WRAP_MIRROR_REPEAT:
return A4XX_TEX_MIRROR_REPEAT; return A4XX_TEX_MIRROR_REPEAT;
case PIPE_TEX_WRAP_MIRROR_CLAMP: case PIPE_TEX_WRAP_MIRROR_CLAMP:
case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER: case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
/* these two we could perhaps emulate, but we currently /* these two we could perhaps emulate, but we currently
* just don't advertise PIPE_CAP_TEXTURE_MIRROR_CLAMP * just don't advertise PIPE_CAP_TEXTURE_MIRROR_CLAMP
*/ */
default: default:
DBG("invalid wrap: %u", wrap); DBG("invalid wrap: %u", wrap);
return 0; return 0;
} }
} }
static enum a4xx_tex_filter static enum a4xx_tex_filter
tex_filter(unsigned filter, bool aniso) tex_filter(unsigned filter, bool aniso)
{ {
switch (filter) { switch (filter) {
case PIPE_TEX_FILTER_NEAREST: case PIPE_TEX_FILTER_NEAREST:
return A4XX_TEX_NEAREST; return A4XX_TEX_NEAREST;
case PIPE_TEX_FILTER_LINEAR: case PIPE_TEX_FILTER_LINEAR:
return aniso ? A4XX_TEX_ANISO : A4XX_TEX_LINEAR; return aniso ? A4XX_TEX_ANISO : A4XX_TEX_LINEAR;
default: default:
DBG("invalid filter: %u", filter); DBG("invalid filter: %u", filter);
return 0; return 0;
} }
} }
static void * static void *
fd4_sampler_state_create(struct pipe_context *pctx, fd4_sampler_state_create(struct pipe_context *pctx,
const struct pipe_sampler_state *cso) const struct pipe_sampler_state *cso)
{ {
struct fd4_sampler_stateobj *so = CALLOC_STRUCT(fd4_sampler_stateobj); struct fd4_sampler_stateobj *so = CALLOC_STRUCT(fd4_sampler_stateobj);
unsigned aniso = util_last_bit(MIN2(cso->max_anisotropy >> 1, 8)); unsigned aniso = util_last_bit(MIN2(cso->max_anisotropy >> 1, 8));
bool miplinear = false; bool miplinear = false;
if (!so) if (!so)
return NULL; return NULL;
if (cso->min_mip_filter == PIPE_TEX_MIPFILTER_LINEAR) if (cso->min_mip_filter == PIPE_TEX_MIPFILTER_LINEAR)
miplinear = true; miplinear = true;
so->base = *cso; so->base = *cso;
so->needs_border = false; so->needs_border = false;
so->texsamp0 = so->texsamp0 =
COND(miplinear, A4XX_TEX_SAMP_0_MIPFILTER_LINEAR_NEAR) | COND(miplinear, A4XX_TEX_SAMP_0_MIPFILTER_LINEAR_NEAR) |
A4XX_TEX_SAMP_0_XY_MAG(tex_filter(cso->mag_img_filter, aniso)) | A4XX_TEX_SAMP_0_XY_MAG(tex_filter(cso->mag_img_filter, aniso)) |
A4XX_TEX_SAMP_0_XY_MIN(tex_filter(cso->min_img_filter, aniso)) | A4XX_TEX_SAMP_0_XY_MIN(tex_filter(cso->min_img_filter, aniso)) |
A4XX_TEX_SAMP_0_ANISO(aniso) | A4XX_TEX_SAMP_0_ANISO(aniso) |
A4XX_TEX_SAMP_0_WRAP_S(tex_clamp(cso->wrap_s, &so->needs_border)) | A4XX_TEX_SAMP_0_WRAP_S(tex_clamp(cso->wrap_s, &so->needs_border)) |
A4XX_TEX_SAMP_0_WRAP_T(tex_clamp(cso->wrap_t, &so->needs_border)) | A4XX_TEX_SAMP_0_WRAP_T(tex_clamp(cso->wrap_t, &so->needs_border)) |
A4XX_TEX_SAMP_0_WRAP_R(tex_clamp(cso->wrap_r, &so->needs_border)); A4XX_TEX_SAMP_0_WRAP_R(tex_clamp(cso->wrap_r, &so->needs_border));
so->texsamp1 = so->texsamp1 =
// COND(miplinear, A4XX_TEX_SAMP_1_MIPFILTER_LINEAR_FAR) | // COND(miplinear, A4XX_TEX_SAMP_1_MIPFILTER_LINEAR_FAR) |
COND(!cso->seamless_cube_map, A4XX_TEX_SAMP_1_CUBEMAPSEAMLESSFILTOFF) | COND(!cso->seamless_cube_map, A4XX_TEX_SAMP_1_CUBEMAPSEAMLESSFILTOFF) |
COND(!cso->normalized_coords, A4XX_TEX_SAMP_1_UNNORM_COORDS); COND(!cso->normalized_coords, A4XX_TEX_SAMP_1_UNNORM_COORDS);
if (cso->min_mip_filter != PIPE_TEX_MIPFILTER_NONE) { if (cso->min_mip_filter != PIPE_TEX_MIPFILTER_NONE) {
so->texsamp0 |= A4XX_TEX_SAMP_0_LOD_BIAS(cso->lod_bias); so->texsamp0 |= A4XX_TEX_SAMP_0_LOD_BIAS(cso->lod_bias);
so->texsamp1 |= so->texsamp1 |= A4XX_TEX_SAMP_1_MIN_LOD(cso->min_lod) |
A4XX_TEX_SAMP_1_MIN_LOD(cso->min_lod) | A4XX_TEX_SAMP_1_MAX_LOD(cso->max_lod);
A4XX_TEX_SAMP_1_MAX_LOD(cso->max_lod); }
}
if (cso->compare_mode) if (cso->compare_mode)
so->texsamp1 |= A4XX_TEX_SAMP_1_COMPARE_FUNC(cso->compare_func); /* maps 1:1 */ so->texsamp1 |=
A4XX_TEX_SAMP_1_COMPARE_FUNC(cso->compare_func); /* maps 1:1 */
return so; return so;
} }
static enum a4xx_tex_type static enum a4xx_tex_type
tex_type(unsigned target) tex_type(unsigned target)
{ {
switch (target) { switch (target) {
default: default:
assert(0); assert(0);
case PIPE_BUFFER: case PIPE_BUFFER:
case PIPE_TEXTURE_1D: case PIPE_TEXTURE_1D:
case PIPE_TEXTURE_1D_ARRAY: case PIPE_TEXTURE_1D_ARRAY:
return A4XX_TEX_1D; return A4XX_TEX_1D;
case PIPE_TEXTURE_RECT: case PIPE_TEXTURE_RECT:
case PIPE_TEXTURE_2D: case PIPE_TEXTURE_2D:
case PIPE_TEXTURE_2D_ARRAY: case PIPE_TEXTURE_2D_ARRAY:
return A4XX_TEX_2D; return A4XX_TEX_2D;
case PIPE_TEXTURE_3D: case PIPE_TEXTURE_3D:
return A4XX_TEX_3D; return A4XX_TEX_3D;
case PIPE_TEXTURE_CUBE: case PIPE_TEXTURE_CUBE:
case PIPE_TEXTURE_CUBE_ARRAY: case PIPE_TEXTURE_CUBE_ARRAY:
return A4XX_TEX_CUBE; return A4XX_TEX_CUBE;
} }
} }
static bool static bool
use_astc_srgb_workaround(struct pipe_context *pctx, enum pipe_format format) use_astc_srgb_workaround(struct pipe_context *pctx, enum pipe_format format)
{ {
return (fd_screen(pctx->screen)->gpu_id == 420) && return (fd_screen(pctx->screen)->gpu_id == 420) &&
(util_format_description(format)->layout == UTIL_FORMAT_LAYOUT_ASTC); (util_format_description(format)->layout == UTIL_FORMAT_LAYOUT_ASTC);
} }
static struct pipe_sampler_view * static struct pipe_sampler_view *
fd4_sampler_view_create(struct pipe_context *pctx, struct pipe_resource *prsc, fd4_sampler_view_create(struct pipe_context *pctx, struct pipe_resource *prsc,
const struct pipe_sampler_view *cso) const struct pipe_sampler_view *cso)
{ {
struct fd4_pipe_sampler_view *so = CALLOC_STRUCT(fd4_pipe_sampler_view); struct fd4_pipe_sampler_view *so = CALLOC_STRUCT(fd4_pipe_sampler_view);
struct fd_resource *rsc = fd_resource(prsc); struct fd_resource *rsc = fd_resource(prsc);
enum pipe_format format = cso->format; enum pipe_format format = cso->format;
unsigned lvl, layers = 0; unsigned lvl, layers = 0;
if (!so) if (!so)
return NULL; return NULL;
if (format == PIPE_FORMAT_X32_S8X24_UINT) { if (format == PIPE_FORMAT_X32_S8X24_UINT) {
rsc = rsc->stencil; rsc = rsc->stencil;
format = rsc->b.b.format; format = rsc->b.b.format;
} }
so->base = *cso; so->base = *cso;
pipe_reference(NULL, &prsc->reference); pipe_reference(NULL, &prsc->reference);
so->base.texture = prsc; so->base.texture = prsc;
so->base.reference.count = 1; so->base.reference.count = 1;
so->base.context = pctx; so->base.context = pctx;
so->texconst0 = so->texconst0 = A4XX_TEX_CONST_0_TYPE(tex_type(cso->target)) |
A4XX_TEX_CONST_0_TYPE(tex_type(cso->target)) | A4XX_TEX_CONST_0_FMT(fd4_pipe2tex(format)) |
A4XX_TEX_CONST_0_FMT(fd4_pipe2tex(format)) | fd4_tex_swiz(format, cso->swizzle_r, cso->swizzle_g,
fd4_tex_swiz(format, cso->swizzle_r, cso->swizzle_g, cso->swizzle_b, cso->swizzle_a);
cso->swizzle_b, cso->swizzle_a);
if (util_format_is_srgb(format)) { if (util_format_is_srgb(format)) {
if (use_astc_srgb_workaround(pctx, format)) if (use_astc_srgb_workaround(pctx, format))
so->astc_srgb = true; so->astc_srgb = true;
so->texconst0 |= A4XX_TEX_CONST_0_SRGB; so->texconst0 |= A4XX_TEX_CONST_0_SRGB;
} }
if (cso->target == PIPE_BUFFER) { if (cso->target == PIPE_BUFFER) {
unsigned elements = cso->u.buf.size / util_format_get_blocksize(format); unsigned elements = cso->u.buf.size / util_format_get_blocksize(format);
lvl = 0; lvl = 0;
so->texconst1 = so->texconst1 =
A4XX_TEX_CONST_1_WIDTH(elements) | A4XX_TEX_CONST_1_WIDTH(elements) | A4XX_TEX_CONST_1_HEIGHT(1);
A4XX_TEX_CONST_1_HEIGHT(1); so->texconst2 = A4XX_TEX_CONST_2_PITCH(elements * rsc->layout.cpp);
so->texconst2 = so->offset = cso->u.buf.offset;
A4XX_TEX_CONST_2_PITCH(elements * rsc->layout.cpp); } else {
so->offset = cso->u.buf.offset; unsigned miplevels;
} else {
unsigned miplevels;
lvl = fd_sampler_first_level(cso); lvl = fd_sampler_first_level(cso);
miplevels = fd_sampler_last_level(cso) - lvl; miplevels = fd_sampler_last_level(cso) - lvl;
layers = cso->u.tex.last_layer - cso->u.tex.first_layer + 1; layers = cso->u.tex.last_layer - cso->u.tex.first_layer + 1;
so->texconst0 |= A4XX_TEX_CONST_0_MIPLVLS(miplevels); so->texconst0 |= A4XX_TEX_CONST_0_MIPLVLS(miplevels);
so->texconst1 = so->texconst1 = A4XX_TEX_CONST_1_WIDTH(u_minify(prsc->width0, lvl)) |
A4XX_TEX_CONST_1_WIDTH(u_minify(prsc->width0, lvl)) | A4XX_TEX_CONST_1_HEIGHT(u_minify(prsc->height0, lvl));
A4XX_TEX_CONST_1_HEIGHT(u_minify(prsc->height0, lvl)); so->texconst2 = A4XX_TEX_CONST_2_PITCHALIGN(rsc->layout.pitchalign - 5) |
so->texconst2 = A4XX_TEX_CONST_2_PITCH(fd_resource_pitch(rsc, lvl));
A4XX_TEX_CONST_2_PITCHALIGN(rsc->layout.pitchalign - 5) | so->offset = fd_resource_offset(rsc, lvl, cso->u.tex.first_layer);
A4XX_TEX_CONST_2_PITCH(fd_resource_pitch(rsc, lvl)); }
so->offset = fd_resource_offset(rsc, lvl, cso->u.tex.first_layer);
}
/* NOTE: since we sample z24s8 using 8888_UINT format, the swizzle /* NOTE: since we sample z24s8 using 8888_UINT format, the swizzle
* we get isn't quite right. Use SWAP(XYZW) as a cheap and cheerful * we get isn't quite right. Use SWAP(XYZW) as a cheap and cheerful
* way to re-arrange things so stencil component is where the swiz * way to re-arrange things so stencil component is where the swiz
* expects. * expects.
* *
* Note that gallium expects stencil sampler to return (s,s,s,s) * Note that gallium expects stencil sampler to return (s,s,s,s)
* which isn't quite true. To make that happen we'd have to massage * which isn't quite true. To make that happen we'd have to massage
* the swizzle. But in practice only the .x component is used. * the swizzle. But in practice only the .x component is used.
*/ */
if (format == PIPE_FORMAT_X24S8_UINT) if (format == PIPE_FORMAT_X24S8_UINT)
so->texconst2 |= A4XX_TEX_CONST_2_SWAP(XYZW); so->texconst2 |= A4XX_TEX_CONST_2_SWAP(XYZW);
switch (cso->target) { switch (cso->target) {
case PIPE_TEXTURE_1D_ARRAY: case PIPE_TEXTURE_1D_ARRAY:
case PIPE_TEXTURE_2D_ARRAY: case PIPE_TEXTURE_2D_ARRAY:
so->texconst3 = so->texconst3 = A4XX_TEX_CONST_3_DEPTH(layers) |
A4XX_TEX_CONST_3_DEPTH(layers) | A4XX_TEX_CONST_3_LAYERSZ(rsc->layout.layer_size);
A4XX_TEX_CONST_3_LAYERSZ(rsc->layout.layer_size); break;
break; case PIPE_TEXTURE_CUBE:
case PIPE_TEXTURE_CUBE: case PIPE_TEXTURE_CUBE_ARRAY:
case PIPE_TEXTURE_CUBE_ARRAY: so->texconst3 = A4XX_TEX_CONST_3_DEPTH(layers / 6) |
so->texconst3 = A4XX_TEX_CONST_3_LAYERSZ(rsc->layout.layer_size);
A4XX_TEX_CONST_3_DEPTH(layers / 6) | break;
A4XX_TEX_CONST_3_LAYERSZ(rsc->layout.layer_size); case PIPE_TEXTURE_3D:
break; so->texconst3 =
case PIPE_TEXTURE_3D: A4XX_TEX_CONST_3_DEPTH(u_minify(prsc->depth0, lvl)) |
so->texconst3 = A4XX_TEX_CONST_3_LAYERSZ(fd_resource_slice(rsc, lvl)->size0);
A4XX_TEX_CONST_3_DEPTH(u_minify(prsc->depth0, lvl)) | so->texconst4 = A4XX_TEX_CONST_4_LAYERSZ(
A4XX_TEX_CONST_3_LAYERSZ(fd_resource_slice(rsc, lvl)->size0); fd_resource_slice(rsc, prsc->last_level)->size0);
so->texconst4 = A4XX_TEX_CONST_4_LAYERSZ( break;
fd_resource_slice(rsc, prsc->last_level)->size0); default:
break; so->texconst3 = 0x00000000;
default: break;
so->texconst3 = 0x00000000; }
break;
}
return &so->base; return &so->base;
} }
static void static void
fd4_set_sampler_views(struct pipe_context *pctx, enum pipe_shader_type shader, fd4_set_sampler_views(struct pipe_context *pctx, enum pipe_shader_type shader,
unsigned start, unsigned nr, unsigned unbind_num_trailing_slots, unsigned start, unsigned nr,
struct pipe_sampler_view **views) unsigned unbind_num_trailing_slots,
struct pipe_sampler_view **views)
{ {
struct fd_context *ctx = fd_context(pctx); struct fd_context *ctx = fd_context(pctx);
struct fd4_context *fd4_ctx = fd4_context(ctx); struct fd4_context *fd4_ctx = fd4_context(ctx);
uint16_t astc_srgb = 0; uint16_t astc_srgb = 0;
unsigned i; unsigned i;
for (i = 0; i < nr; i++) { for (i = 0; i < nr; i++) {
if (views[i]) { if (views[i]) {
struct fd4_pipe_sampler_view *view = struct fd4_pipe_sampler_view *view = fd4_pipe_sampler_view(views[i]);
fd4_pipe_sampler_view(views[i]); if (view->astc_srgb)
if (view->astc_srgb) astc_srgb |= (1 << i);
astc_srgb |= (1 << i); }
} }
}
fd_set_sampler_views(pctx, shader, start, nr, unbind_num_trailing_slots, views); fd_set_sampler_views(pctx, shader, start, nr, unbind_num_trailing_slots,
views);
if (shader == PIPE_SHADER_FRAGMENT) { if (shader == PIPE_SHADER_FRAGMENT) {
fd4_ctx->fastc_srgb = astc_srgb; fd4_ctx->fastc_srgb = astc_srgb;
} else if (shader == PIPE_SHADER_VERTEX) { } else if (shader == PIPE_SHADER_VERTEX) {
fd4_ctx->vastc_srgb = astc_srgb; fd4_ctx->vastc_srgb = astc_srgb;
} }
} }
void void
fd4_texture_init(struct pipe_context *pctx) fd4_texture_init(struct pipe_context *pctx)
{ {
pctx->create_sampler_state = fd4_sampler_state_create; pctx->create_sampler_state = fd4_sampler_state_create;
pctx->bind_sampler_states = fd_sampler_states_bind; pctx->bind_sampler_states = fd_sampler_states_bind;
pctx->create_sampler_view = fd4_sampler_view_create; pctx->create_sampler_view = fd4_sampler_view_create;
pctx->set_sampler_views = fd4_set_sampler_views; pctx->set_sampler_views = fd4_set_sampler_views;
} }

View file

@ -29,39 +29,39 @@
#include "pipe/p_context.h" #include "pipe/p_context.h"
#include "freedreno_texture.h"
#include "freedreno_resource.h" #include "freedreno_resource.h"
#include "freedreno_texture.h"
#include "fd4_context.h" #include "fd4_context.h"
#include "fd4_format.h" #include "fd4_format.h"
struct fd4_sampler_stateobj { struct fd4_sampler_stateobj {
struct pipe_sampler_state base; struct pipe_sampler_state base;
uint32_t texsamp0, texsamp1; uint32_t texsamp0, texsamp1;
bool needs_border; bool needs_border;
}; };
static inline struct fd4_sampler_stateobj * static inline struct fd4_sampler_stateobj *
fd4_sampler_stateobj(struct pipe_sampler_state *samp) fd4_sampler_stateobj(struct pipe_sampler_state *samp)
{ {
return (struct fd4_sampler_stateobj *)samp; return (struct fd4_sampler_stateobj *)samp;
} }
struct fd4_pipe_sampler_view { struct fd4_pipe_sampler_view {
struct pipe_sampler_view base; struct pipe_sampler_view base;
uint32_t texconst0, texconst1, texconst2, texconst3, texconst4; uint32_t texconst0, texconst1, texconst2, texconst3, texconst4;
uint32_t offset; uint32_t offset;
bool astc_srgb; bool astc_srgb;
}; };
static inline struct fd4_pipe_sampler_view * static inline struct fd4_pipe_sampler_view *
fd4_pipe_sampler_view(struct pipe_sampler_view *pview) fd4_pipe_sampler_view(struct pipe_sampler_view *pview)
{ {
return (struct fd4_pipe_sampler_view *)pview; return (struct fd4_pipe_sampler_view *)pview;
} }
unsigned fd4_get_const_idx(struct fd_context *ctx, unsigned fd4_get_const_idx(struct fd_context *ctx,
struct fd_texture_stateobj *tex, unsigned samp_id); struct fd_texture_stateobj *tex, unsigned samp_id);
void fd4_texture_init(struct pipe_context *pctx); void fd4_texture_init(struct pipe_context *pctx);

View file

@ -24,82 +24,77 @@
* Rob Clark <robclark@freedesktop.org> * Rob Clark <robclark@freedesktop.org>
*/ */
#include "pipe/p_state.h" #include "pipe/p_state.h"
#include "util/u_string.h"
#include "util/u_memory.h" #include "util/u_memory.h"
#include "util/u_string.h"
#include "fd4_zsa.h"
#include "fd4_context.h" #include "fd4_context.h"
#include "fd4_format.h" #include "fd4_format.h"
#include "fd4_zsa.h"
void * void *
fd4_zsa_state_create(struct pipe_context *pctx, fd4_zsa_state_create(struct pipe_context *pctx,
const struct pipe_depth_stencil_alpha_state *cso) const struct pipe_depth_stencil_alpha_state *cso)
{ {
struct fd4_zsa_stateobj *so; struct fd4_zsa_stateobj *so;
so = CALLOC_STRUCT(fd4_zsa_stateobj); so = CALLOC_STRUCT(fd4_zsa_stateobj);
if (!so) if (!so)
return NULL; return NULL;
so->base = *cso; so->base = *cso;
so->rb_depth_control |= so->rb_depth_control |=
A4XX_RB_DEPTH_CONTROL_ZFUNC(cso->depth_func); /* maps 1:1 */ A4XX_RB_DEPTH_CONTROL_ZFUNC(cso->depth_func); /* maps 1:1 */
if (cso->depth_enabled) if (cso->depth_enabled)
so->rb_depth_control |= so->rb_depth_control |=
A4XX_RB_DEPTH_CONTROL_Z_ENABLE | A4XX_RB_DEPTH_CONTROL_Z_ENABLE | A4XX_RB_DEPTH_CONTROL_Z_TEST_ENABLE;
A4XX_RB_DEPTH_CONTROL_Z_TEST_ENABLE;
if (cso->depth_writemask) if (cso->depth_writemask)
so->rb_depth_control |= A4XX_RB_DEPTH_CONTROL_Z_WRITE_ENABLE; so->rb_depth_control |= A4XX_RB_DEPTH_CONTROL_Z_WRITE_ENABLE;
if (cso->stencil[0].enabled) { if (cso->stencil[0].enabled) {
const struct pipe_stencil_state *s = &cso->stencil[0]; const struct pipe_stencil_state *s = &cso->stencil[0];
so->rb_stencil_control |= so->rb_stencil_control |=
A4XX_RB_STENCIL_CONTROL_STENCIL_READ | A4XX_RB_STENCIL_CONTROL_STENCIL_READ |
A4XX_RB_STENCIL_CONTROL_STENCIL_ENABLE | A4XX_RB_STENCIL_CONTROL_STENCIL_ENABLE |
A4XX_RB_STENCIL_CONTROL_FUNC(s->func) | /* maps 1:1 */ A4XX_RB_STENCIL_CONTROL_FUNC(s->func) | /* maps 1:1 */
A4XX_RB_STENCIL_CONTROL_FAIL(fd_stencil_op(s->fail_op)) | A4XX_RB_STENCIL_CONTROL_FAIL(fd_stencil_op(s->fail_op)) |
A4XX_RB_STENCIL_CONTROL_ZPASS(fd_stencil_op(s->zpass_op)) | A4XX_RB_STENCIL_CONTROL_ZPASS(fd_stencil_op(s->zpass_op)) |
A4XX_RB_STENCIL_CONTROL_ZFAIL(fd_stencil_op(s->zfail_op)); A4XX_RB_STENCIL_CONTROL_ZFAIL(fd_stencil_op(s->zfail_op));
so->rb_stencil_control2 |= so->rb_stencil_control2 |= A4XX_RB_STENCIL_CONTROL2_STENCIL_BUFFER;
A4XX_RB_STENCIL_CONTROL2_STENCIL_BUFFER; so->rb_stencilrefmask |=
so->rb_stencilrefmask |= 0xff000000 | /* ??? */
0xff000000 | /* ??? */ A4XX_RB_STENCILREFMASK_STENCILWRITEMASK(s->writemask) |
A4XX_RB_STENCILREFMASK_STENCILWRITEMASK(s->writemask) | A4XX_RB_STENCILREFMASK_STENCILMASK(s->valuemask);
A4XX_RB_STENCILREFMASK_STENCILMASK(s->valuemask);
if (cso->stencil[1].enabled) { if (cso->stencil[1].enabled) {
const struct pipe_stencil_state *bs = &cso->stencil[1]; const struct pipe_stencil_state *bs = &cso->stencil[1];
so->rb_stencil_control |= so->rb_stencil_control |=
A4XX_RB_STENCIL_CONTROL_STENCIL_ENABLE_BF | A4XX_RB_STENCIL_CONTROL_STENCIL_ENABLE_BF |
A4XX_RB_STENCIL_CONTROL_FUNC_BF(bs->func) | /* maps 1:1 */ A4XX_RB_STENCIL_CONTROL_FUNC_BF(bs->func) | /* maps 1:1 */
A4XX_RB_STENCIL_CONTROL_FAIL_BF(fd_stencil_op(bs->fail_op)) | A4XX_RB_STENCIL_CONTROL_FAIL_BF(fd_stencil_op(bs->fail_op)) |
A4XX_RB_STENCIL_CONTROL_ZPASS_BF(fd_stencil_op(bs->zpass_op)) | A4XX_RB_STENCIL_CONTROL_ZPASS_BF(fd_stencil_op(bs->zpass_op)) |
A4XX_RB_STENCIL_CONTROL_ZFAIL_BF(fd_stencil_op(bs->zfail_op)); A4XX_RB_STENCIL_CONTROL_ZFAIL_BF(fd_stencil_op(bs->zfail_op));
so->rb_stencilrefmask_bf |= so->rb_stencilrefmask_bf |=
0xff000000 | /* ??? */ 0xff000000 | /* ??? */
A4XX_RB_STENCILREFMASK_BF_STENCILWRITEMASK(bs->writemask) | A4XX_RB_STENCILREFMASK_BF_STENCILWRITEMASK(bs->writemask) |
A4XX_RB_STENCILREFMASK_BF_STENCILMASK(bs->valuemask); A4XX_RB_STENCILREFMASK_BF_STENCILMASK(bs->valuemask);
} }
} }
if (cso->alpha_enabled) { if (cso->alpha_enabled) {
uint32_t ref = cso->alpha_ref_value * 255.0; uint32_t ref = cso->alpha_ref_value * 255.0;
so->gras_alpha_control = so->gras_alpha_control = A4XX_GRAS_ALPHA_CONTROL_ALPHA_TEST_ENABLE;
A4XX_GRAS_ALPHA_CONTROL_ALPHA_TEST_ENABLE; so->rb_alpha_control =
so->rb_alpha_control = A4XX_RB_ALPHA_CONTROL_ALPHA_TEST |
A4XX_RB_ALPHA_CONTROL_ALPHA_TEST | A4XX_RB_ALPHA_CONTROL_ALPHA_REF(ref) |
A4XX_RB_ALPHA_CONTROL_ALPHA_REF(ref) | A4XX_RB_ALPHA_CONTROL_ALPHA_TEST_FUNC(cso->alpha_func);
A4XX_RB_ALPHA_CONTROL_ALPHA_TEST_FUNC(cso->alpha_func); so->rb_depth_control |= A4XX_RB_DEPTH_CONTROL_EARLY_Z_DISABLE;
so->rb_depth_control |= }
A4XX_RB_DEPTH_CONTROL_EARLY_Z_DISABLE;
}
return so; return so;
} }

View file

@ -27,30 +27,29 @@
#ifndef FD4_ZSA_H_ #ifndef FD4_ZSA_H_
#define FD4_ZSA_H_ #define FD4_ZSA_H_
#include "pipe/p_state.h"
#include "pipe/p_context.h" #include "pipe/p_context.h"
#include "pipe/p_state.h"
#include "freedreno_util.h" #include "freedreno_util.h"
struct fd4_zsa_stateobj { struct fd4_zsa_stateobj {
struct pipe_depth_stencil_alpha_state base; struct pipe_depth_stencil_alpha_state base;
uint32_t gras_alpha_control; uint32_t gras_alpha_control;
uint32_t rb_alpha_control; uint32_t rb_alpha_control;
uint32_t rb_depth_control; uint32_t rb_depth_control;
uint32_t rb_stencil_control; uint32_t rb_stencil_control;
uint32_t rb_stencil_control2; uint32_t rb_stencil_control2;
uint32_t rb_stencilrefmask; uint32_t rb_stencilrefmask;
uint32_t rb_stencilrefmask_bf; uint32_t rb_stencilrefmask_bf;
}; };
static inline struct fd4_zsa_stateobj * static inline struct fd4_zsa_stateobj *
fd4_zsa_stateobj(struct pipe_depth_stencil_alpha_state *zsa) fd4_zsa_stateobj(struct pipe_depth_stencil_alpha_state *zsa)
{ {
return (struct fd4_zsa_stateobj *)zsa; return (struct fd4_zsa_stateobj *)zsa;
} }
void * fd4_zsa_state_create(struct pipe_context *pctx, void *fd4_zsa_state_create(struct pipe_context *pctx,
const struct pipe_depth_stencil_alpha_state *cso); const struct pipe_depth_stencil_alpha_state *cso);
#endif /* FD4_ZSA_H_ */ #endif /* FD4_ZSA_H_ */

View file

@ -26,8 +26,8 @@
#include "pipe/p_state.h" #include "pipe/p_state.h"
#include "util/u_blend.h" #include "util/u_blend.h"
#include "util/u_string.h"
#include "util/u_memory.h" #include "util/u_memory.h"
#include "util/u_string.h"
#include "fd5_blend.h" #include "fd5_blend.h"
#include "fd5_context.h" #include "fd5_context.h"
@ -37,90 +37,99 @@
static enum a3xx_rb_blend_opcode static enum a3xx_rb_blend_opcode
blend_func(unsigned func) blend_func(unsigned func)
{ {
switch (func) { switch (func) {
case PIPE_BLEND_ADD: case PIPE_BLEND_ADD:
return BLEND_DST_PLUS_SRC; return BLEND_DST_PLUS_SRC;
case PIPE_BLEND_MIN: case PIPE_BLEND_MIN:
return BLEND_MIN_DST_SRC; return BLEND_MIN_DST_SRC;
case PIPE_BLEND_MAX: case PIPE_BLEND_MAX:
return BLEND_MAX_DST_SRC; return BLEND_MAX_DST_SRC;
case PIPE_BLEND_SUBTRACT: case PIPE_BLEND_SUBTRACT:
return BLEND_SRC_MINUS_DST; return BLEND_SRC_MINUS_DST;
case PIPE_BLEND_REVERSE_SUBTRACT: case PIPE_BLEND_REVERSE_SUBTRACT:
return BLEND_DST_MINUS_SRC; return BLEND_DST_MINUS_SRC;
default: default:
DBG("invalid blend func: %x", func); DBG("invalid blend func: %x", func);
return 0; return 0;
} }
} }
void * void *
fd5_blend_state_create(struct pipe_context *pctx, fd5_blend_state_create(struct pipe_context *pctx,
const struct pipe_blend_state *cso) const struct pipe_blend_state *cso)
{ {
struct fd5_blend_stateobj *so; struct fd5_blend_stateobj *so;
enum a3xx_rop_code rop = ROP_COPY; enum a3xx_rop_code rop = ROP_COPY;
bool reads_dest = false; bool reads_dest = false;
unsigned i, mrt_blend = 0; unsigned i, mrt_blend = 0;
if (cso->logicop_enable) { if (cso->logicop_enable) {
rop = cso->logicop_func; /* maps 1:1 */ rop = cso->logicop_func; /* maps 1:1 */
reads_dest = util_logicop_reads_dest(cso->logicop_func); reads_dest = util_logicop_reads_dest(cso->logicop_func);
} }
so = CALLOC_STRUCT(fd5_blend_stateobj); so = CALLOC_STRUCT(fd5_blend_stateobj);
if (!so) if (!so)
return NULL; return NULL;
so->base = *cso; so->base = *cso;
so->lrz_write = true; /* unless blend enabled for any MRT */ so->lrz_write = true; /* unless blend enabled for any MRT */
for (i = 0; i < ARRAY_SIZE(so->rb_mrt); i++) { for (i = 0; i < ARRAY_SIZE(so->rb_mrt); i++) {
const struct pipe_rt_blend_state *rt; const struct pipe_rt_blend_state *rt;
if (cso->independent_blend_enable) if (cso->independent_blend_enable)
rt = &cso->rt[i]; rt = &cso->rt[i];
else else
rt = &cso->rt[0]; rt = &cso->rt[0];
so->rb_mrt[i].blend_control = so->rb_mrt[i].blend_control =
A5XX_RB_MRT_BLEND_CONTROL_RGB_SRC_FACTOR(fd_blend_factor(rt->rgb_src_factor)) | A5XX_RB_MRT_BLEND_CONTROL_RGB_SRC_FACTOR(
A5XX_RB_MRT_BLEND_CONTROL_RGB_BLEND_OPCODE(blend_func(rt->rgb_func)) | fd_blend_factor(rt->rgb_src_factor)) |
A5XX_RB_MRT_BLEND_CONTROL_RGB_DEST_FACTOR(fd_blend_factor(rt->rgb_dst_factor)) | A5XX_RB_MRT_BLEND_CONTROL_RGB_BLEND_OPCODE(blend_func(rt->rgb_func)) |
A5XX_RB_MRT_BLEND_CONTROL_ALPHA_SRC_FACTOR(fd_blend_factor(rt->alpha_src_factor)) | A5XX_RB_MRT_BLEND_CONTROL_RGB_DEST_FACTOR(
A5XX_RB_MRT_BLEND_CONTROL_ALPHA_BLEND_OPCODE(blend_func(rt->alpha_func)) | fd_blend_factor(rt->rgb_dst_factor)) |
A5XX_RB_MRT_BLEND_CONTROL_ALPHA_DEST_FACTOR(fd_blend_factor(rt->alpha_dst_factor)); A5XX_RB_MRT_BLEND_CONTROL_ALPHA_SRC_FACTOR(
fd_blend_factor(rt->alpha_src_factor)) |
A5XX_RB_MRT_BLEND_CONTROL_ALPHA_BLEND_OPCODE(
blend_func(rt->alpha_func)) |
A5XX_RB_MRT_BLEND_CONTROL_ALPHA_DEST_FACTOR(
fd_blend_factor(rt->alpha_dst_factor));
so->rb_mrt[i].control = so->rb_mrt[i].control =
A5XX_RB_MRT_CONTROL_ROP_CODE(rop) | A5XX_RB_MRT_CONTROL_ROP_CODE(rop) |
COND(cso->logicop_enable, A5XX_RB_MRT_CONTROL_ROP_ENABLE) | COND(cso->logicop_enable, A5XX_RB_MRT_CONTROL_ROP_ENABLE) |
A5XX_RB_MRT_CONTROL_COMPONENT_ENABLE(rt->colormask); A5XX_RB_MRT_CONTROL_COMPONENT_ENABLE(rt->colormask);
if (rt->blend_enable) { if (rt->blend_enable) {
so->rb_mrt[i].control |= so->rb_mrt[i].control |=
// A5XX_RB_MRT_CONTROL_READ_DEST_ENABLE | // A5XX_RB_MRT_CONTROL_READ_DEST_ENABLE
A5XX_RB_MRT_CONTROL_BLEND | //|
A5XX_RB_MRT_CONTROL_BLEND2; A5XX_RB_MRT_CONTROL_BLEND | A5XX_RB_MRT_CONTROL_BLEND2;
mrt_blend |= (1 << i); mrt_blend |= (1 << i);
so->lrz_write = false; so->lrz_write = false;
} }
if (reads_dest) { if (reads_dest) {
// so->rb_mrt[i].control |= A5XX_RB_MRT_CONTROL_READ_DEST_ENABLE; // so->rb_mrt[i].control |=
mrt_blend |= (1 << i); //A5XX_RB_MRT_CONTROL_READ_DEST_ENABLE;
} mrt_blend |= (1 << i);
}
// if (cso->dither) // if (cso->dither)
// so->rb_mrt[i].buf_info |= A5XX_RB_MRT_BUF_INFO_DITHER_MODE(DITHER_ALWAYS); // so->rb_mrt[i].buf_info |=
} //A5XX_RB_MRT_BUF_INFO_DITHER_MODE(DITHER_ALWAYS);
}
so->rb_blend_cntl = A5XX_RB_BLEND_CNTL_ENABLE_BLEND(mrt_blend) | so->rb_blend_cntl =
COND(cso->alpha_to_coverage, A5XX_RB_BLEND_CNTL_ALPHA_TO_COVERAGE) | A5XX_RB_BLEND_CNTL_ENABLE_BLEND(mrt_blend) |
COND(cso->independent_blend_enable, A5XX_RB_BLEND_CNTL_INDEPENDENT_BLEND); COND(cso->alpha_to_coverage, A5XX_RB_BLEND_CNTL_ALPHA_TO_COVERAGE) |
so->sp_blend_cntl = A5XX_SP_BLEND_CNTL_UNK8 | COND(cso->independent_blend_enable, A5XX_RB_BLEND_CNTL_INDEPENDENT_BLEND);
COND(cso->alpha_to_coverage, A5XX_SP_BLEND_CNTL_ALPHA_TO_COVERAGE) | so->sp_blend_cntl =
COND(mrt_blend, A5XX_SP_BLEND_CNTL_ENABLED); A5XX_SP_BLEND_CNTL_UNK8 |
COND(cso->alpha_to_coverage, A5XX_SP_BLEND_CNTL_ALPHA_TO_COVERAGE) |
COND(mrt_blend, A5XX_SP_BLEND_CNTL_ENABLED);
return so; return so;
} }

View file

@ -27,31 +27,31 @@
#ifndef FD5_BLEND_H_ #ifndef FD5_BLEND_H_
#define FD5_BLEND_H_ #define FD5_BLEND_H_
#include "pipe/p_state.h"
#include "pipe/p_context.h" #include "pipe/p_context.h"
#include "pipe/p_state.h"
#include "freedreno_util.h" #include "freedreno_util.h"
struct fd5_blend_stateobj { struct fd5_blend_stateobj {
struct pipe_blend_state base; struct pipe_blend_state base;
struct { struct {
uint32_t control; uint32_t control;
uint32_t buf_info; uint32_t buf_info;
uint32_t blend_control; uint32_t blend_control;
} rb_mrt[A5XX_MAX_RENDER_TARGETS]; } rb_mrt[A5XX_MAX_RENDER_TARGETS];
uint32_t rb_blend_cntl; uint32_t rb_blend_cntl;
uint32_t sp_blend_cntl; uint32_t sp_blend_cntl;
bool lrz_write; bool lrz_write;
}; };
static inline struct fd5_blend_stateobj * static inline struct fd5_blend_stateobj *
fd5_blend_stateobj(struct pipe_blend_state *blend) fd5_blend_stateobj(struct pipe_blend_state *blend)
{ {
return (struct fd5_blend_stateobj *)blend; return (struct fd5_blend_stateobj *)blend;
} }
void * fd5_blend_state_create(struct pipe_context *pctx, void *fd5_blend_state_create(struct pipe_context *pctx,
const struct pipe_blend_state *cso); const struct pipe_blend_state *cso);
#endif /* FD5_BLEND_H_ */ #endif /* FD5_BLEND_H_ */

View file

@ -28,8 +28,8 @@
#include "freedreno_resource.h" #include "freedreno_resource.h"
#include "fd5_blitter.h" #include "fd5_blitter.h"
#include "fd5_format.h"
#include "fd5_emit.h" #include "fd5_emit.h"
#include "fd5_format.h"
/* Make sure none of the requested dimensions extend beyond the size of the /* Make sure none of the requested dimensions extend beyond the size of the
* resource. Not entirely sure why this happens, but sometimes it does, and * resource. Not entirely sure why this happens, but sometimes it does, and
@ -39,9 +39,9 @@
static bool static bool
ok_dims(const struct pipe_resource *r, const struct pipe_box *b, int lvl) ok_dims(const struct pipe_resource *r, const struct pipe_box *b, int lvl)
{ {
return (b->x >= 0) && (b->x + b->width <= u_minify(r->width0, lvl)) && return (b->x >= 0) && (b->x + b->width <= u_minify(r->width0, lvl)) &&
(b->y >= 0) && (b->y + b->height <= u_minify(r->height0, lvl)) && (b->y >= 0) && (b->y + b->height <= u_minify(r->height0, lvl)) &&
(b->z >= 0) && (b->z + b->depth <= u_minify(r->depth0, lvl)); (b->z >= 0) && (b->z + b->depth <= u_minify(r->depth0, lvl));
} }
/* Not sure if format restrictions differ for src and dst, or if /* Not sure if format restrictions differ for src and dst, or if
@ -52,136 +52,136 @@ ok_dims(const struct pipe_resource *r, const struct pipe_box *b, int lvl)
static bool static bool
ok_format(enum pipe_format fmt) ok_format(enum pipe_format fmt)
{ {
if (util_format_is_compressed(fmt)) if (util_format_is_compressed(fmt))
return false; return false;
switch (fmt) { switch (fmt) {
case PIPE_FORMAT_R10G10B10A2_SSCALED: case PIPE_FORMAT_R10G10B10A2_SSCALED:
case PIPE_FORMAT_R10G10B10A2_SNORM: case PIPE_FORMAT_R10G10B10A2_SNORM:
case PIPE_FORMAT_B10G10R10A2_USCALED: case PIPE_FORMAT_B10G10R10A2_USCALED:
case PIPE_FORMAT_B10G10R10A2_SSCALED: case PIPE_FORMAT_B10G10R10A2_SSCALED:
case PIPE_FORMAT_B10G10R10A2_SNORM: case PIPE_FORMAT_B10G10R10A2_SNORM:
case PIPE_FORMAT_R10G10B10A2_UNORM: case PIPE_FORMAT_R10G10B10A2_UNORM:
case PIPE_FORMAT_R10G10B10A2_USCALED: case PIPE_FORMAT_R10G10B10A2_USCALED:
case PIPE_FORMAT_B10G10R10A2_UNORM: case PIPE_FORMAT_B10G10R10A2_UNORM:
case PIPE_FORMAT_R10SG10SB10SA2U_NORM: case PIPE_FORMAT_R10SG10SB10SA2U_NORM:
case PIPE_FORMAT_B10G10R10A2_UINT: case PIPE_FORMAT_B10G10R10A2_UINT:
case PIPE_FORMAT_R10G10B10A2_UINT: case PIPE_FORMAT_R10G10B10A2_UINT:
return false; return false;
default: default:
break; break;
} }
if (fd5_pipe2color(fmt) == RB5_NONE) if (fd5_pipe2color(fmt) == RB5_NONE)
return false; return false;
return true; return true;
} }
static bool static bool
can_do_blit(const struct pipe_blit_info *info) can_do_blit(const struct pipe_blit_info *info)
{ {
/* I think we can do scaling, but not in z dimension since that would /* I think we can do scaling, but not in z dimension since that would
* require blending.. * require blending..
*/ */
if (info->dst.box.depth != info->src.box.depth) if (info->dst.box.depth != info->src.box.depth)
return false; return false;
if (!ok_format(info->dst.format)) if (!ok_format(info->dst.format))
return false; return false;
if (!ok_format(info->src.format)) if (!ok_format(info->src.format))
return false; return false;
/* hw ignores {SRC,DST}_INFO.COLOR_SWAP if {SRC,DST}_INFO.TILE_MODE /* hw ignores {SRC,DST}_INFO.COLOR_SWAP if {SRC,DST}_INFO.TILE_MODE
* is set (not linear). We can kind of get around that when tiling/ * is set (not linear). We can kind of get around that when tiling/
* untiling by setting both src and dst COLOR_SWAP=WZYX, but that * untiling by setting both src and dst COLOR_SWAP=WZYX, but that
* means the formats must match: * means the formats must match:
*/ */
if ((fd_resource(info->dst.resource)->layout.tile_mode || if ((fd_resource(info->dst.resource)->layout.tile_mode ||
fd_resource(info->src.resource)->layout.tile_mode) && fd_resource(info->src.resource)->layout.tile_mode) &&
info->dst.format != info->src.format) info->dst.format != info->src.format)
return false; return false;
/* until we figure out a few more registers: */ /* until we figure out a few more registers: */
if ((info->dst.box.width != info->src.box.width) || if ((info->dst.box.width != info->src.box.width) ||
(info->dst.box.height != info->src.box.height)) (info->dst.box.height != info->src.box.height))
return false; return false;
/* src box can be inverted, which we don't support.. dst box cannot: */ /* src box can be inverted, which we don't support.. dst box cannot: */
if ((info->src.box.width < 0) || (info->src.box.height < 0)) if ((info->src.box.width < 0) || (info->src.box.height < 0))
return false; return false;
if (!ok_dims(info->src.resource, &info->src.box, info->src.level)) if (!ok_dims(info->src.resource, &info->src.box, info->src.level))
return false; return false;
if (!ok_dims(info->dst.resource, &info->dst.box, info->dst.level)) if (!ok_dims(info->dst.resource, &info->dst.box, info->dst.level))
return false; return false;
debug_assert(info->dst.box.width >= 0); debug_assert(info->dst.box.width >= 0);
debug_assert(info->dst.box.height >= 0); debug_assert(info->dst.box.height >= 0);
debug_assert(info->dst.box.depth >= 0); debug_assert(info->dst.box.depth >= 0);
if ((info->dst.resource->nr_samples > 1) || if ((info->dst.resource->nr_samples > 1) ||
(info->src.resource->nr_samples > 1)) (info->src.resource->nr_samples > 1))
return false; return false;
if (info->scissor_enable) if (info->scissor_enable)
return false; return false;
if (info->window_rectangle_include) if (info->window_rectangle_include)
return false; return false;
if (info->render_condition_enable) if (info->render_condition_enable)
return false; return false;
if (info->alpha_blend) if (info->alpha_blend)
return false; return false;
if (info->filter != PIPE_TEX_FILTER_NEAREST) if (info->filter != PIPE_TEX_FILTER_NEAREST)
return false; return false;
if (info->mask != util_format_get_mask(info->src.format)) if (info->mask != util_format_get_mask(info->src.format))
return false; return false;
if (info->mask != util_format_get_mask(info->dst.format)) if (info->mask != util_format_get_mask(info->dst.format))
return false; return false;
return true; return true;
} }
static void static void
emit_setup(struct fd_ringbuffer *ring) emit_setup(struct fd_ringbuffer *ring)
{ {
OUT_PKT4(ring, REG_A5XX_RB_RENDER_CNTL, 1); OUT_PKT4(ring, REG_A5XX_RB_RENDER_CNTL, 1);
OUT_RING(ring, 0x00000008); OUT_RING(ring, 0x00000008);
OUT_PKT4(ring, REG_A5XX_UNKNOWN_2100, 1); OUT_PKT4(ring, REG_A5XX_UNKNOWN_2100, 1);
OUT_RING(ring, 0x86000000); /* UNKNOWN_2100 */ OUT_RING(ring, 0x86000000); /* UNKNOWN_2100 */
OUT_PKT4(ring, REG_A5XX_UNKNOWN_2180, 1); OUT_PKT4(ring, REG_A5XX_UNKNOWN_2180, 1);
OUT_RING(ring, 0x86000000); /* UNKNOWN_2180 */ OUT_RING(ring, 0x86000000); /* UNKNOWN_2180 */
OUT_PKT4(ring, REG_A5XX_UNKNOWN_2184, 1); OUT_PKT4(ring, REG_A5XX_UNKNOWN_2184, 1);
OUT_RING(ring, 0x00000009); /* UNKNOWN_2184 */ OUT_RING(ring, 0x00000009); /* UNKNOWN_2184 */
OUT_PKT4(ring, REG_A5XX_RB_CNTL, 1); OUT_PKT4(ring, REG_A5XX_RB_CNTL, 1);
OUT_RING(ring, A5XX_RB_CNTL_BYPASS); OUT_RING(ring, A5XX_RB_CNTL_BYPASS);
OUT_PKT4(ring, REG_A5XX_RB_MODE_CNTL, 1); OUT_PKT4(ring, REG_A5XX_RB_MODE_CNTL, 1);
OUT_RING(ring, 0x00000004); /* RB_MODE_CNTL */ OUT_RING(ring, 0x00000004); /* RB_MODE_CNTL */
OUT_PKT4(ring, REG_A5XX_SP_MODE_CNTL, 1); OUT_PKT4(ring, REG_A5XX_SP_MODE_CNTL, 1);
OUT_RING(ring, 0x0000000c); /* SP_MODE_CNTL */ OUT_RING(ring, 0x0000000c); /* SP_MODE_CNTL */
OUT_PKT4(ring, REG_A5XX_TPL1_MODE_CNTL, 1); OUT_PKT4(ring, REG_A5XX_TPL1_MODE_CNTL, 1);
OUT_RING(ring, 0x00000344); /* TPL1_MODE_CNTL */ OUT_RING(ring, 0x00000344); /* TPL1_MODE_CNTL */
OUT_PKT4(ring, REG_A5XX_HLSQ_MODE_CNTL, 1); OUT_PKT4(ring, REG_A5XX_HLSQ_MODE_CNTL, 1);
OUT_RING(ring, 0x00000002); /* HLSQ_MODE_CNTL */ OUT_RING(ring, 0x00000002); /* HLSQ_MODE_CNTL */
OUT_PKT4(ring, REG_A5XX_GRAS_CL_CNTL, 1); OUT_PKT4(ring, REG_A5XX_GRAS_CL_CNTL, 1);
OUT_RING(ring, 0x00000181); /* GRAS_CL_CNTL */ OUT_RING(ring, 0x00000181); /* GRAS_CL_CNTL */
} }
/* buffers need to be handled specially since x/width can exceed the bounds /* buffers need to be handled specially since x/width can exceed the bounds
@ -190,297 +190,297 @@ emit_setup(struct fd_ringbuffer *ring)
static void static void
emit_blit_buffer(struct fd_ringbuffer *ring, const struct pipe_blit_info *info) emit_blit_buffer(struct fd_ringbuffer *ring, const struct pipe_blit_info *info)
{ {
const struct pipe_box *sbox = &info->src.box; const struct pipe_box *sbox = &info->src.box;
const struct pipe_box *dbox = &info->dst.box; const struct pipe_box *dbox = &info->dst.box;
struct fd_resource *src, *dst; struct fd_resource *src, *dst;
unsigned sshift, dshift; unsigned sshift, dshift;
src = fd_resource(info->src.resource); src = fd_resource(info->src.resource);
dst = fd_resource(info->dst.resource); dst = fd_resource(info->dst.resource);
debug_assert(src->layout.cpp == 1); debug_assert(src->layout.cpp == 1);
debug_assert(dst->layout.cpp == 1); debug_assert(dst->layout.cpp == 1);
debug_assert(info->src.resource->format == info->dst.resource->format); debug_assert(info->src.resource->format == info->dst.resource->format);
debug_assert((sbox->y == 0) && (sbox->height == 1)); debug_assert((sbox->y == 0) && (sbox->height == 1));
debug_assert((dbox->y == 0) && (dbox->height == 1)); debug_assert((dbox->y == 0) && (dbox->height == 1));
debug_assert((sbox->z == 0) && (sbox->depth == 1)); debug_assert((sbox->z == 0) && (sbox->depth == 1));
debug_assert((dbox->z == 0) && (dbox->depth == 1)); debug_assert((dbox->z == 0) && (dbox->depth == 1));
debug_assert(sbox->width == dbox->width); debug_assert(sbox->width == dbox->width);
debug_assert(info->src.level == 0); debug_assert(info->src.level == 0);
debug_assert(info->dst.level == 0); debug_assert(info->dst.level == 0);
/* /*
* Buffers can have dimensions bigger than max width, remap into * Buffers can have dimensions bigger than max width, remap into
* multiple 1d blits to fit within max dimension * multiple 1d blits to fit within max dimension
* *
* Note that blob uses .ARRAY_PITCH=128 for blitting buffers, which * Note that blob uses .ARRAY_PITCH=128 for blitting buffers, which
* seems to prevent overfetch related faults. Not quite sure what * seems to prevent overfetch related faults. Not quite sure what
* the deal is there. * the deal is there.
* *
* Low 6 bits of SRC/DST addresses need to be zero (ie. address * Low 6 bits of SRC/DST addresses need to be zero (ie. address
* aligned to 64) so we need to shift src/dst x1/x2 to make up the * aligned to 64) so we need to shift src/dst x1/x2 to make up the
* difference. On top of already splitting up the blit so width * difference. On top of already splitting up the blit so width
* isn't > 16k. * isn't > 16k.
* *
* We perhaps could do a bit better, if src and dst are aligned but * We perhaps could do a bit better, if src and dst are aligned but
* in the worst case this means we have to split the copy up into * in the worst case this means we have to split the copy up into
* 16k (0x4000) minus 64 (0x40). * 16k (0x4000) minus 64 (0x40).
*/ */
sshift = sbox->x & 0x3f; sshift = sbox->x & 0x3f;
dshift = dbox->x & 0x3f; dshift = dbox->x & 0x3f;
for (unsigned off = 0; off < sbox->width; off += (0x4000 - 0x40)) { for (unsigned off = 0; off < sbox->width; off += (0x4000 - 0x40)) {
unsigned soff, doff, w, p; unsigned soff, doff, w, p;
soff = (sbox->x + off) & ~0x3f; soff = (sbox->x + off) & ~0x3f;
doff = (dbox->x + off) & ~0x3f; doff = (dbox->x + off) & ~0x3f;
w = MIN2(sbox->width - off, (0x4000 - 0x40)); w = MIN2(sbox->width - off, (0x4000 - 0x40));
p = align(w, 64); p = align(w, 64);
debug_assert((soff + w) <= fd_bo_size(src->bo)); debug_assert((soff + w) <= fd_bo_size(src->bo));
debug_assert((doff + w) <= fd_bo_size(dst->bo)); debug_assert((doff + w) <= fd_bo_size(dst->bo));
OUT_PKT7(ring, CP_SET_RENDER_MODE, 1); OUT_PKT7(ring, CP_SET_RENDER_MODE, 1);
OUT_RING(ring, CP_SET_RENDER_MODE_0_MODE(BLIT2D)); OUT_RING(ring, CP_SET_RENDER_MODE_0_MODE(BLIT2D));
/* /*
* Emit source: * Emit source:
*/ */
OUT_PKT4(ring, REG_A5XX_RB_2D_SRC_INFO, 9); OUT_PKT4(ring, REG_A5XX_RB_2D_SRC_INFO, 9);
OUT_RING(ring, A5XX_RB_2D_SRC_INFO_COLOR_FORMAT(RB5_R8_UNORM) | OUT_RING(ring, A5XX_RB_2D_SRC_INFO_COLOR_FORMAT(RB5_R8_UNORM) |
A5XX_RB_2D_SRC_INFO_TILE_MODE(TILE5_LINEAR) | A5XX_RB_2D_SRC_INFO_TILE_MODE(TILE5_LINEAR) |
A5XX_RB_2D_SRC_INFO_COLOR_SWAP(WZYX)); A5XX_RB_2D_SRC_INFO_COLOR_SWAP(WZYX));
OUT_RELOC(ring, src->bo, soff, 0, 0); /* RB_2D_SRC_LO/HI */ OUT_RELOC(ring, src->bo, soff, 0, 0); /* RB_2D_SRC_LO/HI */
OUT_RING(ring, A5XX_RB_2D_SRC_SIZE_PITCH(p) | OUT_RING(ring, A5XX_RB_2D_SRC_SIZE_PITCH(p) |
A5XX_RB_2D_SRC_SIZE_ARRAY_PITCH(128)); A5XX_RB_2D_SRC_SIZE_ARRAY_PITCH(128));
OUT_RING(ring, 0x00000000); OUT_RING(ring, 0x00000000);
OUT_RING(ring, 0x00000000); OUT_RING(ring, 0x00000000);
OUT_RING(ring, 0x00000000); OUT_RING(ring, 0x00000000);
OUT_RING(ring, 0x00000000); OUT_RING(ring, 0x00000000);
OUT_RING(ring, 0x00000000); OUT_RING(ring, 0x00000000);
OUT_PKT4(ring, REG_A5XX_GRAS_2D_SRC_INFO, 1); OUT_PKT4(ring, REG_A5XX_GRAS_2D_SRC_INFO, 1);
OUT_RING(ring, A5XX_GRAS_2D_SRC_INFO_COLOR_FORMAT(RB5_R8_UNORM) | OUT_RING(ring, A5XX_GRAS_2D_SRC_INFO_COLOR_FORMAT(RB5_R8_UNORM) |
A5XX_GRAS_2D_SRC_INFO_COLOR_SWAP(WZYX)); A5XX_GRAS_2D_SRC_INFO_COLOR_SWAP(WZYX));
/* /*
* Emit destination: * Emit destination:
*/ */
OUT_PKT4(ring, REG_A5XX_RB_2D_DST_INFO, 9); OUT_PKT4(ring, REG_A5XX_RB_2D_DST_INFO, 9);
OUT_RING(ring, A5XX_RB_2D_DST_INFO_COLOR_FORMAT(RB5_R8_UNORM) | OUT_RING(ring, A5XX_RB_2D_DST_INFO_COLOR_FORMAT(RB5_R8_UNORM) |
A5XX_RB_2D_DST_INFO_TILE_MODE(TILE5_LINEAR) | A5XX_RB_2D_DST_INFO_TILE_MODE(TILE5_LINEAR) |
A5XX_RB_2D_DST_INFO_COLOR_SWAP(WZYX)); A5XX_RB_2D_DST_INFO_COLOR_SWAP(WZYX));
OUT_RELOC(ring, dst->bo, doff, 0, 0); /* RB_2D_DST_LO/HI */ OUT_RELOC(ring, dst->bo, doff, 0, 0); /* RB_2D_DST_LO/HI */
OUT_RING(ring, A5XX_RB_2D_DST_SIZE_PITCH(p) | OUT_RING(ring, A5XX_RB_2D_DST_SIZE_PITCH(p) |
A5XX_RB_2D_DST_SIZE_ARRAY_PITCH(128)); A5XX_RB_2D_DST_SIZE_ARRAY_PITCH(128));
OUT_RING(ring, 0x00000000); OUT_RING(ring, 0x00000000);
OUT_RING(ring, 0x00000000); OUT_RING(ring, 0x00000000);
OUT_RING(ring, 0x00000000); OUT_RING(ring, 0x00000000);
OUT_RING(ring, 0x00000000); OUT_RING(ring, 0x00000000);
OUT_RING(ring, 0x00000000); OUT_RING(ring, 0x00000000);
OUT_PKT4(ring, REG_A5XX_GRAS_2D_DST_INFO, 1); OUT_PKT4(ring, REG_A5XX_GRAS_2D_DST_INFO, 1);
OUT_RING(ring, A5XX_GRAS_2D_DST_INFO_COLOR_FORMAT(RB5_R8_UNORM) | OUT_RING(ring, A5XX_GRAS_2D_DST_INFO_COLOR_FORMAT(RB5_R8_UNORM) |
A5XX_GRAS_2D_DST_INFO_COLOR_SWAP(WZYX)); A5XX_GRAS_2D_DST_INFO_COLOR_SWAP(WZYX));
/* /*
* Blit command: * Blit command:
*/ */
OUT_PKT7(ring, CP_BLIT, 5); OUT_PKT7(ring, CP_BLIT, 5);
OUT_RING(ring, CP_BLIT_0_OP(BLIT_OP_COPY)); OUT_RING(ring, CP_BLIT_0_OP(BLIT_OP_COPY));
OUT_RING(ring, CP_BLIT_1_SRC_X1(sshift) | CP_BLIT_1_SRC_Y1(0)); OUT_RING(ring, CP_BLIT_1_SRC_X1(sshift) | CP_BLIT_1_SRC_Y1(0));
OUT_RING(ring, CP_BLIT_2_SRC_X2(sshift+w-1) | CP_BLIT_2_SRC_Y2(0)); OUT_RING(ring, CP_BLIT_2_SRC_X2(sshift + w - 1) | CP_BLIT_2_SRC_Y2(0));
OUT_RING(ring, CP_BLIT_3_DST_X1(dshift) | CP_BLIT_3_DST_Y1(0)); OUT_RING(ring, CP_BLIT_3_DST_X1(dshift) | CP_BLIT_3_DST_Y1(0));
OUT_RING(ring, CP_BLIT_4_DST_X2(dshift+w-1) | CP_BLIT_4_DST_Y2(0)); OUT_RING(ring, CP_BLIT_4_DST_X2(dshift + w - 1) | CP_BLIT_4_DST_Y2(0));
OUT_PKT7(ring, CP_SET_RENDER_MODE, 1); OUT_PKT7(ring, CP_SET_RENDER_MODE, 1);
OUT_RING(ring, CP_SET_RENDER_MODE_0_MODE(END2D)); OUT_RING(ring, CP_SET_RENDER_MODE_0_MODE(END2D));
OUT_WFI5(ring); OUT_WFI5(ring);
} }
} }
static void static void
emit_blit(struct fd_ringbuffer *ring, const struct pipe_blit_info *info) emit_blit(struct fd_ringbuffer *ring, const struct pipe_blit_info *info)
{ {
const struct pipe_box *sbox = &info->src.box; const struct pipe_box *sbox = &info->src.box;
const struct pipe_box *dbox = &info->dst.box; const struct pipe_box *dbox = &info->dst.box;
struct fd_resource *src, *dst; struct fd_resource *src, *dst;
struct fdl_slice *sslice, *dslice; struct fdl_slice *sslice, *dslice;
enum a5xx_color_fmt sfmt, dfmt; enum a5xx_color_fmt sfmt, dfmt;
enum a5xx_tile_mode stile, dtile; enum a5xx_tile_mode stile, dtile;
enum a3xx_color_swap sswap, dswap; enum a3xx_color_swap sswap, dswap;
unsigned ssize, dsize, spitch, dpitch; unsigned ssize, dsize, spitch, dpitch;
unsigned sx1, sy1, sx2, sy2; unsigned sx1, sy1, sx2, sy2;
unsigned dx1, dy1, dx2, dy2; unsigned dx1, dy1, dx2, dy2;
src = fd_resource(info->src.resource); src = fd_resource(info->src.resource);
dst = fd_resource(info->dst.resource); dst = fd_resource(info->dst.resource);
sslice = fd_resource_slice(src, info->src.level); sslice = fd_resource_slice(src, info->src.level);
dslice = fd_resource_slice(dst, info->dst.level); dslice = fd_resource_slice(dst, info->dst.level);
sfmt = fd5_pipe2color(info->src.format); sfmt = fd5_pipe2color(info->src.format);
dfmt = fd5_pipe2color(info->dst.format); dfmt = fd5_pipe2color(info->dst.format);
stile = fd_resource_tile_mode(info->src.resource, info->src.level); stile = fd_resource_tile_mode(info->src.resource, info->src.level);
dtile = fd_resource_tile_mode(info->dst.resource, info->dst.level); dtile = fd_resource_tile_mode(info->dst.resource, info->dst.level);
sswap = fd5_pipe2swap(info->src.format); sswap = fd5_pipe2swap(info->src.format);
dswap = fd5_pipe2swap(info->dst.format); dswap = fd5_pipe2swap(info->dst.format);
spitch = fd_resource_pitch(src, info->src.level); spitch = fd_resource_pitch(src, info->src.level);
dpitch = fd_resource_pitch(dst, info->dst.level); dpitch = fd_resource_pitch(dst, info->dst.level);
/* if dtile, then dswap ignored by hw, and likewise if stile then sswap /* if dtile, then dswap ignored by hw, and likewise if stile then sswap
* ignored by hw.. but in this case we have already rejected the blit * ignored by hw.. but in this case we have already rejected the blit
* if src and dst formats differ, so juse use WZYX for both src and * if src and dst formats differ, so juse use WZYX for both src and
* dst swap mode (so we don't change component order) * dst swap mode (so we don't change component order)
*/ */
if (stile || dtile) { if (stile || dtile) {
debug_assert(info->src.format == info->dst.format); debug_assert(info->src.format == info->dst.format);
sswap = dswap = WZYX; sswap = dswap = WZYX;
} }
sx1 = sbox->x; sx1 = sbox->x;
sy1 = sbox->y; sy1 = sbox->y;
sx2 = sbox->x + sbox->width - 1; sx2 = sbox->x + sbox->width - 1;
sy2 = sbox->y + sbox->height - 1; sy2 = sbox->y + sbox->height - 1;
dx1 = dbox->x; dx1 = dbox->x;
dy1 = dbox->y; dy1 = dbox->y;
dx2 = dbox->x + dbox->width - 1; dx2 = dbox->x + dbox->width - 1;
dy2 = dbox->y + dbox->height - 1; dy2 = dbox->y + dbox->height - 1;
if (info->src.resource->target == PIPE_TEXTURE_3D) if (info->src.resource->target == PIPE_TEXTURE_3D)
ssize = sslice->size0; ssize = sslice->size0;
else else
ssize = src->layout.layer_size; ssize = src->layout.layer_size;
if (info->dst.resource->target == PIPE_TEXTURE_3D) if (info->dst.resource->target == PIPE_TEXTURE_3D)
dsize = dslice->size0; dsize = dslice->size0;
else else
dsize = dst->layout.layer_size; dsize = dst->layout.layer_size;
for (unsigned i = 0; i < info->dst.box.depth; i++) { for (unsigned i = 0; i < info->dst.box.depth; i++) {
unsigned soff = fd_resource_offset(src, info->src.level, sbox->z + i); unsigned soff = fd_resource_offset(src, info->src.level, sbox->z + i);
unsigned doff = fd_resource_offset(dst, info->dst.level, dbox->z + i); unsigned doff = fd_resource_offset(dst, info->dst.level, dbox->z + i);
debug_assert((soff + (sbox->height * spitch)) <= fd_bo_size(src->bo)); debug_assert((soff + (sbox->height * spitch)) <= fd_bo_size(src->bo));
debug_assert((doff + (dbox->height * dpitch)) <= fd_bo_size(dst->bo)); debug_assert((doff + (dbox->height * dpitch)) <= fd_bo_size(dst->bo));
OUT_PKT7(ring, CP_SET_RENDER_MODE, 1); OUT_PKT7(ring, CP_SET_RENDER_MODE, 1);
OUT_RING(ring, CP_SET_RENDER_MODE_0_MODE(BLIT2D)); OUT_RING(ring, CP_SET_RENDER_MODE_0_MODE(BLIT2D));
/* /*
* Emit source: * Emit source:
*/ */
OUT_PKT4(ring, REG_A5XX_RB_2D_SRC_INFO, 9); OUT_PKT4(ring, REG_A5XX_RB_2D_SRC_INFO, 9);
OUT_RING(ring, A5XX_RB_2D_SRC_INFO_COLOR_FORMAT(sfmt) | OUT_RING(ring, A5XX_RB_2D_SRC_INFO_COLOR_FORMAT(sfmt) |
A5XX_RB_2D_SRC_INFO_TILE_MODE(stile) | A5XX_RB_2D_SRC_INFO_TILE_MODE(stile) |
A5XX_RB_2D_SRC_INFO_COLOR_SWAP(sswap)); A5XX_RB_2D_SRC_INFO_COLOR_SWAP(sswap));
OUT_RELOC(ring, src->bo, soff, 0, 0); /* RB_2D_SRC_LO/HI */ OUT_RELOC(ring, src->bo, soff, 0, 0); /* RB_2D_SRC_LO/HI */
OUT_RING(ring, A5XX_RB_2D_SRC_SIZE_PITCH(spitch) | OUT_RING(ring, A5XX_RB_2D_SRC_SIZE_PITCH(spitch) |
A5XX_RB_2D_SRC_SIZE_ARRAY_PITCH(ssize)); A5XX_RB_2D_SRC_SIZE_ARRAY_PITCH(ssize));
OUT_RING(ring, 0x00000000); OUT_RING(ring, 0x00000000);
OUT_RING(ring, 0x00000000); OUT_RING(ring, 0x00000000);
OUT_RING(ring, 0x00000000); OUT_RING(ring, 0x00000000);
OUT_RING(ring, 0x00000000); OUT_RING(ring, 0x00000000);
OUT_RING(ring, 0x00000000); OUT_RING(ring, 0x00000000);
OUT_PKT4(ring, REG_A5XX_GRAS_2D_SRC_INFO, 1); OUT_PKT4(ring, REG_A5XX_GRAS_2D_SRC_INFO, 1);
OUT_RING(ring, A5XX_GRAS_2D_SRC_INFO_COLOR_FORMAT(sfmt) | OUT_RING(ring, A5XX_GRAS_2D_SRC_INFO_COLOR_FORMAT(sfmt) |
A5XX_GRAS_2D_SRC_INFO_TILE_MODE(stile) | A5XX_GRAS_2D_SRC_INFO_TILE_MODE(stile) |
A5XX_GRAS_2D_SRC_INFO_COLOR_SWAP(sswap)); A5XX_GRAS_2D_SRC_INFO_COLOR_SWAP(sswap));
/* /*
* Emit destination: * Emit destination:
*/ */
OUT_PKT4(ring, REG_A5XX_RB_2D_DST_INFO, 9); OUT_PKT4(ring, REG_A5XX_RB_2D_DST_INFO, 9);
OUT_RING(ring, A5XX_RB_2D_DST_INFO_COLOR_FORMAT(dfmt) | OUT_RING(ring, A5XX_RB_2D_DST_INFO_COLOR_FORMAT(dfmt) |
A5XX_RB_2D_DST_INFO_TILE_MODE(dtile) | A5XX_RB_2D_DST_INFO_TILE_MODE(dtile) |
A5XX_RB_2D_DST_INFO_COLOR_SWAP(dswap)); A5XX_RB_2D_DST_INFO_COLOR_SWAP(dswap));
OUT_RELOC(ring, dst->bo, doff, 0, 0); /* RB_2D_DST_LO/HI */ OUT_RELOC(ring, dst->bo, doff, 0, 0); /* RB_2D_DST_LO/HI */
OUT_RING(ring, A5XX_RB_2D_DST_SIZE_PITCH(dpitch) | OUT_RING(ring, A5XX_RB_2D_DST_SIZE_PITCH(dpitch) |
A5XX_RB_2D_DST_SIZE_ARRAY_PITCH(dsize)); A5XX_RB_2D_DST_SIZE_ARRAY_PITCH(dsize));
OUT_RING(ring, 0x00000000); OUT_RING(ring, 0x00000000);
OUT_RING(ring, 0x00000000); OUT_RING(ring, 0x00000000);
OUT_RING(ring, 0x00000000); OUT_RING(ring, 0x00000000);
OUT_RING(ring, 0x00000000); OUT_RING(ring, 0x00000000);
OUT_RING(ring, 0x00000000); OUT_RING(ring, 0x00000000);
OUT_PKT4(ring, REG_A5XX_GRAS_2D_DST_INFO, 1); OUT_PKT4(ring, REG_A5XX_GRAS_2D_DST_INFO, 1);
OUT_RING(ring, A5XX_GRAS_2D_DST_INFO_COLOR_FORMAT(dfmt) | OUT_RING(ring, A5XX_GRAS_2D_DST_INFO_COLOR_FORMAT(dfmt) |
A5XX_GRAS_2D_DST_INFO_TILE_MODE(dtile) | A5XX_GRAS_2D_DST_INFO_TILE_MODE(dtile) |
A5XX_GRAS_2D_DST_INFO_COLOR_SWAP(dswap)); A5XX_GRAS_2D_DST_INFO_COLOR_SWAP(dswap));
/* /*
* Blit command: * Blit command:
*/ */
OUT_PKT7(ring, CP_BLIT, 5); OUT_PKT7(ring, CP_BLIT, 5);
OUT_RING(ring, CP_BLIT_0_OP(BLIT_OP_COPY)); OUT_RING(ring, CP_BLIT_0_OP(BLIT_OP_COPY));
OUT_RING(ring, CP_BLIT_1_SRC_X1(sx1) | CP_BLIT_1_SRC_Y1(sy1)); OUT_RING(ring, CP_BLIT_1_SRC_X1(sx1) | CP_BLIT_1_SRC_Y1(sy1));
OUT_RING(ring, CP_BLIT_2_SRC_X2(sx2) | CP_BLIT_2_SRC_Y2(sy2)); OUT_RING(ring, CP_BLIT_2_SRC_X2(sx2) | CP_BLIT_2_SRC_Y2(sy2));
OUT_RING(ring, CP_BLIT_3_DST_X1(dx1) | CP_BLIT_3_DST_Y1(dy1)); OUT_RING(ring, CP_BLIT_3_DST_X1(dx1) | CP_BLIT_3_DST_Y1(dy1));
OUT_RING(ring, CP_BLIT_4_DST_X2(dx2) | CP_BLIT_4_DST_Y2(dy2)); OUT_RING(ring, CP_BLIT_4_DST_X2(dx2) | CP_BLIT_4_DST_Y2(dy2));
OUT_PKT7(ring, CP_SET_RENDER_MODE, 1); OUT_PKT7(ring, CP_SET_RENDER_MODE, 1);
OUT_RING(ring, CP_SET_RENDER_MODE_0_MODE(END2D)); OUT_RING(ring, CP_SET_RENDER_MODE_0_MODE(END2D));
} }
} }
bool bool
fd5_blitter_blit(struct fd_context *ctx, const struct pipe_blit_info *info) fd5_blitter_blit(struct fd_context *ctx,
assert_dt const struct pipe_blit_info *info) assert_dt
{ {
struct fd_batch *batch; struct fd_batch *batch;
if (!can_do_blit(info)) { if (!can_do_blit(info)) {
return false; return false;
} }
batch = fd_bc_alloc_batch(&ctx->screen->batch_cache, ctx, true); batch = fd_bc_alloc_batch(&ctx->screen->batch_cache, ctx, true);
fd_batch_update_queries(batch); fd_batch_update_queries(batch);
emit_setup(batch->draw); emit_setup(batch->draw);
if ((info->src.resource->target == PIPE_BUFFER) && if ((info->src.resource->target == PIPE_BUFFER) &&
(info->dst.resource->target == PIPE_BUFFER)) { (info->dst.resource->target == PIPE_BUFFER)) {
assert(fd_resource(info->src.resource)->layout.tile_mode == TILE5_LINEAR); assert(fd_resource(info->src.resource)->layout.tile_mode == TILE5_LINEAR);
assert(fd_resource(info->dst.resource)->layout.tile_mode == TILE5_LINEAR); assert(fd_resource(info->dst.resource)->layout.tile_mode == TILE5_LINEAR);
emit_blit_buffer(batch->draw, info); emit_blit_buffer(batch->draw, info);
} else { } else {
/* I don't *think* we need to handle blits between buffer <-> !buffer */ /* I don't *think* we need to handle blits between buffer <-> !buffer */
debug_assert(info->src.resource->target != PIPE_BUFFER); debug_assert(info->src.resource->target != PIPE_BUFFER);
debug_assert(info->dst.resource->target != PIPE_BUFFER); debug_assert(info->dst.resource->target != PIPE_BUFFER);
emit_blit(batch->draw, info); emit_blit(batch->draw, info);
} }
fd_resource(info->dst.resource)->valid = true; fd_resource(info->dst.resource)->valid = true;
batch->needs_flush = true; batch->needs_flush = true;
fd_batch_flush(batch); fd_batch_flush(batch);
fd_batch_reference(&batch, NULL); fd_batch_reference(&batch, NULL);
/* Acc query state will have been dirtied by our fd_batch_update_queries, so /* Acc query state will have been dirtied by our fd_batch_update_queries, so
* the ctx->batch may need to turn its queries back on. * the ctx->batch may need to turn its queries back on.
*/ */
ctx->update_active_queries = true; ctx->update_active_queries = true;
return true; return true;
} }
unsigned unsigned
fd5_tile_mode(const struct pipe_resource *tmpl) fd5_tile_mode(const struct pipe_resource *tmpl)
{ {
/* basically just has to be a format we can blit, so uploads/downloads /* basically just has to be a format we can blit, so uploads/downloads
* via linear staging buffer works: * via linear staging buffer works:
*/ */
if (ok_format(tmpl->format)) if (ok_format(tmpl->format))
return TILE5_3; return TILE5_3;
return TILE5_LINEAR; return TILE5_LINEAR;
} }

View file

@ -31,7 +31,8 @@
#include "freedreno_context.h" #include "freedreno_context.h"
bool fd5_blitter_blit(struct fd_context *ctx, const struct pipe_blit_info *info); bool fd5_blitter_blit(struct fd_context *ctx,
const struct pipe_blit_info *info);
unsigned fd5_tile_mode(const struct pipe_resource *tmpl); unsigned fd5_tile_mode(const struct pipe_resource *tmpl);
#endif /* FD5_BLIT_H_ */ #endif /* FD5_BLIT_H_ */

View file

@ -32,160 +32,167 @@
#include "fd5_context.h" #include "fd5_context.h"
#include "fd5_emit.h" #include "fd5_emit.h"
/* maybe move to fd5_program? */ /* maybe move to fd5_program? */
static void static void
cs_program_emit(struct fd_ringbuffer *ring, struct ir3_shader_variant *v) cs_program_emit(struct fd_ringbuffer *ring, struct ir3_shader_variant *v)
{ {
const struct ir3_info *i = &v->info; const struct ir3_info *i = &v->info;
enum a3xx_threadsize thrsz = i->double_threadsize ? FOUR_QUADS : TWO_QUADS; enum a3xx_threadsize thrsz = i->double_threadsize ? FOUR_QUADS : TWO_QUADS;
unsigned instrlen = v->instrlen; unsigned instrlen = v->instrlen;
/* if shader is more than 32*16 instructions, don't preload it. Similar /* if shader is more than 32*16 instructions, don't preload it. Similar
* to the combined restriction of 64*16 for VS+FS * to the combined restriction of 64*16 for VS+FS
*/ */
if (instrlen > 32) if (instrlen > 32)
instrlen = 0; instrlen = 0;
OUT_PKT4(ring, REG_A5XX_SP_SP_CNTL, 1); OUT_PKT4(ring, REG_A5XX_SP_SP_CNTL, 1);
OUT_RING(ring, 0x00000000); /* SP_SP_CNTL */ OUT_RING(ring, 0x00000000); /* SP_SP_CNTL */
OUT_PKT4(ring, REG_A5XX_HLSQ_CONTROL_0_REG, 1); OUT_PKT4(ring, REG_A5XX_HLSQ_CONTROL_0_REG, 1);
OUT_RING(ring, A5XX_HLSQ_CONTROL_0_REG_FSTHREADSIZE(TWO_QUADS) | OUT_RING(ring, A5XX_HLSQ_CONTROL_0_REG_FSTHREADSIZE(TWO_QUADS) |
A5XX_HLSQ_CONTROL_0_REG_CSTHREADSIZE(thrsz) | A5XX_HLSQ_CONTROL_0_REG_CSTHREADSIZE(thrsz) |
0x00000880 /* XXX */); 0x00000880 /* XXX */);
OUT_PKT4(ring, REG_A5XX_SP_CS_CTRL_REG0, 1); OUT_PKT4(ring, REG_A5XX_SP_CS_CTRL_REG0, 1);
OUT_RING(ring, A5XX_SP_CS_CTRL_REG0_THREADSIZE(thrsz) | OUT_RING(ring,
A5XX_SP_CS_CTRL_REG0_HALFREGFOOTPRINT(i->max_half_reg + 1) | A5XX_SP_CS_CTRL_REG0_THREADSIZE(thrsz) |
A5XX_SP_CS_CTRL_REG0_FULLREGFOOTPRINT(i->max_reg + 1) | A5XX_SP_CS_CTRL_REG0_HALFREGFOOTPRINT(i->max_half_reg + 1) |
A5XX_SP_CS_CTRL_REG0_BRANCHSTACK(0x3) | // XXX need to figure this out somehow.. A5XX_SP_CS_CTRL_REG0_FULLREGFOOTPRINT(i->max_reg + 1) |
0x6 /* XXX */); A5XX_SP_CS_CTRL_REG0_BRANCHSTACK(
0x3) | // XXX need to figure this out somehow..
0x6 /* XXX */);
OUT_PKT4(ring, REG_A5XX_HLSQ_CS_CONFIG, 1); OUT_PKT4(ring, REG_A5XX_HLSQ_CS_CONFIG, 1);
OUT_RING(ring, A5XX_HLSQ_CS_CONFIG_CONSTOBJECTOFFSET(0) | OUT_RING(ring, A5XX_HLSQ_CS_CONFIG_CONSTOBJECTOFFSET(0) |
A5XX_HLSQ_CS_CONFIG_SHADEROBJOFFSET(0) | A5XX_HLSQ_CS_CONFIG_SHADEROBJOFFSET(0) |
A5XX_HLSQ_CS_CONFIG_ENABLED); A5XX_HLSQ_CS_CONFIG_ENABLED);
OUT_PKT4(ring, REG_A5XX_HLSQ_CS_CNTL, 1); OUT_PKT4(ring, REG_A5XX_HLSQ_CS_CNTL, 1);
OUT_RING(ring, A5XX_HLSQ_CS_CNTL_INSTRLEN(instrlen) | OUT_RING(ring, A5XX_HLSQ_CS_CNTL_INSTRLEN(instrlen) |
COND(v->has_ssbo, A5XX_HLSQ_CS_CNTL_SSBO_ENABLE)); COND(v->has_ssbo, A5XX_HLSQ_CS_CNTL_SSBO_ENABLE));
OUT_PKT4(ring, REG_A5XX_SP_CS_CONFIG, 1); OUT_PKT4(ring, REG_A5XX_SP_CS_CONFIG, 1);
OUT_RING(ring, A5XX_SP_CS_CONFIG_CONSTOBJECTOFFSET(0) | OUT_RING(ring, A5XX_SP_CS_CONFIG_CONSTOBJECTOFFSET(0) |
A5XX_SP_CS_CONFIG_SHADEROBJOFFSET(0) | A5XX_SP_CS_CONFIG_SHADEROBJOFFSET(0) |
A5XX_SP_CS_CONFIG_ENABLED); A5XX_SP_CS_CONFIG_ENABLED);
assert(v->constlen % 4 == 0); assert(v->constlen % 4 == 0);
unsigned constlen = v->constlen / 4; unsigned constlen = v->constlen / 4;
OUT_PKT4(ring, REG_A5XX_HLSQ_CS_CONSTLEN, 2); OUT_PKT4(ring, REG_A5XX_HLSQ_CS_CONSTLEN, 2);
OUT_RING(ring, constlen); /* HLSQ_CS_CONSTLEN */ OUT_RING(ring, constlen); /* HLSQ_CS_CONSTLEN */
OUT_RING(ring, instrlen); /* HLSQ_CS_INSTRLEN */ OUT_RING(ring, instrlen); /* HLSQ_CS_INSTRLEN */
OUT_PKT4(ring, REG_A5XX_SP_CS_OBJ_START_LO, 2); OUT_PKT4(ring, REG_A5XX_SP_CS_OBJ_START_LO, 2);
OUT_RELOC(ring, v->bo, 0, 0, 0); /* SP_CS_OBJ_START_LO/HI */ OUT_RELOC(ring, v->bo, 0, 0, 0); /* SP_CS_OBJ_START_LO/HI */
OUT_PKT4(ring, REG_A5XX_HLSQ_UPDATE_CNTL, 1); OUT_PKT4(ring, REG_A5XX_HLSQ_UPDATE_CNTL, 1);
OUT_RING(ring, 0x1f00000); OUT_RING(ring, 0x1f00000);
uint32_t local_invocation_id, work_group_id; uint32_t local_invocation_id, work_group_id;
local_invocation_id = ir3_find_sysval_regid(v, SYSTEM_VALUE_LOCAL_INVOCATION_ID); local_invocation_id =
work_group_id = ir3_find_sysval_regid(v, SYSTEM_VALUE_WORK_GROUP_ID); ir3_find_sysval_regid(v, SYSTEM_VALUE_LOCAL_INVOCATION_ID);
work_group_id = ir3_find_sysval_regid(v, SYSTEM_VALUE_WORK_GROUP_ID);
OUT_PKT4(ring, REG_A5XX_HLSQ_CS_CNTL_0, 2); OUT_PKT4(ring, REG_A5XX_HLSQ_CS_CNTL_0, 2);
OUT_RING(ring, A5XX_HLSQ_CS_CNTL_0_WGIDCONSTID(work_group_id) | OUT_RING(ring, A5XX_HLSQ_CS_CNTL_0_WGIDCONSTID(work_group_id) |
A5XX_HLSQ_CS_CNTL_0_UNK0(regid(63, 0)) | A5XX_HLSQ_CS_CNTL_0_UNK0(regid(63, 0)) |
A5XX_HLSQ_CS_CNTL_0_UNK1(regid(63, 0)) | A5XX_HLSQ_CS_CNTL_0_UNK1(regid(63, 0)) |
A5XX_HLSQ_CS_CNTL_0_LOCALIDREGID(local_invocation_id)); A5XX_HLSQ_CS_CNTL_0_LOCALIDREGID(local_invocation_id));
OUT_RING(ring, 0x1); /* HLSQ_CS_CNTL_1 */ OUT_RING(ring, 0x1); /* HLSQ_CS_CNTL_1 */
if (instrlen > 0) if (instrlen > 0)
fd5_emit_shader(ring, v); fd5_emit_shader(ring, v);
} }
static void static void
fd5_launch_grid(struct fd_context *ctx, const struct pipe_grid_info *info) fd5_launch_grid(struct fd_context *ctx,
assert_dt const struct pipe_grid_info *info) assert_dt
{ {
struct ir3_shader_key key = {}; struct ir3_shader_key key = {};
struct ir3_shader_variant *v; struct ir3_shader_variant *v;
struct fd_ringbuffer *ring = ctx->batch->draw; struct fd_ringbuffer *ring = ctx->batch->draw;
unsigned nglobal = 0; unsigned nglobal = 0;
v = ir3_shader_variant(ir3_get_shader(ctx->compute), key, false, &ctx->debug); v =
if (!v) ir3_shader_variant(ir3_get_shader(ctx->compute), key, false, &ctx->debug);
return; if (!v)
return;
if (ctx->dirty_shader[PIPE_SHADER_COMPUTE] & FD_DIRTY_SHADER_PROG) if (ctx->dirty_shader[PIPE_SHADER_COMPUTE] & FD_DIRTY_SHADER_PROG)
cs_program_emit(ring, v); cs_program_emit(ring, v);
fd5_emit_cs_state(ctx, ring, v); fd5_emit_cs_state(ctx, ring, v);
fd5_emit_cs_consts(v, ring, ctx, info); fd5_emit_cs_consts(v, ring, ctx, info);
u_foreach_bit(i, ctx->global_bindings.enabled_mask) u_foreach_bit (i, ctx->global_bindings.enabled_mask)
nglobal++; nglobal++;
if (nglobal > 0) { if (nglobal > 0) {
/* global resources don't otherwise get an OUT_RELOC(), since /* global resources don't otherwise get an OUT_RELOC(), since
* the raw ptr address is emitted ir ir3_emit_cs_consts(). * the raw ptr address is emitted ir ir3_emit_cs_consts().
* So to make the kernel aware that these buffers are referenced * So to make the kernel aware that these buffers are referenced
* by the batch, emit dummy reloc's as part of a no-op packet * by the batch, emit dummy reloc's as part of a no-op packet
* payload: * payload:
*/ */
OUT_PKT7(ring, CP_NOP, 2 * nglobal); OUT_PKT7(ring, CP_NOP, 2 * nglobal);
u_foreach_bit(i, ctx->global_bindings.enabled_mask) { u_foreach_bit (i, ctx->global_bindings.enabled_mask) {
struct pipe_resource *prsc = ctx->global_bindings.buf[i]; struct pipe_resource *prsc = ctx->global_bindings.buf[i];
OUT_RELOC(ring, fd_resource(prsc)->bo, 0, 0, 0); OUT_RELOC(ring, fd_resource(prsc)->bo, 0, 0, 0);
} }
} }
const unsigned *local_size = info->block; // v->shader->nir->info->cs.local_size; const unsigned *local_size =
const unsigned *num_groups = info->grid; info->block; // v->shader->nir->info->cs.local_size;
/* for some reason, mesa/st doesn't set info->work_dim, so just assume 3: */ const unsigned *num_groups = info->grid;
const unsigned work_dim = info->work_dim ? info->work_dim : 3; /* for some reason, mesa/st doesn't set info->work_dim, so just assume 3: */
OUT_PKT4(ring, REG_A5XX_HLSQ_CS_NDRANGE_0, 7); const unsigned work_dim = info->work_dim ? info->work_dim : 3;
OUT_RING(ring, A5XX_HLSQ_CS_NDRANGE_0_KERNELDIM(work_dim) | OUT_PKT4(ring, REG_A5XX_HLSQ_CS_NDRANGE_0, 7);
A5XX_HLSQ_CS_NDRANGE_0_LOCALSIZEX(local_size[0] - 1) | OUT_RING(ring, A5XX_HLSQ_CS_NDRANGE_0_KERNELDIM(work_dim) |
A5XX_HLSQ_CS_NDRANGE_0_LOCALSIZEY(local_size[1] - 1) | A5XX_HLSQ_CS_NDRANGE_0_LOCALSIZEX(local_size[0] - 1) |
A5XX_HLSQ_CS_NDRANGE_0_LOCALSIZEZ(local_size[2] - 1)); A5XX_HLSQ_CS_NDRANGE_0_LOCALSIZEY(local_size[1] - 1) |
OUT_RING(ring, A5XX_HLSQ_CS_NDRANGE_1_GLOBALSIZE_X(local_size[0] * num_groups[0])); A5XX_HLSQ_CS_NDRANGE_0_LOCALSIZEZ(local_size[2] - 1));
OUT_RING(ring, 0); /* HLSQ_CS_NDRANGE_2_GLOBALOFF_X */ OUT_RING(ring,
OUT_RING(ring, A5XX_HLSQ_CS_NDRANGE_3_GLOBALSIZE_Y(local_size[1] * num_groups[1])); A5XX_HLSQ_CS_NDRANGE_1_GLOBALSIZE_X(local_size[0] * num_groups[0]));
OUT_RING(ring, 0); /* HLSQ_CS_NDRANGE_4_GLOBALOFF_Y */ OUT_RING(ring, 0); /* HLSQ_CS_NDRANGE_2_GLOBALOFF_X */
OUT_RING(ring, A5XX_HLSQ_CS_NDRANGE_5_GLOBALSIZE_Z(local_size[2] * num_groups[2])); OUT_RING(ring,
OUT_RING(ring, 0); /* HLSQ_CS_NDRANGE_6_GLOBALOFF_Z */ A5XX_HLSQ_CS_NDRANGE_3_GLOBALSIZE_Y(local_size[1] * num_groups[1]));
OUT_RING(ring, 0); /* HLSQ_CS_NDRANGE_4_GLOBALOFF_Y */
OUT_RING(ring,
A5XX_HLSQ_CS_NDRANGE_5_GLOBALSIZE_Z(local_size[2] * num_groups[2]));
OUT_RING(ring, 0); /* HLSQ_CS_NDRANGE_6_GLOBALOFF_Z */
OUT_PKT4(ring, REG_A5XX_HLSQ_CS_KERNEL_GROUP_X, 3); OUT_PKT4(ring, REG_A5XX_HLSQ_CS_KERNEL_GROUP_X, 3);
OUT_RING(ring, 1); /* HLSQ_CS_KERNEL_GROUP_X */ OUT_RING(ring, 1); /* HLSQ_CS_KERNEL_GROUP_X */
OUT_RING(ring, 1); /* HLSQ_CS_KERNEL_GROUP_Y */ OUT_RING(ring, 1); /* HLSQ_CS_KERNEL_GROUP_Y */
OUT_RING(ring, 1); /* HLSQ_CS_KERNEL_GROUP_Z */ OUT_RING(ring, 1); /* HLSQ_CS_KERNEL_GROUP_Z */
if (info->indirect) { if (info->indirect) {
struct fd_resource *rsc = fd_resource(info->indirect); struct fd_resource *rsc = fd_resource(info->indirect);
fd5_emit_flush(ctx, ring); fd5_emit_flush(ctx, ring);
OUT_PKT7(ring, CP_EXEC_CS_INDIRECT, 4); OUT_PKT7(ring, CP_EXEC_CS_INDIRECT, 4);
OUT_RING(ring, 0x00000000); OUT_RING(ring, 0x00000000);
OUT_RELOC(ring, rsc->bo, info->indirect_offset, 0, 0); /* ADDR_LO/HI */ OUT_RELOC(ring, rsc->bo, info->indirect_offset, 0, 0); /* ADDR_LO/HI */
OUT_RING(ring, A5XX_CP_EXEC_CS_INDIRECT_3_LOCALSIZEX(local_size[0] - 1) | OUT_RING(ring,
A5XX_CP_EXEC_CS_INDIRECT_3_LOCALSIZEY(local_size[1] - 1) | A5XX_CP_EXEC_CS_INDIRECT_3_LOCALSIZEX(local_size[0] - 1) |
A5XX_CP_EXEC_CS_INDIRECT_3_LOCALSIZEZ(local_size[2] - 1)); A5XX_CP_EXEC_CS_INDIRECT_3_LOCALSIZEY(local_size[1] - 1) |
} else { A5XX_CP_EXEC_CS_INDIRECT_3_LOCALSIZEZ(local_size[2] - 1));
OUT_PKT7(ring, CP_EXEC_CS, 4); } else {
OUT_RING(ring, 0x00000000); OUT_PKT7(ring, CP_EXEC_CS, 4);
OUT_RING(ring, CP_EXEC_CS_1_NGROUPS_X(info->grid[0])); OUT_RING(ring, 0x00000000);
OUT_RING(ring, CP_EXEC_CS_2_NGROUPS_Y(info->grid[1])); OUT_RING(ring, CP_EXEC_CS_1_NGROUPS_X(info->grid[0]));
OUT_RING(ring, CP_EXEC_CS_3_NGROUPS_Z(info->grid[2])); OUT_RING(ring, CP_EXEC_CS_2_NGROUPS_Y(info->grid[1]));
} OUT_RING(ring, CP_EXEC_CS_3_NGROUPS_Z(info->grid[2]));
}
} }
void void
fd5_compute_init(struct pipe_context *pctx) fd5_compute_init(struct pipe_context *pctx) disable_thread_safety_analysis
disable_thread_safety_analysis
{ {
struct fd_context *ctx = fd_context(pctx); struct fd_context *ctx = fd_context(pctx);
ctx->launch_grid = fd5_launch_grid; ctx->launch_grid = fd5_launch_grid;
pctx->create_compute_state = ir3_shader_compute_state_create; pctx->create_compute_state = ir3_shader_compute_state_create;
pctx->delete_compute_state = ir3_shader_state_delete; pctx->delete_compute_state = ir3_shader_state_delete;
} }

View file

@ -26,10 +26,10 @@
#include "freedreno_query_acc.h" #include "freedreno_query_acc.h"
#include "fd5_context.h"
#include "fd5_blend.h" #include "fd5_blend.h"
#include "fd5_blitter.h" #include "fd5_blitter.h"
#include "fd5_compute.h" #include "fd5_compute.h"
#include "fd5_context.h"
#include "fd5_draw.h" #include "fd5_draw.h"
#include "fd5_emit.h" #include "fd5_emit.h"
#include "fd5_gmem.h" #include "fd5_gmem.h"
@ -40,22 +40,21 @@
#include "fd5_zsa.h" #include "fd5_zsa.h"
static void static void
fd5_context_destroy(struct pipe_context *pctx) fd5_context_destroy(struct pipe_context *pctx) in_dt
in_dt
{ {
struct fd5_context *fd5_ctx = fd5_context(fd_context(pctx)); struct fd5_context *fd5_ctx = fd5_context(fd_context(pctx));
u_upload_destroy(fd5_ctx->border_color_uploader); u_upload_destroy(fd5_ctx->border_color_uploader);
pipe_resource_reference(&fd5_ctx->border_color_buf, NULL); pipe_resource_reference(&fd5_ctx->border_color_buf, NULL);
fd_context_destroy(pctx); fd_context_destroy(pctx);
fd_bo_del(fd5_ctx->vsc_size_mem); fd_bo_del(fd5_ctx->vsc_size_mem);
fd_bo_del(fd5_ctx->blit_mem); fd_bo_del(fd5_ctx->blit_mem);
fd_context_cleanup_common_vbos(&fd5_ctx->base); fd_context_cleanup_common_vbos(&fd5_ctx->base);
free(fd5_ctx); free(fd5_ctx);
} }
/* clang-format off */ /* clang-format off */
@ -72,56 +71,56 @@ static const uint8_t primtypes[] = {
/* clang-format on */ /* clang-format on */
struct pipe_context * struct pipe_context *
fd5_context_create(struct pipe_screen *pscreen, void *priv, unsigned flags) fd5_context_create(struct pipe_screen *pscreen, void *priv,
disable_thread_safety_analysis unsigned flags) disable_thread_safety_analysis
{ {
struct fd_screen *screen = fd_screen(pscreen); struct fd_screen *screen = fd_screen(pscreen);
struct fd5_context *fd5_ctx = CALLOC_STRUCT(fd5_context); struct fd5_context *fd5_ctx = CALLOC_STRUCT(fd5_context);
struct pipe_context *pctx; struct pipe_context *pctx;
if (!fd5_ctx) if (!fd5_ctx)
return NULL; return NULL;
pctx = &fd5_ctx->base.base; pctx = &fd5_ctx->base.base;
pctx->screen = pscreen; pctx->screen = pscreen;
fd5_ctx->base.dev = fd_device_ref(screen->dev); fd5_ctx->base.dev = fd_device_ref(screen->dev);
fd5_ctx->base.screen = fd_screen(pscreen); fd5_ctx->base.screen = fd_screen(pscreen);
fd5_ctx->base.last.key = &fd5_ctx->last_key; fd5_ctx->base.last.key = &fd5_ctx->last_key;
pctx->destroy = fd5_context_destroy; pctx->destroy = fd5_context_destroy;
pctx->create_blend_state = fd5_blend_state_create; pctx->create_blend_state = fd5_blend_state_create;
pctx->create_rasterizer_state = fd5_rasterizer_state_create; pctx->create_rasterizer_state = fd5_rasterizer_state_create;
pctx->create_depth_stencil_alpha_state = fd5_zsa_state_create; pctx->create_depth_stencil_alpha_state = fd5_zsa_state_create;
fd5_draw_init(pctx); fd5_draw_init(pctx);
fd5_compute_init(pctx); fd5_compute_init(pctx);
fd5_gmem_init(pctx); fd5_gmem_init(pctx);
fd5_texture_init(pctx); fd5_texture_init(pctx);
fd5_prog_init(pctx); fd5_prog_init(pctx);
fd5_emit_init(pctx); fd5_emit_init(pctx);
if (!FD_DBG(NOBLIT)) if (!FD_DBG(NOBLIT))
fd5_ctx->base.blit = fd5_blitter_blit; fd5_ctx->base.blit = fd5_blitter_blit;
pctx = fd_context_init(&fd5_ctx->base, pscreen, primtypes, priv, flags); pctx = fd_context_init(&fd5_ctx->base, pscreen, primtypes, priv, flags);
if (!pctx) if (!pctx)
return NULL; return NULL;
util_blitter_set_texture_multisample(fd5_ctx->base.blitter, true); util_blitter_set_texture_multisample(fd5_ctx->base.blitter, true);
fd5_ctx->vsc_size_mem = fd_bo_new(screen->dev, 0x1000, fd5_ctx->vsc_size_mem =
DRM_FREEDRENO_GEM_TYPE_KMEM, "vsc_size"); fd_bo_new(screen->dev, 0x1000, DRM_FREEDRENO_GEM_TYPE_KMEM, "vsc_size");
fd5_ctx->blit_mem = fd_bo_new(screen->dev, 0x1000, fd5_ctx->blit_mem =
DRM_FREEDRENO_GEM_TYPE_KMEM, "blit"); fd_bo_new(screen->dev, 0x1000, DRM_FREEDRENO_GEM_TYPE_KMEM, "blit");
fd_context_setup_common_vbos(&fd5_ctx->base); fd_context_setup_common_vbos(&fd5_ctx->base);
fd5_query_context_init(pctx); fd5_query_context_init(pctx);
fd5_ctx->border_color_uploader = u_upload_create(pctx, 4096, 0, fd5_ctx->border_color_uploader =
PIPE_USAGE_STREAM, 0); u_upload_create(pctx, 4096, 0, PIPE_USAGE_STREAM, 0);
return pctx; return pctx;
} }

View file

@ -34,55 +34,55 @@
#include "ir3/ir3_shader.h" #include "ir3/ir3_shader.h"
struct fd5_context { struct fd5_context {
struct fd_context base; struct fd_context base;
/* This only needs to be 4 * num_of_pipes bytes (ie. 32 bytes). We /* This only needs to be 4 * num_of_pipes bytes (ie. 32 bytes). We
* could combine it with another allocation. * could combine it with another allocation.
*/ */
struct fd_bo *vsc_size_mem; struct fd_bo *vsc_size_mem;
/* TODO not sure what this is for.. probably similar to /* TODO not sure what this is for.. probably similar to
* CACHE_FLUSH_TS on kernel side, where value gets written * CACHE_FLUSH_TS on kernel side, where value gets written
* to this address synchronized w/ 3d (ie. a way to * to this address synchronized w/ 3d (ie. a way to
* synchronize when the CP is running far ahead) * synchronize when the CP is running far ahead)
*/ */
struct fd_bo *blit_mem; struct fd_bo *blit_mem;
struct u_upload_mgr *border_color_uploader; struct u_upload_mgr *border_color_uploader;
struct pipe_resource *border_color_buf; struct pipe_resource *border_color_buf;
/* bitmask of samplers which need astc srgb workaround: */ /* bitmask of samplers which need astc srgb workaround: */
uint16_t vastc_srgb, fastc_srgb; uint16_t vastc_srgb, fastc_srgb;
/* storage for ctx->last.key: */ /* storage for ctx->last.key: */
struct ir3_shader_key last_key; struct ir3_shader_key last_key;
/* number of active samples-passed queries: */ /* number of active samples-passed queries: */
int samples_passed_queries; int samples_passed_queries;
/* cached state about current emitted shader program (3d): */ /* cached state about current emitted shader program (3d): */
unsigned max_loc; unsigned max_loc;
}; };
static inline struct fd5_context * static inline struct fd5_context *
fd5_context(struct fd_context *ctx) fd5_context(struct fd_context *ctx)
{ {
return (struct fd5_context *)ctx; return (struct fd5_context *)ctx;
} }
struct pipe_context * struct pipe_context *fd5_context_create(struct pipe_screen *pscreen, void *priv,
fd5_context_create(struct pipe_screen *pscreen, void *priv, unsigned flags); unsigned flags);
/* helper for places where we need to stall CP to wait for previous draws: */ /* helper for places where we need to stall CP to wait for previous draws: */
static inline void static inline void
fd5_emit_flush(struct fd_context *ctx, struct fd_ringbuffer *ring) fd5_emit_flush(struct fd_context *ctx, struct fd_ringbuffer *ring)
{ {
OUT_PKT7(ring, CP_EVENT_WRITE, 4); OUT_PKT7(ring, CP_EVENT_WRITE, 4);
OUT_RING(ring, CACHE_FLUSH_TS); OUT_RING(ring, CACHE_FLUSH_TS);
OUT_RELOC(ring, fd5_context(ctx)->blit_mem, 0, 0, 0); /* ADDR_LO/HI */ OUT_RELOC(ring, fd5_context(ctx)->blit_mem, 0, 0, 0); /* ADDR_LO/HI */
OUT_RING(ring, 0x00000000); OUT_RING(ring, 0x00000000);
OUT_WFI5(ring); OUT_WFI5(ring);
} }
#endif /* FD5_CONTEXT_H_ */ #endif /* FD5_CONTEXT_H_ */

View file

@ -25,343 +25,341 @@
*/ */
#include "pipe/p_state.h" #include "pipe/p_state.h"
#include "util/u_string.h"
#include "util/u_memory.h" #include "util/u_memory.h"
#include "util/u_prim.h" #include "util/u_prim.h"
#include "util/u_string.h"
#include "freedreno_state.h"
#include "freedreno_resource.h" #include "freedreno_resource.h"
#include "freedreno_state.h"
#include "fd5_draw.h"
#include "fd5_context.h" #include "fd5_context.h"
#include "fd5_draw.h"
#include "fd5_emit.h" #include "fd5_emit.h"
#include "fd5_program.h"
#include "fd5_format.h" #include "fd5_format.h"
#include "fd5_program.h"
#include "fd5_zsa.h" #include "fd5_zsa.h"
static void static void
draw_impl(struct fd_context *ctx, struct fd_ringbuffer *ring, draw_impl(struct fd_context *ctx, struct fd_ringbuffer *ring,
struct fd5_emit *emit, unsigned index_offset) struct fd5_emit *emit, unsigned index_offset) assert_dt
assert_dt
{ {
const struct pipe_draw_info *info = emit->info; const struct pipe_draw_info *info = emit->info;
enum pc_di_primtype primtype = ctx->primtypes[info->mode]; enum pc_di_primtype primtype = ctx->primtypes[info->mode];
fd5_emit_state(ctx, ring, emit); fd5_emit_state(ctx, ring, emit);
if (emit->dirty & (FD_DIRTY_VTXBUF | FD_DIRTY_VTXSTATE)) if (emit->dirty & (FD_DIRTY_VTXBUF | FD_DIRTY_VTXSTATE))
fd5_emit_vertex_bufs(ring, emit); fd5_emit_vertex_bufs(ring, emit);
OUT_PKT4(ring, REG_A5XX_VFD_INDEX_OFFSET, 2); OUT_PKT4(ring, REG_A5XX_VFD_INDEX_OFFSET, 2);
OUT_RING(ring, info->index_size ? info->index_bias : emit->draw->start); /* VFD_INDEX_OFFSET */ OUT_RING(ring, info->index_size ? info->index_bias
OUT_RING(ring, info->start_instance); /* VFD_INSTANCE_START_OFFSET */ : emit->draw->start); /* VFD_INDEX_OFFSET */
OUT_RING(ring, info->start_instance); /* VFD_INSTANCE_START_OFFSET */
OUT_PKT4(ring, REG_A5XX_PC_RESTART_INDEX, 1); OUT_PKT4(ring, REG_A5XX_PC_RESTART_INDEX, 1);
OUT_RING(ring, info->primitive_restart ? /* PC_RESTART_INDEX */ OUT_RING(ring, info->primitive_restart ? /* PC_RESTART_INDEX */
info->restart_index : 0xffffffff); info->restart_index
: 0xffffffff);
fd5_emit_render_cntl(ctx, false, emit->binning_pass); fd5_emit_render_cntl(ctx, false, emit->binning_pass);
fd5_draw_emit(ctx->batch, ring, primtype, fd5_draw_emit(ctx->batch, ring, primtype,
emit->binning_pass ? IGNORE_VISIBILITY : USE_VISIBILITY, emit->binning_pass ? IGNORE_VISIBILITY : USE_VISIBILITY, info,
info, emit->indirect, emit->draw, index_offset); emit->indirect, emit->draw, index_offset);
} }
static bool static bool
fd5_draw_vbo(struct fd_context *ctx, const struct pipe_draw_info *info, fd5_draw_vbo(struct fd_context *ctx, const struct pipe_draw_info *info,
const struct pipe_draw_indirect_info *indirect, const struct pipe_draw_indirect_info *indirect,
const struct pipe_draw_start_count *draw, const struct pipe_draw_start_count *draw,
unsigned index_offset) unsigned index_offset) in_dt
in_dt
{ {
struct fd5_context *fd5_ctx = fd5_context(ctx); struct fd5_context *fd5_ctx = fd5_context(ctx);
struct fd5_emit emit = { struct fd5_emit emit = {
.debug = &ctx->debug, .debug = &ctx->debug,
.vtx = &ctx->vtx, .vtx = &ctx->vtx,
.info = info, .info = info,
.indirect = indirect, .indirect = indirect,
.draw = draw, .draw = draw,
.key = { .key =
.vs = ctx->prog.vs, {
.fs = ctx->prog.fs, .vs = ctx->prog.vs,
.key = { .fs = ctx->prog.fs,
.rasterflat = ctx->rasterizer->flatshade, .key =
.has_per_samp = fd5_ctx->fastc_srgb || fd5_ctx->vastc_srgb, {
.vastc_srgb = fd5_ctx->vastc_srgb, .rasterflat = ctx->rasterizer->flatshade,
.fastc_srgb = fd5_ctx->fastc_srgb, .has_per_samp = fd5_ctx->fastc_srgb || fd5_ctx->vastc_srgb,
}, .vastc_srgb = fd5_ctx->vastc_srgb,
}, .fastc_srgb = fd5_ctx->fastc_srgb,
.rasterflat = ctx->rasterizer->flatshade, },
.sprite_coord_enable = ctx->rasterizer->sprite_coord_enable, },
.sprite_coord_mode = ctx->rasterizer->sprite_coord_mode, .rasterflat = ctx->rasterizer->flatshade,
}; .sprite_coord_enable = ctx->rasterizer->sprite_coord_enable,
.sprite_coord_mode = ctx->rasterizer->sprite_coord_mode,
};
/* Technically a5xx should not require this, but it avoids a crash in /* Technically a5xx should not require this, but it avoids a crash in
* piglit 'spec@!opengl 1.1@ppgtt_memory_alignment' due to a draw with * piglit 'spec@!opengl 1.1@ppgtt_memory_alignment' due to a draw with
* no VBO bound but a VS that expects an input. The draw is a single * no VBO bound but a VS that expects an input. The draw is a single
* vertex with PIPE_PRIM_TRIANGLES so the u_trim_pipe_prim() causes it * vertex with PIPE_PRIM_TRIANGLES so the u_trim_pipe_prim() causes it
* to be skipped. * to be skipped.
*/ */
if (info->mode != PIPE_PRIM_MAX && if (info->mode != PIPE_PRIM_MAX && !indirect && !info->primitive_restart &&
!indirect && !u_trim_pipe_prim(info->mode, (unsigned *)&draw->count))
!info->primitive_restart && return false;
!u_trim_pipe_prim(info->mode, (unsigned*)&draw->count))
return false;
ir3_fixup_shader_state(&ctx->base, &emit.key.key); ir3_fixup_shader_state(&ctx->base, &emit.key.key);
unsigned dirty = ctx->dirty; unsigned dirty = ctx->dirty;
emit.prog = fd5_program_state(ir3_cache_lookup(ctx->shader_cache, &emit.key, &ctx->debug)); emit.prog = fd5_program_state(
ir3_cache_lookup(ctx->shader_cache, &emit.key, &ctx->debug));
/* bail if compile failed: */ /* bail if compile failed: */
if (!emit.prog) if (!emit.prog)
return false; return false;
const struct ir3_shader_variant *vp = fd5_emit_get_vp(&emit); const struct ir3_shader_variant *vp = fd5_emit_get_vp(&emit);
const struct ir3_shader_variant *fp = fd5_emit_get_fp(&emit); const struct ir3_shader_variant *fp = fd5_emit_get_fp(&emit);
ir3_update_max_tf_vtx(ctx, vp); ir3_update_max_tf_vtx(ctx, vp);
/* do regular pass first: */ /* do regular pass first: */
if (unlikely(ctx->stats_users > 0)) { if (unlikely(ctx->stats_users > 0)) {
ctx->stats.vs_regs += ir3_shader_halfregs(vp); ctx->stats.vs_regs += ir3_shader_halfregs(vp);
ctx->stats.fs_regs += ir3_shader_halfregs(fp); ctx->stats.fs_regs += ir3_shader_halfregs(fp);
} }
/* figure out whether we need to disable LRZ write for binning /* figure out whether we need to disable LRZ write for binning
* pass using draw pass's fp: * pass using draw pass's fp:
*/ */
emit.no_lrz_write = fp->writes_pos || fp->no_earlyz || fp->has_kill; emit.no_lrz_write = fp->writes_pos || fp->no_earlyz || fp->has_kill;
emit.binning_pass = false; emit.binning_pass = false;
emit.dirty = dirty; emit.dirty = dirty;
draw_impl(ctx, ctx->batch->draw, &emit, index_offset); draw_impl(ctx, ctx->batch->draw, &emit, index_offset);
/* and now binning pass: */ /* and now binning pass: */
emit.binning_pass = true; emit.binning_pass = true;
emit.dirty = dirty & ~(FD_DIRTY_BLEND); emit.dirty = dirty & ~(FD_DIRTY_BLEND);
emit.vs = NULL; /* we changed key so need to refetch vp */ emit.vs = NULL; /* we changed key so need to refetch vp */
emit.fs = NULL; emit.fs = NULL;
draw_impl(ctx, ctx->batch->binning, &emit, index_offset); draw_impl(ctx, ctx->batch->binning, &emit, index_offset);
if (emit.streamout_mask) { if (emit.streamout_mask) {
struct fd_ringbuffer *ring = ctx->batch->draw; struct fd_ringbuffer *ring = ctx->batch->draw;
for (unsigned i = 0; i < PIPE_MAX_SO_BUFFERS; i++) { for (unsigned i = 0; i < PIPE_MAX_SO_BUFFERS; i++) {
if (emit.streamout_mask & (1 << i)) { if (emit.streamout_mask & (1 << i)) {
fd5_event_write(ctx->batch, ring, FLUSH_SO_0 + i, false); fd5_event_write(ctx->batch, ring, FLUSH_SO_0 + i, false);
} }
} }
} }
fd_context_all_clean(ctx); fd_context_all_clean(ctx);
return true; return true;
} }
static bool is_z32(enum pipe_format format) static bool
is_z32(enum pipe_format format)
{ {
switch (format) { switch (format) {
case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT: case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
case PIPE_FORMAT_Z32_UNORM: case PIPE_FORMAT_Z32_UNORM:
case PIPE_FORMAT_Z32_FLOAT: case PIPE_FORMAT_Z32_FLOAT:
return true; return true;
default: default:
return false; return false;
} }
} }
static void static void
fd5_clear_lrz(struct fd_batch *batch, struct fd_resource *zsbuf, double depth) fd5_clear_lrz(struct fd_batch *batch, struct fd_resource *zsbuf, double depth)
{ {
struct fd_ringbuffer *ring; struct fd_ringbuffer *ring;
uint32_t clear = util_pack_z(PIPE_FORMAT_Z16_UNORM, depth); uint32_t clear = util_pack_z(PIPE_FORMAT_Z16_UNORM, depth);
ring = fd_batch_get_prologue(batch); ring = fd_batch_get_prologue(batch);
OUT_WFI5(ring); OUT_WFI5(ring);
OUT_PKT4(ring, REG_A5XX_RB_CCU_CNTL, 1); OUT_PKT4(ring, REG_A5XX_RB_CCU_CNTL, 1);
OUT_RING(ring, 0x10000000); OUT_RING(ring, 0x10000000);
OUT_PKT4(ring, REG_A5XX_HLSQ_UPDATE_CNTL, 1); OUT_PKT4(ring, REG_A5XX_HLSQ_UPDATE_CNTL, 1);
OUT_RING(ring, 0x20fffff); OUT_RING(ring, 0x20fffff);
OUT_PKT4(ring, REG_A5XX_GRAS_SU_CNTL, 1); OUT_PKT4(ring, REG_A5XX_GRAS_SU_CNTL, 1);
OUT_RING(ring, A5XX_GRAS_SU_CNTL_LINEHALFWIDTH(0.0) | OUT_RING(ring,
COND(zsbuf->b.b.nr_samples > 1, A5XX_GRAS_SU_CNTL_MSAA_ENABLE)); A5XX_GRAS_SU_CNTL_LINEHALFWIDTH(0.0) |
COND(zsbuf->b.b.nr_samples > 1, A5XX_GRAS_SU_CNTL_MSAA_ENABLE));
OUT_PKT4(ring, REG_A5XX_GRAS_CNTL, 1); OUT_PKT4(ring, REG_A5XX_GRAS_CNTL, 1);
OUT_RING(ring, 0x00000000); OUT_RING(ring, 0x00000000);
OUT_PKT4(ring, REG_A5XX_GRAS_CL_CNTL, 1); OUT_PKT4(ring, REG_A5XX_GRAS_CL_CNTL, 1);
OUT_RING(ring, 0x00000181); OUT_RING(ring, 0x00000181);
OUT_PKT4(ring, REG_A5XX_GRAS_LRZ_CNTL, 1); OUT_PKT4(ring, REG_A5XX_GRAS_LRZ_CNTL, 1);
OUT_RING(ring, 0x00000000); OUT_RING(ring, 0x00000000);
OUT_PKT4(ring, REG_A5XX_RB_MRT_BUF_INFO(0), 5); OUT_PKT4(ring, REG_A5XX_RB_MRT_BUF_INFO(0), 5);
OUT_RING(ring, A5XX_RB_MRT_BUF_INFO_COLOR_FORMAT(RB5_R16_UNORM) | OUT_RING(ring, A5XX_RB_MRT_BUF_INFO_COLOR_FORMAT(RB5_R16_UNORM) |
A5XX_RB_MRT_BUF_INFO_COLOR_TILE_MODE(TILE5_LINEAR) | A5XX_RB_MRT_BUF_INFO_COLOR_TILE_MODE(TILE5_LINEAR) |
A5XX_RB_MRT_BUF_INFO_COLOR_SWAP(WZYX)); A5XX_RB_MRT_BUF_INFO_COLOR_SWAP(WZYX));
OUT_RING(ring, A5XX_RB_MRT_PITCH(zsbuf->lrz_pitch * 2)); OUT_RING(ring, A5XX_RB_MRT_PITCH(zsbuf->lrz_pitch * 2));
OUT_RING(ring, A5XX_RB_MRT_ARRAY_PITCH(fd_bo_size(zsbuf->lrz))); OUT_RING(ring, A5XX_RB_MRT_ARRAY_PITCH(fd_bo_size(zsbuf->lrz)));
OUT_RELOC(ring, zsbuf->lrz, 0x1000, 0, 0); OUT_RELOC(ring, zsbuf->lrz, 0x1000, 0, 0);
OUT_PKT4(ring, REG_A5XX_RB_RENDER_CNTL, 1); OUT_PKT4(ring, REG_A5XX_RB_RENDER_CNTL, 1);
OUT_RING(ring, 0x00000000); OUT_RING(ring, 0x00000000);
OUT_PKT4(ring, REG_A5XX_RB_DEST_MSAA_CNTL, 1); OUT_PKT4(ring, REG_A5XX_RB_DEST_MSAA_CNTL, 1);
OUT_RING(ring, A5XX_RB_DEST_MSAA_CNTL_SAMPLES(MSAA_ONE)); OUT_RING(ring, A5XX_RB_DEST_MSAA_CNTL_SAMPLES(MSAA_ONE));
OUT_PKT4(ring, REG_A5XX_RB_BLIT_CNTL, 1); OUT_PKT4(ring, REG_A5XX_RB_BLIT_CNTL, 1);
OUT_RING(ring, A5XX_RB_BLIT_CNTL_BUF(BLIT_MRT0)); OUT_RING(ring, A5XX_RB_BLIT_CNTL_BUF(BLIT_MRT0));
OUT_PKT4(ring, REG_A5XX_RB_CLEAR_CNTL, 1); OUT_PKT4(ring, REG_A5XX_RB_CLEAR_CNTL, 1);
OUT_RING(ring, A5XX_RB_CLEAR_CNTL_FAST_CLEAR | OUT_RING(ring, A5XX_RB_CLEAR_CNTL_FAST_CLEAR | A5XX_RB_CLEAR_CNTL_MASK(0xf));
A5XX_RB_CLEAR_CNTL_MASK(0xf));
OUT_PKT4(ring, REG_A5XX_RB_CLEAR_COLOR_DW0, 1); OUT_PKT4(ring, REG_A5XX_RB_CLEAR_COLOR_DW0, 1);
OUT_RING(ring, clear); /* RB_CLEAR_COLOR_DW0 */ OUT_RING(ring, clear); /* RB_CLEAR_COLOR_DW0 */
OUT_PKT4(ring, REG_A5XX_VSC_RESOLVE_CNTL, 2); OUT_PKT4(ring, REG_A5XX_VSC_RESOLVE_CNTL, 2);
OUT_RING(ring, A5XX_VSC_RESOLVE_CNTL_X(zsbuf->lrz_width) | OUT_RING(ring, A5XX_VSC_RESOLVE_CNTL_X(zsbuf->lrz_width) |
A5XX_VSC_RESOLVE_CNTL_Y(zsbuf->lrz_height)); A5XX_VSC_RESOLVE_CNTL_Y(zsbuf->lrz_height));
OUT_RING(ring, 0x00000000); // XXX UNKNOWN_0CDE OUT_RING(ring, 0x00000000); // XXX UNKNOWN_0CDE
OUT_PKT4(ring, REG_A5XX_RB_CNTL, 1); OUT_PKT4(ring, REG_A5XX_RB_CNTL, 1);
OUT_RING(ring, A5XX_RB_CNTL_BYPASS); OUT_RING(ring, A5XX_RB_CNTL_BYPASS);
OUT_PKT4(ring, REG_A5XX_RB_RESOLVE_CNTL_1, 2); OUT_PKT4(ring, REG_A5XX_RB_RESOLVE_CNTL_1, 2);
OUT_RING(ring, A5XX_RB_RESOLVE_CNTL_1_X(0) | OUT_RING(ring, A5XX_RB_RESOLVE_CNTL_1_X(0) | A5XX_RB_RESOLVE_CNTL_1_Y(0));
A5XX_RB_RESOLVE_CNTL_1_Y(0)); OUT_RING(ring, A5XX_RB_RESOLVE_CNTL_2_X(zsbuf->lrz_width - 1) |
OUT_RING(ring, A5XX_RB_RESOLVE_CNTL_2_X(zsbuf->lrz_width - 1) | A5XX_RB_RESOLVE_CNTL_2_Y(zsbuf->lrz_height - 1));
A5XX_RB_RESOLVE_CNTL_2_Y(zsbuf->lrz_height - 1));
fd5_emit_blit(batch, ring); fd5_emit_blit(batch, ring);
} }
static bool static bool
fd5_clear(struct fd_context *ctx, unsigned buffers, fd5_clear(struct fd_context *ctx, unsigned buffers,
const union pipe_color_union *color, double depth, unsigned stencil) const union pipe_color_union *color, double depth,
assert_dt unsigned stencil) assert_dt
{ {
struct fd_ringbuffer *ring = ctx->batch->draw; struct fd_ringbuffer *ring = ctx->batch->draw;
struct pipe_framebuffer_state *pfb = &ctx->batch->framebuffer; struct pipe_framebuffer_state *pfb = &ctx->batch->framebuffer;
if ((buffers & (PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL)) && if ((buffers & (PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL)) &&
is_z32(pfb->zsbuf->format)) is_z32(pfb->zsbuf->format))
return false; return false;
fd5_emit_render_cntl(ctx, true, false); fd5_emit_render_cntl(ctx, true, false);
if (buffers & PIPE_CLEAR_COLOR) { if (buffers & PIPE_CLEAR_COLOR) {
for (int i = 0; i < pfb->nr_cbufs; i++) { for (int i = 0; i < pfb->nr_cbufs; i++) {
union util_color uc = {0}; union util_color uc = {0};
if (!pfb->cbufs[i]) if (!pfb->cbufs[i])
continue; continue;
if (!(buffers & (PIPE_CLEAR_COLOR0 << i))) if (!(buffers & (PIPE_CLEAR_COLOR0 << i)))
continue; continue;
enum pipe_format pfmt = pfb->cbufs[i]->format; enum pipe_format pfmt = pfb->cbufs[i]->format;
// XXX I think RB_CLEAR_COLOR_DWn wants to take into account SWAP?? // XXX I think RB_CLEAR_COLOR_DWn wants to take into account SWAP??
union pipe_color_union swapped; union pipe_color_union swapped;
switch (fd5_pipe2swap(pfmt)) { switch (fd5_pipe2swap(pfmt)) {
case WZYX: case WZYX:
swapped.ui[0] = color->ui[0]; swapped.ui[0] = color->ui[0];
swapped.ui[1] = color->ui[1]; swapped.ui[1] = color->ui[1];
swapped.ui[2] = color->ui[2]; swapped.ui[2] = color->ui[2];
swapped.ui[3] = color->ui[3]; swapped.ui[3] = color->ui[3];
break; break;
case WXYZ: case WXYZ:
swapped.ui[2] = color->ui[0]; swapped.ui[2] = color->ui[0];
swapped.ui[1] = color->ui[1]; swapped.ui[1] = color->ui[1];
swapped.ui[0] = color->ui[2]; swapped.ui[0] = color->ui[2];
swapped.ui[3] = color->ui[3]; swapped.ui[3] = color->ui[3];
break; break;
case ZYXW: case ZYXW:
swapped.ui[3] = color->ui[0]; swapped.ui[3] = color->ui[0];
swapped.ui[0] = color->ui[1]; swapped.ui[0] = color->ui[1];
swapped.ui[1] = color->ui[2]; swapped.ui[1] = color->ui[2];
swapped.ui[2] = color->ui[3]; swapped.ui[2] = color->ui[3];
break; break;
case XYZW: case XYZW:
swapped.ui[3] = color->ui[0]; swapped.ui[3] = color->ui[0];
swapped.ui[2] = color->ui[1]; swapped.ui[2] = color->ui[1];
swapped.ui[1] = color->ui[2]; swapped.ui[1] = color->ui[2];
swapped.ui[0] = color->ui[3]; swapped.ui[0] = color->ui[3];
break; break;
} }
util_pack_color_union(pfmt, &uc, &swapped); util_pack_color_union(pfmt, &uc, &swapped);
OUT_PKT4(ring, REG_A5XX_RB_BLIT_CNTL, 1); OUT_PKT4(ring, REG_A5XX_RB_BLIT_CNTL, 1);
OUT_RING(ring, A5XX_RB_BLIT_CNTL_BUF(BLIT_MRT0 + i)); OUT_RING(ring, A5XX_RB_BLIT_CNTL_BUF(BLIT_MRT0 + i));
OUT_PKT4(ring, REG_A5XX_RB_CLEAR_CNTL, 1); OUT_PKT4(ring, REG_A5XX_RB_CLEAR_CNTL, 1);
OUT_RING(ring, A5XX_RB_CLEAR_CNTL_FAST_CLEAR | OUT_RING(ring,
A5XX_RB_CLEAR_CNTL_MASK(0xf)); A5XX_RB_CLEAR_CNTL_FAST_CLEAR | A5XX_RB_CLEAR_CNTL_MASK(0xf));
OUT_PKT4(ring, REG_A5XX_RB_CLEAR_COLOR_DW0, 4); OUT_PKT4(ring, REG_A5XX_RB_CLEAR_COLOR_DW0, 4);
OUT_RING(ring, uc.ui[0]); /* RB_CLEAR_COLOR_DW0 */ OUT_RING(ring, uc.ui[0]); /* RB_CLEAR_COLOR_DW0 */
OUT_RING(ring, uc.ui[1]); /* RB_CLEAR_COLOR_DW1 */ OUT_RING(ring, uc.ui[1]); /* RB_CLEAR_COLOR_DW1 */
OUT_RING(ring, uc.ui[2]); /* RB_CLEAR_COLOR_DW2 */ OUT_RING(ring, uc.ui[2]); /* RB_CLEAR_COLOR_DW2 */
OUT_RING(ring, uc.ui[3]); /* RB_CLEAR_COLOR_DW3 */ OUT_RING(ring, uc.ui[3]); /* RB_CLEAR_COLOR_DW3 */
fd5_emit_blit(ctx->batch, ring); fd5_emit_blit(ctx->batch, ring);
} }
} }
if (pfb->zsbuf && (buffers & (PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL))) { if (pfb->zsbuf && (buffers & (PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL))) {
uint32_t clear = uint32_t clear = util_pack_z_stencil(pfb->zsbuf->format, depth, stencil);
util_pack_z_stencil(pfb->zsbuf->format, depth, stencil); uint32_t mask = 0;
uint32_t mask = 0;
if (buffers & PIPE_CLEAR_DEPTH) if (buffers & PIPE_CLEAR_DEPTH)
mask |= 0x1; mask |= 0x1;
if (buffers & PIPE_CLEAR_STENCIL) if (buffers & PIPE_CLEAR_STENCIL)
mask |= 0x2; mask |= 0x2;
OUT_PKT4(ring, REG_A5XX_RB_BLIT_CNTL, 1); OUT_PKT4(ring, REG_A5XX_RB_BLIT_CNTL, 1);
OUT_RING(ring, A5XX_RB_BLIT_CNTL_BUF(BLIT_ZS)); OUT_RING(ring, A5XX_RB_BLIT_CNTL_BUF(BLIT_ZS));
OUT_PKT4(ring, REG_A5XX_RB_CLEAR_CNTL, 1); OUT_PKT4(ring, REG_A5XX_RB_CLEAR_CNTL, 1);
OUT_RING(ring, A5XX_RB_CLEAR_CNTL_FAST_CLEAR | OUT_RING(ring,
A5XX_RB_CLEAR_CNTL_MASK(mask)); A5XX_RB_CLEAR_CNTL_FAST_CLEAR | A5XX_RB_CLEAR_CNTL_MASK(mask));
OUT_PKT4(ring, REG_A5XX_RB_CLEAR_COLOR_DW0, 1); OUT_PKT4(ring, REG_A5XX_RB_CLEAR_COLOR_DW0, 1);
OUT_RING(ring, clear); /* RB_CLEAR_COLOR_DW0 */ OUT_RING(ring, clear); /* RB_CLEAR_COLOR_DW0 */
fd5_emit_blit(ctx->batch, ring); fd5_emit_blit(ctx->batch, ring);
if (pfb->zsbuf && (buffers & PIPE_CLEAR_DEPTH)) { if (pfb->zsbuf && (buffers & PIPE_CLEAR_DEPTH)) {
struct fd_resource *zsbuf = fd_resource(pfb->zsbuf->texture); struct fd_resource *zsbuf = fd_resource(pfb->zsbuf->texture);
if (zsbuf->lrz) { if (zsbuf->lrz) {
zsbuf->lrz_valid = true; zsbuf->lrz_valid = true;
fd5_clear_lrz(ctx->batch, zsbuf, depth); fd5_clear_lrz(ctx->batch, zsbuf, depth);
} }
} }
} }
/* disable fast clear to not interfere w/ gmem->mem, etc.. */ /* disable fast clear to not interfere w/ gmem->mem, etc.. */
OUT_PKT4(ring, REG_A5XX_RB_CLEAR_CNTL, 1); OUT_PKT4(ring, REG_A5XX_RB_CLEAR_CNTL, 1);
OUT_RING(ring, 0x00000000); /* RB_CLEAR_CNTL */ OUT_RING(ring, 0x00000000); /* RB_CLEAR_CNTL */
return true; return true;
} }
void void
fd5_draw_init(struct pipe_context *pctx) fd5_draw_init(struct pipe_context *pctx) disable_thread_safety_analysis
disable_thread_safety_analysis
{ {
struct fd_context *ctx = fd_context(pctx); struct fd_context *ctx = fd_context(pctx);
ctx->draw_vbo = fd5_draw_vbo; ctx->draw_vbo = fd5_draw_vbo;
ctx->clear = fd5_clear; ctx->clear = fd5_clear;
} }

View file

@ -41,107 +41,103 @@ void fd5_draw_init(struct pipe_context *pctx);
static inline void static inline void
fd5_draw(struct fd_batch *batch, struct fd_ringbuffer *ring, fd5_draw(struct fd_batch *batch, struct fd_ringbuffer *ring,
enum pc_di_primtype primtype, enum pc_di_primtype primtype, enum pc_di_vis_cull_mode vismode,
enum pc_di_vis_cull_mode vismode, enum pc_di_src_sel src_sel, uint32_t count, uint32_t instances,
enum pc_di_src_sel src_sel, uint32_t count, enum a4xx_index_size idx_type, uint32_t max_indices,
uint32_t instances, enum a4xx_index_size idx_type, uint32_t idx_offset, struct pipe_resource *idx_buffer)
uint32_t max_indices, uint32_t idx_offset,
struct pipe_resource *idx_buffer)
{ {
/* for debug after a lock up, write a unique counter value /* for debug after a lock up, write a unique counter value
* to scratch7 for each draw, to make it easier to match up * to scratch7 for each draw, to make it easier to match up
* register dumps to cmdstream. The combination of IB * register dumps to cmdstream. The combination of IB
* (scratch6) and DRAW is enough to "triangulate" the * (scratch6) and DRAW is enough to "triangulate" the
* particular draw that caused lockup. * particular draw that caused lockup.
*/ */
emit_marker5(ring, 7); emit_marker5(ring, 7);
OUT_PKT7(ring, CP_DRAW_INDX_OFFSET, idx_buffer ? 7 : 3); OUT_PKT7(ring, CP_DRAW_INDX_OFFSET, idx_buffer ? 7 : 3);
if (vismode == USE_VISIBILITY) { if (vismode == USE_VISIBILITY) {
/* leave vis mode blank for now, it will be patched up when /* leave vis mode blank for now, it will be patched up when
* we know if we are binning or not * we know if we are binning or not
*/ */
OUT_RINGP(ring, DRAW4(primtype, src_sel, idx_type, 0), OUT_RINGP(ring, DRAW4(primtype, src_sel, idx_type, 0),
&batch->draw_patches); &batch->draw_patches);
} else { } else {
OUT_RING(ring, DRAW4(primtype, src_sel, idx_type, vismode)); OUT_RING(ring, DRAW4(primtype, src_sel, idx_type, vismode));
} }
OUT_RING(ring, instances); /* NumInstances */ OUT_RING(ring, instances); /* NumInstances */
OUT_RING(ring, count); /* NumIndices */ OUT_RING(ring, count); /* NumIndices */
if (idx_buffer) { if (idx_buffer) {
OUT_RING(ring, 0x0); /* XXX */ OUT_RING(ring, 0x0); /* XXX */
OUT_RELOC(ring, fd_resource(idx_buffer)->bo, idx_offset, 0, 0); OUT_RELOC(ring, fd_resource(idx_buffer)->bo, idx_offset, 0, 0);
OUT_RING (ring, max_indices); OUT_RING(ring, max_indices);
} }
emit_marker5(ring, 7); emit_marker5(ring, 7);
fd_reset_wfi(batch); fd_reset_wfi(batch);
} }
static inline void static inline void
fd5_draw_emit(struct fd_batch *batch, struct fd_ringbuffer *ring, fd5_draw_emit(struct fd_batch *batch, struct fd_ringbuffer *ring,
enum pc_di_primtype primtype, enum pc_di_primtype primtype, enum pc_di_vis_cull_mode vismode,
enum pc_di_vis_cull_mode vismode, const struct pipe_draw_info *info,
const struct pipe_draw_info *info,
const struct pipe_draw_indirect_info *indirect, const struct pipe_draw_indirect_info *indirect,
const struct pipe_draw_start_count *draw, const struct pipe_draw_start_count *draw, unsigned index_offset)
unsigned index_offset)
{ {
struct pipe_resource *idx_buffer = NULL; struct pipe_resource *idx_buffer = NULL;
enum a4xx_index_size idx_type; enum a4xx_index_size idx_type;
enum pc_di_src_sel src_sel; enum pc_di_src_sel src_sel;
uint32_t max_indices, idx_offset; uint32_t max_indices, idx_offset;
if (indirect && indirect->buffer) { if (indirect && indirect->buffer) {
struct fd_resource *ind = fd_resource(indirect->buffer); struct fd_resource *ind = fd_resource(indirect->buffer);
emit_marker5(ring, 7); emit_marker5(ring, 7);
if (info->index_size) { if (info->index_size) {
struct pipe_resource *idx = info->index.resource; struct pipe_resource *idx = info->index.resource;
max_indices = idx->width0 / info->index_size; max_indices = idx->width0 / info->index_size;
OUT_PKT7(ring, CP_DRAW_INDX_INDIRECT, 6); OUT_PKT7(ring, CP_DRAW_INDX_INDIRECT, 6);
OUT_RINGP(ring, DRAW4(primtype, DI_SRC_SEL_DMA, OUT_RINGP(ring,
fd4_size2indextype(info->index_size), 0), DRAW4(primtype, DI_SRC_SEL_DMA,
&batch->draw_patches); fd4_size2indextype(info->index_size), 0),
OUT_RELOC(ring, fd_resource(idx)->bo, &batch->draw_patches);
index_offset, 0, 0); OUT_RELOC(ring, fd_resource(idx)->bo, index_offset, 0, 0);
OUT_RING(ring, A5XX_CP_DRAW_INDX_INDIRECT_3_MAX_INDICES(max_indices)); OUT_RING(ring, A5XX_CP_DRAW_INDX_INDIRECT_3_MAX_INDICES(max_indices));
OUT_RELOC(ring, ind->bo, indirect->offset, 0, 0); OUT_RELOC(ring, ind->bo, indirect->offset, 0, 0);
} else { } else {
OUT_PKT7(ring, CP_DRAW_INDIRECT, 3); OUT_PKT7(ring, CP_DRAW_INDIRECT, 3);
OUT_RINGP(ring, DRAW4(primtype, DI_SRC_SEL_AUTO_INDEX, 0, 0), OUT_RINGP(ring, DRAW4(primtype, DI_SRC_SEL_AUTO_INDEX, 0, 0),
&batch->draw_patches); &batch->draw_patches);
OUT_RELOC(ring, ind->bo, indirect->offset, 0, 0); OUT_RELOC(ring, ind->bo, indirect->offset, 0, 0);
} }
emit_marker5(ring, 7); emit_marker5(ring, 7);
fd_reset_wfi(batch); fd_reset_wfi(batch);
return; return;
} }
if (info->index_size) { if (info->index_size) {
assert(!info->has_user_indices); assert(!info->has_user_indices);
idx_buffer = info->index.resource; idx_buffer = info->index.resource;
idx_type = fd4_size2indextype(info->index_size); idx_type = fd4_size2indextype(info->index_size);
max_indices = idx_buffer->width0 / info->index_size; max_indices = idx_buffer->width0 / info->index_size;
idx_offset = index_offset + draw->start * info->index_size; idx_offset = index_offset + draw->start * info->index_size;
src_sel = DI_SRC_SEL_DMA; src_sel = DI_SRC_SEL_DMA;
} else { } else {
idx_buffer = NULL; idx_buffer = NULL;
idx_type = INDEX4_SIZE_32_BIT; idx_type = INDEX4_SIZE_32_BIT;
max_indices = 0; max_indices = 0;
idx_offset = 0; idx_offset = 0;
src_sel = DI_SRC_SEL_AUTO_INDEX; src_sel = DI_SRC_SEL_AUTO_INDEX;
} }
fd5_draw(batch, ring, primtype, vismode, src_sel, fd5_draw(batch, ring, primtype, vismode, src_sel, draw->count,
draw->count, info->instance_count, info->instance_count, idx_type, max_indices, idx_offset,
idx_type, max_indices, idx_offset, idx_buffer); idx_buffer);
} }
#endif /* FD5_DRAW_H_ */ #endif /* FD5_DRAW_H_ */

File diff suppressed because it is too large Load diff

View file

@ -29,186 +29,191 @@
#include "pipe/p_context.h" #include "pipe/p_context.h"
#include "freedreno_batch.h"
#include "freedreno_context.h"
#include "fd5_context.h" #include "fd5_context.h"
#include "fd5_format.h" #include "fd5_format.h"
#include "fd5_program.h" #include "fd5_program.h"
#include "fd5_screen.h" #include "fd5_screen.h"
#include "freedreno_batch.h"
#include "freedreno_context.h"
#include "ir3_gallium.h" #include "ir3_gallium.h"
struct fd_ringbuffer; struct fd_ringbuffer;
/* grouped together emit-state for prog/vertex/state emit: */ /* grouped together emit-state for prog/vertex/state emit: */
struct fd5_emit { struct fd5_emit {
struct pipe_debug_callback *debug; struct pipe_debug_callback *debug;
const struct fd_vertex_state *vtx; const struct fd_vertex_state *vtx;
const struct fd5_program_state *prog; const struct fd5_program_state *prog;
const struct pipe_draw_info *info; const struct pipe_draw_info *info;
const struct pipe_draw_indirect_info *indirect; const struct pipe_draw_indirect_info *indirect;
const struct pipe_draw_start_count *draw; const struct pipe_draw_start_count *draw;
bool binning_pass; bool binning_pass;
struct ir3_cache_key key; struct ir3_cache_key key;
enum fd_dirty_3d_state dirty; enum fd_dirty_3d_state dirty;
uint32_t sprite_coord_enable; /* bitmask */ uint32_t sprite_coord_enable; /* bitmask */
bool sprite_coord_mode; bool sprite_coord_mode;
bool rasterflat; bool rasterflat;
/* in binning pass, we don't have real frag shader, so we /* in binning pass, we don't have real frag shader, so we
* don't know if real draw disqualifies lrz write. So just * don't know if real draw disqualifies lrz write. So just
* figure that out up-front and stash it in the emit. * figure that out up-front and stash it in the emit.
*/ */
bool no_lrz_write; bool no_lrz_write;
/* cached to avoid repeated lookups of same variants: */ /* cached to avoid repeated lookups of same variants: */
const struct ir3_shader_variant *vs, *fs; const struct ir3_shader_variant *vs, *fs;
/* TODO: other shader stages.. */ /* TODO: other shader stages.. */
unsigned streamout_mask; unsigned streamout_mask;
}; };
static inline enum a5xx_color_fmt fd5_emit_format(struct pipe_surface *surf) static inline enum a5xx_color_fmt
fd5_emit_format(struct pipe_surface *surf)
{ {
if (!surf) if (!surf)
return 0; return 0;
return fd5_pipe2color(surf->format); return fd5_pipe2color(surf->format);
} }
static inline const struct ir3_shader_variant * static inline const struct ir3_shader_variant *
fd5_emit_get_vp(struct fd5_emit *emit) fd5_emit_get_vp(struct fd5_emit *emit)
{ {
if (!emit->vs) { if (!emit->vs) {
/* We use nonbinning VS during binning when TFB is enabled because that /* We use nonbinning VS during binning when TFB is enabled because that
* is what has all the outputs that might be involved in TFB. * is what has all the outputs that might be involved in TFB.
*/ */
if (emit->binning_pass && !emit->prog->vs->shader->stream_output.num_outputs) if (emit->binning_pass &&
emit->vs = emit->prog->bs; !emit->prog->vs->shader->stream_output.num_outputs)
else emit->vs = emit->prog->bs;
emit->vs = emit->prog->vs; else
} emit->vs = emit->prog->vs;
return emit->vs; }
return emit->vs;
} }
static inline const struct ir3_shader_variant * static inline const struct ir3_shader_variant *
fd5_emit_get_fp(struct fd5_emit *emit) fd5_emit_get_fp(struct fd5_emit *emit)
{ {
if (!emit->fs) { if (!emit->fs) {
if (emit->binning_pass) { if (emit->binning_pass) {
/* use dummy stateobj to simplify binning vs non-binning: */ /* use dummy stateobj to simplify binning vs non-binning: */
static const struct ir3_shader_variant binning_fs = {}; static const struct ir3_shader_variant binning_fs = {};
emit->fs = &binning_fs; emit->fs = &binning_fs;
} else { } else {
emit->fs = emit->prog->fs; emit->fs = emit->prog->fs;
} }
} }
return emit->fs; return emit->fs;
} }
static inline void static inline void
fd5_cache_flush(struct fd_batch *batch, struct fd_ringbuffer *ring) fd5_cache_flush(struct fd_batch *batch, struct fd_ringbuffer *ring) assert_dt
assert_dt
{ {
fd_reset_wfi(batch); fd_reset_wfi(batch);
OUT_PKT4(ring, REG_A5XX_UCHE_CACHE_INVALIDATE_MIN_LO, 5); OUT_PKT4(ring, REG_A5XX_UCHE_CACHE_INVALIDATE_MIN_LO, 5);
OUT_RING(ring, 0x00000000); /* UCHE_CACHE_INVALIDATE_MIN_LO */ OUT_RING(ring, 0x00000000); /* UCHE_CACHE_INVALIDATE_MIN_LO */
OUT_RING(ring, 0x00000000); /* UCHE_CACHE_INVALIDATE_MIN_HI */ OUT_RING(ring, 0x00000000); /* UCHE_CACHE_INVALIDATE_MIN_HI */
OUT_RING(ring, 0x00000000); /* UCHE_CACHE_INVALIDATE_MAX_LO */ OUT_RING(ring, 0x00000000); /* UCHE_CACHE_INVALIDATE_MAX_LO */
OUT_RING(ring, 0x00000000); /* UCHE_CACHE_INVALIDATE_MAX_HI */ OUT_RING(ring, 0x00000000); /* UCHE_CACHE_INVALIDATE_MAX_HI */
OUT_RING(ring, 0x00000012); /* UCHE_CACHE_INVALIDATE */ OUT_RING(ring, 0x00000012); /* UCHE_CACHE_INVALIDATE */
fd_wfi(batch, ring); fd_wfi(batch, ring);
} }
static inline void static inline void
fd5_set_render_mode(struct fd_context *ctx, struct fd_ringbuffer *ring, fd5_set_render_mode(struct fd_context *ctx, struct fd_ringbuffer *ring,
enum render_mode_cmd mode) enum render_mode_cmd mode)
{ {
/* TODO add preemption support, gmem bypass, etc */ /* TODO add preemption support, gmem bypass, etc */
emit_marker5(ring, 7); emit_marker5(ring, 7);
OUT_PKT7(ring, CP_SET_RENDER_MODE, 5); OUT_PKT7(ring, CP_SET_RENDER_MODE, 5);
OUT_RING(ring, CP_SET_RENDER_MODE_0_MODE(mode)); OUT_RING(ring, CP_SET_RENDER_MODE_0_MODE(mode));
OUT_RING(ring, 0x00000000); /* ADDR_LO */ OUT_RING(ring, 0x00000000); /* ADDR_LO */
OUT_RING(ring, 0x00000000); /* ADDR_HI */ OUT_RING(ring, 0x00000000); /* ADDR_HI */
OUT_RING(ring, COND(mode == GMEM, CP_SET_RENDER_MODE_3_GMEM_ENABLE) | OUT_RING(ring, COND(mode == GMEM, CP_SET_RENDER_MODE_3_GMEM_ENABLE) |
COND(mode == BINNING, CP_SET_RENDER_MODE_3_VSC_ENABLE)); COND(mode == BINNING, CP_SET_RENDER_MODE_3_VSC_ENABLE));
OUT_RING(ring, 0x00000000); OUT_RING(ring, 0x00000000);
emit_marker5(ring, 7); emit_marker5(ring, 7);
} }
static inline void static inline void
fd5_event_write(struct fd_batch *batch, struct fd_ringbuffer *ring, fd5_event_write(struct fd_batch *batch, struct fd_ringbuffer *ring,
enum vgt_event_type evt, bool timestamp) enum vgt_event_type evt, bool timestamp)
{ {
OUT_PKT7(ring, CP_EVENT_WRITE, timestamp ? 4 : 1); OUT_PKT7(ring, CP_EVENT_WRITE, timestamp ? 4 : 1);
OUT_RING(ring, CP_EVENT_WRITE_0_EVENT(evt)); OUT_RING(ring, CP_EVENT_WRITE_0_EVENT(evt));
if (timestamp) { if (timestamp) {
OUT_RELOC(ring, fd5_context(batch->ctx)->blit_mem, 0, 0, 0); /* ADDR_LO/HI */ OUT_RELOC(ring, fd5_context(batch->ctx)->blit_mem, 0, 0,
OUT_RING(ring, 0x00000000); 0); /* ADDR_LO/HI */
} OUT_RING(ring, 0x00000000);
}
} }
static inline void static inline void
fd5_emit_blit(struct fd_batch *batch, struct fd_ringbuffer *ring) fd5_emit_blit(struct fd_batch *batch, struct fd_ringbuffer *ring)
{ {
emit_marker5(ring, 7); emit_marker5(ring, 7);
fd5_event_write(batch, ring, BLIT, true); fd5_event_write(batch, ring, BLIT, true);
emit_marker5(ring, 7); emit_marker5(ring, 7);
} }
static inline void static inline void
fd5_emit_render_cntl(struct fd_context *ctx, bool blit, bool binning) fd5_emit_render_cntl(struct fd_context *ctx, bool blit, bool binning) assert_dt
assert_dt
{ {
struct fd_ringbuffer *ring = binning ? ctx->batch->binning : ctx->batch->draw; struct fd_ringbuffer *ring =
binning ? ctx->batch->binning : ctx->batch->draw;
/* TODO eventually this partially depends on the pfb state, ie. /* TODO eventually this partially depends on the pfb state, ie.
* which of the cbuf(s)/zsbuf has an UBWC flag buffer.. that part * which of the cbuf(s)/zsbuf has an UBWC flag buffer.. that part
* we could probably cache and just regenerate if framebuffer * we could probably cache and just regenerate if framebuffer
* state is dirty (or something like that).. * state is dirty (or something like that)..
* *
* Other bits seem to depend on query state, like if samples-passed * Other bits seem to depend on query state, like if samples-passed
* query is active. * query is active.
*/ */
bool samples_passed = (fd5_context(ctx)->samples_passed_queries > 0); bool samples_passed = (fd5_context(ctx)->samples_passed_queries > 0);
OUT_PKT4(ring, REG_A5XX_RB_RENDER_CNTL, 1); OUT_PKT4(ring, REG_A5XX_RB_RENDER_CNTL, 1);
OUT_RING(ring, 0x00000000 | /* RB_RENDER_CNTL */ OUT_RING(ring, 0x00000000 | /* RB_RENDER_CNTL */
COND(binning, A5XX_RB_RENDER_CNTL_BINNING_PASS) | COND(binning, A5XX_RB_RENDER_CNTL_BINNING_PASS) |
COND(binning, A5XX_RB_RENDER_CNTL_DISABLE_COLOR_PIPE) | COND(binning, A5XX_RB_RENDER_CNTL_DISABLE_COLOR_PIPE) |
COND(samples_passed, A5XX_RB_RENDER_CNTL_SAMPLES_PASSED) | COND(samples_passed, A5XX_RB_RENDER_CNTL_SAMPLES_PASSED) |
COND(!blit, 0x8)); COND(!blit, 0x8));
OUT_PKT4(ring, REG_A5XX_GRAS_SC_CNTL, 1); OUT_PKT4(ring, REG_A5XX_GRAS_SC_CNTL, 1);
OUT_RING(ring, 0x00000008 | /* GRAS_SC_CNTL */ OUT_RING(ring, 0x00000008 | /* GRAS_SC_CNTL */
COND(binning, A5XX_GRAS_SC_CNTL_BINNING_PASS) | COND(binning, A5XX_GRAS_SC_CNTL_BINNING_PASS) |
COND(samples_passed, A5XX_GRAS_SC_CNTL_SAMPLES_PASSED)); COND(samples_passed, A5XX_GRAS_SC_CNTL_SAMPLES_PASSED));
} }
static inline void static inline void
fd5_emit_lrz_flush(struct fd_batch *batch, struct fd_ringbuffer *ring) fd5_emit_lrz_flush(struct fd_batch *batch, struct fd_ringbuffer *ring)
{ {
/* TODO I think the extra writes to GRAS_LRZ_CNTL are probably /* TODO I think the extra writes to GRAS_LRZ_CNTL are probably
* a workaround and not needed on all a5xx. * a workaround and not needed on all a5xx.
*/ */
OUT_PKT4(ring, REG_A5XX_GRAS_LRZ_CNTL, 1); OUT_PKT4(ring, REG_A5XX_GRAS_LRZ_CNTL, 1);
OUT_RING(ring, A5XX_GRAS_LRZ_CNTL_ENABLE); OUT_RING(ring, A5XX_GRAS_LRZ_CNTL_ENABLE);
fd5_event_write(batch, ring, LRZ_FLUSH, false); fd5_event_write(batch, ring, LRZ_FLUSH, false);
OUT_PKT4(ring, REG_A5XX_GRAS_LRZ_CNTL, 1); OUT_PKT4(ring, REG_A5XX_GRAS_LRZ_CNTL, 1);
OUT_RING(ring, 0x0); OUT_RING(ring, 0x0);
} }
void fd5_emit_vertex_bufs(struct fd_ringbuffer *ring, struct fd5_emit *emit) assert_dt; void fd5_emit_vertex_bufs(struct fd_ringbuffer *ring,
struct fd5_emit *emit) assert_dt;
void fd5_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring, void fd5_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring,
struct fd5_emit *emit) assert_dt; struct fd5_emit *emit) assert_dt;
void fd5_emit_cs_state(struct fd_context *ctx, struct fd_ringbuffer *ring, void fd5_emit_cs_state(struct fd_context *ctx, struct fd_ringbuffer *ring,
struct ir3_shader_variant *cp) assert_dt; struct ir3_shader_variant *cp) assert_dt;
void fd5_emit_cs_consts(const struct ir3_shader_variant *v, struct fd_ringbuffer *ring, void fd5_emit_cs_consts(const struct ir3_shader_variant *v,
struct fd_context *ctx, const struct pipe_grid_info *info) assert_dt; struct fd_ringbuffer *ring, struct fd_context *ctx,
const struct pipe_grid_info *info) assert_dt;
void fd5_emit_restore(struct fd_batch *batch, struct fd_ringbuffer *ring) assert_dt; void fd5_emit_restore(struct fd_batch *batch,
struct fd_ringbuffer *ring) assert_dt;
void fd5_emit_init_screen(struct pipe_screen *pscreen); void fd5_emit_init_screen(struct pipe_screen *pscreen);
void fd5_emit_init(struct pipe_context *pctx); void fd5_emit_init(struct pipe_context *pctx);
@ -216,15 +221,15 @@ void fd5_emit_init(struct pipe_context *pctx);
static inline void static inline void
fd5_emit_ib(struct fd_ringbuffer *ring, struct fd_ringbuffer *target) fd5_emit_ib(struct fd_ringbuffer *ring, struct fd_ringbuffer *target)
{ {
/* for debug after a lock up, write a unique counter value /* for debug after a lock up, write a unique counter value
* to scratch6 for each IB, to make it easier to match up * to scratch6 for each IB, to make it easier to match up
* register dumps to cmdstream. The combination of IB and * register dumps to cmdstream. The combination of IB and
* DRAW (scratch7) is enough to "triangulate" the particular * DRAW (scratch7) is enough to "triangulate" the particular
* draw that caused lockup. * draw that caused lockup.
*/ */
emit_marker5(ring, 6); emit_marker5(ring, 6);
__OUT_IB5(ring, target); __OUT_IB5(ring, target);
emit_marker5(ring, 6); emit_marker5(ring, 6);
} }
#endif /* FD5_EMIT_H */ #endif /* FD5_EMIT_H */

View file

@ -29,48 +29,41 @@
#include "fd5_format.h" #include "fd5_format.h"
/* Specifies the table of all the formats and their features. Also supplies /* Specifies the table of all the formats and their features. Also supplies
* the helpers that look up various data in those tables. * the helpers that look up various data in those tables.
*/ */
struct fd5_format { struct fd5_format {
enum a5xx_vtx_fmt vtx; enum a5xx_vtx_fmt vtx;
enum a5xx_tex_fmt tex; enum a5xx_tex_fmt tex;
enum a5xx_color_fmt rb; enum a5xx_color_fmt rb;
enum a3xx_color_swap swap; enum a3xx_color_swap swap;
boolean present; boolean present;
}; };
/* vertex + texture */ /* vertex + texture */
#define VT(pipe, fmt, rbfmt, swapfmt) \ #define VT(pipe, fmt, rbfmt, swapfmt) \
[PIPE_FORMAT_ ## pipe] = { \ [PIPE_FORMAT_##pipe] = {.present = 1, \
.present = 1, \ .vtx = VFMT5_##fmt, \
.vtx = VFMT5_ ## fmt, \ .tex = TFMT5_##fmt, \
.tex = TFMT5_ ## fmt, \ .rb = RB5_##rbfmt, \
.rb = RB5_ ## rbfmt, \ .swap = swapfmt}
.swap = swapfmt \
}
/* texture-only */ /* texture-only */
#define _T(pipe, fmt, rbfmt, swapfmt) \ #define _T(pipe, fmt, rbfmt, swapfmt) \
[PIPE_FORMAT_ ## pipe] = { \ [PIPE_FORMAT_##pipe] = {.present = 1, \
.present = 1, \ .vtx = VFMT5_NONE, \
.vtx = VFMT5_NONE, \ .tex = TFMT5_##fmt, \
.tex = TFMT5_ ## fmt, \ .rb = RB5_##rbfmt, \
.rb = RB5_ ## rbfmt, \ .swap = swapfmt}
.swap = swapfmt \
}
/* vertex-only */ /* vertex-only */
#define V_(pipe, fmt, rbfmt, swapfmt) \ #define V_(pipe, fmt, rbfmt, swapfmt) \
[PIPE_FORMAT_ ## pipe] = { \ [PIPE_FORMAT_##pipe] = {.present = 1, \
.present = 1, \ .vtx = VFMT5_##fmt, \
.vtx = VFMT5_ ## fmt, \ .tex = TFMT5_NONE, \
.tex = TFMT5_NONE, \ .rb = RB5_##rbfmt, \
.rb = RB5_ ## rbfmt, \ .swap = swapfmt}
.swap = swapfmt \
}
/* clang-format off */ /* clang-format off */
static struct fd5_format formats[PIPE_FORMAT_COUNT] = { static struct fd5_format formats[PIPE_FORMAT_COUNT] = {
@ -343,84 +336,94 @@ static struct fd5_format formats[PIPE_FORMAT_COUNT] = {
enum a5xx_vtx_fmt enum a5xx_vtx_fmt
fd5_pipe2vtx(enum pipe_format format) fd5_pipe2vtx(enum pipe_format format)
{ {
if (!formats[format].present) if (!formats[format].present)
return VFMT5_NONE; return VFMT5_NONE;
return formats[format].vtx; return formats[format].vtx;
} }
/* convert pipe format to texture sampler format: */ /* convert pipe format to texture sampler format: */
enum a5xx_tex_fmt enum a5xx_tex_fmt
fd5_pipe2tex(enum pipe_format format) fd5_pipe2tex(enum pipe_format format)
{ {
if (!formats[format].present) if (!formats[format].present)
return TFMT5_NONE; return TFMT5_NONE;
return formats[format].tex; return formats[format].tex;
} }
/* convert pipe format to MRT / copydest format used for render-target: */ /* convert pipe format to MRT / copydest format used for render-target: */
enum a5xx_color_fmt enum a5xx_color_fmt
fd5_pipe2color(enum pipe_format format) fd5_pipe2color(enum pipe_format format)
{ {
if (!formats[format].present) if (!formats[format].present)
return RB5_NONE; return RB5_NONE;
return formats[format].rb; return formats[format].rb;
} }
enum a3xx_color_swap enum a3xx_color_swap
fd5_pipe2swap(enum pipe_format format) fd5_pipe2swap(enum pipe_format format)
{ {
if (!formats[format].present) if (!formats[format].present)
return WZYX; return WZYX;
return formats[format].swap; return formats[format].swap;
} }
enum a5xx_depth_format enum a5xx_depth_format
fd5_pipe2depth(enum pipe_format format) fd5_pipe2depth(enum pipe_format format)
{ {
switch (format) { switch (format) {
case PIPE_FORMAT_Z16_UNORM: case PIPE_FORMAT_Z16_UNORM:
return DEPTH5_16; return DEPTH5_16;
case PIPE_FORMAT_Z24X8_UNORM: case PIPE_FORMAT_Z24X8_UNORM:
case PIPE_FORMAT_Z24_UNORM_S8_UINT: case PIPE_FORMAT_Z24_UNORM_S8_UINT:
case PIPE_FORMAT_X8Z24_UNORM: case PIPE_FORMAT_X8Z24_UNORM:
case PIPE_FORMAT_S8_UINT_Z24_UNORM: case PIPE_FORMAT_S8_UINT_Z24_UNORM:
return DEPTH5_24_8; return DEPTH5_24_8;
case PIPE_FORMAT_Z32_FLOAT: case PIPE_FORMAT_Z32_FLOAT:
case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT: case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
return DEPTH5_32; return DEPTH5_32;
default: default:
return ~0; return ~0;
} }
} }
static inline enum a5xx_tex_swiz static inline enum a5xx_tex_swiz
tex_swiz(unsigned swiz) tex_swiz(unsigned swiz)
{ {
switch (swiz) { switch (swiz) {
default: default:
case PIPE_SWIZZLE_X: return A5XX_TEX_X; case PIPE_SWIZZLE_X:
case PIPE_SWIZZLE_Y: return A5XX_TEX_Y; return A5XX_TEX_X;
case PIPE_SWIZZLE_Z: return A5XX_TEX_Z; case PIPE_SWIZZLE_Y:
case PIPE_SWIZZLE_W: return A5XX_TEX_W; return A5XX_TEX_Y;
case PIPE_SWIZZLE_0: return A5XX_TEX_ZERO; case PIPE_SWIZZLE_Z:
case PIPE_SWIZZLE_1: return A5XX_TEX_ONE; return A5XX_TEX_Z;
} case PIPE_SWIZZLE_W:
return A5XX_TEX_W;
case PIPE_SWIZZLE_0:
return A5XX_TEX_ZERO;
case PIPE_SWIZZLE_1:
return A5XX_TEX_ONE;
}
} }
uint32_t uint32_t
fd5_tex_swiz(enum pipe_format format, unsigned swizzle_r, unsigned swizzle_g, fd5_tex_swiz(enum pipe_format format, unsigned swizzle_r, unsigned swizzle_g,
unsigned swizzle_b, unsigned swizzle_a) unsigned swizzle_b, unsigned swizzle_a)
{ {
const struct util_format_description *desc = const struct util_format_description *desc = util_format_description(format);
util_format_description(format); unsigned char swiz[4] =
unsigned char swiz[4] = { {
swizzle_r, swizzle_g, swizzle_b, swizzle_a, swizzle_r,
}, rswiz[4]; swizzle_g,
swizzle_b,
swizzle_a,
},
rswiz[4];
util_format_compose_swizzles(desc->swizzle, swiz, rswiz); util_format_compose_swizzles(desc->swizzle, swiz, rswiz);
return A5XX_TEX_CONST_0_SWIZ_X(tex_swiz(rswiz[0])) | return A5XX_TEX_CONST_0_SWIZ_X(tex_swiz(rswiz[0])) |
A5XX_TEX_CONST_0_SWIZ_Y(tex_swiz(rswiz[1])) | A5XX_TEX_CONST_0_SWIZ_Y(tex_swiz(rswiz[1])) |
A5XX_TEX_CONST_0_SWIZ_Z(tex_swiz(rswiz[2])) | A5XX_TEX_CONST_0_SWIZ_Z(tex_swiz(rswiz[2])) |
A5XX_TEX_CONST_0_SWIZ_W(tex_swiz(rswiz[3])); A5XX_TEX_CONST_0_SWIZ_W(tex_swiz(rswiz[3]));
} }

View file

@ -38,6 +38,7 @@ enum a3xx_color_swap fd5_pipe2swap(enum pipe_format format);
enum a5xx_depth_format fd5_pipe2depth(enum pipe_format format); enum a5xx_depth_format fd5_pipe2depth(enum pipe_format format);
uint32_t fd5_tex_swiz(enum pipe_format format, unsigned swizzle_r, uint32_t fd5_tex_swiz(enum pipe_format format, unsigned swizzle_r,
unsigned swizzle_g, unsigned swizzle_b, unsigned swizzle_a); unsigned swizzle_g, unsigned swizzle_b,
unsigned swizzle_a);
#endif /* FD5_UTIL_H_ */ #endif /* FD5_UTIL_H_ */

File diff suppressed because it is too large Load diff

View file

@ -26,181 +26,184 @@
#include "pipe/p_state.h" #include "pipe/p_state.h"
#include "freedreno_resource.h"
#include "fd5_image.h"
#include "fd5_format.h" #include "fd5_format.h"
#include "fd5_image.h"
#include "fd5_texture.h" #include "fd5_texture.h"
#include "freedreno_resource.h"
static enum a4xx_state_block texsb[] = { static enum a4xx_state_block texsb[] = {
[PIPE_SHADER_COMPUTE] = SB4_CS_TEX, [PIPE_SHADER_COMPUTE] = SB4_CS_TEX,
[PIPE_SHADER_FRAGMENT] = SB4_FS_TEX, [PIPE_SHADER_FRAGMENT] = SB4_FS_TEX,
}; };
static enum a4xx_state_block imgsb[] = { static enum a4xx_state_block imgsb[] = {
[PIPE_SHADER_COMPUTE] = SB4_CS_SSBO, [PIPE_SHADER_COMPUTE] = SB4_CS_SSBO,
[PIPE_SHADER_FRAGMENT] = SB4_SSBO, [PIPE_SHADER_FRAGMENT] = SB4_SSBO,
}; };
struct fd5_image { struct fd5_image {
enum pipe_format pfmt; enum pipe_format pfmt;
enum a5xx_tex_fmt fmt; enum a5xx_tex_fmt fmt;
enum a5xx_tex_type type; enum a5xx_tex_type type;
bool srgb; bool srgb;
uint32_t cpp; uint32_t cpp;
uint32_t width; uint32_t width;
uint32_t height; uint32_t height;
uint32_t depth; uint32_t depth;
uint32_t pitch; uint32_t pitch;
uint32_t array_pitch; uint32_t array_pitch;
struct fd_bo *bo; struct fd_bo *bo;
uint32_t offset; uint32_t offset;
bool buffer; bool buffer;
}; };
static void translate_image(struct fd5_image *img, struct pipe_image_view *pimg) static void
translate_image(struct fd5_image *img, struct pipe_image_view *pimg)
{ {
enum pipe_format format = pimg->format; enum pipe_format format = pimg->format;
struct pipe_resource *prsc = pimg->resource; struct pipe_resource *prsc = pimg->resource;
struct fd_resource *rsc = fd_resource(prsc); struct fd_resource *rsc = fd_resource(prsc);
if (!pimg->resource) { if (!pimg->resource) {
memset(img, 0, sizeof(*img)); memset(img, 0, sizeof(*img));
return; return;
} }
img->pfmt = format; img->pfmt = format;
img->fmt = fd5_pipe2tex(format); img->fmt = fd5_pipe2tex(format);
img->type = fd5_tex_type(prsc->target); img->type = fd5_tex_type(prsc->target);
img->srgb = util_format_is_srgb(format); img->srgb = util_format_is_srgb(format);
img->cpp = rsc->layout.cpp; img->cpp = rsc->layout.cpp;
img->bo = rsc->bo; img->bo = rsc->bo;
/* Treat cube textures as 2d-array: */ /* Treat cube textures as 2d-array: */
if (img->type == A5XX_TEX_CUBE) if (img->type == A5XX_TEX_CUBE)
img->type = A5XX_TEX_2D; img->type = A5XX_TEX_2D;
if (prsc->target == PIPE_BUFFER) { if (prsc->target == PIPE_BUFFER) {
img->buffer = true; img->buffer = true;
img->offset = pimg->u.buf.offset; img->offset = pimg->u.buf.offset;
img->pitch = 0; img->pitch = 0;
img->array_pitch = 0; img->array_pitch = 0;
/* size is encoded with low 15b in WIDTH and high bits in /* size is encoded with low 15b in WIDTH and high bits in
* HEIGHT, in units of elements: * HEIGHT, in units of elements:
*/ */
unsigned sz = pimg->u.buf.size / util_format_get_blocksize(format); unsigned sz = pimg->u.buf.size / util_format_get_blocksize(format);
img->width = sz & MASK(15); img->width = sz & MASK(15);
img->height = sz >> 15; img->height = sz >> 15;
img->depth = 0; img->depth = 0;
} else { } else {
img->buffer = false; img->buffer = false;
unsigned lvl = pimg->u.tex.level; unsigned lvl = pimg->u.tex.level;
img->offset = fd_resource_offset(rsc, lvl, pimg->u.tex.first_layer); img->offset = fd_resource_offset(rsc, lvl, pimg->u.tex.first_layer);
img->pitch = fd_resource_pitch(rsc, lvl); img->pitch = fd_resource_pitch(rsc, lvl);
img->width = u_minify(prsc->width0, lvl); img->width = u_minify(prsc->width0, lvl);
img->height = u_minify(prsc->height0, lvl); img->height = u_minify(prsc->height0, lvl);
unsigned layers = pimg->u.tex.last_layer - pimg->u.tex.first_layer + 1; unsigned layers = pimg->u.tex.last_layer - pimg->u.tex.first_layer + 1;
switch (prsc->target) { switch (prsc->target) {
case PIPE_TEXTURE_RECT: case PIPE_TEXTURE_RECT:
case PIPE_TEXTURE_1D: case PIPE_TEXTURE_1D:
case PIPE_TEXTURE_2D: case PIPE_TEXTURE_2D:
img->array_pitch = rsc->layout.layer_size; img->array_pitch = rsc->layout.layer_size;
img->depth = 1; img->depth = 1;
break; break;
case PIPE_TEXTURE_1D_ARRAY: case PIPE_TEXTURE_1D_ARRAY:
case PIPE_TEXTURE_2D_ARRAY: case PIPE_TEXTURE_2D_ARRAY:
img->array_pitch = rsc->layout.layer_size; img->array_pitch = rsc->layout.layer_size;
img->depth = layers; img->depth = layers;
break; break;
case PIPE_TEXTURE_CUBE: case PIPE_TEXTURE_CUBE:
case PIPE_TEXTURE_CUBE_ARRAY: case PIPE_TEXTURE_CUBE_ARRAY:
img->array_pitch = rsc->layout.layer_size; img->array_pitch = rsc->layout.layer_size;
img->depth = layers; img->depth = layers;
break; break;
case PIPE_TEXTURE_3D: case PIPE_TEXTURE_3D:
img->array_pitch = fd_resource_slice(rsc, lvl)->size0; img->array_pitch = fd_resource_slice(rsc, lvl)->size0;
img->depth = u_minify(prsc->depth0, lvl); img->depth = u_minify(prsc->depth0, lvl);
break; break;
default: default:
img->array_pitch = 0; img->array_pitch = 0;
img->depth = 0; img->depth = 0;
break; break;
} }
} }
} }
static void emit_image_tex(struct fd_ringbuffer *ring, unsigned slot, static void
struct fd5_image *img, enum pipe_shader_type shader) emit_image_tex(struct fd_ringbuffer *ring, unsigned slot, struct fd5_image *img,
enum pipe_shader_type shader)
{ {
OUT_PKT7(ring, CP_LOAD_STATE4, 3 + 12); OUT_PKT7(ring, CP_LOAD_STATE4, 3 + 12);
OUT_RING(ring, CP_LOAD_STATE4_0_DST_OFF(slot) | OUT_RING(ring, CP_LOAD_STATE4_0_DST_OFF(slot) |
CP_LOAD_STATE4_0_STATE_SRC(SS4_DIRECT) | CP_LOAD_STATE4_0_STATE_SRC(SS4_DIRECT) |
CP_LOAD_STATE4_0_STATE_BLOCK(texsb[shader]) | CP_LOAD_STATE4_0_STATE_BLOCK(texsb[shader]) |
CP_LOAD_STATE4_0_NUM_UNIT(1)); CP_LOAD_STATE4_0_NUM_UNIT(1));
OUT_RING(ring, CP_LOAD_STATE4_1_STATE_TYPE(ST4_CONSTANTS) | OUT_RING(ring, CP_LOAD_STATE4_1_STATE_TYPE(ST4_CONSTANTS) |
CP_LOAD_STATE4_1_EXT_SRC_ADDR(0)); CP_LOAD_STATE4_1_EXT_SRC_ADDR(0));
OUT_RING(ring, CP_LOAD_STATE4_2_EXT_SRC_ADDR_HI(0)); OUT_RING(ring, CP_LOAD_STATE4_2_EXT_SRC_ADDR_HI(0));
OUT_RING(ring, A5XX_TEX_CONST_0_FMT(img->fmt) | OUT_RING(ring, A5XX_TEX_CONST_0_FMT(img->fmt) |
fd5_tex_swiz(img->pfmt, PIPE_SWIZZLE_X, PIPE_SWIZZLE_Y, fd5_tex_swiz(img->pfmt, PIPE_SWIZZLE_X, PIPE_SWIZZLE_Y,
PIPE_SWIZZLE_Z, PIPE_SWIZZLE_W) | PIPE_SWIZZLE_Z, PIPE_SWIZZLE_W) |
COND(img->srgb, A5XX_TEX_CONST_0_SRGB)); COND(img->srgb, A5XX_TEX_CONST_0_SRGB));
OUT_RING(ring, A5XX_TEX_CONST_1_WIDTH(img->width) | OUT_RING(ring, A5XX_TEX_CONST_1_WIDTH(img->width) |
A5XX_TEX_CONST_1_HEIGHT(img->height)); A5XX_TEX_CONST_1_HEIGHT(img->height));
OUT_RING(ring, OUT_RING(ring,
COND(img->buffer, A5XX_TEX_CONST_2_UNK4 | A5XX_TEX_CONST_2_UNK31) | COND(img->buffer, A5XX_TEX_CONST_2_UNK4 | A5XX_TEX_CONST_2_UNK31) |
A5XX_TEX_CONST_2_TYPE(img->type) | A5XX_TEX_CONST_2_TYPE(img->type) |
A5XX_TEX_CONST_2_PITCH(img->pitch)); A5XX_TEX_CONST_2_PITCH(img->pitch));
OUT_RING(ring, A5XX_TEX_CONST_3_ARRAY_PITCH(img->array_pitch)); OUT_RING(ring, A5XX_TEX_CONST_3_ARRAY_PITCH(img->array_pitch));
if (img->bo) { if (img->bo) {
OUT_RELOC(ring, img->bo, img->offset, OUT_RELOC(ring, img->bo, img->offset,
(uint64_t)A5XX_TEX_CONST_5_DEPTH(img->depth) << 32, 0); (uint64_t)A5XX_TEX_CONST_5_DEPTH(img->depth) << 32, 0);
} else { } else {
OUT_RING(ring, 0x00000000); OUT_RING(ring, 0x00000000);
OUT_RING(ring, A5XX_TEX_CONST_5_DEPTH(img->depth)); OUT_RING(ring, A5XX_TEX_CONST_5_DEPTH(img->depth));
} }
OUT_RING(ring, 0x00000000); OUT_RING(ring, 0x00000000);
OUT_RING(ring, 0x00000000); OUT_RING(ring, 0x00000000);
OUT_RING(ring, 0x00000000); OUT_RING(ring, 0x00000000);
OUT_RING(ring, 0x00000000); OUT_RING(ring, 0x00000000);
OUT_RING(ring, 0x00000000); OUT_RING(ring, 0x00000000);
OUT_RING(ring, 0x00000000); OUT_RING(ring, 0x00000000);
} }
static void emit_image_ssbo(struct fd_ringbuffer *ring, unsigned slot, static void
struct fd5_image *img, enum pipe_shader_type shader) emit_image_ssbo(struct fd_ringbuffer *ring, unsigned slot,
struct fd5_image *img, enum pipe_shader_type shader)
{ {
OUT_PKT7(ring, CP_LOAD_STATE4, 3 + 2); OUT_PKT7(ring, CP_LOAD_STATE4, 3 + 2);
OUT_RING(ring, CP_LOAD_STATE4_0_DST_OFF(slot) | OUT_RING(ring, CP_LOAD_STATE4_0_DST_OFF(slot) |
CP_LOAD_STATE4_0_STATE_SRC(SS4_DIRECT) | CP_LOAD_STATE4_0_STATE_SRC(SS4_DIRECT) |
CP_LOAD_STATE4_0_STATE_BLOCK(imgsb[shader]) | CP_LOAD_STATE4_0_STATE_BLOCK(imgsb[shader]) |
CP_LOAD_STATE4_0_NUM_UNIT(1)); CP_LOAD_STATE4_0_NUM_UNIT(1));
OUT_RING(ring, CP_LOAD_STATE4_1_STATE_TYPE(1) | OUT_RING(ring,
CP_LOAD_STATE4_1_EXT_SRC_ADDR(0)); CP_LOAD_STATE4_1_STATE_TYPE(1) | CP_LOAD_STATE4_1_EXT_SRC_ADDR(0));
OUT_RING(ring, CP_LOAD_STATE4_2_EXT_SRC_ADDR_HI(0)); OUT_RING(ring, CP_LOAD_STATE4_2_EXT_SRC_ADDR_HI(0));
OUT_RING(ring, A5XX_SSBO_1_0_FMT(img->fmt) | OUT_RING(ring,
A5XX_SSBO_1_0_WIDTH(img->width)); A5XX_SSBO_1_0_FMT(img->fmt) | A5XX_SSBO_1_0_WIDTH(img->width));
OUT_RING(ring, A5XX_SSBO_1_1_HEIGHT(img->height) | OUT_RING(ring, A5XX_SSBO_1_1_HEIGHT(img->height) |
A5XX_SSBO_1_1_DEPTH(img->depth)); A5XX_SSBO_1_1_DEPTH(img->depth));
OUT_PKT7(ring, CP_LOAD_STATE4, 3 + 2); OUT_PKT7(ring, CP_LOAD_STATE4, 3 + 2);
OUT_RING(ring, CP_LOAD_STATE4_0_DST_OFF(slot) | OUT_RING(ring, CP_LOAD_STATE4_0_DST_OFF(slot) |
CP_LOAD_STATE4_0_STATE_SRC(SS4_DIRECT) | CP_LOAD_STATE4_0_STATE_SRC(SS4_DIRECT) |
CP_LOAD_STATE4_0_STATE_BLOCK(imgsb[shader]) | CP_LOAD_STATE4_0_STATE_BLOCK(imgsb[shader]) |
CP_LOAD_STATE4_0_NUM_UNIT(1)); CP_LOAD_STATE4_0_NUM_UNIT(1));
OUT_RING(ring, CP_LOAD_STATE4_1_STATE_TYPE(2) | OUT_RING(ring,
CP_LOAD_STATE4_1_EXT_SRC_ADDR(0)); CP_LOAD_STATE4_1_STATE_TYPE(2) | CP_LOAD_STATE4_1_EXT_SRC_ADDR(0));
OUT_RING(ring, CP_LOAD_STATE4_2_EXT_SRC_ADDR_HI(0)); OUT_RING(ring, CP_LOAD_STATE4_2_EXT_SRC_ADDR_HI(0));
if (img->bo) { if (img->bo) {
OUT_RELOC(ring, img->bo, img->offset, 0, 0); OUT_RELOC(ring, img->bo, img->offset, 0, 0);
} else { } else {
OUT_RING(ring, 0x00000000); OUT_RING(ring, 0x00000000);
OUT_RING(ring, 0x00000000); OUT_RING(ring, 0x00000000);
} }
} }
/* Emit required "SSBO" and sampler state. The sampler state is used by the /* Emit required "SSBO" and sampler state. The sampler state is used by the
@ -209,19 +212,21 @@ static void emit_image_ssbo(struct fd_ringbuffer *ring, unsigned slot,
*/ */
void void
fd5_emit_images(struct fd_context *ctx, struct fd_ringbuffer *ring, fd5_emit_images(struct fd_context *ctx, struct fd_ringbuffer *ring,
enum pipe_shader_type shader, const struct ir3_shader_variant *v) enum pipe_shader_type shader,
const struct ir3_shader_variant *v)
{ {
struct fd_shaderimg_stateobj *so = &ctx->shaderimg[shader]; struct fd_shaderimg_stateobj *so = &ctx->shaderimg[shader];
unsigned enabled_mask = so->enabled_mask; unsigned enabled_mask = so->enabled_mask;
const struct ir3_ibo_mapping *m = &v->image_mapping; const struct ir3_ibo_mapping *m = &v->image_mapping;
while (enabled_mask) { while (enabled_mask) {
unsigned index = u_bit_scan(&enabled_mask); unsigned index = u_bit_scan(&enabled_mask);
struct fd5_image img; struct fd5_image img;
translate_image(&img, &so->si[index]); translate_image(&img, &so->si[index]);
emit_image_tex(ring, m->image_to_tex[index] + m->tex_base, &img, shader); emit_image_tex(ring, m->image_to_tex[index] + m->tex_base, &img, shader);
emit_image_ssbo(ring, v->shader->nir->info.num_ssbos + index, &img, shader); emit_image_ssbo(ring, v->shader->nir->info.num_ssbos + index, &img,
} shader);
}
} }

View file

@ -31,6 +31,7 @@
struct ir3_shader_variant; struct ir3_shader_variant;
void fd5_emit_images(struct fd_context *ctx, struct fd_ringbuffer *ring, void fd5_emit_images(struct fd_context *ctx, struct fd_ringbuffer *ring,
enum pipe_shader_type shader, const struct ir3_shader_variant *v); enum pipe_shader_type shader,
const struct ir3_shader_variant *v);
#endif /* FD5_IMAGE_H_ */ #endif /* FD5_IMAGE_H_ */

File diff suppressed because it is too large Load diff

View file

@ -36,22 +36,23 @@
struct fd5_emit; struct fd5_emit;
struct fd5_program_state { struct fd5_program_state {
struct ir3_program_state base; struct ir3_program_state base;
struct ir3_shader_variant *bs; /* VS for when emit->binning */ struct ir3_shader_variant *bs; /* VS for when emit->binning */
struct ir3_shader_variant *vs; struct ir3_shader_variant *vs;
struct ir3_shader_variant *fs; /* FS for when !emit->binning */ struct ir3_shader_variant *fs; /* FS for when !emit->binning */
}; };
static inline struct fd5_program_state * static inline struct fd5_program_state *
fd5_program_state(struct ir3_program_state *state) fd5_program_state(struct ir3_program_state *state)
{ {
return (struct fd5_program_state *)state; return (struct fd5_program_state *)state;
} }
void fd5_emit_shader(struct fd_ringbuffer *ring, const struct ir3_shader_variant *so); void fd5_emit_shader(struct fd_ringbuffer *ring,
const struct ir3_shader_variant *so);
void fd5_program_emit(struct fd_context *ctx, struct fd_ringbuffer *ring, void fd5_program_emit(struct fd_context *ctx, struct fd_ringbuffer *ring,
struct fd5_emit *emit); struct fd5_emit *emit);
void fd5_prog_init(struct pipe_context *pctx); void fd5_prog_init(struct pipe_context *pctx);

View file

@ -35,21 +35,20 @@
#include "fd5_query.h" #include "fd5_query.h"
struct PACKED fd5_query_sample { struct PACKED fd5_query_sample {
uint64_t start; uint64_t start;
uint64_t result; uint64_t result;
uint64_t stop; uint64_t stop;
}; };
/* offset of a single field of an array of fd5_query_sample: */ /* offset of a single field of an array of fd5_query_sample: */
#define query_sample_idx(aq, idx, field) \ #define query_sample_idx(aq, idx, field) \
fd_resource((aq)->prsc)->bo, \ fd_resource((aq)->prsc)->bo, \
(idx * sizeof(struct fd5_query_sample)) + \ (idx * sizeof(struct fd5_query_sample)) + \
offsetof(struct fd5_query_sample, field), \ offsetof(struct fd5_query_sample, field), \
0, 0 0, 0
/* offset of a single field of fd5_query_sample: */ /* offset of a single field of fd5_query_sample: */
#define query_sample(aq, field) \ #define query_sample(aq, field) query_sample_idx(aq, 0, field)
query_sample_idx(aq, 0, field)
/* /*
* Occlusion Query: * Occlusion Query:
@ -61,98 +60,97 @@ struct PACKED fd5_query_sample {
static void static void
occlusion_resume(struct fd_acc_query *aq, struct fd_batch *batch) occlusion_resume(struct fd_acc_query *aq, struct fd_batch *batch)
{ {
struct fd_ringbuffer *ring = batch->draw; struct fd_ringbuffer *ring = batch->draw;
OUT_PKT4(ring, REG_A5XX_RB_SAMPLE_COUNT_CONTROL, 1); OUT_PKT4(ring, REG_A5XX_RB_SAMPLE_COUNT_CONTROL, 1);
OUT_RING(ring, A5XX_RB_SAMPLE_COUNT_CONTROL_COPY); OUT_RING(ring, A5XX_RB_SAMPLE_COUNT_CONTROL_COPY);
OUT_PKT4(ring, REG_A5XX_RB_SAMPLE_COUNT_ADDR_LO, 2); OUT_PKT4(ring, REG_A5XX_RB_SAMPLE_COUNT_ADDR_LO, 2);
OUT_RELOC(ring, query_sample(aq, start)); OUT_RELOC(ring, query_sample(aq, start));
fd5_event_write(batch, ring, ZPASS_DONE, false); fd5_event_write(batch, ring, ZPASS_DONE, false);
fd_reset_wfi(batch); fd_reset_wfi(batch);
fd5_context(batch->ctx)->samples_passed_queries++; fd5_context(batch->ctx)->samples_passed_queries++;
} }
static void static void
occlusion_pause(struct fd_acc_query *aq, struct fd_batch *batch) occlusion_pause(struct fd_acc_query *aq, struct fd_batch *batch)
{ {
struct fd_ringbuffer *ring = batch->draw; struct fd_ringbuffer *ring = batch->draw;
OUT_PKT7(ring, CP_MEM_WRITE, 4); OUT_PKT7(ring, CP_MEM_WRITE, 4);
OUT_RELOC(ring, query_sample(aq, stop)); OUT_RELOC(ring, query_sample(aq, stop));
OUT_RING(ring, 0xffffffff); OUT_RING(ring, 0xffffffff);
OUT_RING(ring, 0xffffffff); OUT_RING(ring, 0xffffffff);
OUT_PKT7(ring, CP_WAIT_MEM_WRITES, 0); OUT_PKT7(ring, CP_WAIT_MEM_WRITES, 0);
OUT_PKT4(ring, REG_A5XX_RB_SAMPLE_COUNT_CONTROL, 1); OUT_PKT4(ring, REG_A5XX_RB_SAMPLE_COUNT_CONTROL, 1);
OUT_RING(ring, A5XX_RB_SAMPLE_COUNT_CONTROL_COPY); OUT_RING(ring, A5XX_RB_SAMPLE_COUNT_CONTROL_COPY);
OUT_PKT4(ring, REG_A5XX_RB_SAMPLE_COUNT_ADDR_LO, 2); OUT_PKT4(ring, REG_A5XX_RB_SAMPLE_COUNT_ADDR_LO, 2);
OUT_RELOC(ring, query_sample(aq, stop)); OUT_RELOC(ring, query_sample(aq, stop));
fd5_event_write(batch, ring, ZPASS_DONE, false); fd5_event_write(batch, ring, ZPASS_DONE, false);
fd_reset_wfi(batch); fd_reset_wfi(batch);
OUT_PKT7(ring, CP_WAIT_REG_MEM, 6); OUT_PKT7(ring, CP_WAIT_REG_MEM, 6);
OUT_RING(ring, 0x00000014); // XXX OUT_RING(ring, 0x00000014); // XXX
OUT_RELOC(ring, query_sample(aq, stop)); OUT_RELOC(ring, query_sample(aq, stop));
OUT_RING(ring, 0xffffffff); OUT_RING(ring, 0xffffffff);
OUT_RING(ring, 0xffffffff); OUT_RING(ring, 0xffffffff);
OUT_RING(ring, 0x00000010); // XXX OUT_RING(ring, 0x00000010); // XXX
/* result += stop - start: */ /* result += stop - start: */
OUT_PKT7(ring, CP_MEM_TO_MEM, 9); OUT_PKT7(ring, CP_MEM_TO_MEM, 9);
OUT_RING(ring, CP_MEM_TO_MEM_0_DOUBLE | OUT_RING(ring, CP_MEM_TO_MEM_0_DOUBLE | CP_MEM_TO_MEM_0_NEG_C);
CP_MEM_TO_MEM_0_NEG_C); OUT_RELOC(ring, query_sample(aq, result)); /* dst */
OUT_RELOC(ring, query_sample(aq, result)); /* dst */ OUT_RELOC(ring, query_sample(aq, result)); /* srcA */
OUT_RELOC(ring, query_sample(aq, result)); /* srcA */ OUT_RELOC(ring, query_sample(aq, stop)); /* srcB */
OUT_RELOC(ring, query_sample(aq, stop)); /* srcB */ OUT_RELOC(ring, query_sample(aq, start)); /* srcC */
OUT_RELOC(ring, query_sample(aq, start)); /* srcC */
fd5_context(batch->ctx)->samples_passed_queries--; fd5_context(batch->ctx)->samples_passed_queries--;
} }
static void static void
occlusion_counter_result(struct fd_acc_query *aq, void *buf, occlusion_counter_result(struct fd_acc_query *aq, void *buf,
union pipe_query_result *result) union pipe_query_result *result)
{ {
struct fd5_query_sample *sp = buf; struct fd5_query_sample *sp = buf;
result->u64 = sp->result; result->u64 = sp->result;
} }
static void static void
occlusion_predicate_result(struct fd_acc_query *aq, void *buf, occlusion_predicate_result(struct fd_acc_query *aq, void *buf,
union pipe_query_result *result) union pipe_query_result *result)
{ {
struct fd5_query_sample *sp = buf; struct fd5_query_sample *sp = buf;
result->b = !!sp->result; result->b = !!sp->result;
} }
static const struct fd_acc_sample_provider occlusion_counter = { static const struct fd_acc_sample_provider occlusion_counter = {
.query_type = PIPE_QUERY_OCCLUSION_COUNTER, .query_type = PIPE_QUERY_OCCLUSION_COUNTER,
.size = sizeof(struct fd5_query_sample), .size = sizeof(struct fd5_query_sample),
.resume = occlusion_resume, .resume = occlusion_resume,
.pause = occlusion_pause, .pause = occlusion_pause,
.result = occlusion_counter_result, .result = occlusion_counter_result,
}; };
static const struct fd_acc_sample_provider occlusion_predicate = { static const struct fd_acc_sample_provider occlusion_predicate = {
.query_type = PIPE_QUERY_OCCLUSION_PREDICATE, .query_type = PIPE_QUERY_OCCLUSION_PREDICATE,
.size = sizeof(struct fd5_query_sample), .size = sizeof(struct fd5_query_sample),
.resume = occlusion_resume, .resume = occlusion_resume,
.pause = occlusion_pause, .pause = occlusion_pause,
.result = occlusion_predicate_result, .result = occlusion_predicate_result,
}; };
static const struct fd_acc_sample_provider occlusion_predicate_conservative = { static const struct fd_acc_sample_provider occlusion_predicate_conservative = {
.query_type = PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE, .query_type = PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE,
.size = sizeof(struct fd5_query_sample), .size = sizeof(struct fd5_query_sample),
.resume = occlusion_resume, .resume = occlusion_resume,
.pause = occlusion_pause, .pause = occlusion_pause,
.result = occlusion_predicate_result, .result = occlusion_predicate_result,
}; };
/* /*
@ -160,78 +158,75 @@ static const struct fd_acc_sample_provider occlusion_predicate_conservative = {
*/ */
static void static void
timestamp_resume(struct fd_acc_query *aq, struct fd_batch *batch) timestamp_resume(struct fd_acc_query *aq, struct fd_batch *batch) assert_dt
assert_dt
{ {
struct fd_ringbuffer *ring = batch->draw; struct fd_ringbuffer *ring = batch->draw;
OUT_PKT7(ring, CP_EVENT_WRITE, 4); OUT_PKT7(ring, CP_EVENT_WRITE, 4);
OUT_RING(ring, CP_EVENT_WRITE_0_EVENT(RB_DONE_TS) | OUT_RING(ring,
CP_EVENT_WRITE_0_TIMESTAMP); CP_EVENT_WRITE_0_EVENT(RB_DONE_TS) | CP_EVENT_WRITE_0_TIMESTAMP);
OUT_RELOC(ring, query_sample(aq, start)); OUT_RELOC(ring, query_sample(aq, start));
OUT_RING(ring, 0x00000000); OUT_RING(ring, 0x00000000);
fd_reset_wfi(batch); fd_reset_wfi(batch);
} }
static void static void
timestamp_pause(struct fd_acc_query *aq, struct fd_batch *batch) timestamp_pause(struct fd_acc_query *aq, struct fd_batch *batch) assert_dt
assert_dt
{ {
struct fd_ringbuffer *ring = batch->draw; struct fd_ringbuffer *ring = batch->draw;
OUT_PKT7(ring, CP_EVENT_WRITE, 4); OUT_PKT7(ring, CP_EVENT_WRITE, 4);
OUT_RING(ring, CP_EVENT_WRITE_0_EVENT(RB_DONE_TS) | OUT_RING(ring,
CP_EVENT_WRITE_0_TIMESTAMP); CP_EVENT_WRITE_0_EVENT(RB_DONE_TS) | CP_EVENT_WRITE_0_TIMESTAMP);
OUT_RELOC(ring, query_sample(aq, stop)); OUT_RELOC(ring, query_sample(aq, stop));
OUT_RING(ring, 0x00000000); OUT_RING(ring, 0x00000000);
fd_reset_wfi(batch); fd_reset_wfi(batch);
fd_wfi(batch, ring); fd_wfi(batch, ring);
/* result += stop - start: */ /* result += stop - start: */
OUT_PKT7(ring, CP_MEM_TO_MEM, 9); OUT_PKT7(ring, CP_MEM_TO_MEM, 9);
OUT_RING(ring, CP_MEM_TO_MEM_0_DOUBLE | OUT_RING(ring, CP_MEM_TO_MEM_0_DOUBLE | CP_MEM_TO_MEM_0_NEG_C);
CP_MEM_TO_MEM_0_NEG_C); OUT_RELOC(ring, query_sample(aq, result)); /* dst */
OUT_RELOC(ring, query_sample(aq, result)); /* dst */ OUT_RELOC(ring, query_sample(aq, result)); /* srcA */
OUT_RELOC(ring, query_sample(aq, result)); /* srcA */ OUT_RELOC(ring, query_sample(aq, stop)); /* srcB */
OUT_RELOC(ring, query_sample(aq, stop)); /* srcB */ OUT_RELOC(ring, query_sample(aq, start)); /* srcC */
OUT_RELOC(ring, query_sample(aq, start)); /* srcC */
} }
static uint64_t static uint64_t
ticks_to_ns(uint32_t ts) ticks_to_ns(uint32_t ts)
{ {
/* This is based on the 19.2MHz always-on rbbm timer. /* This is based on the 19.2MHz always-on rbbm timer.
* *
* TODO we should probably query this value from kernel.. * TODO we should probably query this value from kernel..
*/ */
return ts * (1000000000 / 19200000); return ts * (1000000000 / 19200000);
} }
static void static void
time_elapsed_accumulate_result(struct fd_acc_query *aq, void *buf, time_elapsed_accumulate_result(struct fd_acc_query *aq, void *buf,
union pipe_query_result *result) union pipe_query_result *result)
{ {
struct fd5_query_sample *sp = buf; struct fd5_query_sample *sp = buf;
result->u64 = ticks_to_ns(sp->result); result->u64 = ticks_to_ns(sp->result);
} }
static void static void
timestamp_accumulate_result(struct fd_acc_query *aq, void *buf, timestamp_accumulate_result(struct fd_acc_query *aq, void *buf,
union pipe_query_result *result) union pipe_query_result *result)
{ {
struct fd5_query_sample *sp = buf; struct fd5_query_sample *sp = buf;
result->u64 = ticks_to_ns(sp->result); result->u64 = ticks_to_ns(sp->result);
} }
static const struct fd_acc_sample_provider time_elapsed = { static const struct fd_acc_sample_provider time_elapsed = {
.query_type = PIPE_QUERY_TIME_ELAPSED, .query_type = PIPE_QUERY_TIME_ELAPSED,
.always = true, .always = true,
.size = sizeof(struct fd5_query_sample), .size = sizeof(struct fd5_query_sample),
.resume = timestamp_resume, .resume = timestamp_resume,
.pause = timestamp_pause, .pause = timestamp_pause,
.result = time_elapsed_accumulate_result, .result = time_elapsed_accumulate_result,
}; };
/* NOTE: timestamp query isn't going to give terribly sensible results /* NOTE: timestamp query isn't going to give terribly sensible results
@ -242,12 +237,12 @@ static const struct fd_acc_sample_provider time_elapsed = {
*/ */
static const struct fd_acc_sample_provider timestamp = { static const struct fd_acc_sample_provider timestamp = {
.query_type = PIPE_QUERY_TIMESTAMP, .query_type = PIPE_QUERY_TIMESTAMP,
.always = true, .always = true,
.size = sizeof(struct fd5_query_sample), .size = sizeof(struct fd5_query_sample),
.resume = timestamp_resume, .resume = timestamp_resume,
.pause = timestamp_pause, .pause = timestamp_pause,
.result = timestamp_accumulate_result, .result = timestamp_accumulate_result,
}; };
/* /*
@ -260,208 +255,204 @@ static const struct fd_acc_sample_provider timestamp = {
*/ */
struct fd_batch_query_entry { struct fd_batch_query_entry {
uint8_t gid; /* group-id */ uint8_t gid; /* group-id */
uint8_t cid; /* countable-id within the group */ uint8_t cid; /* countable-id within the group */
}; };
struct fd_batch_query_data { struct fd_batch_query_data {
struct fd_screen *screen; struct fd_screen *screen;
unsigned num_query_entries; unsigned num_query_entries;
struct fd_batch_query_entry query_entries[]; struct fd_batch_query_entry query_entries[];
}; };
static void static void
perfcntr_resume(struct fd_acc_query *aq, struct fd_batch *batch) perfcntr_resume(struct fd_acc_query *aq, struct fd_batch *batch) assert_dt
assert_dt
{ {
struct fd_batch_query_data *data = aq->query_data; struct fd_batch_query_data *data = aq->query_data;
struct fd_screen *screen = data->screen; struct fd_screen *screen = data->screen;
struct fd_ringbuffer *ring = batch->draw; struct fd_ringbuffer *ring = batch->draw;
unsigned counters_per_group[screen->num_perfcntr_groups]; unsigned counters_per_group[screen->num_perfcntr_groups];
memset(counters_per_group, 0, sizeof(counters_per_group)); memset(counters_per_group, 0, sizeof(counters_per_group));
fd_wfi(batch, ring); fd_wfi(batch, ring);
/* configure performance counters for the requested queries: */ /* configure performance counters for the requested queries: */
for (unsigned i = 0; i < data->num_query_entries; i++) { for (unsigned i = 0; i < data->num_query_entries; i++) {
struct fd_batch_query_entry *entry = &data->query_entries[i]; struct fd_batch_query_entry *entry = &data->query_entries[i];
const struct fd_perfcntr_group *g = &screen->perfcntr_groups[entry->gid]; const struct fd_perfcntr_group *g = &screen->perfcntr_groups[entry->gid];
unsigned counter_idx = counters_per_group[entry->gid]++; unsigned counter_idx = counters_per_group[entry->gid]++;
debug_assert(counter_idx < g->num_counters); debug_assert(counter_idx < g->num_counters);
OUT_PKT4(ring, g->counters[counter_idx].select_reg, 1); OUT_PKT4(ring, g->counters[counter_idx].select_reg, 1);
OUT_RING(ring, g->countables[entry->cid].selector); OUT_RING(ring, g->countables[entry->cid].selector);
} }
memset(counters_per_group, 0, sizeof(counters_per_group)); memset(counters_per_group, 0, sizeof(counters_per_group));
/* and snapshot the start values */ /* and snapshot the start values */
for (unsigned i = 0; i < data->num_query_entries; i++) { for (unsigned i = 0; i < data->num_query_entries; i++) {
struct fd_batch_query_entry *entry = &data->query_entries[i]; struct fd_batch_query_entry *entry = &data->query_entries[i];
const struct fd_perfcntr_group *g = &screen->perfcntr_groups[entry->gid]; const struct fd_perfcntr_group *g = &screen->perfcntr_groups[entry->gid];
unsigned counter_idx = counters_per_group[entry->gid]++; unsigned counter_idx = counters_per_group[entry->gid]++;
const struct fd_perfcntr_counter *counter = &g->counters[counter_idx]; const struct fd_perfcntr_counter *counter = &g->counters[counter_idx];
OUT_PKT7(ring, CP_REG_TO_MEM, 3); OUT_PKT7(ring, CP_REG_TO_MEM, 3);
OUT_RING(ring, CP_REG_TO_MEM_0_64B | OUT_RING(ring, CP_REG_TO_MEM_0_64B |
CP_REG_TO_MEM_0_REG(counter->counter_reg_lo)); CP_REG_TO_MEM_0_REG(counter->counter_reg_lo));
OUT_RELOC(ring, query_sample_idx(aq, i, start)); OUT_RELOC(ring, query_sample_idx(aq, i, start));
} }
} }
static void static void
perfcntr_pause(struct fd_acc_query *aq, struct fd_batch *batch) perfcntr_pause(struct fd_acc_query *aq, struct fd_batch *batch) assert_dt
assert_dt
{ {
struct fd_batch_query_data *data = aq->query_data; struct fd_batch_query_data *data = aq->query_data;
struct fd_screen *screen = data->screen; struct fd_screen *screen = data->screen;
struct fd_ringbuffer *ring = batch->draw; struct fd_ringbuffer *ring = batch->draw;
unsigned counters_per_group[screen->num_perfcntr_groups]; unsigned counters_per_group[screen->num_perfcntr_groups];
memset(counters_per_group, 0, sizeof(counters_per_group)); memset(counters_per_group, 0, sizeof(counters_per_group));
fd_wfi(batch, ring); fd_wfi(batch, ring);
/* TODO do we need to bother to turn anything off? */ /* TODO do we need to bother to turn anything off? */
/* snapshot the end values: */ /* snapshot the end values: */
for (unsigned i = 0; i < data->num_query_entries; i++) { for (unsigned i = 0; i < data->num_query_entries; i++) {
struct fd_batch_query_entry *entry = &data->query_entries[i]; struct fd_batch_query_entry *entry = &data->query_entries[i];
const struct fd_perfcntr_group *g = &screen->perfcntr_groups[entry->gid]; const struct fd_perfcntr_group *g = &screen->perfcntr_groups[entry->gid];
unsigned counter_idx = counters_per_group[entry->gid]++; unsigned counter_idx = counters_per_group[entry->gid]++;
const struct fd_perfcntr_counter *counter = &g->counters[counter_idx]; const struct fd_perfcntr_counter *counter = &g->counters[counter_idx];
OUT_PKT7(ring, CP_REG_TO_MEM, 3); OUT_PKT7(ring, CP_REG_TO_MEM, 3);
OUT_RING(ring, CP_REG_TO_MEM_0_64B | OUT_RING(ring, CP_REG_TO_MEM_0_64B |
CP_REG_TO_MEM_0_REG(counter->counter_reg_lo)); CP_REG_TO_MEM_0_REG(counter->counter_reg_lo));
OUT_RELOC(ring, query_sample_idx(aq, i, stop)); OUT_RELOC(ring, query_sample_idx(aq, i, stop));
} }
/* and compute the result: */ /* and compute the result: */
for (unsigned i = 0; i < data->num_query_entries; i++) { for (unsigned i = 0; i < data->num_query_entries; i++) {
/* result += stop - start: */ /* result += stop - start: */
OUT_PKT7(ring, CP_MEM_TO_MEM, 9); OUT_PKT7(ring, CP_MEM_TO_MEM, 9);
OUT_RING(ring, CP_MEM_TO_MEM_0_DOUBLE | OUT_RING(ring, CP_MEM_TO_MEM_0_DOUBLE | CP_MEM_TO_MEM_0_NEG_C);
CP_MEM_TO_MEM_0_NEG_C); OUT_RELOC(ring, query_sample_idx(aq, i, result)); /* dst */
OUT_RELOC(ring, query_sample_idx(aq, i, result)); /* dst */ OUT_RELOC(ring, query_sample_idx(aq, i, result)); /* srcA */
OUT_RELOC(ring, query_sample_idx(aq, i, result)); /* srcA */ OUT_RELOC(ring, query_sample_idx(aq, i, stop)); /* srcB */
OUT_RELOC(ring, query_sample_idx(aq, i, stop)); /* srcB */ OUT_RELOC(ring, query_sample_idx(aq, i, start)); /* srcC */
OUT_RELOC(ring, query_sample_idx(aq, i, start)); /* srcC */ }
}
} }
static void static void
perfcntr_accumulate_result(struct fd_acc_query *aq, void *buf, perfcntr_accumulate_result(struct fd_acc_query *aq, void *buf,
union pipe_query_result *result) union pipe_query_result *result)
{ {
struct fd_batch_query_data *data = aq->query_data; struct fd_batch_query_data *data = aq->query_data;
struct fd5_query_sample *sp = buf; struct fd5_query_sample *sp = buf;
for (unsigned i = 0; i < data->num_query_entries; i++) { for (unsigned i = 0; i < data->num_query_entries; i++) {
result->batch[i].u64 = sp[i].result; result->batch[i].u64 = sp[i].result;
} }
} }
static const struct fd_acc_sample_provider perfcntr = { static const struct fd_acc_sample_provider perfcntr = {
.query_type = FD_QUERY_FIRST_PERFCNTR, .query_type = FD_QUERY_FIRST_PERFCNTR,
.always = true, .always = true,
.resume = perfcntr_resume, .resume = perfcntr_resume,
.pause = perfcntr_pause, .pause = perfcntr_pause,
.result = perfcntr_accumulate_result, .result = perfcntr_accumulate_result,
}; };
static struct pipe_query * static struct pipe_query *
fd5_create_batch_query(struct pipe_context *pctx, fd5_create_batch_query(struct pipe_context *pctx, unsigned num_queries,
unsigned num_queries, unsigned *query_types) unsigned *query_types)
{ {
struct fd_context *ctx = fd_context(pctx); struct fd_context *ctx = fd_context(pctx);
struct fd_screen *screen = ctx->screen; struct fd_screen *screen = ctx->screen;
struct fd_query *q; struct fd_query *q;
struct fd_acc_query *aq; struct fd_acc_query *aq;
struct fd_batch_query_data *data; struct fd_batch_query_data *data;
data = CALLOC_VARIANT_LENGTH_STRUCT(fd_batch_query_data, data = CALLOC_VARIANT_LENGTH_STRUCT(
num_queries * sizeof(data->query_entries[0])); fd_batch_query_data, num_queries * sizeof(data->query_entries[0]));
data->screen = screen; data->screen = screen;
data->num_query_entries = num_queries; data->num_query_entries = num_queries;
/* validate the requested query_types and ensure we don't try /* validate the requested query_types and ensure we don't try
* to request more query_types of a given group than we have * to request more query_types of a given group than we have
* counters: * counters:
*/ */
unsigned counters_per_group[screen->num_perfcntr_groups]; unsigned counters_per_group[screen->num_perfcntr_groups];
memset(counters_per_group, 0, sizeof(counters_per_group)); memset(counters_per_group, 0, sizeof(counters_per_group));
for (unsigned i = 0; i < num_queries; i++) { for (unsigned i = 0; i < num_queries; i++) {
unsigned idx = query_types[i] - FD_QUERY_FIRST_PERFCNTR; unsigned idx = query_types[i] - FD_QUERY_FIRST_PERFCNTR;
/* verify valid query_type, ie. is it actually a perfcntr? */ /* verify valid query_type, ie. is it actually a perfcntr? */
if ((query_types[i] < FD_QUERY_FIRST_PERFCNTR) || if ((query_types[i] < FD_QUERY_FIRST_PERFCNTR) ||
(idx >= screen->num_perfcntr_queries)) { (idx >= screen->num_perfcntr_queries)) {
mesa_loge("invalid batch query query_type: %u", query_types[i]); mesa_loge("invalid batch query query_type: %u", query_types[i]);
goto error; goto error;
} }
struct fd_batch_query_entry *entry = &data->query_entries[i]; struct fd_batch_query_entry *entry = &data->query_entries[i];
struct pipe_driver_query_info *pq = &screen->perfcntr_queries[idx]; struct pipe_driver_query_info *pq = &screen->perfcntr_queries[idx];
entry->gid = pq->group_id; entry->gid = pq->group_id;
/* the perfcntr_queries[] table flattens all the countables /* the perfcntr_queries[] table flattens all the countables
* for each group in series, ie: * for each group in series, ie:
* *
* (G0,C0), .., (G0,Cn), (G1,C0), .., (G1,Cm), ... * (G0,C0), .., (G0,Cn), (G1,C0), .., (G1,Cm), ...
* *
* So to find the countable index just step back through the * So to find the countable index just step back through the
* table to find the first entry with the same group-id. * table to find the first entry with the same group-id.
*/ */
while (pq > screen->perfcntr_queries) { while (pq > screen->perfcntr_queries) {
pq--; pq--;
if (pq->group_id == entry->gid) if (pq->group_id == entry->gid)
entry->cid++; entry->cid++;
} }
if (counters_per_group[entry->gid] >= if (counters_per_group[entry->gid] >=
screen->perfcntr_groups[entry->gid].num_counters) { screen->perfcntr_groups[entry->gid].num_counters) {
mesa_loge("too many counters for group %u\n", entry->gid); mesa_loge("too many counters for group %u\n", entry->gid);
goto error; goto error;
} }
counters_per_group[entry->gid]++; counters_per_group[entry->gid]++;
} }
q = fd_acc_create_query2(ctx, 0, 0, &perfcntr); q = fd_acc_create_query2(ctx, 0, 0, &perfcntr);
aq = fd_acc_query(q); aq = fd_acc_query(q);
/* sample buffer size is based on # of queries: */ /* sample buffer size is based on # of queries: */
aq->size = num_queries * sizeof(struct fd5_query_sample); aq->size = num_queries * sizeof(struct fd5_query_sample);
aq->query_data = data; aq->query_data = data;
return (struct pipe_query *)q; return (struct pipe_query *)q;
error: error:
free(data); free(data);
return NULL; return NULL;
} }
void void
fd5_query_context_init(struct pipe_context *pctx) fd5_query_context_init(struct pipe_context *pctx) disable_thread_safety_analysis
disable_thread_safety_analysis
{ {
struct fd_context *ctx = fd_context(pctx); struct fd_context *ctx = fd_context(pctx);
ctx->create_query = fd_acc_create_query; ctx->create_query = fd_acc_create_query;
ctx->query_update_batch = fd_acc_query_update_batch; ctx->query_update_batch = fd_acc_query_update_batch;
pctx->create_batch_query = fd5_create_batch_query; pctx->create_batch_query = fd5_create_batch_query;
fd_acc_query_register_provider(pctx, &occlusion_counter); fd_acc_query_register_provider(pctx, &occlusion_counter);
fd_acc_query_register_provider(pctx, &occlusion_predicate); fd_acc_query_register_provider(pctx, &occlusion_predicate);
fd_acc_query_register_provider(pctx, &occlusion_predicate_conservative); fd_acc_query_register_provider(pctx, &occlusion_predicate_conservative);
fd_acc_query_register_provider(pctx, &time_elapsed); fd_acc_query_register_provider(pctx, &time_elapsed);
fd_acc_query_register_provider(pctx, &timestamp); fd_acc_query_register_provider(pctx, &timestamp);
} }

View file

@ -24,75 +24,73 @@
* Rob Clark <robclark@freedesktop.org> * Rob Clark <robclark@freedesktop.org>
*/ */
#include "pipe/p_state.h" #include "pipe/p_state.h"
#include "util/u_string.h"
#include "util/u_memory.h" #include "util/u_memory.h"
#include "util/u_string.h"
#include "fd5_rasterizer.h"
#include "fd5_context.h" #include "fd5_context.h"
#include "fd5_format.h" #include "fd5_format.h"
#include "fd5_rasterizer.h"
void * void *
fd5_rasterizer_state_create(struct pipe_context *pctx, fd5_rasterizer_state_create(struct pipe_context *pctx,
const struct pipe_rasterizer_state *cso) const struct pipe_rasterizer_state *cso)
{ {
struct fd5_rasterizer_stateobj *so; struct fd5_rasterizer_stateobj *so;
float psize_min, psize_max; float psize_min, psize_max;
so = CALLOC_STRUCT(fd5_rasterizer_stateobj); so = CALLOC_STRUCT(fd5_rasterizer_stateobj);
if (!so) if (!so)
return NULL; return NULL;
so->base = *cso; so->base = *cso;
if (cso->point_size_per_vertex) { if (cso->point_size_per_vertex) {
psize_min = util_get_min_point_size(cso); psize_min = util_get_min_point_size(cso);
psize_max = 4092; psize_max = 4092;
} else { } else {
/* Force the point size to be as if the vertex output was disabled. */ /* Force the point size to be as if the vertex output was disabled. */
psize_min = cso->point_size; psize_min = cso->point_size;
psize_max = cso->point_size; psize_max = cso->point_size;
} }
so->gras_su_point_minmax = so->gras_su_point_minmax = A5XX_GRAS_SU_POINT_MINMAX_MIN(psize_min) |
A5XX_GRAS_SU_POINT_MINMAX_MIN(psize_min) | A5XX_GRAS_SU_POINT_MINMAX_MAX(psize_max);
A5XX_GRAS_SU_POINT_MINMAX_MAX(psize_max); so->gras_su_point_size = A5XX_GRAS_SU_POINT_SIZE(cso->point_size);
so->gras_su_point_size = A5XX_GRAS_SU_POINT_SIZE(cso->point_size); so->gras_su_poly_offset_scale =
so->gras_su_poly_offset_scale = A5XX_GRAS_SU_POLY_OFFSET_SCALE(cso->offset_scale);
A5XX_GRAS_SU_POLY_OFFSET_SCALE(cso->offset_scale); so->gras_su_poly_offset_offset =
so->gras_su_poly_offset_offset = A5XX_GRAS_SU_POLY_OFFSET_OFFSET(cso->offset_units);
A5XX_GRAS_SU_POLY_OFFSET_OFFSET(cso->offset_units); so->gras_su_poly_offset_clamp =
so->gras_su_poly_offset_clamp = A5XX_GRAS_SU_POLY_OFFSET_OFFSET_CLAMP(cso->offset_clamp);
A5XX_GRAS_SU_POLY_OFFSET_OFFSET_CLAMP(cso->offset_clamp);
so->gras_su_cntl = so->gras_su_cntl = A5XX_GRAS_SU_CNTL_LINEHALFWIDTH(cso->line_width / 2.0);
A5XX_GRAS_SU_CNTL_LINEHALFWIDTH(cso->line_width/2.0); so->pc_raster_cntl =
so->pc_raster_cntl = A5XX_PC_RASTER_CNTL_POLYMODE_FRONT_PTYPE(
A5XX_PC_RASTER_CNTL_POLYMODE_FRONT_PTYPE(fd_polygon_mode(cso->fill_front)) | fd_polygon_mode(cso->fill_front)) |
A5XX_PC_RASTER_CNTL_POLYMODE_BACK_PTYPE(fd_polygon_mode(cso->fill_back)); A5XX_PC_RASTER_CNTL_POLYMODE_BACK_PTYPE(fd_polygon_mode(cso->fill_back));
if (cso->fill_front != PIPE_POLYGON_MODE_FILL || if (cso->fill_front != PIPE_POLYGON_MODE_FILL ||
cso->fill_back != PIPE_POLYGON_MODE_FILL) cso->fill_back != PIPE_POLYGON_MODE_FILL)
so->pc_raster_cntl |= A5XX_PC_RASTER_CNTL_POLYMODE_ENABLE; so->pc_raster_cntl |= A5XX_PC_RASTER_CNTL_POLYMODE_ENABLE;
if (cso->cull_face & PIPE_FACE_FRONT) if (cso->cull_face & PIPE_FACE_FRONT)
so->gras_su_cntl |= A5XX_GRAS_SU_CNTL_CULL_FRONT; so->gras_su_cntl |= A5XX_GRAS_SU_CNTL_CULL_FRONT;
if (cso->cull_face & PIPE_FACE_BACK) if (cso->cull_face & PIPE_FACE_BACK)
so->gras_su_cntl |= A5XX_GRAS_SU_CNTL_CULL_BACK; so->gras_su_cntl |= A5XX_GRAS_SU_CNTL_CULL_BACK;
if (!cso->front_ccw) if (!cso->front_ccw)
so->gras_su_cntl |= A5XX_GRAS_SU_CNTL_FRONT_CW; so->gras_su_cntl |= A5XX_GRAS_SU_CNTL_FRONT_CW;
if (cso->offset_tri) if (cso->offset_tri)
so->gras_su_cntl |= A5XX_GRAS_SU_CNTL_POLY_OFFSET; so->gras_su_cntl |= A5XX_GRAS_SU_CNTL_POLY_OFFSET;
if (!cso->flatshade_first) if (!cso->flatshade_first)
so->pc_primitive_cntl |= A5XX_PC_PRIMITIVE_CNTL_PROVOKING_VTX_LAST; so->pc_primitive_cntl |= A5XX_PC_PRIMITIVE_CNTL_PROVOKING_VTX_LAST;
// if (!cso->depth_clip) // if (!cso->depth_clip)
// so->gras_cl_clip_cntl |= A5XX_GRAS_CL_CLIP_CNTL_ZNEAR_CLIP_DISABLE | // so->gras_cl_clip_cntl |= A5XX_GRAS_CL_CLIP_CNTL_ZNEAR_CLIP_DISABLE
// A5XX_GRAS_CL_CLIP_CNTL_ZFAR_CLIP_DISABLE; //| A5XX_GRAS_CL_CLIP_CNTL_ZFAR_CLIP_DISABLE;
if (cso->clip_halfz) if (cso->clip_halfz)
so->gras_cl_clip_cntl |= A5XX_GRAS_CL_CNTL_ZERO_GB_SCALE_Z; so->gras_cl_clip_cntl |= A5XX_GRAS_CL_CNTL_ZERO_GB_SCALE_Z;
return so; return so;
} }

View file

@ -27,31 +27,31 @@
#ifndef FD5_RASTERIZER_H_ #ifndef FD5_RASTERIZER_H_
#define FD5_RASTERIZER_H_ #define FD5_RASTERIZER_H_
#include "pipe/p_state.h"
#include "pipe/p_context.h" #include "pipe/p_context.h"
#include "pipe/p_state.h"
struct fd5_rasterizer_stateobj { struct fd5_rasterizer_stateobj {
struct pipe_rasterizer_state base; struct pipe_rasterizer_state base;
uint32_t gras_su_point_minmax; uint32_t gras_su_point_minmax;
uint32_t gras_su_point_size; uint32_t gras_su_point_size;
uint32_t gras_su_poly_offset_scale; uint32_t gras_su_poly_offset_scale;
uint32_t gras_su_poly_offset_offset; uint32_t gras_su_poly_offset_offset;
uint32_t gras_su_poly_offset_clamp; uint32_t gras_su_poly_offset_clamp;
uint32_t gras_su_cntl; uint32_t gras_su_cntl;
uint32_t gras_cl_clip_cntl; uint32_t gras_cl_clip_cntl;
uint32_t pc_primitive_cntl; uint32_t pc_primitive_cntl;
uint32_t pc_raster_cntl; uint32_t pc_raster_cntl;
}; };
static inline struct fd5_rasterizer_stateobj * static inline struct fd5_rasterizer_stateobj *
fd5_rasterizer_stateobj(struct pipe_rasterizer_state *rast) fd5_rasterizer_stateobj(struct pipe_rasterizer_state *rast)
{ {
return (struct fd5_rasterizer_stateobj *)rast; return (struct fd5_rasterizer_stateobj *)rast;
} }
void * fd5_rasterizer_state_create(struct pipe_context *pctx, void *fd5_rasterizer_state_create(struct pipe_context *pctx,
const struct pipe_rasterizer_state *cso); const struct pipe_rasterizer_state *cso);
#endif /* FD5_RASTERIZER_H_ */ #endif /* FD5_RASTERIZER_H_ */

View file

@ -29,43 +29,42 @@
static void static void
setup_lrz(struct fd_resource *rsc) setup_lrz(struct fd_resource *rsc)
{ {
struct fd_screen *screen = fd_screen(rsc->b.b.screen); struct fd_screen *screen = fd_screen(rsc->b.b.screen);
const uint32_t flags = DRM_FREEDRENO_GEM_CACHE_WCOMBINE | const uint32_t flags =
DRM_FREEDRENO_GEM_TYPE_KMEM; /* TODO */ DRM_FREEDRENO_GEM_CACHE_WCOMBINE | DRM_FREEDRENO_GEM_TYPE_KMEM; /* TODO */
unsigned lrz_pitch = align(DIV_ROUND_UP(rsc->b.b.width0, 8), 64); unsigned lrz_pitch = align(DIV_ROUND_UP(rsc->b.b.width0, 8), 64);
unsigned lrz_height = DIV_ROUND_UP(rsc->b.b.height0, 8); unsigned lrz_height = DIV_ROUND_UP(rsc->b.b.height0, 8);
/* LRZ buffer is super-sampled: */ /* LRZ buffer is super-sampled: */
switch (rsc->b.b.nr_samples) { switch (rsc->b.b.nr_samples) {
case 4: case 4:
lrz_pitch *= 2; lrz_pitch *= 2;
FALLTHROUGH; FALLTHROUGH;
case 2: case 2:
lrz_height *= 2; lrz_height *= 2;
} }
unsigned size = lrz_pitch * lrz_height * 2; unsigned size = lrz_pitch * lrz_height * 2;
size += 0x1000; /* for GRAS_LRZ_FAST_CLEAR_BUFFER */ size += 0x1000; /* for GRAS_LRZ_FAST_CLEAR_BUFFER */
rsc->lrz_height = lrz_height; rsc->lrz_height = lrz_height;
rsc->lrz_width = lrz_pitch; rsc->lrz_width = lrz_pitch;
rsc->lrz_pitch = lrz_pitch; rsc->lrz_pitch = lrz_pitch;
rsc->lrz = fd_bo_new(screen->dev, size, flags, "lrz"); rsc->lrz = fd_bo_new(screen->dev, size, flags, "lrz");
} }
uint32_t uint32_t
fd5_setup_slices(struct fd_resource *rsc) fd5_setup_slices(struct fd_resource *rsc)
{ {
struct pipe_resource *prsc = &rsc->b.b; struct pipe_resource *prsc = &rsc->b.b;
if (FD_DBG(LRZ) && has_depth(rsc->b.b.format)) if (FD_DBG(LRZ) && has_depth(rsc->b.b.format))
setup_lrz(rsc); setup_lrz(rsc);
fdl5_layout(&rsc->layout, prsc->format, fd_resource_nr_samples(prsc), fdl5_layout(&rsc->layout, prsc->format, fd_resource_nr_samples(prsc),
prsc->width0, prsc->height0, prsc->depth0, prsc->width0, prsc->height0, prsc->depth0, prsc->last_level + 1,
prsc->last_level + 1, prsc->array_size, prsc->array_size, prsc->target == PIPE_TEXTURE_3D);
prsc->target == PIPE_TEXTURE_3D);
return rsc->layout.size; return rsc->layout.size;
} }

View file

@ -27,112 +27,106 @@
#include "pipe/p_screen.h" #include "pipe/p_screen.h"
#include "util/format/u_format.h" #include "util/format/u_format.h"
#include "fd5_screen.h"
#include "fd5_blitter.h" #include "fd5_blitter.h"
#include "fd5_context.h" #include "fd5_context.h"
#include "fd5_format.h"
#include "fd5_emit.h" #include "fd5_emit.h"
#include "fd5_format.h"
#include "fd5_resource.h" #include "fd5_resource.h"
#include "fd5_screen.h"
#include "ir3/ir3_compiler.h" #include "ir3/ir3_compiler.h"
static bool static bool
valid_sample_count(unsigned sample_count) valid_sample_count(unsigned sample_count)
{ {
switch (sample_count) { switch (sample_count) {
case 0: case 0:
case 1: case 1:
case 2: case 2:
case 4: case 4:
return true; return true;
default: default:
return false; return false;
} }
} }
static bool static bool
fd5_screen_is_format_supported(struct pipe_screen *pscreen, fd5_screen_is_format_supported(struct pipe_screen *pscreen,
enum pipe_format format, enum pipe_format format,
enum pipe_texture_target target, enum pipe_texture_target target,
unsigned sample_count, unsigned sample_count,
unsigned storage_sample_count, unsigned storage_sample_count, unsigned usage)
unsigned usage)
{ {
unsigned retval = 0; unsigned retval = 0;
if ((target >= PIPE_MAX_TEXTURE_TYPES) || if ((target >= PIPE_MAX_TEXTURE_TYPES) ||
!valid_sample_count(sample_count)) { !valid_sample_count(sample_count)) {
DBG("not supported: format=%s, target=%d, sample_count=%d, usage=%x", DBG("not supported: format=%s, target=%d, sample_count=%d, usage=%x",
util_format_name(format), target, sample_count, usage); util_format_name(format), target, sample_count, usage);
return false; return false;
} }
if (MAX2(1, sample_count) != MAX2(1, storage_sample_count)) if (MAX2(1, sample_count) != MAX2(1, storage_sample_count))
return false; return false;
if ((usage & PIPE_BIND_VERTEX_BUFFER) && if ((usage & PIPE_BIND_VERTEX_BUFFER) &&
(fd5_pipe2vtx(format) != VFMT5_NONE)) { (fd5_pipe2vtx(format) != VFMT5_NONE)) {
retval |= PIPE_BIND_VERTEX_BUFFER; retval |= PIPE_BIND_VERTEX_BUFFER;
} }
if ((usage & (PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_SHADER_IMAGE)) && if ((usage & (PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_SHADER_IMAGE)) &&
(fd5_pipe2tex(format) != TFMT5_NONE) && (fd5_pipe2tex(format) != TFMT5_NONE) &&
(target == PIPE_BUFFER || (target == PIPE_BUFFER || util_format_get_blocksize(format) != 12)) {
util_format_get_blocksize(format) != 12)) { retval |= usage & (PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_SHADER_IMAGE);
retval |= usage & (PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_SHADER_IMAGE); }
}
if ((usage & (PIPE_BIND_RENDER_TARGET | if ((usage &
PIPE_BIND_DISPLAY_TARGET | (PIPE_BIND_RENDER_TARGET | PIPE_BIND_DISPLAY_TARGET |
PIPE_BIND_SCANOUT | PIPE_BIND_SCANOUT | PIPE_BIND_SHARED | PIPE_BIND_COMPUTE_RESOURCE)) &&
PIPE_BIND_SHARED | (fd5_pipe2color(format) != RB5_NONE) &&
PIPE_BIND_COMPUTE_RESOURCE)) && (fd5_pipe2tex(format) != TFMT5_NONE)) {
(fd5_pipe2color(format) != RB5_NONE) && retval |= usage & (PIPE_BIND_RENDER_TARGET | PIPE_BIND_DISPLAY_TARGET |
(fd5_pipe2tex(format) != TFMT5_NONE)) { PIPE_BIND_SCANOUT | PIPE_BIND_SHARED |
retval |= usage & (PIPE_BIND_RENDER_TARGET | PIPE_BIND_COMPUTE_RESOURCE);
PIPE_BIND_DISPLAY_TARGET | }
PIPE_BIND_SCANOUT |
PIPE_BIND_SHARED |
PIPE_BIND_COMPUTE_RESOURCE);
}
/* For ARB_framebuffer_no_attachments: */ /* For ARB_framebuffer_no_attachments: */
if ((usage & PIPE_BIND_RENDER_TARGET) && (format == PIPE_FORMAT_NONE)) { if ((usage & PIPE_BIND_RENDER_TARGET) && (format == PIPE_FORMAT_NONE)) {
retval |= usage & PIPE_BIND_RENDER_TARGET; retval |= usage & PIPE_BIND_RENDER_TARGET;
} }
if ((usage & PIPE_BIND_DEPTH_STENCIL) && if ((usage & PIPE_BIND_DEPTH_STENCIL) &&
(fd5_pipe2depth(format) != (enum a5xx_depth_format)~0) && (fd5_pipe2depth(format) != (enum a5xx_depth_format) ~0) &&
(fd5_pipe2tex(format) != TFMT5_NONE)) { (fd5_pipe2tex(format) != TFMT5_NONE)) {
retval |= PIPE_BIND_DEPTH_STENCIL; retval |= PIPE_BIND_DEPTH_STENCIL;
} }
if ((usage & PIPE_BIND_INDEX_BUFFER) && if ((usage & PIPE_BIND_INDEX_BUFFER) &&
(fd_pipe2index(format) != (enum pc_di_index_size)~0)) { (fd_pipe2index(format) != (enum pc_di_index_size) ~0)) {
retval |= PIPE_BIND_INDEX_BUFFER; retval |= PIPE_BIND_INDEX_BUFFER;
} }
if (retval != usage) { if (retval != usage) {
DBG("not supported: format=%s, target=%d, sample_count=%d, " DBG("not supported: format=%s, target=%d, sample_count=%d, "
"usage=%x, retval=%x", util_format_name(format), "usage=%x, retval=%x",
target, sample_count, usage, retval); util_format_name(format), target, sample_count, usage, retval);
} }
return retval == usage; return retval == usage;
} }
void void
fd5_screen_init(struct pipe_screen *pscreen) fd5_screen_init(struct pipe_screen *pscreen)
{ {
struct fd_screen *screen = fd_screen(pscreen); struct fd_screen *screen = fd_screen(pscreen);
screen->max_rts = A5XX_MAX_RENDER_TARGETS; screen->max_rts = A5XX_MAX_RENDER_TARGETS;
pscreen->context_create = fd5_context_create; pscreen->context_create = fd5_context_create;
pscreen->is_format_supported = fd5_screen_is_format_supported; pscreen->is_format_supported = fd5_screen_is_format_supported;
screen->setup_slices = fd5_setup_slices; screen->setup_slices = fd5_setup_slices;
if (FD_DBG(TTILE)) if (FD_DBG(TTILE))
screen->tile_mode = fd5_tile_mode; screen->tile_mode = fd5_tile_mode;
fd5_emit_init_screen(pscreen); fd5_emit_init_screen(pscreen);
ir3_screen_init(pscreen); ir3_screen_init(pscreen);
} }

View file

@ -38,13 +38,13 @@ void fd5_screen_init(struct pipe_screen *pscreen);
static inline void static inline void
emit_marker5(struct fd_ringbuffer *ring, int scratch_idx) emit_marker5(struct fd_ringbuffer *ring, int scratch_idx)
{ {
extern int32_t marker_cnt; extern int32_t marker_cnt;
unsigned reg = REG_A5XX_CP_SCRATCH_REG(scratch_idx); unsigned reg = REG_A5XX_CP_SCRATCH_REG(scratch_idx);
if (__EMIT_MARKER) { if (__EMIT_MARKER) {
OUT_WFI5(ring); OUT_WFI5(ring);
OUT_PKT4(ring, reg, 1); OUT_PKT4(ring, reg, 1);
OUT_RING(ring, p_atomic_inc_return(&marker_cnt)); OUT_RING(ring, p_atomic_inc_return(&marker_cnt));
} }
} }
#endif /* FD5_SCREEN_H_ */ #endif /* FD5_SCREEN_H_ */

View file

@ -25,263 +25,250 @@
*/ */
#include "pipe/p_state.h" #include "pipe/p_state.h"
#include "util/u_string.h"
#include "util/u_memory.h"
#include "util/u_inlines.h"
#include "util/format/u_format.h" #include "util/format/u_format.h"
#include "util/u_inlines.h"
#include "util/u_memory.h"
#include "util/u_string.h"
#include "fd5_texture.h"
#include "fd5_format.h" #include "fd5_format.h"
#include "fd5_texture.h"
static enum a5xx_tex_clamp static enum a5xx_tex_clamp
tex_clamp(unsigned wrap, bool *needs_border) tex_clamp(unsigned wrap, bool *needs_border)
{ {
switch (wrap) { switch (wrap) {
case PIPE_TEX_WRAP_REPEAT: case PIPE_TEX_WRAP_REPEAT:
return A5XX_TEX_REPEAT; return A5XX_TEX_REPEAT;
case PIPE_TEX_WRAP_CLAMP_TO_EDGE: case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
return A5XX_TEX_CLAMP_TO_EDGE; return A5XX_TEX_CLAMP_TO_EDGE;
case PIPE_TEX_WRAP_CLAMP_TO_BORDER: case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
*needs_border = true; *needs_border = true;
return A5XX_TEX_CLAMP_TO_BORDER; return A5XX_TEX_CLAMP_TO_BORDER;
case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE: case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
/* only works for PoT.. need to emulate otherwise! */ /* only works for PoT.. need to emulate otherwise! */
return A5XX_TEX_MIRROR_CLAMP; return A5XX_TEX_MIRROR_CLAMP;
case PIPE_TEX_WRAP_MIRROR_REPEAT: case PIPE_TEX_WRAP_MIRROR_REPEAT:
return A5XX_TEX_MIRROR_REPEAT; return A5XX_TEX_MIRROR_REPEAT;
case PIPE_TEX_WRAP_MIRROR_CLAMP: case PIPE_TEX_WRAP_MIRROR_CLAMP:
case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER: case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
/* these two we could perhaps emulate, but we currently /* these two we could perhaps emulate, but we currently
* just don't advertise PIPE_CAP_TEXTURE_MIRROR_CLAMP * just don't advertise PIPE_CAP_TEXTURE_MIRROR_CLAMP
*/ */
default: default:
DBG("invalid wrap: %u", wrap); DBG("invalid wrap: %u", wrap);
return 0; return 0;
} }
} }
static enum a5xx_tex_filter static enum a5xx_tex_filter
tex_filter(unsigned filter, bool aniso) tex_filter(unsigned filter, bool aniso)
{ {
switch (filter) { switch (filter) {
case PIPE_TEX_FILTER_NEAREST: case PIPE_TEX_FILTER_NEAREST:
return A5XX_TEX_NEAREST; return A5XX_TEX_NEAREST;
case PIPE_TEX_FILTER_LINEAR: case PIPE_TEX_FILTER_LINEAR:
return aniso ? A5XX_TEX_ANISO : A5XX_TEX_LINEAR; return aniso ? A5XX_TEX_ANISO : A5XX_TEX_LINEAR;
default: default:
DBG("invalid filter: %u", filter); DBG("invalid filter: %u", filter);
return 0; return 0;
} }
} }
static void * static void *
fd5_sampler_state_create(struct pipe_context *pctx, fd5_sampler_state_create(struct pipe_context *pctx,
const struct pipe_sampler_state *cso) const struct pipe_sampler_state *cso)
{ {
struct fd5_sampler_stateobj *so = CALLOC_STRUCT(fd5_sampler_stateobj); struct fd5_sampler_stateobj *so = CALLOC_STRUCT(fd5_sampler_stateobj);
unsigned aniso = util_last_bit(MIN2(cso->max_anisotropy >> 1, 8)); unsigned aniso = util_last_bit(MIN2(cso->max_anisotropy >> 1, 8));
bool miplinear = false; bool miplinear = false;
if (!so) if (!so)
return NULL; return NULL;
so->base = *cso; so->base = *cso;
if (cso->min_mip_filter == PIPE_TEX_MIPFILTER_LINEAR) if (cso->min_mip_filter == PIPE_TEX_MIPFILTER_LINEAR)
miplinear = true; miplinear = true;
so->needs_border = false; so->needs_border = false;
so->texsamp0 = so->texsamp0 =
COND(miplinear, A5XX_TEX_SAMP_0_MIPFILTER_LINEAR_NEAR) | COND(miplinear, A5XX_TEX_SAMP_0_MIPFILTER_LINEAR_NEAR) |
A5XX_TEX_SAMP_0_XY_MAG(tex_filter(cso->mag_img_filter, aniso)) | A5XX_TEX_SAMP_0_XY_MAG(tex_filter(cso->mag_img_filter, aniso)) |
A5XX_TEX_SAMP_0_XY_MIN(tex_filter(cso->min_img_filter, aniso)) | A5XX_TEX_SAMP_0_XY_MIN(tex_filter(cso->min_img_filter, aniso)) |
A5XX_TEX_SAMP_0_ANISO(aniso) | A5XX_TEX_SAMP_0_ANISO(aniso) |
A5XX_TEX_SAMP_0_WRAP_S(tex_clamp(cso->wrap_s, &so->needs_border)) | A5XX_TEX_SAMP_0_WRAP_S(tex_clamp(cso->wrap_s, &so->needs_border)) |
A5XX_TEX_SAMP_0_WRAP_T(tex_clamp(cso->wrap_t, &so->needs_border)) | A5XX_TEX_SAMP_0_WRAP_T(tex_clamp(cso->wrap_t, &so->needs_border)) |
A5XX_TEX_SAMP_0_WRAP_R(tex_clamp(cso->wrap_r, &so->needs_border)); A5XX_TEX_SAMP_0_WRAP_R(tex_clamp(cso->wrap_r, &so->needs_border));
so->texsamp1 = so->texsamp1 =
COND(!cso->seamless_cube_map, A5XX_TEX_SAMP_1_CUBEMAPSEAMLESSFILTOFF) | COND(!cso->seamless_cube_map, A5XX_TEX_SAMP_1_CUBEMAPSEAMLESSFILTOFF) |
COND(!cso->normalized_coords, A5XX_TEX_SAMP_1_UNNORM_COORDS); COND(!cso->normalized_coords, A5XX_TEX_SAMP_1_UNNORM_COORDS);
so->texsamp0 |= A5XX_TEX_SAMP_0_LOD_BIAS(cso->lod_bias); so->texsamp0 |= A5XX_TEX_SAMP_0_LOD_BIAS(cso->lod_bias);
if (cso->min_mip_filter != PIPE_TEX_MIPFILTER_NONE) { if (cso->min_mip_filter != PIPE_TEX_MIPFILTER_NONE) {
so->texsamp1 |= so->texsamp1 |= A5XX_TEX_SAMP_1_MIN_LOD(cso->min_lod) |
A5XX_TEX_SAMP_1_MIN_LOD(cso->min_lod) | A5XX_TEX_SAMP_1_MAX_LOD(cso->max_lod);
A5XX_TEX_SAMP_1_MAX_LOD(cso->max_lod); } else {
} else { /* If we're not doing mipmap filtering, we still need a slightly > 0
/* If we're not doing mipmap filtering, we still need a slightly > 0 * LOD clamp so the HW can decide between min and mag filtering of
* LOD clamp so the HW can decide between min and mag filtering of * level 0.
* level 0. */
*/ so->texsamp1 |= A5XX_TEX_SAMP_1_MIN_LOD(MIN2(cso->min_lod, 0.125)) |
so->texsamp1 |= A5XX_TEX_SAMP_1_MAX_LOD(MIN2(cso->max_lod, 0.125));
A5XX_TEX_SAMP_1_MIN_LOD(MIN2(cso->min_lod, 0.125)) | }
A5XX_TEX_SAMP_1_MAX_LOD(MIN2(cso->max_lod, 0.125));
}
if (cso->compare_mode) if (cso->compare_mode)
so->texsamp1 |= A5XX_TEX_SAMP_1_COMPARE_FUNC(cso->compare_func); /* maps 1:1 */ so->texsamp1 |=
A5XX_TEX_SAMP_1_COMPARE_FUNC(cso->compare_func); /* maps 1:1 */
return so; return so;
} }
static bool static bool
use_astc_srgb_workaround(struct pipe_context *pctx, enum pipe_format format) use_astc_srgb_workaround(struct pipe_context *pctx, enum pipe_format format)
{ {
return false; // TODO check if this is still needed on a5xx return false; // TODO check if this is still needed on a5xx
} }
static struct pipe_sampler_view * static struct pipe_sampler_view *
fd5_sampler_view_create(struct pipe_context *pctx, struct pipe_resource *prsc, fd5_sampler_view_create(struct pipe_context *pctx, struct pipe_resource *prsc,
const struct pipe_sampler_view *cso) const struct pipe_sampler_view *cso)
{ {
struct fd5_pipe_sampler_view *so = CALLOC_STRUCT(fd5_pipe_sampler_view); struct fd5_pipe_sampler_view *so = CALLOC_STRUCT(fd5_pipe_sampler_view);
struct fd_resource *rsc = fd_resource(prsc); struct fd_resource *rsc = fd_resource(prsc);
enum pipe_format format = cso->format; enum pipe_format format = cso->format;
unsigned lvl, layers = 0; unsigned lvl, layers = 0;
if (!so) if (!so)
return NULL; return NULL;
if (format == PIPE_FORMAT_X32_S8X24_UINT) { if (format == PIPE_FORMAT_X32_S8X24_UINT) {
rsc = rsc->stencil; rsc = rsc->stencil;
format = rsc->b.b.format; format = rsc->b.b.format;
} }
so->base = *cso; so->base = *cso;
pipe_reference(NULL, &prsc->reference); pipe_reference(NULL, &prsc->reference);
so->base.texture = prsc; so->base.texture = prsc;
so->base.reference.count = 1; so->base.reference.count = 1;
so->base.context = pctx; so->base.context = pctx;
so->texconst0 = so->texconst0 = A5XX_TEX_CONST_0_FMT(fd5_pipe2tex(format)) |
A5XX_TEX_CONST_0_FMT(fd5_pipe2tex(format)) | A5XX_TEX_CONST_0_SAMPLES(fd_msaa_samples(prsc->nr_samples)) |
A5XX_TEX_CONST_0_SAMPLES(fd_msaa_samples(prsc->nr_samples)) | fd5_tex_swiz(format, cso->swizzle_r, cso->swizzle_g,
fd5_tex_swiz(format, cso->swizzle_r, cso->swizzle_g, cso->swizzle_b, cso->swizzle_a);
cso->swizzle_b, cso->swizzle_a);
/* NOTE: since we sample z24s8 using 8888_UINT format, the swizzle /* NOTE: since we sample z24s8 using 8888_UINT format, the swizzle
* we get isn't quite right. Use SWAP(XYZW) as a cheap and cheerful * we get isn't quite right. Use SWAP(XYZW) as a cheap and cheerful
* way to re-arrange things so stencil component is where the swiz * way to re-arrange things so stencil component is where the swiz
* expects. * expects.
* *
* Note that gallium expects stencil sampler to return (s,s,s,s) * Note that gallium expects stencil sampler to return (s,s,s,s)
* which isn't quite true. To make that happen we'd have to massage * which isn't quite true. To make that happen we'd have to massage
* the swizzle. But in practice only the .x component is used. * the swizzle. But in practice only the .x component is used.
*/ */
if (format == PIPE_FORMAT_X24S8_UINT) { if (format == PIPE_FORMAT_X24S8_UINT) {
so->texconst0 |= A5XX_TEX_CONST_0_SWAP(XYZW); so->texconst0 |= A5XX_TEX_CONST_0_SWAP(XYZW);
} }
if (util_format_is_srgb(format)) { if (util_format_is_srgb(format)) {
if (use_astc_srgb_workaround(pctx, format)) if (use_astc_srgb_workaround(pctx, format))
so->astc_srgb = true; so->astc_srgb = true;
so->texconst0 |= A5XX_TEX_CONST_0_SRGB; so->texconst0 |= A5XX_TEX_CONST_0_SRGB;
} }
if (cso->target == PIPE_BUFFER) { if (cso->target == PIPE_BUFFER) {
unsigned elements = cso->u.buf.size / util_format_get_blocksize(format); unsigned elements = cso->u.buf.size / util_format_get_blocksize(format);
lvl = 0; lvl = 0;
so->texconst1 = so->texconst1 = A5XX_TEX_CONST_1_WIDTH(elements & MASK(15)) |
A5XX_TEX_CONST_1_WIDTH(elements & MASK(15)) | A5XX_TEX_CONST_1_HEIGHT(elements >> 15);
A5XX_TEX_CONST_1_HEIGHT(elements >> 15); so->texconst2 = A5XX_TEX_CONST_2_UNK4 | A5XX_TEX_CONST_2_UNK31;
so->texconst2 = so->offset = cso->u.buf.offset;
A5XX_TEX_CONST_2_UNK4 | } else {
A5XX_TEX_CONST_2_UNK31; unsigned miplevels;
so->offset = cso->u.buf.offset;
} else {
unsigned miplevels;
lvl = fd_sampler_first_level(cso); lvl = fd_sampler_first_level(cso);
miplevels = fd_sampler_last_level(cso) - lvl; miplevels = fd_sampler_last_level(cso) - lvl;
layers = cso->u.tex.last_layer - cso->u.tex.first_layer + 1; layers = cso->u.tex.last_layer - cso->u.tex.first_layer + 1;
so->texconst0 |= A5XX_TEX_CONST_0_MIPLVLS(miplevels); so->texconst0 |= A5XX_TEX_CONST_0_MIPLVLS(miplevels);
so->texconst1 = so->texconst1 = A5XX_TEX_CONST_1_WIDTH(u_minify(prsc->width0, lvl)) |
A5XX_TEX_CONST_1_WIDTH(u_minify(prsc->width0, lvl)) | A5XX_TEX_CONST_1_HEIGHT(u_minify(prsc->height0, lvl));
A5XX_TEX_CONST_1_HEIGHT(u_minify(prsc->height0, lvl)); so->texconst2 = A5XX_TEX_CONST_2_PITCHALIGN(rsc->layout.pitchalign - 6) |
so->texconst2 = A5XX_TEX_CONST_2_PITCH(fd_resource_pitch(rsc, lvl));
A5XX_TEX_CONST_2_PITCHALIGN(rsc->layout.pitchalign - 6) | so->offset = fd_resource_offset(rsc, lvl, cso->u.tex.first_layer);
A5XX_TEX_CONST_2_PITCH(fd_resource_pitch(rsc, lvl)); }
so->offset = fd_resource_offset(rsc, lvl, cso->u.tex.first_layer);
}
so->texconst2 |= A5XX_TEX_CONST_2_TYPE(fd5_tex_type(cso->target)); so->texconst2 |= A5XX_TEX_CONST_2_TYPE(fd5_tex_type(cso->target));
switch (cso->target) { switch (cso->target) {
case PIPE_TEXTURE_RECT: case PIPE_TEXTURE_RECT:
case PIPE_TEXTURE_1D: case PIPE_TEXTURE_1D:
case PIPE_TEXTURE_2D: case PIPE_TEXTURE_2D:
so->texconst3 = so->texconst3 = A5XX_TEX_CONST_3_ARRAY_PITCH(rsc->layout.layer_size);
A5XX_TEX_CONST_3_ARRAY_PITCH(rsc->layout.layer_size); so->texconst5 = A5XX_TEX_CONST_5_DEPTH(1);
so->texconst5 = break;
A5XX_TEX_CONST_5_DEPTH(1); case PIPE_TEXTURE_1D_ARRAY:
break; case PIPE_TEXTURE_2D_ARRAY:
case PIPE_TEXTURE_1D_ARRAY: so->texconst3 = A5XX_TEX_CONST_3_ARRAY_PITCH(rsc->layout.layer_size);
case PIPE_TEXTURE_2D_ARRAY: so->texconst5 = A5XX_TEX_CONST_5_DEPTH(layers);
so->texconst3 = break;
A5XX_TEX_CONST_3_ARRAY_PITCH(rsc->layout.layer_size); case PIPE_TEXTURE_CUBE:
so->texconst5 = case PIPE_TEXTURE_CUBE_ARRAY:
A5XX_TEX_CONST_5_DEPTH(layers); so->texconst3 = A5XX_TEX_CONST_3_ARRAY_PITCH(rsc->layout.layer_size);
break; so->texconst5 = A5XX_TEX_CONST_5_DEPTH(layers / 6);
case PIPE_TEXTURE_CUBE: break;
case PIPE_TEXTURE_CUBE_ARRAY: case PIPE_TEXTURE_3D:
so->texconst3 = so->texconst3 =
A5XX_TEX_CONST_3_ARRAY_PITCH(rsc->layout.layer_size); A5XX_TEX_CONST_3_MIN_LAYERSZ(
so->texconst5 = fd_resource_slice(rsc, prsc->last_level)->size0) |
A5XX_TEX_CONST_5_DEPTH(layers / 6); A5XX_TEX_CONST_3_ARRAY_PITCH(fd_resource_slice(rsc, lvl)->size0);
break; so->texconst5 = A5XX_TEX_CONST_5_DEPTH(u_minify(prsc->depth0, lvl));
case PIPE_TEXTURE_3D: break;
so->texconst3 = default:
A5XX_TEX_CONST_3_MIN_LAYERSZ( so->texconst3 = 0x00000000;
fd_resource_slice(rsc, prsc->last_level)->size0) | break;
A5XX_TEX_CONST_3_ARRAY_PITCH(fd_resource_slice(rsc, lvl)->size0); }
so->texconst5 =
A5XX_TEX_CONST_5_DEPTH(u_minify(prsc->depth0, lvl));
break;
default:
so->texconst3 = 0x00000000;
break;
}
return &so->base; return &so->base;
} }
static void static void
fd5_set_sampler_views(struct pipe_context *pctx, enum pipe_shader_type shader, fd5_set_sampler_views(struct pipe_context *pctx, enum pipe_shader_type shader,
unsigned start, unsigned nr, unsigned unbind_num_trailing_slots, unsigned start, unsigned nr,
struct pipe_sampler_view **views) unsigned unbind_num_trailing_slots,
struct pipe_sampler_view **views)
{ {
struct fd_context *ctx = fd_context(pctx); struct fd_context *ctx = fd_context(pctx);
struct fd5_context *fd5_ctx = fd5_context(ctx); struct fd5_context *fd5_ctx = fd5_context(ctx);
uint16_t astc_srgb = 0; uint16_t astc_srgb = 0;
unsigned i; unsigned i;
for (i = 0; i < nr; i++) { for (i = 0; i < nr; i++) {
if (views[i]) { if (views[i]) {
struct fd5_pipe_sampler_view *view = struct fd5_pipe_sampler_view *view = fd5_pipe_sampler_view(views[i]);
fd5_pipe_sampler_view(views[i]); if (view->astc_srgb)
if (view->astc_srgb) astc_srgb |= (1 << i);
astc_srgb |= (1 << i); }
} }
}
fd_set_sampler_views(pctx, shader, start, nr, unbind_num_trailing_slots, views); fd_set_sampler_views(pctx, shader, start, nr, unbind_num_trailing_slots,
views);
if (shader == PIPE_SHADER_FRAGMENT) { if (shader == PIPE_SHADER_FRAGMENT) {
fd5_ctx->fastc_srgb = astc_srgb; fd5_ctx->fastc_srgb = astc_srgb;
} else if (shader == PIPE_SHADER_VERTEX) { } else if (shader == PIPE_SHADER_VERTEX) {
fd5_ctx->vastc_srgb = astc_srgb; fd5_ctx->vastc_srgb = astc_srgb;
} }
} }
void void
fd5_texture_init(struct pipe_context *pctx) fd5_texture_init(struct pipe_context *pctx)
{ {
pctx->create_sampler_state = fd5_sampler_state_create; pctx->create_sampler_state = fd5_sampler_state_create;
pctx->bind_sampler_states = fd_sampler_states_bind; pctx->bind_sampler_states = fd_sampler_states_bind;
pctx->create_sampler_view = fd5_sampler_view_create; pctx->create_sampler_view = fd5_sampler_view_create;
pctx->set_sampler_views = fd5_set_sampler_views; pctx->set_sampler_views = fd5_set_sampler_views;
} }

View file

@ -29,61 +29,60 @@
#include "pipe/p_context.h" #include "pipe/p_context.h"
#include "freedreno_texture.h"
#include "freedreno_resource.h" #include "freedreno_resource.h"
#include "freedreno_texture.h"
#include "fd5_context.h" #include "fd5_context.h"
#include "fd5_format.h" #include "fd5_format.h"
struct fd5_sampler_stateobj { struct fd5_sampler_stateobj {
struct pipe_sampler_state base; struct pipe_sampler_state base;
uint32_t texsamp0, texsamp1, texsamp2, texsamp3; uint32_t texsamp0, texsamp1, texsamp2, texsamp3;
bool needs_border; bool needs_border;
}; };
static inline struct fd5_sampler_stateobj * static inline struct fd5_sampler_stateobj *
fd5_sampler_stateobj(struct pipe_sampler_state *samp) fd5_sampler_stateobj(struct pipe_sampler_state *samp)
{ {
return (struct fd5_sampler_stateobj *)samp; return (struct fd5_sampler_stateobj *)samp;
} }
struct fd5_pipe_sampler_view { struct fd5_pipe_sampler_view {
struct pipe_sampler_view base; struct pipe_sampler_view base;
uint32_t texconst0, texconst1, texconst2, texconst3, texconst5; uint32_t texconst0, texconst1, texconst2, texconst3, texconst5;
uint32_t texconst6, texconst7, texconst8, texconst9, texconst10, texconst11; uint32_t texconst6, texconst7, texconst8, texconst9, texconst10, texconst11;
uint32_t offset; uint32_t offset;
bool astc_srgb; bool astc_srgb;
}; };
static inline struct fd5_pipe_sampler_view * static inline struct fd5_pipe_sampler_view *
fd5_pipe_sampler_view(struct pipe_sampler_view *pview) fd5_pipe_sampler_view(struct pipe_sampler_view *pview)
{ {
return (struct fd5_pipe_sampler_view *)pview; return (struct fd5_pipe_sampler_view *)pview;
} }
void fd5_texture_init(struct pipe_context *pctx); void fd5_texture_init(struct pipe_context *pctx);
static inline enum a5xx_tex_type static inline enum a5xx_tex_type
fd5_tex_type(unsigned target) fd5_tex_type(unsigned target)
{ {
switch (target) { switch (target) {
default: default:
assert(0); assert(0);
case PIPE_BUFFER: case PIPE_BUFFER:
case PIPE_TEXTURE_1D: case PIPE_TEXTURE_1D:
case PIPE_TEXTURE_1D_ARRAY: case PIPE_TEXTURE_1D_ARRAY:
return A5XX_TEX_1D; return A5XX_TEX_1D;
case PIPE_TEXTURE_RECT: case PIPE_TEXTURE_RECT:
case PIPE_TEXTURE_2D: case PIPE_TEXTURE_2D:
case PIPE_TEXTURE_2D_ARRAY: case PIPE_TEXTURE_2D_ARRAY:
return A5XX_TEX_2D; return A5XX_TEX_2D;
case PIPE_TEXTURE_3D: case PIPE_TEXTURE_3D:
return A5XX_TEX_3D; return A5XX_TEX_3D;
case PIPE_TEXTURE_CUBE: case PIPE_TEXTURE_CUBE:
case PIPE_TEXTURE_CUBE_ARRAY: case PIPE_TEXTURE_CUBE_ARRAY:
return A5XX_TEX_CUBE; return A5XX_TEX_CUBE;
} }
} }
#endif /* FD5_TEXTURE_H_ */ #endif /* FD5_TEXTURE_H_ */

View file

@ -24,96 +24,95 @@
* Rob Clark <robclark@freedesktop.org> * Rob Clark <robclark@freedesktop.org>
*/ */
#include "pipe/p_state.h" #include "pipe/p_state.h"
#include "util/u_string.h"
#include "util/u_memory.h" #include "util/u_memory.h"
#include "util/u_string.h"
#include "fd5_zsa.h"
#include "fd5_context.h" #include "fd5_context.h"
#include "fd5_format.h" #include "fd5_format.h"
#include "fd5_zsa.h"
void * void *
fd5_zsa_state_create(struct pipe_context *pctx, fd5_zsa_state_create(struct pipe_context *pctx,
const struct pipe_depth_stencil_alpha_state *cso) const struct pipe_depth_stencil_alpha_state *cso)
{ {
struct fd5_zsa_stateobj *so; struct fd5_zsa_stateobj *so;
so = CALLOC_STRUCT(fd5_zsa_stateobj); so = CALLOC_STRUCT(fd5_zsa_stateobj);
if (!so) if (!so)
return NULL; return NULL;
so->base = *cso; so->base = *cso;
switch (cso->depth_func) { switch (cso->depth_func) {
case PIPE_FUNC_LESS: case PIPE_FUNC_LESS:
case PIPE_FUNC_LEQUAL: case PIPE_FUNC_LEQUAL:
so->gras_lrz_cntl = A5XX_GRAS_LRZ_CNTL_ENABLE; so->gras_lrz_cntl = A5XX_GRAS_LRZ_CNTL_ENABLE;
break; break;
case PIPE_FUNC_GREATER: case PIPE_FUNC_GREATER:
case PIPE_FUNC_GEQUAL: case PIPE_FUNC_GEQUAL:
so->gras_lrz_cntl = A5XX_GRAS_LRZ_CNTL_ENABLE | A5XX_GRAS_LRZ_CNTL_GREATER; so->gras_lrz_cntl =
break; A5XX_GRAS_LRZ_CNTL_ENABLE | A5XX_GRAS_LRZ_CNTL_GREATER;
break;
default: default:
/* LRZ not enabled */ /* LRZ not enabled */
so->gras_lrz_cntl = 0; so->gras_lrz_cntl = 0;
break; break;
} }
if (!(cso->stencil->enabled || cso->alpha_enabled || !cso->depth_writemask)) if (!(cso->stencil->enabled || cso->alpha_enabled || !cso->depth_writemask))
so->lrz_write = true; so->lrz_write = true;
so->rb_depth_cntl |= so->rb_depth_cntl |=
A5XX_RB_DEPTH_CNTL_ZFUNC(cso->depth_func); /* maps 1:1 */ A5XX_RB_DEPTH_CNTL_ZFUNC(cso->depth_func); /* maps 1:1 */
if (cso->depth_enabled) if (cso->depth_enabled)
so->rb_depth_cntl |= so->rb_depth_cntl |=
A5XX_RB_DEPTH_CNTL_Z_ENABLE | A5XX_RB_DEPTH_CNTL_Z_ENABLE | A5XX_RB_DEPTH_CNTL_Z_TEST_ENABLE;
A5XX_RB_DEPTH_CNTL_Z_TEST_ENABLE;
if (cso->depth_writemask) if (cso->depth_writemask)
so->rb_depth_cntl |= A5XX_RB_DEPTH_CNTL_Z_WRITE_ENABLE; so->rb_depth_cntl |= A5XX_RB_DEPTH_CNTL_Z_WRITE_ENABLE;
if (cso->stencil[0].enabled) { if (cso->stencil[0].enabled) {
const struct pipe_stencil_state *s = &cso->stencil[0]; const struct pipe_stencil_state *s = &cso->stencil[0];
so->rb_stencil_control |= so->rb_stencil_control |=
A5XX_RB_STENCIL_CONTROL_STENCIL_READ | A5XX_RB_STENCIL_CONTROL_STENCIL_READ |
A5XX_RB_STENCIL_CONTROL_STENCIL_ENABLE | A5XX_RB_STENCIL_CONTROL_STENCIL_ENABLE |
A5XX_RB_STENCIL_CONTROL_FUNC(s->func) | /* maps 1:1 */ A5XX_RB_STENCIL_CONTROL_FUNC(s->func) | /* maps 1:1 */
A5XX_RB_STENCIL_CONTROL_FAIL(fd_stencil_op(s->fail_op)) | A5XX_RB_STENCIL_CONTROL_FAIL(fd_stencil_op(s->fail_op)) |
A5XX_RB_STENCIL_CONTROL_ZPASS(fd_stencil_op(s->zpass_op)) | A5XX_RB_STENCIL_CONTROL_ZPASS(fd_stencil_op(s->zpass_op)) |
A5XX_RB_STENCIL_CONTROL_ZFAIL(fd_stencil_op(s->zfail_op)); A5XX_RB_STENCIL_CONTROL_ZFAIL(fd_stencil_op(s->zfail_op));
so->rb_stencilrefmask |= so->rb_stencilrefmask |=
A5XX_RB_STENCILREFMASK_STENCILWRITEMASK(s->writemask) | A5XX_RB_STENCILREFMASK_STENCILWRITEMASK(s->writemask) |
A5XX_RB_STENCILREFMASK_STENCILMASK(s->valuemask); A5XX_RB_STENCILREFMASK_STENCILMASK(s->valuemask);
if (cso->stencil[1].enabled) { if (cso->stencil[1].enabled) {
const struct pipe_stencil_state *bs = &cso->stencil[1]; const struct pipe_stencil_state *bs = &cso->stencil[1];
so->rb_stencil_control |= so->rb_stencil_control |=
A5XX_RB_STENCIL_CONTROL_STENCIL_ENABLE_BF | A5XX_RB_STENCIL_CONTROL_STENCIL_ENABLE_BF |
A5XX_RB_STENCIL_CONTROL_FUNC_BF(bs->func) | /* maps 1:1 */ A5XX_RB_STENCIL_CONTROL_FUNC_BF(bs->func) | /* maps 1:1 */
A5XX_RB_STENCIL_CONTROL_FAIL_BF(fd_stencil_op(bs->fail_op)) | A5XX_RB_STENCIL_CONTROL_FAIL_BF(fd_stencil_op(bs->fail_op)) |
A5XX_RB_STENCIL_CONTROL_ZPASS_BF(fd_stencil_op(bs->zpass_op)) | A5XX_RB_STENCIL_CONTROL_ZPASS_BF(fd_stencil_op(bs->zpass_op)) |
A5XX_RB_STENCIL_CONTROL_ZFAIL_BF(fd_stencil_op(bs->zfail_op)); A5XX_RB_STENCIL_CONTROL_ZFAIL_BF(fd_stencil_op(bs->zfail_op));
so->rb_stencilrefmask_bf |= so->rb_stencilrefmask_bf |=
A5XX_RB_STENCILREFMASK_BF_STENCILWRITEMASK(bs->writemask) | A5XX_RB_STENCILREFMASK_BF_STENCILWRITEMASK(bs->writemask) |
A5XX_RB_STENCILREFMASK_BF_STENCILMASK(bs->valuemask); A5XX_RB_STENCILREFMASK_BF_STENCILMASK(bs->valuemask);
} }
} }
if (cso->alpha_enabled) { if (cso->alpha_enabled) {
uint32_t ref = cso->alpha_ref_value * 255.0; uint32_t ref = cso->alpha_ref_value * 255.0;
so->rb_alpha_control = so->rb_alpha_control =
A5XX_RB_ALPHA_CONTROL_ALPHA_TEST | A5XX_RB_ALPHA_CONTROL_ALPHA_TEST |
A5XX_RB_ALPHA_CONTROL_ALPHA_REF(ref) | A5XX_RB_ALPHA_CONTROL_ALPHA_REF(ref) |
A5XX_RB_ALPHA_CONTROL_ALPHA_TEST_FUNC(cso->alpha_func); A5XX_RB_ALPHA_CONTROL_ALPHA_TEST_FUNC(cso->alpha_func);
// so->rb_depth_control |= // so->rb_depth_control |=
// A5XX_RB_DEPTH_CONTROL_EARLY_Z_DISABLE; // A5XX_RB_DEPTH_CONTROL_EARLY_Z_DISABLE;
} }
return so; return so;
} }

View file

@ -27,31 +27,30 @@
#ifndef FD5_ZSA_H_ #ifndef FD5_ZSA_H_
#define FD5_ZSA_H_ #define FD5_ZSA_H_
#include "pipe/p_state.h"
#include "pipe/p_context.h" #include "pipe/p_context.h"
#include "pipe/p_state.h"
#include "freedreno_util.h" #include "freedreno_util.h"
struct fd5_zsa_stateobj { struct fd5_zsa_stateobj {
struct pipe_depth_stencil_alpha_state base; struct pipe_depth_stencil_alpha_state base;
uint32_t rb_alpha_control; uint32_t rb_alpha_control;
uint32_t rb_depth_cntl; uint32_t rb_depth_cntl;
uint32_t rb_stencil_control; uint32_t rb_stencil_control;
uint32_t rb_stencilrefmask; uint32_t rb_stencilrefmask;
uint32_t rb_stencilrefmask_bf; uint32_t rb_stencilrefmask_bf;
uint32_t gras_lrz_cntl; uint32_t gras_lrz_cntl;
bool lrz_write; bool lrz_write;
}; };
static inline struct fd5_zsa_stateobj * static inline struct fd5_zsa_stateobj *
fd5_zsa_stateobj(struct pipe_depth_stencil_alpha_state *zsa) fd5_zsa_stateobj(struct pipe_depth_stencil_alpha_state *zsa)
{ {
return (struct fd5_zsa_stateobj *)zsa; return (struct fd5_zsa_stateobj *)zsa;
} }
void * fd5_zsa_state_create(struct pipe_context *pctx, void *fd5_zsa_state_create(struct pipe_context *pctx,
const struct pipe_depth_stencil_alpha_state *cso); const struct pipe_depth_stencil_alpha_state *cso);
#endif /* FD5_ZSA_H_ */ #endif /* FD5_ZSA_H_ */

Some files were not shown because too many files have changed in this diff Show more